GPUShader: Add GPU_SHADER_2D_IMAGE_MULTISAMPLE_2/4/8/16

This shader is used instead of blitting back and forth to a single sample
buffer.

This means it resolves the color and depth samples and outputs a fragment
which can be depth tested and blended on top of an existing framebuffer.

We do static shader variation with manual loop unrolling for performance
reason. In my test I get 25% more perf with intel integrated gpu and 75%
performance gain with dedicated nvidia card compared to a single shader
with a uniform for sample count.
This commit is contained in:
Clément Foucault 2018-04-23 21:08:11 +02:00
parent 2602198485
commit 12570c7373
4 changed files with 89 additions and 0 deletions

View File

@ -158,6 +158,7 @@ data_to_c_simple(shaders/gpu_shader_image_varying_color_frag.glsl SRC)
data_to_c_simple(shaders/gpu_shader_image_depth_linear_frag.glsl SRC)
data_to_c_simple(shaders/gpu_shader_image_depth_copy_frag.glsl SRC)
data_to_c_simple(shaders/gpu_shader_image_interlace_frag.glsl SRC)
data_to_c_simple(shaders/gpu_shader_image_multisample_resolve_frag.glsl SRC)
data_to_c_simple(shaders/gpu_shader_3D_image_vert.glsl SRC)
data_to_c_simple(shaders/gpu_shader_3D_vert.glsl SRC)
data_to_c_simple(shaders/gpu_shader_3D_normal_vert.glsl SRC)

View File

@ -119,6 +119,10 @@ typedef enum GPUBuiltinShader {
GPU_SHADER_2D_IMAGE_ALPHA,
GPU_SHADER_2D_IMAGE_RECT_COLOR,
GPU_SHADER_2D_IMAGE_MULTI_RECT_COLOR,
GPU_SHADER_2D_IMAGE_MULTISAMPLE_2,
GPU_SHADER_2D_IMAGE_MULTISAMPLE_4,
GPU_SHADER_2D_IMAGE_MULTISAMPLE_8,
GPU_SHADER_2D_IMAGE_MULTISAMPLE_16,
GPU_SHADER_2D_CHECKER,
GPU_SHADER_2D_DIAG_STRIPES,
/* for simple 3D drawing */

View File

@ -90,6 +90,7 @@ extern char datatoc_gpu_shader_image_mask_uniform_color_frag_glsl[];
extern char datatoc_gpu_shader_image_modulate_alpha_frag_glsl[];
extern char datatoc_gpu_shader_image_depth_linear_frag_glsl[];
extern char datatoc_gpu_shader_image_depth_copy_frag_glsl[];
extern char datatoc_gpu_shader_image_multisample_resolve_frag_glsl[];
extern char datatoc_gpu_shader_3D_vert_glsl[];
extern char datatoc_gpu_shader_3D_normal_vert_glsl[];
extern char datatoc_gpu_shader_3D_flat_color_vert_glsl[];
@ -690,6 +691,10 @@ GPUShader *GPU_shader_get_builtin_shader(GPUBuiltinShader shader)
datatoc_gpu_shader_image_depth_linear_frag_glsl },
[GPU_SHADER_3D_IMAGE_DEPTH_COPY] = { datatoc_gpu_shader_3D_image_vert_glsl,
datatoc_gpu_shader_image_depth_copy_frag_glsl },
[GPU_SHADER_2D_IMAGE_MULTISAMPLE_2] = { datatoc_gpu_shader_2D_vert_glsl, datatoc_gpu_shader_image_multisample_resolve_frag_glsl },
[GPU_SHADER_2D_IMAGE_MULTISAMPLE_4] = { datatoc_gpu_shader_2D_vert_glsl, datatoc_gpu_shader_image_multisample_resolve_frag_glsl },
[GPU_SHADER_2D_IMAGE_MULTISAMPLE_8] = { datatoc_gpu_shader_2D_vert_glsl, datatoc_gpu_shader_image_multisample_resolve_frag_glsl },
[GPU_SHADER_2D_IMAGE_MULTISAMPLE_16] = { datatoc_gpu_shader_2D_vert_glsl, datatoc_gpu_shader_image_multisample_resolve_frag_glsl },
[GPU_SHADER_2D_IMAGE_INTERLACE] = { datatoc_gpu_shader_2D_image_vert_glsl,
datatoc_gpu_shader_image_interlace_frag_glsl },
@ -830,6 +835,18 @@ GPUShader *GPU_shader_get_builtin_shader(GPUBuiltinShader shader)
/* just a few special cases */
const char *defines = NULL;
switch (shader) {
case GPU_SHADER_2D_IMAGE_MULTISAMPLE_2:
defines = "#define SAMPLES 2\n";
break;
case GPU_SHADER_2D_IMAGE_MULTISAMPLE_4:
defines = "#define SAMPLES 4\n";
break;
case GPU_SHADER_2D_IMAGE_MULTISAMPLE_8:
defines = "#define SAMPLES 8\n";
break;
case GPU_SHADER_2D_IMAGE_MULTISAMPLE_16:
defines = "#define SAMPLES 16\n";
break;
case GPU_SHADER_2D_WIDGET_BASE_INST:
case GPU_SHADER_2D_NODELINK_INST:
defines = "#define USE_INSTANCE\n";

View File

@ -0,0 +1,67 @@
uniform sampler2DMS depthMulti;
uniform sampler2DMS colorMulti;
out vec4 fragColor;
#if SAMPLES > 16
#error "Too many samples"
#endif
void main()
{
ivec2 texel = ivec2(gl_FragCoord.xy);
float depth = 1.0;
depth = min(depth, texelFetch(depthMulti, texel, 0).r);
depth = min(depth, texelFetch(depthMulti, texel, 1).r);
#if SAMPLES > 2
depth = min(depth, texelFetch(depthMulti, texel, 2).r);
depth = min(depth, texelFetch(depthMulti, texel, 3).r);
#endif
#if SAMPLES > 4
depth = min(depth, texelFetch(depthMulti, texel, 4).r);
depth = min(depth, texelFetch(depthMulti, texel, 5).r);
depth = min(depth, texelFetch(depthMulti, texel, 6).r);
depth = min(depth, texelFetch(depthMulti, texel, 7).r);
#endif
#if SAMPLES > 8
depth = min(depth, texelFetch(depthMulti, texel, 8).r);
depth = min(depth, texelFetch(depthMulti, texel, 9).r);
depth = min(depth, texelFetch(depthMulti, texel, 10).r);
depth = min(depth, texelFetch(depthMulti, texel, 11).r);
depth = min(depth, texelFetch(depthMulti, texel, 12).r);
depth = min(depth, texelFetch(depthMulti, texel, 13).r);
depth = min(depth, texelFetch(depthMulti, texel, 14).r);
depth = min(depth, texelFetch(depthMulti, texel, 15).r);
#endif
vec4 color = vec4(0.0);
color += texelFetch(colorMulti, texel, 0);
color += texelFetch(colorMulti, texel, 1);
#if SAMPLES > 2
color += texelFetch(colorMulti, texel, 2);
color += texelFetch(colorMulti, texel, 3);
#endif
#if SAMPLES > 4
color += texelFetch(colorMulti, texel, 4);
color += texelFetch(colorMulti, texel, 5);
color += texelFetch(colorMulti, texel, 6);
color += texelFetch(colorMulti, texel, 7);
#endif
#if SAMPLES > 8
color += texelFetch(colorMulti, texel, 8);
color += texelFetch(colorMulti, texel, 9);
color += texelFetch(colorMulti, texel, 10);
color += texelFetch(colorMulti, texel, 11);
color += texelFetch(colorMulti, texel, 12);
color += texelFetch(colorMulti, texel, 13);
color += texelFetch(colorMulti, texel, 14);
color += texelFetch(colorMulti, texel, 15);
#endif
const float inv_samples = 1.0 / float(SAMPLES);
fragColor = color * inv_samples;
gl_FragDepth = depth;
}