GPUShader: Optimize Multisample resolve shader.

Group all fetches together without interleived alu to let compiler optimize.

Also do the color samples only if needed.

Went from 3.86ms to [1.11-2.22]ms [min-max] for the 16samples resolve pass
on my nvidia card.
This commit is contained in:
Clément Foucault 2018-04-23 23:07:58 +02:00
parent 1fff3e02c2
commit 6d8e308eae
1 changed files with 93 additions and 36 deletions

View File

@ -8,60 +8,117 @@ out vec4 fragColor;
#error "Too many samples"
#endif
// #define USE_DEPTH_WEIGHTING
void main()
{
ivec2 texel = ivec2(gl_FragCoord.xy);
float depth = 1.0;
depth = min(depth, texelFetch(depthMulti, texel, 0).r);
depth = min(depth, texelFetch(depthMulti, texel, 1).r);
bvec4 b1, b2, b3, b4;
vec4 w1, w2, w3, w4;
vec4 d1, d2, d3, d4;
vec4 c1, c2, c3, c4, c5, c6, c7, c8;
vec4 c9, c10, c11, c12, c13, c14, c15, c16;
d1 = d2 = d3 = d4 = vec4(1.0);
w1 = w2 = w3 = w4 = vec4(0.0);
c1 = c2 = c3 = c4 = c5 = c6 = c7 = c8 = vec4(0.0);
c9 = c10 = c11 = c12 = c13 = c14 = c15 = c16 = vec4(0.0);
/* Depth */
d1.x = texelFetch(depthMulti, texel, 0).r;
d1.y = texelFetch(depthMulti, texel, 1).r;
#if SAMPLES > 2
depth = min(depth, texelFetch(depthMulti, texel, 2).r);
depth = min(depth, texelFetch(depthMulti, texel, 3).r);
d1.z = texelFetch(depthMulti, texel, 2).r;
d1.w = texelFetch(depthMulti, texel, 3).r;
#endif
#if SAMPLES > 4
depth = min(depth, texelFetch(depthMulti, texel, 4).r);
depth = min(depth, texelFetch(depthMulti, texel, 5).r);
depth = min(depth, texelFetch(depthMulti, texel, 6).r);
depth = min(depth, texelFetch(depthMulti, texel, 7).r);
d2.x = texelFetch(depthMulti, texel, 4).r;
d2.y = texelFetch(depthMulti, texel, 5).r;
d2.z = texelFetch(depthMulti, texel, 6).r;
d2.w = texelFetch(depthMulti, texel, 7).r;
#endif
#if SAMPLES > 8
depth = min(depth, texelFetch(depthMulti, texel, 8).r);
depth = min(depth, texelFetch(depthMulti, texel, 9).r);
depth = min(depth, texelFetch(depthMulti, texel, 10).r);
depth = min(depth, texelFetch(depthMulti, texel, 11).r);
depth = min(depth, texelFetch(depthMulti, texel, 12).r);
depth = min(depth, texelFetch(depthMulti, texel, 13).r);
depth = min(depth, texelFetch(depthMulti, texel, 14).r);
depth = min(depth, texelFetch(depthMulti, texel, 15).r);
d3.x = texelFetch(depthMulti, texel, 8).r;
d3.y = texelFetch(depthMulti, texel, 9).r;
d3.z = texelFetch(depthMulti, texel, 10).r;
d3.w = texelFetch(depthMulti, texel, 11).r;
d4.x = texelFetch(depthMulti, texel, 12).r;
d4.y = texelFetch(depthMulti, texel, 13).r;
d4.z = texelFetch(depthMulti, texel, 14).r;
d4.w = texelFetch(depthMulti, texel, 15).r;
#endif
vec4 color = vec4(0.0);
color += texelFetch(colorMulti, texel, 0);
color += texelFetch(colorMulti, texel, 1);
/* COLOR */
b1 = notEqual(d1, vec4(1.0));
if (any(b1)) {
c1 = texelFetch(colorMulti, texel, 0);
c2 = texelFetch(colorMulti, texel, 1);
#if SAMPLES > 2
color += texelFetch(colorMulti, texel, 2);
color += texelFetch(colorMulti, texel, 3);
c3 = texelFetch(colorMulti, texel, 2);
c4 = texelFetch(colorMulti, texel, 3);
#endif
w1 = vec4(b1);
}
#if SAMPLES > 4
color += texelFetch(colorMulti, texel, 4);
color += texelFetch(colorMulti, texel, 5);
color += texelFetch(colorMulti, texel, 6);
color += texelFetch(colorMulti, texel, 7);
b2 = notEqual(d2, vec4(1.0));
if (any(b2)) {
c5 = texelFetch(colorMulti, texel, 4);
c6 = texelFetch(colorMulti, texel, 5);
c7 = texelFetch(colorMulti, texel, 6);
c8 = texelFetch(colorMulti, texel, 7);
w2 = vec4(b2);
}
#endif
#if SAMPLES > 8
color += texelFetch(colorMulti, texel, 8);
color += texelFetch(colorMulti, texel, 9);
color += texelFetch(colorMulti, texel, 10);
color += texelFetch(colorMulti, texel, 11);
color += texelFetch(colorMulti, texel, 12);
color += texelFetch(colorMulti, texel, 13);
color += texelFetch(colorMulti, texel, 14);
color += texelFetch(colorMulti, texel, 15);
b3 = notEqual(d3, vec4(1.0));
if (any(b3)) {
c9 = texelFetch(colorMulti, texel, 8);
c10 = texelFetch(colorMulti, texel, 9);
c11 = texelFetch(colorMulti, texel, 10);
c12 = texelFetch(colorMulti, texel, 11);
w3 = vec4(b3);
}
b4 = notEqual(d4, vec4(1.0));
if (any(b4)) {
c13 = texelFetch(colorMulti, texel, 12);
c14 = texelFetch(colorMulti, texel, 13);
c15 = texelFetch(colorMulti, texel, 14);
c16 = texelFetch(colorMulti, texel, 15);
w4 = vec4(b4);
}
#endif
#if SAMPLES > 8
d1 = min(d1, min(d3, d4));
#endif
#if SAMPLES > 4
d1 = min(d1, d2);
#endif
#if SAMPLES > 2
d1.xy = min(d1.xy, d1.zw);
#endif
gl_FragDepth = min(d1.x, d1.y);
#ifdef USE_DEPTH_WEIGHTING
c1 *= w1.x; c2 *= w1.y; c3 *= w1.z; c4 *= w1.w;
c5 *= w2.x; c6 *= w2.y; c7 *= w2.z; c8 *= w2.w;
c9 *= w3.x; c10 *= w3.y; c11 *= w3.z; c12 *= w3.w;
c13 *= w4.x; c14 *= w4.y; c15 *= w4.z; c16 *= w4.w;
#endif
c1 = c1 + c2;
#if SAMPLES > 2
c1 += c3 + c4;
#endif
#if SAMPLES > 4
c1 += c5 + c6 + c7 + c8;
#endif
#if SAMPLES > 8
c1 += c9 + c10 + c11 + c12 + c13 + c14 + c15 + c16;
#endif
const float inv_samples = 1.0 / float(SAMPLES);
fragColor = color * inv_samples;
gl_FragDepth = depth;
fragColor = c1 * inv_samples;
}