GPUShader: Optimize Multisample resolve shader.
Group all fetches together without interleived alu to let compiler optimize. Also do the color samples only if needed. Went from 3.86ms to [1.11-2.22]ms [min-max] for the 16samples resolve pass on my nvidia card.
This commit is contained in:
parent
1fff3e02c2
commit
6d8e308eae
|
@ -8,60 +8,117 @@ out vec4 fragColor;
|
|||
#error "Too many samples"
|
||||
#endif
|
||||
|
||||
// #define USE_DEPTH_WEIGHTING
|
||||
|
||||
void main()
|
||||
{
|
||||
ivec2 texel = ivec2(gl_FragCoord.xy);
|
||||
|
||||
float depth = 1.0;
|
||||
depth = min(depth, texelFetch(depthMulti, texel, 0).r);
|
||||
depth = min(depth, texelFetch(depthMulti, texel, 1).r);
|
||||
bvec4 b1, b2, b3, b4;
|
||||
vec4 w1, w2, w3, w4;
|
||||
vec4 d1, d2, d3, d4;
|
||||
vec4 c1, c2, c3, c4, c5, c6, c7, c8;
|
||||
vec4 c9, c10, c11, c12, c13, c14, c15, c16;
|
||||
d1 = d2 = d3 = d4 = vec4(1.0);
|
||||
w1 = w2 = w3 = w4 = vec4(0.0);
|
||||
c1 = c2 = c3 = c4 = c5 = c6 = c7 = c8 = vec4(0.0);
|
||||
c9 = c10 = c11 = c12 = c13 = c14 = c15 = c16 = vec4(0.0);
|
||||
|
||||
/* Depth */
|
||||
|
||||
d1.x = texelFetch(depthMulti, texel, 0).r;
|
||||
d1.y = texelFetch(depthMulti, texel, 1).r;
|
||||
#if SAMPLES > 2
|
||||
depth = min(depth, texelFetch(depthMulti, texel, 2).r);
|
||||
depth = min(depth, texelFetch(depthMulti, texel, 3).r);
|
||||
d1.z = texelFetch(depthMulti, texel, 2).r;
|
||||
d1.w = texelFetch(depthMulti, texel, 3).r;
|
||||
#endif
|
||||
#if SAMPLES > 4
|
||||
depth = min(depth, texelFetch(depthMulti, texel, 4).r);
|
||||
depth = min(depth, texelFetch(depthMulti, texel, 5).r);
|
||||
depth = min(depth, texelFetch(depthMulti, texel, 6).r);
|
||||
depth = min(depth, texelFetch(depthMulti, texel, 7).r);
|
||||
d2.x = texelFetch(depthMulti, texel, 4).r;
|
||||
d2.y = texelFetch(depthMulti, texel, 5).r;
|
||||
d2.z = texelFetch(depthMulti, texel, 6).r;
|
||||
d2.w = texelFetch(depthMulti, texel, 7).r;
|
||||
#endif
|
||||
#if SAMPLES > 8
|
||||
depth = min(depth, texelFetch(depthMulti, texel, 8).r);
|
||||
depth = min(depth, texelFetch(depthMulti, texel, 9).r);
|
||||
depth = min(depth, texelFetch(depthMulti, texel, 10).r);
|
||||
depth = min(depth, texelFetch(depthMulti, texel, 11).r);
|
||||
depth = min(depth, texelFetch(depthMulti, texel, 12).r);
|
||||
depth = min(depth, texelFetch(depthMulti, texel, 13).r);
|
||||
depth = min(depth, texelFetch(depthMulti, texel, 14).r);
|
||||
depth = min(depth, texelFetch(depthMulti, texel, 15).r);
|
||||
d3.x = texelFetch(depthMulti, texel, 8).r;
|
||||
d3.y = texelFetch(depthMulti, texel, 9).r;
|
||||
d3.z = texelFetch(depthMulti, texel, 10).r;
|
||||
d3.w = texelFetch(depthMulti, texel, 11).r;
|
||||
d4.x = texelFetch(depthMulti, texel, 12).r;
|
||||
d4.y = texelFetch(depthMulti, texel, 13).r;
|
||||
d4.z = texelFetch(depthMulti, texel, 14).r;
|
||||
d4.w = texelFetch(depthMulti, texel, 15).r;
|
||||
#endif
|
||||
|
||||
vec4 color = vec4(0.0);
|
||||
color += texelFetch(colorMulti, texel, 0);
|
||||
color += texelFetch(colorMulti, texel, 1);
|
||||
/* COLOR */
|
||||
b1 = notEqual(d1, vec4(1.0));
|
||||
if (any(b1)) {
|
||||
c1 = texelFetch(colorMulti, texel, 0);
|
||||
c2 = texelFetch(colorMulti, texel, 1);
|
||||
#if SAMPLES > 2
|
||||
color += texelFetch(colorMulti, texel, 2);
|
||||
color += texelFetch(colorMulti, texel, 3);
|
||||
c3 = texelFetch(colorMulti, texel, 2);
|
||||
c4 = texelFetch(colorMulti, texel, 3);
|
||||
#endif
|
||||
w1 = vec4(b1);
|
||||
}
|
||||
#if SAMPLES > 4
|
||||
color += texelFetch(colorMulti, texel, 4);
|
||||
color += texelFetch(colorMulti, texel, 5);
|
||||
color += texelFetch(colorMulti, texel, 6);
|
||||
color += texelFetch(colorMulti, texel, 7);
|
||||
b2 = notEqual(d2, vec4(1.0));
|
||||
if (any(b2)) {
|
||||
c5 = texelFetch(colorMulti, texel, 4);
|
||||
c6 = texelFetch(colorMulti, texel, 5);
|
||||
c7 = texelFetch(colorMulti, texel, 6);
|
||||
c8 = texelFetch(colorMulti, texel, 7);
|
||||
w2 = vec4(b2);
|
||||
}
|
||||
#endif
|
||||
#if SAMPLES > 8
|
||||
color += texelFetch(colorMulti, texel, 8);
|
||||
color += texelFetch(colorMulti, texel, 9);
|
||||
color += texelFetch(colorMulti, texel, 10);
|
||||
color += texelFetch(colorMulti, texel, 11);
|
||||
color += texelFetch(colorMulti, texel, 12);
|
||||
color += texelFetch(colorMulti, texel, 13);
|
||||
color += texelFetch(colorMulti, texel, 14);
|
||||
color += texelFetch(colorMulti, texel, 15);
|
||||
b3 = notEqual(d3, vec4(1.0));
|
||||
if (any(b3)) {
|
||||
c9 = texelFetch(colorMulti, texel, 8);
|
||||
c10 = texelFetch(colorMulti, texel, 9);
|
||||
c11 = texelFetch(colorMulti, texel, 10);
|
||||
c12 = texelFetch(colorMulti, texel, 11);
|
||||
w3 = vec4(b3);
|
||||
}
|
||||
b4 = notEqual(d4, vec4(1.0));
|
||||
if (any(b4)) {
|
||||
c13 = texelFetch(colorMulti, texel, 12);
|
||||
c14 = texelFetch(colorMulti, texel, 13);
|
||||
c15 = texelFetch(colorMulti, texel, 14);
|
||||
c16 = texelFetch(colorMulti, texel, 15);
|
||||
w4 = vec4(b4);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if SAMPLES > 8
|
||||
d1 = min(d1, min(d3, d4));
|
||||
#endif
|
||||
#if SAMPLES > 4
|
||||
d1 = min(d1, d2);
|
||||
#endif
|
||||
#if SAMPLES > 2
|
||||
d1.xy = min(d1.xy, d1.zw);
|
||||
#endif
|
||||
gl_FragDepth = min(d1.x, d1.y);
|
||||
|
||||
#ifdef USE_DEPTH_WEIGHTING
|
||||
c1 *= w1.x; c2 *= w1.y; c3 *= w1.z; c4 *= w1.w;
|
||||
c5 *= w2.x; c6 *= w2.y; c7 *= w2.z; c8 *= w2.w;
|
||||
c9 *= w3.x; c10 *= w3.y; c11 *= w3.z; c12 *= w3.w;
|
||||
c13 *= w4.x; c14 *= w4.y; c15 *= w4.z; c16 *= w4.w;
|
||||
#endif
|
||||
|
||||
c1 = c1 + c2;
|
||||
#if SAMPLES > 2
|
||||
c1 += c3 + c4;
|
||||
#endif
|
||||
#if SAMPLES > 4
|
||||
c1 += c5 + c6 + c7 + c8;
|
||||
#endif
|
||||
#if SAMPLES > 8
|
||||
c1 += c9 + c10 + c11 + c12 + c13 + c14 + c15 + c16;
|
||||
#endif
|
||||
|
||||
const float inv_samples = 1.0 / float(SAMPLES);
|
||||
|
||||
fragColor = color * inv_samples;
|
||||
gl_FragDepth = depth;
|
||||
fragColor = c1 * inv_samples;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue