Eevee: Merge shadow map processing into one drawcall per light

This removes a lot of framebuffer configuration and binding.
This commit is contained in:
Clément Foucault 2019-06-04 18:26:36 +02:00
parent ff7401b02c
commit 942a748d5d
9 changed files with 254 additions and 161 deletions

View File

@ -200,6 +200,8 @@ data_to_c_simple(engines/eevee/shaders/prepass_frag.glsl SRC)
data_to_c_simple(engines/eevee/shaders/prepass_vert.glsl SRC)
data_to_c_simple(engines/eevee/shaders/shadow_frag.glsl SRC)
data_to_c_simple(engines/eevee/shaders/shadow_vert.glsl SRC)
data_to_c_simple(engines/eevee/shaders/shadow_process_vert.glsl SRC)
data_to_c_simple(engines/eevee/shaders/shadow_process_geom.glsl SRC)
data_to_c_simple(engines/eevee/shaders/shadow_store_frag.glsl SRC)
data_to_c_simple(engines/eevee/shaders/shadow_copy_frag.glsl SRC)
data_to_c_simple(engines/eevee/shaders/bsdf_lut_frag.glsl SRC)

View File

@ -38,8 +38,10 @@ void EEVEE_view_layer_data_free(void *storage)
DRW_UBO_FREE_SAFE(sldata->shadow_render_ubo);
GPU_FRAMEBUFFER_FREE_SAFE(sldata->shadow_cube_target_fb);
GPU_FRAMEBUFFER_FREE_SAFE(sldata->shadow_cube_store_fb);
GPU_FRAMEBUFFER_FREE_SAFE(sldata->shadow_cube_copy_fb);
GPU_FRAMEBUFFER_FREE_SAFE(sldata->shadow_cascade_target_fb);
GPU_FRAMEBUFFER_FREE_SAFE(sldata->shadow_cascade_store_fb);
GPU_FRAMEBUFFER_FREE_SAFE(sldata->shadow_cascade_copy_fb);
DRW_TEXTURE_FREE_SAFE(sldata->shadow_cube_target);
DRW_TEXTURE_FREE_SAFE(sldata->shadow_cube_blur);
DRW_TEXTURE_FREE_SAFE(sldata->shadow_cube_pool);

View File

@ -49,6 +49,8 @@ static struct {
extern char datatoc_shadow_vert_glsl[];
extern char datatoc_shadow_frag_glsl[];
extern char datatoc_shadow_process_vert_glsl[];
extern char datatoc_shadow_process_geom_glsl[];
extern char datatoc_shadow_store_frag_glsl[];
extern char datatoc_shadow_copy_frag_glsl[];
extern char datatoc_concentric_samples_lib_glsl[];
@ -185,26 +187,30 @@ void EEVEE_lights_init(EEVEE_ViewLayerData *sldata)
/* only compile the ones needed. reduce startup time. */
if ((sh_method == SHADOW_ESM) && !e_data.shadow_copy_cube_sh[SHADOW_ESM]) {
e_data.shadow_copy_cube_sh[SHADOW_ESM] = DRW_shader_create_fullscreen(
datatoc_shadow_copy_frag_glsl,
"#define ESM\n"
"#define COPY\n");
e_data.shadow_copy_cascade_sh[SHADOW_ESM] = DRW_shader_create_fullscreen(
datatoc_shadow_copy_frag_glsl,
"#define ESM\n"
"#define COPY\n"
"#define CSM\n");
e_data.shadow_copy_cube_sh[SHADOW_ESM] = DRW_shader_create(datatoc_shadow_process_vert_glsl,
datatoc_shadow_process_geom_glsl,
datatoc_shadow_copy_frag_glsl,
"#define ESM\n"
"#define COPY\n");
e_data.shadow_copy_cascade_sh[SHADOW_ESM] = DRW_shader_create(datatoc_shadow_process_vert_glsl,
datatoc_shadow_process_geom_glsl,
datatoc_shadow_copy_frag_glsl,
"#define ESM\n"
"#define COPY\n"
"#define CSM\n");
}
else if ((sh_method == SHADOW_VSM) && !e_data.shadow_copy_cube_sh[SHADOW_VSM]) {
e_data.shadow_copy_cube_sh[SHADOW_VSM] = DRW_shader_create_fullscreen(
datatoc_shadow_copy_frag_glsl,
"#define VSM\n"
"#define COPY\n");
e_data.shadow_copy_cascade_sh[SHADOW_VSM] = DRW_shader_create_fullscreen(
datatoc_shadow_copy_frag_glsl,
"#define VSM\n"
"#define COPY\n"
"#define CSM\n");
e_data.shadow_copy_cube_sh[SHADOW_VSM] = DRW_shader_create(datatoc_shadow_process_vert_glsl,
datatoc_shadow_process_geom_glsl,
datatoc_shadow_copy_frag_glsl,
"#define VSM\n"
"#define COPY\n");
e_data.shadow_copy_cascade_sh[SHADOW_VSM] = DRW_shader_create(datatoc_shadow_process_vert_glsl,
datatoc_shadow_process_geom_glsl,
datatoc_shadow_copy_frag_glsl,
"#define VSM\n"
"#define COPY\n"
"#define CSM\n");
}
}
@ -239,7 +245,10 @@ static GPUShader *eevee_lights_get_store_sh(int shadow_method, bool high_blur, b
char *define_str = BLI_dynstr_get_cstring(ds_frag);
BLI_dynstr_free(ds_frag);
*shader = DRW_shader_create_fullscreen(store_shadow_shader_str, define_str);
*shader = DRW_shader_create(datatoc_shadow_process_vert_glsl,
datatoc_shadow_process_geom_glsl,
store_shadow_shader_str,
define_str);
MEM_freeN(store_shadow_shader_str);
MEM_freeN(define_str);
@ -256,14 +265,13 @@ static DRWPass *eevee_lights_cube_store_pass_get(EEVEE_PassList *psl,
bool high_blur = shadow_samples_len > 16;
DRWPass **pass = (high_blur) ? &psl->shadow_cube_store_pass : &psl->shadow_cube_store_high_pass;
if (*pass == NULL) {
EEVEE_LightsInfo *linfo = sldata->lights;
*pass = DRW_pass_create("Shadow Cube Storage Pass", DRW_STATE_WRITE_COLOR);
GPUShader *shader = eevee_lights_get_store_sh(shadow_method, high_blur, false);
DRWShadingGroup *grp = DRW_shgroup_create(shader, *pass);
DRW_shgroup_uniform_texture_ref(grp, "shadowTexture", &sldata->shadow_cube_blur);
DRW_shgroup_uniform_block(grp, "shadow_render_block", sldata->shadow_render_ubo);
DRW_shgroup_uniform_float(grp, "shadowFilterSize", &linfo->filter_size, 1);
DRW_shgroup_call(grp, DRW_cache_fullscreen_quad_get(), NULL);
DRW_shgroup_call_procedural_triangles(grp, NULL, 6);
}
return *pass;
}
@ -277,15 +285,13 @@ static DRWPass *eevee_lights_cascade_store_pass_get(EEVEE_PassList *psl,
DRWPass **pass = (high_blur) ? &psl->shadow_cascade_store_pass :
&psl->shadow_cascade_store_high_pass;
if (*pass == NULL) {
EEVEE_LightsInfo *linfo = sldata->lights;
*pass = DRW_pass_create("Shadow Cascade Storage Pass", DRW_STATE_WRITE_COLOR);
GPUShader *shader = eevee_lights_get_store_sh(shadow_method, high_blur, true);
DRWShadingGroup *grp = DRW_shgroup_create(shader, *pass);
DRW_shgroup_uniform_texture_ref(grp, "shadowTexture", &sldata->shadow_cascade_blur);
DRW_shgroup_uniform_block(grp, "shadow_render_block", sldata->shadow_render_ubo);
DRW_shgroup_uniform_int(grp, "cascadeId", &linfo->current_shadow_cascade, 1);
DRW_shgroup_uniform_float(grp, "shadowFilterSize", &linfo->filter_size, 1);
DRW_shgroup_call(grp, DRW_cache_fullscreen_quad_get(), NULL);
DRW_shgroup_call_procedural_triangles(grp, NULL, MAX_CASCADE_NUM);
}
return *pass;
}
@ -325,9 +331,8 @@ void EEVEE_lights_cache_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
psl->shadow_cube_copy_pass);
DRW_shgroup_uniform_texture_ref(grp, "shadowTexture", &sldata->shadow_cube_target);
DRW_shgroup_uniform_block(grp, "shadow_render_block", sldata->shadow_render_ubo);
DRW_shgroup_uniform_float(grp, "shadowFilterSize", &linfo->filter_size, 1);
DRW_shgroup_uniform_int(grp, "faceId", &linfo->current_shadow_face, 1);
DRW_shgroup_call(grp, DRW_cache_fullscreen_quad_get(), NULL);
DRW_shgroup_call_procedural_triangles(grp, NULL, 6);
}
{
@ -337,9 +342,8 @@ void EEVEE_lights_cache_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
psl->shadow_cascade_copy_pass);
DRW_shgroup_uniform_texture_ref(grp, "shadowTexture", &sldata->shadow_cascade_target);
DRW_shgroup_uniform_block(grp, "shadow_render_block", sldata->shadow_render_ubo);
DRW_shgroup_uniform_float(grp, "shadowFilterSize", &linfo->filter_size, 1);
DRW_shgroup_uniform_int(grp, "cascadeId", &linfo->current_shadow_cascade, 1);
DRW_shgroup_call(grp, DRW_cache_fullscreen_quad_get(), NULL);
DRW_shgroup_call_procedural_triangles(grp, NULL, MAX_CASCADE_NUM);
}
{
@ -589,6 +593,10 @@ void EEVEE_lights_cache_finish(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
sldata->shadow_cube_blur = DRW_texture_create_cube(
linfo->shadow_cube_size, shadow_pool_format, DRW_TEX_FILTER, NULL);
}
GPU_framebuffer_ensure_config(
&sldata->shadow_cube_copy_fb,
{GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(sldata->shadow_cube_blur)});
if (!sldata->shadow_cube_pool) {
sldata->shadow_cube_pool = DRW_texture_create_2d_array(linfo->shadow_cube_store_size,
linfo->shadow_cube_store_size,
@ -618,6 +626,10 @@ void EEVEE_lights_cache_finish(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
DRW_TEX_FILTER,
NULL);
}
GPU_framebuffer_ensure_config(
&sldata->shadow_cascade_copy_fb,
{GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(sldata->shadow_cascade_blur)});
if (!sldata->shadow_cascade_pool) {
sldata->shadow_cascade_pool = DRW_texture_create_2d_array(linfo->shadow_cascade_size,
linfo->shadow_cascade_size,
@ -1382,11 +1394,9 @@ void EEVEE_draw_shadows(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata, DRWView
srd->clip_far = light_attenuation_radius_get(la, light_threshold);
srd->stored_texel_size = 1.0 / (float)linfo->shadow_cube_store_size;
srd->exponent = la->bleedexp;
copy_v3_v3(srd->position, cube_data->position);
DRW_uniformbuffer_update(sldata->shadow_render_ubo, srd);
eevee_ensure_cube_views(srd->clip_near, srd->clip_far, srd->position, g_data->cube_views);
eevee_ensure_cube_views(
srd->clip_near, srd->clip_far, cube_data->position, g_data->cube_views);
/* Render shadow cube */
/* Render 6 faces separately: seems to be faster for the general case.
@ -1406,51 +1416,48 @@ void EEVEE_draw_shadows(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata, DRWView
*/
float filter_texture_size = la->soft * 0.001f;
float filter_pixel_size = ceil(filter_texture_size / srd->cube_texel_size);
linfo->filter_size = srd->cube_texel_size * ((filter_pixel_size > 1.0f) ? 1.5f : 0.0f);
/* TODO: OPTI: Filter all faces in one/two draw call */
/* TODO: OPTI: Don't do this intermediate step if no filter is needed. */
for (linfo->current_shadow_face = 0; linfo->current_shadow_face < 6;
linfo->current_shadow_face++) {
{
srd->filter_size[0] = srd->cube_texel_size * ((filter_pixel_size > 1.0f) ? 1.5f : 0.0f);
srd->view_count = 6;
srd->base_id = 0;
DRW_uniformbuffer_update(sldata->shadow_render_ubo, srd);
/* Copy using a small 3x3 box filter */
GPU_framebuffer_texture_cubeface_attach(sldata->shadow_cube_store_fb,
sldata->shadow_cube_blur,
0,
linfo->current_shadow_face,
0);
GPU_framebuffer_bind(sldata->shadow_cube_store_fb);
GPU_framebuffer_bind(sldata->shadow_cube_copy_fb);
DRW_draw_pass(psl->shadow_cube_copy_pass);
}
/* Push it to shadowmap array */
{
/* Adjust constants if concentric samples change. */
const float max_filter_size = 7.5f;
const float magic = 4.5f; /* Dunno why but that works. */
const int max_sample = 256;
/* Adjust constants if concentric samples change. */
const float max_filter_size = 7.5f;
const float magic = 4.5f; /* Dunno why but that works. */
const int max_sample = 256;
if (filter_pixel_size > 2.0f) {
srd->filter_size[0] = srd->cube_texel_size * max_filter_size * magic;
filter_pixel_size = max_ff(0.0f, filter_pixel_size - 3.0f);
/* Compute number of concentric samples. Depends directly on filter size. */
float pix_size_sqr = filter_pixel_size * filter_pixel_size;
srd->shadow_samples_len[0] = min_ii(
max_sample, 4 + 8 * (int)filter_pixel_size + 4 * (int)(pix_size_sqr));
}
else {
srd->filter_size[0] = 0.0f;
srd->shadow_samples_len[0] = 4;
}
srd->view_count = 1;
srd->base_id = evscd->layer_id;
srd->shadow_samples_len_inv[0] = 1.0f / (float)srd->shadow_samples_len[0];
DRW_uniformbuffer_update(sldata->shadow_render_ubo, srd);
if (filter_pixel_size > 2.0f) {
linfo->filter_size = srd->cube_texel_size * max_filter_size * magic;
filter_pixel_size = max_ff(0.0f, filter_pixel_size - 3.0f);
/* Compute number of concentric samples. Depends directly on filter size. */
float pix_size_sqr = filter_pixel_size * filter_pixel_size;
srd->shadow_samples_len = min_ii(max_sample,
4 + 8 * (int)filter_pixel_size + 4 * (int)(pix_size_sqr));
DRWPass *store_pass = eevee_lights_cube_store_pass_get(
psl, sldata, linfo->shadow_method, srd->shadow_samples_len[0]);
GPU_framebuffer_bind(sldata->shadow_cube_store_fb);
DRW_draw_pass(store_pass);
}
else {
linfo->filter_size = 0.0f;
srd->shadow_samples_len = 4;
}
srd->shadow_samples_len_inv = 1.0f / (float)srd->shadow_samples_len;
DRW_uniformbuffer_update(sldata->shadow_render_ubo, srd);
GPU_framebuffer_texture_layer_attach(
sldata->shadow_cube_store_fb, sldata->shadow_cube_pool, 0, evscd->layer_id, 0);
GPU_framebuffer_bind(sldata->shadow_cube_store_fb);
DRWPass *store_pass = eevee_lights_cube_store_pass_get(
psl, sldata, linfo->shadow_method, srd->shadow_samples_len);
DRW_draw_pass(store_pass);
if (linfo->soft_shadows == false) {
led->need_update = false;
@ -1477,6 +1484,7 @@ void EEVEE_draw_shadows(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata, DRWView
srd->clip_near = la->clipsta;
srd->clip_far = la->clipend;
srd->view_count = la->cascade_count;
srd->stored_texel_size = 1.0 / (float)linfo->shadow_cascade_size;
DRW_uniformbuffer_update(sldata->shadow_render_ubo, &linfo->shadow_render_data);
@ -1500,54 +1508,52 @@ void EEVEE_draw_shadows(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata, DRWView
GPU_framebuffer_clear_depth(sldata->shadow_cascade_target_fb, 1.0f);
DRW_draw_pass(psl->shadow_pass);
}
/* TODO: OPTI: Filter all cascade in one/two draw call */
for (linfo->current_shadow_cascade = 0; linfo->current_shadow_cascade < la->cascade_count;
++linfo->current_shadow_cascade) {
/* 0.01f factor to convert to percentage */
float filter_texture_size = la->soft * 0.01f / evscd->radius[linfo->current_shadow_cascade];
float filter_pixel_size = ceil(linfo->shadow_cascade_size * filter_texture_size);
/* Copy using a small 3x3 box filter */
/* Copy using a small 3x3 box filter */
{
/* NOTE: We always do it in the case of CSM because of artifacts in the farthest cascade. */
linfo->filter_size = srd->stored_texel_size;
GPU_framebuffer_texture_layer_attach(sldata->shadow_cascade_store_fb,
sldata->shadow_cascade_blur,
0,
linfo->current_shadow_cascade,
0);
GPU_framebuffer_bind(sldata->shadow_cascade_store_fb);
copy_v4_fl(srd->filter_size, srd->stored_texel_size);
srd->base_id = 0;
DRW_uniformbuffer_update(sldata->shadow_render_ubo, srd);
GPU_framebuffer_bind(sldata->shadow_cascade_copy_fb);
DRW_draw_pass(psl->shadow_cascade_copy_pass);
}
/* Push it to shadowmap array and blur more */
{
int max_pass_sample = 0;
/* Push it to shadowmap array and blur more */
for (int j = 0; j < la->cascade_count; j++) {
/* 0.01f factor to convert to percentage */
float filter_texture_size = la->soft * 0.01f / evscd->radius[j];
float filter_pixel_size = ceil(linfo->shadow_cascade_size * filter_texture_size);
/* Adjust constants if concentric samples change. */
const float max_filter_size = 7.5f;
const float magic = 3.2f; /* Arbitrary: less banding */
const int max_sample = 256;
/* Adjust constants if concentric samples change. */
const float max_filter_size = 7.5f;
const float magic = 3.2f; /* Arbitrary: less banding */
const int max_sample = 256;
if (filter_pixel_size > 2.0f) {
linfo->filter_size = srd->stored_texel_size * max_filter_size * magic;
filter_pixel_size = max_ff(0.0f, filter_pixel_size - 3.0f);
/* Compute number of concentric samples. Depends directly on filter size. */
float pix_size_sqr = filter_pixel_size * filter_pixel_size;
srd->shadow_samples_len = min_ii(max_sample,
4 + 8 * (int)filter_pixel_size + 4 * (int)(pix_size_sqr));
if (filter_pixel_size > 2.0f) {
srd->filter_size[j] = srd->stored_texel_size * max_filter_size * magic;
filter_pixel_size = max_ff(0.0f, filter_pixel_size - 3.0f);
/* Compute number of concentric samples. Depends directly on filter size. */
float pix_size_sqr = filter_pixel_size * filter_pixel_size;
srd->shadow_samples_len[j] = min_ii(
max_sample, 4 + 8 * (int)filter_pixel_size + 4 * (int)(pix_size_sqr));
}
else {
srd->filter_size[j] = 0.0f;
srd->shadow_samples_len[j] = 4;
}
srd->shadow_samples_len_inv[j] = 1.0f / (float)srd->shadow_samples_len[j];
max_pass_sample = max_ii(max_pass_sample, srd->shadow_samples_len[j]);
}
else {
linfo->filter_size = 0.0f;
srd->shadow_samples_len = 4;
}
srd->shadow_samples_len_inv = 1.0f / (float)srd->shadow_samples_len;
DRW_uniformbuffer_update(sldata->shadow_render_ubo, &linfo->shadow_render_data);
int layer = evscd->layer_id + linfo->current_shadow_cascade;
GPU_framebuffer_texture_layer_attach(
sldata->shadow_cascade_store_fb, sldata->shadow_cascade_pool, 0, layer, 0);
GPU_framebuffer_bind(sldata->shadow_cascade_store_fb);
srd->base_id = evscd->layer_id;
DRW_uniformbuffer_update(sldata->shadow_render_ubo, srd);
/* XXX(fclem) this create drawcalls outside of cache generation. */
DRWPass *store_pass = eevee_lights_cascade_store_pass_get(
psl, sldata, linfo->shadow_method, srd->shadow_samples_len);
psl, sldata, linfo->shadow_method, max_pass_sample);
GPU_framebuffer_bind(sldata->shadow_cascade_store_fb);
DRW_draw_pass(store_pass);
}
}

View File

@ -393,15 +393,17 @@ typedef struct EEVEE_ShadowCascade {
} EEVEE_ShadowCascade;
typedef struct EEVEE_ShadowRender {
float position[3], pad;
int shadow_samples_len[MAX_CASCADE_NUM];
float shadow_samples_len_inv[MAX_CASCADE_NUM];
float filter_size[MAX_CASCADE_NUM];
int view_count;
int base_id;
float cube_texel_size;
float stored_texel_size;
float clip_near;
float clip_far;
int shadow_samples_len;
float shadow_samples_len_inv;
float exponent;
float pad2;
float pad;
} EEVEE_ShadowRender;
BLI_STATIC_ASSERT_ALIGN(EEVEE_Light, 16)
@ -439,10 +441,6 @@ typedef struct EEVEE_LightsInfo {
int shadow_cube_size, shadow_cascade_size, shadow_method;
bool shadow_high_bitdepth, soft_shadows;
int shadow_cube_store_size;
int current_shadow_cascade;
int current_shadow_face;
uint shadow_instance_count;
float filter_size;
/* List of lights in the scene. */
/* XXX This is fragile, can get out of sync quickly. */
struct Object *light_ref[MAX_LIGHT];
@ -716,8 +714,10 @@ typedef struct EEVEE_ViewLayerData {
struct GPUFrameBuffer *shadow_cube_target_fb;
struct GPUFrameBuffer *shadow_cube_store_fb;
struct GPUFrameBuffer *shadow_cube_copy_fb;
struct GPUFrameBuffer *shadow_cascade_target_fb;
struct GPUFrameBuffer *shadow_cascade_store_fb;
struct GPUFrameBuffer *shadow_cascade_copy_fb;
struct GPUTexture *shadow_cube_target;
struct GPUTexture *shadow_cube_blur;

View File

@ -177,7 +177,7 @@ float light_visibility(LightData ld,
vec3 W,
#ifndef VOLUMETRICS
vec3 viewPosition,
vec3 viewNormal,
vec3 vN,
#endif
vec4 l_vector)
{
@ -227,15 +227,17 @@ float light_visibility(LightData ld,
vec3 ray_ori = viewPosition;
if (dot(viewNormal, ray_dir) <= 0.0) {
return vis;
}
// vN = (gl_FrontFacing) ? vN : -vN;
float bias = 0.5; /* Constant Bias */
bias += 1.0 - abs(dot(viewNormal, ray_dir)); /* Angle dependent bias */
// if (dot(vN, ray_dir) <= 0.0) {
// return vis;
// }
float bias = 0.5; /* Constant Bias */
bias += 1.0 - abs(dot(vN, ray_dir)); /* Angle dependent bias */
bias *= gl_FrontFacing ? data.sh_contact_offset : -data.sh_contact_offset;
vec3 nor_bias = viewNormal * bias;
vec3 nor_bias = vN * bias;
ray_ori += nor_bias;
ray_dir *= trace_distance;

View File

@ -3,24 +3,32 @@
layout(std140) uniform shadow_render_block
{
vec4 lampPosition;
/* Use vectors to avoid alignement padding. */
ivec4 shadowSampleCount;
vec4 shadowInvSampleCount;
vec4 filterSize;
int viewCount;
int baseId;
float cubeTexelSize;
float storedTexelSize;
float nearClip;
float farClip;
int shadowSampleCount;
float shadowInvSampleCount;
float exponent;
};
#ifdef CSM
uniform sampler2DArray shadowTexture;
uniform int cascadeId;
#else
uniform samplerCube shadowTexture;
uniform int faceId;
#endif
uniform float shadowFilterSize;
flat in int layerID;
#ifdef CSM
# define cascadeID layerID
#else
# define cascadeID 0
#endif
out vec4 FragColor;
@ -92,7 +100,7 @@ void prefilter(vec4 depths, float ref, inout vec2 accum)
#ifdef CSM
vec3 get_texco(vec2 uvs, vec2 ofs)
{
return vec3(uvs + ofs, float(cascadeId));
return vec3(uvs + ofs, float(cascadeID));
}
#else /* CUBEMAP */
const vec3 minorAxisX[6] = vec3[6](vec3(0.0f, 0.0f, -1.0f),
@ -119,7 +127,7 @@ const vec3 majorAxis[6] = vec3[6](vec3(1.0f, 0.0f, 0.0f),
vec3 get_texco(vec2 uvs, vec2 ofs)
{
uvs += ofs;
return majorAxis[faceId] + uvs.x * minorAxisX[faceId] + uvs.y * minorAxisY[faceId];
return majorAxis[layerID] + uvs.x * minorAxisX[layerID] + uvs.y * minorAxisY[layerID];
}
#endif
@ -139,7 +147,7 @@ void main()
float depth = texture(shadowTexture, co).r;
depth = get_world_distance(depth, co);
if (shadowFilterSize == 0.0) {
if (filterSize[cascadeID] == 0.0) {
#ifdef ESM
FragColor = vec4(depth);
#else /* VSM */
@ -149,18 +157,14 @@ void main()
}
#ifdef ESM
float accum = 1.0;
float ref = depth;
float accum = 1.0;
#else /* VSM */
float ref = 0.0; /* UNUSED */
vec2 accum = vec2(depth, depth * depth) * SAMPLE_WEIGHT;
#endif
#ifdef CSM
vec3 ofs = vec3(1.0, 0.0, -1.0) * shadowFilterSize;
#else /* CUBEMAP */
vec3 ofs = vec3(1.0, 0.0, -1.0) * shadowFilterSize;
#endif
vec3 ofs = vec3(1.0, 0.0, -1.0) * filterSize[cascadeID];
vec3 cos[4];
cos[0] = get_texco(uvs, ofs.zz);

View File

@ -0,0 +1,36 @@
layout(triangles) in;
layout(triangle_strip, max_vertices = 3) out;
layout(std140) uniform shadow_render_block
{
/* Use vectors to avoid alignement padding. */
ivec4 shadowSampleCount;
vec4 shadowInvSampleCount;
vec4 filterSize;
int viewCount;
int baseId;
float cubeTexelSize;
float storedTexelSize;
float nearClip;
float farClip;
float exponent;
};
in int layerID_g[];
flat out int layerID;
void main()
{
gl_Layer = layerID_g[0];
layerID = gl_Layer - baseId;
gl_Position = gl_in[0].gl_Position;
EmitVertex();
gl_Position = gl_in[1].gl_Position;
EmitVertex();
gl_Position = gl_in[2].gl_Position;
EmitVertex();
EndPrimitive();
}

View File

@ -0,0 +1,32 @@
layout(std140) uniform shadow_render_block
{
/* Use vectors to avoid alignement padding. */
ivec4 shadowSampleCount;
vec4 shadowInvSampleCount;
vec4 filterSize;
int viewCount;
int baseId;
float cubeTexelSize;
float storedTexelSize;
float nearClip;
float farClip;
float exponent;
};
out int layerID_g;
void main()
{
int v = gl_VertexID % 3;
layerID_g = gl_VertexID / 3;
float x = -1.0 + float((v & 1) << 2);
float y = -1.0 + float((v & 2) << 1);
gl_Position = vec4(x, y, 1.0, 1.0);
/* HACK avoid changing drawcall parameters. */
if (layerID_g >= viewCount) {
gl_Position = vec4(0.0);
}
layerID_g += baseId;
}

View File

@ -1,23 +1,32 @@
layout(std140) uniform shadow_render_block
{
vec4 lampPosition;
/* Use vectors to avoid alignement padding. */
ivec4 shadowSampleCount;
vec4 shadowInvSampleCount;
vec4 filterSize;
int viewCount;
int baseId;
float cubeTexelSize;
float storedTexelSize;
float nearClip;
float farClip;
int shadowSampleCount;
float shadowInvSampleCount;
float exponent;
};
#ifdef CSM
uniform sampler2DArray shadowTexture;
uniform int cascadeId;
#else
uniform samplerCube shadowTexture;
#endif
uniform float shadowFilterSize;
flat in int layerID;
#ifdef CSM
# define cascadeID layerID
#else
# define cascadeID 0
#endif
out vec4 FragColor;
@ -39,12 +48,12 @@ vec3 octahedral_to_cubemap_proj(vec2 co)
/* http://advances.realtimerendering.com/s2009/SIGGRAPH%202009%20-%20Lighting%20Research%20at%20Bungie.pdf
* Slide 55. */
#define ln_space_prefilter_step(ref, sample) exp(sample - ref)
#define ln_space_prefilter_finalize(ref, sum) (ref + log(shadowInvSampleCount * sum))
#define ln_space_prefilter_finalize(ref, sum) (ref + log(shadowInvSampleCount[cascadeID] * sum))
#ifdef CSM
vec3 get_texco(vec3 cos, const vec2 ofs)
{
cos.xy += ofs * shadowFilterSize;
cos.xy += ofs * filterSize[cascadeID];
return cos;
}
#else /* CUBEMAP */
@ -109,7 +118,7 @@ void main()
cos.xy = gl_FragCoord.xy * storedTexelSize;
#ifdef CSM
cos.z = float(cascadeId);
cos.z = float(cascadeID);
#else /* CUBEMAP */
/* add a 2 pixel border to ensure filtering is correct */
cos.xy *= 1.0 + storedTexelSize * 2.0;
@ -132,8 +141,8 @@ void main()
cos = normalize(octahedral_to_cubemap_proj(cos.xy));
make_orthonormal_basis(cos);
T *= shadowFilterSize;
B *= shadowFilterSize;
T *= filterSize[cascadeID];
B *= filterSize[cascadeID];
#endif
#ifdef ESM
@ -159,7 +168,7 @@ void main()
* `const vec2 concentric[]` array with variable indices is extremely slow.
* The solution is to use constant indices to access the array.
*/
if (shadowSampleCount > 4) {
if (shadowSampleCount[cascadeID] > 4) {
grouped_samples_accum(
cos, concentric[4], concentric[5], concentric[6], concentric[7], ref, accum);
grouped_samples_accum(
@ -167,7 +176,7 @@ void main()
grouped_samples_accum(
cos, concentric[12], concentric[13], concentric[14], concentric[15], ref, accum);
}
if (shadowSampleCount > 16) {
if (shadowSampleCount[cascadeID] > 16) {
grouped_samples_accum(
cos, concentric[16], concentric[17], concentric[18], concentric[19], ref, accum);
grouped_samples_accum(
@ -180,7 +189,7 @@ void main()
cos, concentric[32], concentric[33], concentric[34], concentric[35], ref, accum);
}
#ifdef HIGH_BLUR
if (shadowSampleCount > 36) {
if (shadowSampleCount[cascadeID] > 36) {
grouped_samples_accum(
cos, concentric[36], concentric[37], concentric[38], concentric[39], ref, accum);
grouped_samples_accum(
@ -196,7 +205,7 @@ void main()
grouped_samples_accum(
cos, concentric[60], concentric[61], concentric[62], concentric[63], ref, accum);
}
if (shadowSampleCount > 64) {
if (shadowSampleCount[cascadeID] > 64) {
grouped_samples_accum(
cos, concentric[64], concentric[65], concentric[66], concentric[67], ref, accum);
grouped_samples_accum(
@ -216,7 +225,7 @@ void main()
grouped_samples_accum(
cos, concentric[96], concentric[97], concentric[98], concentric[99], ref, accum);
}
if (shadowSampleCount > 100) {
if (shadowSampleCount[cascadeID] > 100) {
grouped_samples_accum(
cos, concentric[100], concentric[101], concentric[102], concentric[103], ref, accum);
grouped_samples_accum(
@ -240,7 +249,7 @@ void main()
grouped_samples_accum(
cos, concentric[140], concentric[141], concentric[142], concentric[143], ref, accum);
}
if (shadowSampleCount > 144) {
if (shadowSampleCount[cascadeID] > 144) {
grouped_samples_accum(
cos, concentric[144], concentric[145], concentric[146], concentric[147], ref, accum);
grouped_samples_accum(
@ -268,7 +277,7 @@ void main()
grouped_samples_accum(
cos, concentric[192], concentric[193], concentric[194], concentric[195], ref, accum);
}
if (shadowSampleCount > 196) {
if (shadowSampleCount[cascadeID] > 196) {
grouped_samples_accum(
cos, concentric[196], concentric[197], concentric[198], concentric[199], ref, accum);
grouped_samples_accum(
@ -308,6 +317,6 @@ void main()
FragColor = accum.xxxx;
#else /* VSM */
FragColor = accum.xyxy * shadowInvSampleCount;
FragColor = accum.xyxy * shadowInvSampleCount[cascadeID];
#endif
}