Cycles: Set of fixes for delayed SSS ray tracing
There were multiple issues which are solved now: - It was possible that ray wouldn't be bounced off the BSSRDF, for example when PDF or shader eval is zero. In this case PathState might have been left in pre-bounced state which would have been gave incorrect shading results. This is solved by having separate PathState for each of the hits. - Path radiance summing wasn't happening correct as well, indirect rays were using wrong path radiance in the case when there were more than one hit recorded. This is now using a bit trickier state machine which calculates path radiance for just SSS (both direct and indirect) and then sums it back to the final radiance. - Previous commit wasn't totally correct either and was an induced bug due to wrong path state left from the "un-happened" ray bounce. There should be no special case happening here, BSSRDFs will be replaced with diffuse ones due to PATH_RAY_DIFFUSE_ANCESTOR flag. - Merged back codebases for "delayed" and "immediate" indirect SSS ray tracing, hopefully making it easier to maintain the codebase. Sure this changes brings memory usage back by about 4-5%, but overall it's still about 2x memory reduction for the experimental kernel here. Thanks Brecht for the review!
This commit is contained in:
parent
8919ed3a62
commit
1e43f0d742
|
@ -65,6 +65,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
|
|||
if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
|
||||
/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
|
||||
SubsurfaceIndirectRays ss_indirect;
|
||||
ss_indirect.tracing = false;
|
||||
ss_indirect.num_rays = 0;
|
||||
if(kernel_path_subsurface_scatter(kg,
|
||||
sd,
|
||||
|
@ -75,14 +76,13 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
|
|||
&throughput,
|
||||
&ss_indirect))
|
||||
{
|
||||
# ifdef __SUBSURFACE_DELAYED_INDIRECT__
|
||||
while(ss_indirect.num_rays) {
|
||||
kernel_path_subsurface_setup_indirect(kg,
|
||||
&ss_indirect,
|
||||
&L_sample,
|
||||
&ray,
|
||||
&state,
|
||||
&ray,
|
||||
&ray,
|
||||
&L_sample,
|
||||
&throughput);
|
||||
kernel_path_indirect(kg,
|
||||
&rng,
|
||||
|
@ -91,8 +91,8 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
|
|||
state.num_samples,
|
||||
&state,
|
||||
&L_sample);
|
||||
kernel_path_subsurface_accum_indirect(&ss_indirect, &L_sample);
|
||||
}
|
||||
# endif /* __SUBSURFACE_DELAYED_INDIRECT__ */
|
||||
is_sss_sample = true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -448,21 +448,13 @@ ccl_device bool kernel_path_subsurface_scatter(
|
|||
|
||||
/* do bssrdf scatter step if we picked a bssrdf closure */
|
||||
if(sc) {
|
||||
uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
|
||||
/* We should never have two consecutive BSSRDF bounces,
|
||||
* the second one should be converted to a diffuse BSDF to
|
||||
* avoid this.
|
||||
*/
|
||||
kernel_assert(!ss_indirect->tracing);
|
||||
|
||||
/* If indirect ray hits BSSRDF we replace it with diffuse BSDF. */
|
||||
if(ss_indirect->num_rays) {
|
||||
float bssrdf_u, bssrdf_v;
|
||||
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
|
||||
subsurface_scatter_step(kg,
|
||||
sd,
|
||||
state->flag,
|
||||
sc,
|
||||
&lcg_state,
|
||||
bssrdf_u, bssrdf_v,
|
||||
false);
|
||||
return false;
|
||||
}
|
||||
uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
|
||||
|
||||
SubsurfaceIntersection ss_isect;
|
||||
float bssrdf_u, bssrdf_v;
|
||||
|
@ -493,9 +485,10 @@ ccl_device bool kernel_path_subsurface_scatter(
|
|||
sc,
|
||||
false);
|
||||
|
||||
PathState *hit_state = &ss_indirect->state;
|
||||
PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
|
||||
Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
|
||||
float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
|
||||
PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays];
|
||||
|
||||
*hit_state = *state;
|
||||
*hit_ray = *ray;
|
||||
|
@ -503,51 +496,25 @@ ccl_device bool kernel_path_subsurface_scatter(
|
|||
|
||||
hit_state->rng_offset += PRNG_BOUNCE_NUM;
|
||||
|
||||
kernel_path_surface_connect_light(kg, rng, sd, *hit_tp, state, L);
|
||||
path_radiance_init(hit_L, kernel_data.film.use_light_pass);
|
||||
kernel_path_surface_connect_light(kg, rng, sd, *hit_tp, state, hit_L);
|
||||
|
||||
if(kernel_path_surface_bounce(kg,
|
||||
rng,
|
||||
sd,
|
||||
hit_tp,
|
||||
hit_state,
|
||||
L,
|
||||
hit_L,
|
||||
hit_ray))
|
||||
{
|
||||
#ifdef __LAMP_MIS__
|
||||
hit_state->ray_t = 0.0f;
|
||||
#endif
|
||||
|
||||
#ifdef __SUBSURFACE_DELAYED_INDIRECT__
|
||||
ss_indirect->num_rays++;
|
||||
#else
|
||||
# ifdef __VOLUME__
|
||||
if(ss_indirect->need_update_volume_stack) {
|
||||
Ray volume_ray = *ray;
|
||||
|
||||
/* Setup ray from previous surface point to the new one. */
|
||||
volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
|
||||
&volume_ray.t);
|
||||
|
||||
kernel_volume_stack_update_for_subsurface(kg,
|
||||
&volume_ray,
|
||||
hit_state->volume_stack);
|
||||
}
|
||||
# endif /* __VOLUME__ */
|
||||
|
||||
kernel_path_indirect(kg,
|
||||
rng,
|
||||
hit_ray,
|
||||
*hit_tp,
|
||||
hit_state->num_samples,
|
||||
hit_state,
|
||||
L);
|
||||
|
||||
/* For render passes, sum and reset indirect light pass variables
|
||||
* for the next samples.
|
||||
*/
|
||||
path_radiance_sum_indirect(L);
|
||||
path_radiance_reset_indirect(L);
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
path_radiance_accum_sample(L, hit_L, 1);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
@ -555,23 +522,38 @@ ccl_device bool kernel_path_subsurface_scatter(
|
|||
return false;
|
||||
}
|
||||
|
||||
#ifdef __SUBSURFACE_DELAYED_INDIRECT__
|
||||
ccl_device void kernel_path_subsurface_accum_indirect(
|
||||
SubsurfaceIndirectRays *ss_indirect,
|
||||
PathRadiance *L)
|
||||
{
|
||||
if(ss_indirect->tracing) {
|
||||
path_radiance_sum_indirect(L);
|
||||
path_radiance_accum_sample(&ss_indirect->direct_L, L, 1);
|
||||
if(ss_indirect->num_rays == 0) {
|
||||
*L = ss_indirect->direct_L;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device void kernel_path_subsurface_setup_indirect(
|
||||
KernelGlobals *kg,
|
||||
SubsurfaceIndirectRays *ss_indirect,
|
||||
PathRadiance *L,
|
||||
const Ray *orig_ray,
|
||||
PathState *state,
|
||||
Ray *orig_ray,
|
||||
Ray *ray,
|
||||
PathRadiance *L,
|
||||
float3 *throughput)
|
||||
{
|
||||
if(!ss_indirect->tracing) {
|
||||
ss_indirect->direct_L = *L;
|
||||
}
|
||||
ss_indirect->tracing = true;
|
||||
|
||||
/* Setup state, ray and throughput for indirect SSS rays. */
|
||||
ss_indirect->num_rays--;
|
||||
|
||||
Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays];
|
||||
|
||||
*state = ss_indirect->state;
|
||||
*throughput = ss_indirect->throughputs[ss_indirect->num_rays];
|
||||
PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays];
|
||||
|
||||
#ifdef __VOLUME__
|
||||
if(ss_indirect->need_update_volume_stack) {
|
||||
|
@ -587,17 +569,15 @@ ccl_device void kernel_path_subsurface_setup_indirect(
|
|||
}
|
||||
#endif /* __VOLUME__ */
|
||||
|
||||
*state = ss_indirect->state[ss_indirect->num_rays];
|
||||
*ray = *indirect_ray;
|
||||
*L = *indirect_L;
|
||||
*throughput = ss_indirect->throughputs[ss_indirect->num_rays];
|
||||
|
||||
/* For render passes, sum and reset indirect light pass variables
|
||||
* for the next samples.
|
||||
*/
|
||||
path_radiance_sum_indirect(L);
|
||||
path_radiance_reset_indirect(L);
|
||||
state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
|
||||
}
|
||||
#endif /* __SUBSURFACE_DELAYED_INDIRECT__ */
|
||||
|
||||
#endif
|
||||
#endif /* __SUBSURFACE__ */
|
||||
|
||||
ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer)
|
||||
{
|
||||
|
@ -618,9 +598,9 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
|
|||
|
||||
#ifdef __SUBSURFACE__
|
||||
SubsurfaceIndirectRays ss_indirect;
|
||||
ss_indirect.tracing = false;
|
||||
ss_indirect.num_rays = 0;
|
||||
|
||||
# ifdef __SUBSURFACE_DELAYED_INDIRECT__
|
||||
/* TODO(sergey): Avoid having explicit copy of the pre-subsurface scatter
|
||||
* ray by storing an updated version of state in the ss_indirect which will
|
||||
* be updated to the new volume stack.
|
||||
|
@ -628,7 +608,6 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
|
|||
Ray ss_orig_ray;
|
||||
|
||||
for(;;) {
|
||||
# endif /* __SUBSURFACE_DELAYED_INDIRECT__ */
|
||||
#endif
|
||||
|
||||
/* path iteration */
|
||||
|
@ -877,9 +856,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
|
|||
&throughput,
|
||||
&ss_indirect))
|
||||
{
|
||||
# ifdef __SUBSURFACE_DELAYED_INDIRECT__
|
||||
ss_orig_ray = ray;
|
||||
# endif /* __SUBSURFACE_DELAYED_INDIRECT__ */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -893,24 +870,26 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
|
|||
break;
|
||||
}
|
||||
|
||||
#ifdef __SUBSURFACE_DELAYED_INDIRECT__
|
||||
#ifdef __SUBSURFACE__
|
||||
kernel_path_subsurface_accum_indirect(&ss_indirect, &L);
|
||||
|
||||
/* Trace indirect subsurface rays by restarting the loop. this uses less
|
||||
* stack memory than invoking kernel_path_indirect.
|
||||
*/
|
||||
if(ss_indirect.num_rays) {
|
||||
kernel_path_subsurface_setup_indirect(kg,
|
||||
&ss_indirect,
|
||||
&L,
|
||||
&state,
|
||||
&ss_orig_ray,
|
||||
&state,
|
||||
&ray,
|
||||
&L,
|
||||
&throughput);
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif /* __SUBSURFACE_DELAYED_INDIRECT__ */
|
||||
#endif /* __SUBSURFACE__ */
|
||||
|
||||
float3 L_sum = path_radiance_clamp_and_sum(kg, &L);
|
||||
|
||||
|
|
|
@ -87,7 +87,6 @@ CCL_NAMESPACE_BEGIN
|
|||
/* Experimental on GPU */
|
||||
#ifdef __KERNEL_EXPERIMENTAL__
|
||||
#define __SUBSURFACE__
|
||||
#define __SUBSURFACE_DELAYED_INDIRECT__
|
||||
#define __CMJ__
|
||||
#endif
|
||||
|
||||
|
@ -770,11 +769,14 @@ struct SubsurfaceIntersection
|
|||
struct SubsurfaceIndirectRays
|
||||
{
|
||||
bool need_update_volume_stack;
|
||||
PathState state;
|
||||
bool tracing;
|
||||
PathState state[BSSRDF_MAX_HITS];
|
||||
PathRadiance direct_L;
|
||||
|
||||
int num_rays;
|
||||
Ray rays[BSSRDF_MAX_HITS];
|
||||
float3 throughputs[BSSRDF_MAX_HITS];
|
||||
PathRadiance L[BSSRDF_MAX_HITS];
|
||||
};
|
||||
|
||||
/* Constant Kernel Data
|
||||
|
|
Loading…
Reference in New Issue