Cycles: Set of fixes for delayed SSS ray tracing

There were multiple issues which are solved now:

- It was possible that ray wouldn't be bounced off the BSSRDF, for example
  when PDF or shader eval is zero. In this case PathState might have been
  left in pre-bounced state which would have been gave incorrect shading
  results.

  This is solved by having separate PathState for each of the hits.

- Path radiance summing wasn't happening correct as well, indirect rays
  were using wrong path radiance in the case when there were more than
  one hit recorded.

  This is now using a bit trickier state machine which calculates path
  radiance for just SSS (both direct and indirect) and then sums it back
  to the final radiance.

- Previous commit wasn't totally correct either and was an induced bug
  due to wrong path state left from the "un-happened" ray bounce.

  There should be no special case happening here, BSSRDFs will be replaced
  with diffuse ones due to PATH_RAY_DIFFUSE_ANCESTOR flag.

- Merged back codebases for "delayed" and "immediate" indirect SSS ray
  tracing, hopefully making it easier to maintain the codebase.

Sure this changes brings memory usage back by about 4-5%, but overall
it's still about 2x memory reduction for the experimental kernel here.

Thanks Brecht for the review!
This commit is contained in:
Sergey Sharybin 2015-11-28 19:30:35 +05:00
parent 8919ed3a62
commit 1e43f0d742
3 changed files with 55 additions and 74 deletions

View File

@ -65,6 +65,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
SubsurfaceIndirectRays ss_indirect;
ss_indirect.tracing = false;
ss_indirect.num_rays = 0;
if(kernel_path_subsurface_scatter(kg,
sd,
@ -75,14 +76,13 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
&throughput,
&ss_indirect))
{
# ifdef __SUBSURFACE_DELAYED_INDIRECT__
while(ss_indirect.num_rays) {
kernel_path_subsurface_setup_indirect(kg,
&ss_indirect,
&L_sample,
&ray,
&state,
&ray,
&ray,
&L_sample,
&throughput);
kernel_path_indirect(kg,
&rng,
@ -91,8 +91,8 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
state.num_samples,
&state,
&L_sample);
kernel_path_subsurface_accum_indirect(&ss_indirect, &L_sample);
}
# endif /* __SUBSURFACE_DELAYED_INDIRECT__ */
is_sss_sample = true;
}
}

View File

@ -448,21 +448,13 @@ ccl_device bool kernel_path_subsurface_scatter(
/* do bssrdf scatter step if we picked a bssrdf closure */
if(sc) {
uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
/* We should never have two consecutive BSSRDF bounces,
* the second one should be converted to a diffuse BSDF to
* avoid this.
*/
kernel_assert(!ss_indirect->tracing);
/* If indirect ray hits BSSRDF we replace it with diffuse BSDF. */
if(ss_indirect->num_rays) {
float bssrdf_u, bssrdf_v;
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
subsurface_scatter_step(kg,
sd,
state->flag,
sc,
&lcg_state,
bssrdf_u, bssrdf_v,
false);
return false;
}
uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
SubsurfaceIntersection ss_isect;
float bssrdf_u, bssrdf_v;
@ -493,9 +485,10 @@ ccl_device bool kernel_path_subsurface_scatter(
sc,
false);
PathState *hit_state = &ss_indirect->state;
PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays];
*hit_state = *state;
*hit_ray = *ray;
@ -503,51 +496,25 @@ ccl_device bool kernel_path_subsurface_scatter(
hit_state->rng_offset += PRNG_BOUNCE_NUM;
kernel_path_surface_connect_light(kg, rng, sd, *hit_tp, state, L);
path_radiance_init(hit_L, kernel_data.film.use_light_pass);
kernel_path_surface_connect_light(kg, rng, sd, *hit_tp, state, hit_L);
if(kernel_path_surface_bounce(kg,
rng,
sd,
hit_tp,
hit_state,
L,
hit_L,
hit_ray))
{
#ifdef __LAMP_MIS__
hit_state->ray_t = 0.0f;
#endif
#ifdef __SUBSURFACE_DELAYED_INDIRECT__
ss_indirect->num_rays++;
#else
# ifdef __VOLUME__
if(ss_indirect->need_update_volume_stack) {
Ray volume_ray = *ray;
/* Setup ray from previous surface point to the new one. */
volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
&volume_ray.t);
kernel_volume_stack_update_for_subsurface(kg,
&volume_ray,
hit_state->volume_stack);
}
# endif /* __VOLUME__ */
kernel_path_indirect(kg,
rng,
hit_ray,
*hit_tp,
hit_state->num_samples,
hit_state,
L);
/* For render passes, sum and reset indirect light pass variables
* for the next samples.
*/
path_radiance_sum_indirect(L);
path_radiance_reset_indirect(L);
#endif
}
else {
path_radiance_accum_sample(L, hit_L, 1);
}
}
return true;
@ -555,23 +522,38 @@ ccl_device bool kernel_path_subsurface_scatter(
return false;
}
#ifdef __SUBSURFACE_DELAYED_INDIRECT__
ccl_device void kernel_path_subsurface_accum_indirect(
SubsurfaceIndirectRays *ss_indirect,
PathRadiance *L)
{
if(ss_indirect->tracing) {
path_radiance_sum_indirect(L);
path_radiance_accum_sample(&ss_indirect->direct_L, L, 1);
if(ss_indirect->num_rays == 0) {
*L = ss_indirect->direct_L;
}
}
}
ccl_device void kernel_path_subsurface_setup_indirect(
KernelGlobals *kg,
SubsurfaceIndirectRays *ss_indirect,
PathRadiance *L,
const Ray *orig_ray,
PathState *state,
Ray *orig_ray,
Ray *ray,
PathRadiance *L,
float3 *throughput)
{
if(!ss_indirect->tracing) {
ss_indirect->direct_L = *L;
}
ss_indirect->tracing = true;
/* Setup state, ray and throughput for indirect SSS rays. */
ss_indirect->num_rays--;
Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays];
*state = ss_indirect->state;
*throughput = ss_indirect->throughputs[ss_indirect->num_rays];
PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays];
#ifdef __VOLUME__
if(ss_indirect->need_update_volume_stack) {
@ -587,17 +569,15 @@ ccl_device void kernel_path_subsurface_setup_indirect(
}
#endif /* __VOLUME__ */
*state = ss_indirect->state[ss_indirect->num_rays];
*ray = *indirect_ray;
*L = *indirect_L;
*throughput = ss_indirect->throughputs[ss_indirect->num_rays];
/* For render passes, sum and reset indirect light pass variables
* for the next samples.
*/
path_radiance_sum_indirect(L);
path_radiance_reset_indirect(L);
state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
}
#endif /* __SUBSURFACE_DELAYED_INDIRECT__ */
#endif
#endif /* __SUBSURFACE__ */
ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer)
{
@ -618,9 +598,9 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
#ifdef __SUBSURFACE__
SubsurfaceIndirectRays ss_indirect;
ss_indirect.tracing = false;
ss_indirect.num_rays = 0;
# ifdef __SUBSURFACE_DELAYED_INDIRECT__
/* TODO(sergey): Avoid having explicit copy of the pre-subsurface scatter
* ray by storing an updated version of state in the ss_indirect which will
* be updated to the new volume stack.
@ -628,7 +608,6 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
Ray ss_orig_ray;
for(;;) {
# endif /* __SUBSURFACE_DELAYED_INDIRECT__ */
#endif
/* path iteration */
@ -877,9 +856,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
&throughput,
&ss_indirect))
{
# ifdef __SUBSURFACE_DELAYED_INDIRECT__
ss_orig_ray = ray;
# endif /* __SUBSURFACE_DELAYED_INDIRECT__ */
break;
}
}
@ -893,24 +870,26 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
break;
}
#ifdef __SUBSURFACE_DELAYED_INDIRECT__
#ifdef __SUBSURFACE__
kernel_path_subsurface_accum_indirect(&ss_indirect, &L);
/* Trace indirect subsurface rays by restarting the loop. this uses less
* stack memory than invoking kernel_path_indirect.
*/
if(ss_indirect.num_rays) {
kernel_path_subsurface_setup_indirect(kg,
&ss_indirect,
&L,
&state,
&ss_orig_ray,
&state,
&ray,
&L,
&throughput);
}
else {
break;
}
}
#endif /* __SUBSURFACE_DELAYED_INDIRECT__ */
#endif /* __SUBSURFACE__ */
float3 L_sum = path_radiance_clamp_and_sum(kg, &L);

View File

@ -87,7 +87,6 @@ CCL_NAMESPACE_BEGIN
/* Experimental on GPU */
#ifdef __KERNEL_EXPERIMENTAL__
#define __SUBSURFACE__
#define __SUBSURFACE_DELAYED_INDIRECT__
#define __CMJ__
#endif
@ -770,11 +769,14 @@ struct SubsurfaceIntersection
struct SubsurfaceIndirectRays
{
bool need_update_volume_stack;
PathState state;
bool tracing;
PathState state[BSSRDF_MAX_HITS];
PathRadiance direct_L;
int num_rays;
Ray rays[BSSRDF_MAX_HITS];
float3 throughputs[BSSRDF_MAX_HITS];
PathRadiance L[BSSRDF_MAX_HITS];
};
/* Constant Kernel Data