Cycles: Delay shooting SSS indirect rays

The idea is to delay shooting indirect rays for the SSS sampling and
trace them after the main integration loop was finished.

This reduces GPU stack usage even further and brings it down to around
652MB (comparing to 722MB before the change and 946MB with previous
stable release).

This also solves the speed regression happened in the previous commit
and now simple SSS scene (SSS suzanne on the floor) renders in 0:50
(comparing to 1:16 with previous commit and 1:03 with official release).
This commit is contained in:
Sergey Sharybin 2015-11-22 15:48:33 +05:00
parent 8bca34fe32
commit 2a5c1fc9cc
4 changed files with 353 additions and 112 deletions

View File

@ -64,8 +64,19 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample subsurface scattering */
if((is_combined || is_sss_sample) && (sd->flag & SD_BSSRDF)) {
/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
if(kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput))
SubsurfaceIndirectRays ss_indirect;
if(kernel_path_subsurface_scatter(kg,
sd,
&L_sample,
&state,
&rng,
&ray,
&throughput,
&ss_indirect))
{
kernel_path_subsurface_scatter_indirect(kg, &L_sample, &state, &rng, &ray, &ss_indirect);
is_sss_sample = true;
}
}
#endif
@ -84,7 +95,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
state.ray_t = 0.0f;
#endif
/* compute indirect light */
kernel_path_indirect(kg, &rng, ray, throughput, 1, state, &L_sample);
kernel_path_indirect(kg, &rng, &ray, throughput, 1, &state, &L_sample);
/* sum and reset indirect light pass variables for the next samples */
path_radiance_sum_indirect(&L_sample);

View File

@ -52,47 +52,64 @@
CCL_NAMESPACE_BEGIN
ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
float3 throughput, int num_samples, PathState state, PathRadiance *L)
ccl_device void kernel_path_indirect(KernelGlobals *kg,
RNG *rng,
Ray *ray,
float3 throughput,
int num_samples,
PathState *state,
PathRadiance *L)
{
/* path iteration */
for(;;) {
/* intersect scene */
Intersection isect;
uint visibility = path_state_ray_visibility(kg, &state);
bool hit = scene_intersect(kg, &ray, visibility, &isect, NULL, 0.0f, 0.0f);
uint visibility = path_state_ray_visibility(kg, state);
bool hit = scene_intersect(kg,
ray,
visibility,
&isect,
NULL,
0.0f, 0.0f);
#ifdef __LAMP_MIS__
if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
/* ray starting from previous non-transparent bounce */
Ray light_ray;
light_ray.P = ray.P - state.ray_t*ray.D;
state.ray_t += isect.t;
light_ray.D = ray.D;
light_ray.t = state.ray_t;
light_ray.time = ray.time;
light_ray.dD = ray.dD;
light_ray.dP = ray.dP;
light_ray.P = ray->P - state->ray_t*ray->D;
state->ray_t += isect.t;
light_ray.D = ray->D;
light_ray.t = state->ray_t;
light_ray.time = ray->time;
light_ray.dD = ray->dD;
light_ray.dP = ray->dP;
/* intersect with lamp */
float3 emission;
if(indirect_lamp_emission(kg, &state, &light_ray, &emission))
path_radiance_accum_emission(L, throughput, emission, state.bounce);
if(indirect_lamp_emission(kg, state, &light_ray, &emission)) {
path_radiance_accum_emission(L,
throughput,
emission,
state->bounce);
}
}
#endif
#ifdef __VOLUME__
/* volume attenuation, emission, scatter */
if(state.volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = ray;
if(state->volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = *ray;
volume_ray.t = (hit)? isect.t: FLT_MAX;
bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
bool heterogeneous =
volume_stack_is_heterogeneous(kg,
state->volume_stack);
#ifdef __VOLUME_DECOUPLED__
int sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
int sampling_method =
volume_stack_sampling_method(kg,
state->volume_stack);
bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, false, sampling_method);
if(decoupled) {
@ -100,15 +117,27 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
VolumeSegment volume_segment;
ShaderData volume_sd;
shader_setup_from_volume(kg, &volume_sd, &volume_ray, state.bounce, state.transparent_bounce);
kernel_volume_decoupled_record(kg, &state,
&volume_ray, &volume_sd, &volume_segment, heterogeneous);
shader_setup_from_volume(kg,
&volume_sd,
&volume_ray,
state->bounce,
state->transparent_bounce);
kernel_volume_decoupled_record(kg,
state,
&volume_ray,
&volume_sd,
&volume_segment,
heterogeneous);
volume_segment.sampling_method = sampling_method;
/* emission */
if(volume_segment.closure_flag & SD_EMISSION)
path_radiance_accum_emission(L, throughput, volume_segment.accum_emission, state.bounce);
if(volume_segment.closure_flag & SD_EMISSION) {
path_radiance_accum_emission(L,
throughput,
volume_segment.accum_emission,
state->bounce);
}
/* scattering */
VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
@ -117,28 +146,51 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
bool all = kernel_data.integrator.sample_all_lights_indirect;
/* direct light sampling */
kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
throughput, &state, L, all, &volume_ray, &volume_segment);
kernel_branched_path_volume_connect_light(kg,
rng,
&volume_sd,
throughput,
state,
L,
all,
&volume_ray,
&volume_segment);
/* indirect sample. if we use distance sampling and take just
* one sample for direct and indirect light, we could share
* this computation, but makes code a bit complex */
float rphase = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_PHASE);
float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
result = kernel_volume_decoupled_scatter(kg,
&state, &volume_ray, &volume_sd, &throughput,
rphase, rscatter, &volume_segment, NULL, true);
state,
&volume_ray,
&volume_sd,
&throughput,
rphase,
rscatter,
&volume_segment,
NULL,
true);
}
/* free cached steps */
kernel_volume_decoupled_free(kg, &volume_segment);
if(result == VOLUME_PATH_SCATTERED) {
if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray))
if(kernel_path_volume_bounce(kg,
rng,
&volume_sd,
&throughput,
state,
L,
ray))
{
continue;
else
}
else {
break;
}
}
else {
throughput *= volume_segment.accum_transmittance;
@ -150,18 +202,32 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
/* integrate along volume segment with distance sampling */
ShaderData volume_sd;
VolumeIntegrateResult result = kernel_volume_integrate(
kg, &state, &volume_sd, &volume_ray, L, &throughput, rng, heterogeneous);
kg, state, &volume_sd, &volume_ray, L, &throughput, rng, heterogeneous);
#ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, L);
kernel_path_volume_connect_light(kg,
rng,
&volume_sd,
throughput,
state,
L);
/* indirect light bounce */
if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, L, &ray))
if(kernel_path_volume_bounce(kg,
rng,
&volume_sd,
&throughput,
state,
L,
ray))
{
continue;
else
}
else {
break;
}
}
#endif
}
@ -171,8 +237,11 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
if(!hit) {
#ifdef __BACKGROUND__
/* sample background shader */
float3 L_background = indirect_background(kg, &state, &ray);
path_radiance_accum_background(L, throughput, L_background, state.bounce);
float3 L_background = indirect_background(kg, state, ray);
path_radiance_accum_background(L,
throughput,
L_background,
state->bounce);
#endif
break;
@ -180,9 +249,14 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
/* setup shading */
ShaderData sd;
shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce, state.transparent_bounce);
float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF);
shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_INDIRECT);
shader_setup_from_ray(kg,
&sd,
&isect,
ray,
state->bounce,
state->transparent_bounce);
float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
shader_eval_surface(kg, &sd, rbsdf, state->flag, SHADER_CONTEXT_INDIRECT);
#ifdef __BRANCHED_PATH__
shader_merge_closures(&sd);
#endif
@ -190,7 +264,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
/* blurring of bsdf after bounces, for rays that have a small likelihood
* of following this particular path (diffuse, rough glossy) */
if(kernel_data.integrator.filter_glossy != FLT_MAX) {
float blur_pdf = kernel_data.integrator.filter_glossy*state.min_ray_pdf;
float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
if(blur_pdf < 1.0f) {
float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
@ -201,21 +275,28 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
#ifdef __EMISSION__
/* emission */
if(sd.flag & SD_EMISSION) {
float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf);
path_radiance_accum_emission(L, throughput, emission, state.bounce);
float3 emission = indirect_primitive_emission(kg,
&sd,
isect.t,
state->flag,
state->ray_pdf);
path_radiance_accum_emission(L, throughput, emission, state->bounce);
}
#endif
/* path termination. this is a strange place to put the termination, it's
* mainly due to the mixed in MIS that we use. gives too many unneeded
* shader evaluations, only need emission if we are going to terminate */
float probability = path_state_terminate_probability(kg, &state, throughput*num_samples);
float probability =
path_state_terminate_probability(kg,
state,
throughput*num_samples);
if(probability == 0.0f) {
break;
}
else if(probability != 1.0f) {
float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE);
float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
if(terminate >= probability)
break;
@ -227,7 +308,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
float bsdf_u, bsdf_v;
path_state_rng_2D(kg, rng, &state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
float ao_factor = kernel_data.background.ao_factor;
float3 ao_N;
@ -251,8 +332,14 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
light_ray.dP = sd.dP;
light_ray.dD = differential3_zero();
if(!shadow_blocked(kg, &state, &light_ray, &ao_shadow))
path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state.bounce);
if(!shadow_blocked(kg, state, &light_ray, &ao_shadow)) {
path_radiance_accum_ao(L,
throughput,
ao_alpha,
ao_bsdf,
ao_shadow,
state->bounce);
}
}
}
#endif
@ -269,11 +356,21 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
/* do bssrdf scatter step if we picked a bssrdf closure */
if(sc) {
uint lcg_state = lcg_state_init(rng, &state, 0x68bc21eb);
uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
float bssrdf_u, bssrdf_v;
path_state_rng_2D(kg, rng, &state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
path_state_rng_2D(kg,
rng,
state,
PRNG_BSDF_U,
&bssrdf_u, &bssrdf_v);
subsurface_scatter_step(kg,
&sd,
state->flag,
sc,
&lcg_state,
bssrdf_u, bssrdf_v,
false);
}
}
#endif
@ -281,11 +378,18 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
#if defined(__EMISSION__) && defined(__BRANCHED_PATH__)
if(kernel_data.integrator.use_direct_light) {
bool all = kernel_data.integrator.sample_all_lights_indirect;
kernel_branched_path_surface_connect_light(kg, rng, &sd, &state, throughput, 1.0f, L, all);
kernel_branched_path_surface_connect_light(kg,
rng,
&sd,
state,
throughput,
1.0f,
L,
all);
}
#endif
if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, L, &ray))
if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, state, L, ray))
break;
}
}
@ -326,7 +430,15 @@ ccl_device void kernel_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *
#ifdef __SUBSURFACE__
ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, Ray *ray, float3 *throughput)
ccl_device bool kernel_path_subsurface_scatter(
KernelGlobals *kg,
ShaderData *sd,
PathRadiance *L,
PathState *state,
RNG *rng,
Ray *ray,
float3 *throughput,
SubsurfaceIndirectRays *ss_indirect)
{
float bssrdf_probability;
ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
@ -349,9 +461,9 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd
bssrdf_u, bssrdf_v,
false);
#ifdef __VOLUME__
Ray volume_ray = *ray;
bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME;
ss_indirect->need_update_volume_stack =
kernel_data.integrator.use_volumes &&
ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME;
#endif
/* compute lighting with the BSDF closure */
@ -367,47 +479,82 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd
sc,
false);
float3 tp = *throughput;
PathState hit_state = *state;
Ray hit_ray = *ray;
PathState *hit_state = &ss_indirect->state;
Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
hit_state.rng_offset += PRNG_BOUNCE_NUM;
*hit_state = *state;
*hit_ray = *ray;
*hit_tp = *throughput;
kernel_path_surface_connect_light(kg, rng, sd, tp, state, L);
hit_state->rng_offset += PRNG_BOUNCE_NUM;
if(kernel_path_surface_bounce(kg, rng, sd, &tp, &hit_state, L, &hit_ray)) {
kernel_path_surface_connect_light(kg, rng, sd, *hit_tp, state, L);
if(kernel_path_surface_bounce(kg,
rng,
sd,
hit_tp,
hit_state,
L,
hit_ray))
{
#ifdef __LAMP_MIS__
hit_state.ray_t = 0.0f;
hit_state->ray_t = 0.0f;
#endif
#ifdef __VOLUME__
if(need_update_volume_stack) {
/* Setup ray from previous surface point to the new one. */
volume_ray.D = normalize_len(hit_ray.P - volume_ray.P,
&volume_ray.t);
kernel_volume_stack_update_for_subsurface(
kg,
&volume_ray,
hit_state.volume_stack);
/* Move volume ray forward. */
volume_ray.P = hit_ray.P;
}
#endif
kernel_path_indirect(kg, rng, hit_ray, tp, state->num_samples, hit_state, L);
/* for render passes, sum and reset indirect light pass variables
* for the next samples */
path_radiance_sum_indirect(L);
path_radiance_reset_indirect(L);
ss_indirect->num_rays++;
}
}
return true;
}
return false;
}
/* Trace subsurface indirect rays separately after the path loop, to reduce
* GPU stack memory usage. this way ShaderData and other data structures
* used during the loop are not needed during kernel_path_indirect.
*/
ccl_device void kernel_path_subsurface_scatter_indirect(
KernelGlobals *kg,
PathRadiance *L,
PathState *state,
RNG *rng,
Ray *ray,
SubsurfaceIndirectRays *ss_indirect)
{
for (int i = 0; i < ss_indirect->num_rays; i++) {
Ray *indirect_ray = &ss_indirect->rays[i];
float3 indirect_throughput = ss_indirect->throughputs[i];
*state = ss_indirect->state;
#ifdef __VOLUME__
if(ss_indirect->need_update_volume_stack) {
/* TODO(sergey): Single assignment per scatter. */
Ray volume_ray = *ray;
/* Setup ray from previous surface point to the new one. */
volume_ray.D = normalize_len(indirect_ray->P - volume_ray.P,
&volume_ray.t);
kernel_volume_stack_update_for_subsurface(
kg,
&volume_ray,
state->volume_stack);
}
#endif
/* Note that this modifies state. */
kernel_path_indirect(kg, rng, indirect_ray, indirect_throughput, state->num_samples, state, L);
/* For render passes, sum and reset indirect light pass variables
* for the next samples.
*/
path_radiance_sum_indirect(L);
path_radiance_reset_indirect(L);
}
}
#endif
ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer)
@ -427,6 +574,11 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
debug_data_init(&debug_data);
#endif
#ifdef __SUBSURFACE__
SubsurfaceIndirectRays ss_indirect;
ss_indirect.num_rays = 0;
#endif
/* path iteration */
for(;;) {
/* intersect scene */
@ -664,8 +816,17 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
/* bssrdf scatter to a different location on the same object, replacing
* the closures with a diffuse BSDF */
if(sd.flag & SD_BSSRDF) {
if(kernel_path_subsurface_scatter(kg, &sd, &L, &state, rng, &ray, &throughput))
if(kernel_path_subsurface_scatter(kg,
&sd,
&L,
&state,
rng,
&ray,
&throughput,
&ss_indirect))
{
break;
}
}
#endif
@ -677,6 +838,20 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
break;
}
#ifdef __SUBSURFACE__
/* Trace indirect subsurface afterwards to reduce GPU stack size.
* note that this modifies state.
*/
if (ss_indirect.num_rays) {
kernel_path_subsurface_scatter_indirect(kg,
&L,
&state,
rng,
&ray,
&ss_indirect);
}
#endif
float3 L_sum = path_radiance_clamp_and_sum(kg, &L);
kernel_write_light_passes(kg, buffer, &L, sample);

View File

@ -91,10 +91,27 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba
float3 tp = throughput;
Ray bsdf_ray;
if(!kernel_branched_path_surface_bounce(kg, &bsdf_rng, sd, sc, j, num_samples, &tp, &ps, L, &bsdf_ray))
if(!kernel_branched_path_surface_bounce(kg,
&bsdf_rng,
sd,
sc,
j,
num_samples,
&tp,
&ps,
L,
&bsdf_ray))
{
continue;
}
kernel_path_indirect(kg, rng, bsdf_ray, tp*num_samples_inv, num_samples, ps, L);
kernel_path_indirect(kg,
rng,
&bsdf_ray,
tp*num_samples_inv,
num_samples,
&ps,
L);
/* for render passes, sum and reset indirect light pass variables
* for the next samples */
@ -312,12 +329,25 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
&ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
(void)result;
kernel_assert(result == VOLUME_PATH_SCATTERED);
if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) {
kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L);
if(kernel_path_volume_bounce(kg,
rng,
&volume_sd,
&tp,
&ps,
&L,
&pray))
{
kernel_path_indirect(kg,
rng,
&pray,
tp*num_samples_inv,
num_samples,
&ps,
&L);
/* for render passes, sum and reset indirect light pass variables
* for the next samples */
@ -353,15 +383,28 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
VolumeIntegrateResult result = kernel_volume_integrate(
kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng, heterogeneous);
#ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* todo: support equiangular, MIS and all light sampling.
* alternatively get decoupled ray marching working on the GPU */
kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L);
if(kernel_path_volume_bounce(kg, rng, &volume_sd, &tp, &ps, &L, &pray)) {
kernel_path_indirect(kg, rng, pray, tp, num_samples, ps, &L);
if(kernel_path_volume_bounce(kg,
rng,
&volume_sd,
&tp,
&ps,
&L,
&pray))
{
kernel_path_indirect(kg,
rng,
&pray,
tp,
num_samples,
&ps,
&L);
/* for render passes, sum and reset indirect light pass variables
* for the next samples */

View File

@ -520,18 +520,6 @@ typedef ccl_addr_space struct Intersection {
#endif
} Intersection;
/* Subsurface Intersection result */
struct SubsurfaceIntersection
{
Ray ray;
float3 weight[BSSRDF_MAX_HITS];
int num_hits;
struct Intersection hits[BSSRDF_MAX_HITS];
float3 Ng[BSSRDF_MAX_HITS];
};
/* Primitives */
typedef enum PrimitiveType {
@ -764,6 +752,30 @@ typedef struct PathState {
#endif
} PathState;
/* Subsurface */
/* Struct to gather multiple SSS hits. */
struct SubsurfaceIntersection
{
Ray ray;
float3 weight[BSSRDF_MAX_HITS];
int num_hits;
Intersection hits[BSSRDF_MAX_HITS];
float3 Ng[BSSRDF_MAX_HITS];
};
/* Struct to gather SSS indirect rays and delay tracing them. */
struct SubsurfaceIndirectRays
{
bool need_update_volume_stack;
PathState state;
int num_rays;
Ray rays[BSSRDF_MAX_HITS];
float3 throughputs[BSSRDF_MAX_HITS];
};
/* Constant Kernel Data
*
* These structs are passed from CPU to various devices, and the struct layout