Cycles CUDA: reduce stack memory by reusing ShaderData.

57% less for path and 48% less for branched path.
This commit is contained in:
Brecht Van Lommel 2016-05-22 22:35:47 +02:00
parent af4a04eae0
commit 999d5a6785
Notes: blender-bot 2023-02-21 17:59:30 +01:00
Referenced by issue #48695, Significant slowdown when editing a mesh with shrinkwraps & slightly high poly
Referenced by issue #48547, Shrinkwrap not taking auxiliary target into account properly
Referenced by issue #48507, Symmetrize doesn't work properly on Bendy Bones
14 changed files with 196 additions and 169 deletions

View File

@ -30,6 +30,9 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
Ray ray;
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
/* emission shader data memory used by various functions */
ShaderData emission_sd;
ray.P = sd->P + sd->Ng;
ray.D = -sd->Ng;
ray.t = FLT_MAX;
@ -41,7 +44,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
/* init path state */
path_state_init(kg, &state, &rng, sample, NULL);
path_state_init(kg, &emission_sd, &state, &rng, sample, NULL);
/* evaluate surface shader */
float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF);
@ -56,7 +59,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample ambient occlusion */
if(pass_filter & BAKE_FILTER_AO) {
kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
kernel_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput);
}
/* sample emission */
@ -75,6 +78,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
kernel_path_subsurface_init_indirect(&ss_indirect);
if(kernel_path_subsurface_scatter(kg,
sd,
&emission_sd,
&L_sample,
&state,
&rng,
@ -90,6 +94,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
&L_sample,
&throughput);
kernel_path_indirect(kg,
&emission_sd,
&rng,
&ray,
throughput,
@ -105,14 +110,14 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample light and BSDF */
if(!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) {
kernel_path_surface_connect_light(kg, &rng, sd, throughput, &state, &L_sample);
kernel_path_surface_connect_light(kg, &rng, sd, &emission_sd, throughput, &state, &L_sample);
if(kernel_path_surface_bounce(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) {
#ifdef __LAMP_MIS__
state.ray_t = 0.0f;
#endif
/* compute indirect light */
kernel_path_indirect(kg, &rng, &ray, throughput, 1, &state, &L_sample);
kernel_path_indirect(kg, &emission_sd, &rng, &ray, throughput, 1, &state, &L_sample);
/* sum and reset indirect light pass variables for the next samples */
path_radiance_sum_indirect(&L_sample);
@ -126,7 +131,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample ambient occlusion */
if(pass_filter & BAKE_FILTER_AO) {
kernel_branched_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, &rng, throughput);
}
/* sample emission */
@ -139,7 +144,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
/* sample subsurface scattering */
if((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) {
/* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
kernel_branched_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, throughput);
kernel_branched_path_subsurface_scatter(kg, sd, &emission_sd, &L_sample, &state, &rng, &ray, throughput);
}
#endif
@ -150,13 +155,13 @@ ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadian
if(kernel_data.integrator.use_direct_light) {
int all = kernel_data.integrator.sample_all_lights_direct;
kernel_branched_path_surface_connect_light(kg, &rng,
sd, &state, throughput, 1.0f, &L_sample, all);
sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all);
}
#endif
/* indirect light */
kernel_branched_path_surface_indirect_light(kg, &rng,
sd, throughput, 1.0f, &state, &L_sample);
sd, &emission_sd, throughput, 1.0f, &state, &L_sample);
}
}
#endif

View File

@ -18,6 +18,7 @@ CCL_NAMESPACE_BEGIN
/* Direction Emission */
ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
ShaderData *emission_sd,
LightSample *ls,
ccl_addr_space PathState *state,
float3 I,
@ -26,12 +27,6 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
float time)
{
/* setup shading at emitter */
#ifdef __SPLIT_KERNEL__
ShaderData *sd = kg->sd_input;
#else
ShaderData sd_object;
ShaderData *sd = &sd_object;
#endif
float3 eval;
#ifdef __BACKGROUND_MIS__
@ -46,28 +41,28 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
ray.dP = differential3_zero();
ray.dD = dI;
shader_setup_from_background(kg, sd, &ray);
shader_setup_from_background(kg, emission_sd, &ray);
path_state_modify_bounce(state, true);
eval = shader_eval_background(kg, sd, state, 0, SHADER_CONTEXT_EMISSION);
eval = shader_eval_background(kg, emission_sd, state, 0, SHADER_CONTEXT_EMISSION);
path_state_modify_bounce(state, false);
}
else
#endif
{
shader_setup_from_sample(kg, sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time);
shader_setup_from_sample(kg, emission_sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time);
ls->Ng = ccl_fetch(sd, Ng);
ls->Ng = ccl_fetch(emission_sd, Ng);
/* no path flag, we're evaluating this for all closures. that's weak but
* we'd have to do multiple evaluations otherwise */
path_state_modify_bounce(state, true);
shader_eval_surface(kg, sd, state, 0.0f, 0, SHADER_CONTEXT_EMISSION);
shader_eval_surface(kg, emission_sd, state, 0.0f, 0, SHADER_CONTEXT_EMISSION);
path_state_modify_bounce(state, false);
/* evaluate emissive closure */
if(ccl_fetch(sd, flag) & SD_EMISSION)
eval = shader_emissive_eval(kg, sd);
if(ccl_fetch(emission_sd, flag) & SD_EMISSION)
eval = shader_emissive_eval(kg, emission_sd);
else
eval = make_float3(0.0f, 0.0f, 0.0f);
}
@ -79,6 +74,7 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
ccl_device_noinline bool direct_emission(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
LightSample *ls,
ccl_addr_space PathState *state,
Ray *ray,
@ -94,6 +90,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg,
/* evaluate closure */
float3 light_eval = direct_emissive_eval(kg,
emission_sd,
ls,
state,
-ls->D,
@ -198,6 +195,7 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader
/* Indirect Lamp Emission */
ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
ShaderData *emission_sd,
ccl_addr_space PathState *state,
Ray *ray,
float3 *emission)
@ -225,6 +223,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
#endif
float3 L = direct_emissive_eval(kg,
emission_sd,
&ls,
state,
-ray->D,
@ -238,7 +237,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
Ray volume_ray = *ray;
volume_ray.t = ls.t;
float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f);
kernel_volume_shadow(kg, state, &volume_ray, &volume_tp);
kernel_volume_shadow(kg, emission_sd, state, &volume_ray, &volume_tp);
L *= volume_tp;
}
#endif
@ -260,6 +259,7 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
/* Indirect Background */
ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
ShaderData *emission_sd,
ccl_addr_space PathState *state,
ccl_addr_space Ray *ray)
{
@ -280,19 +280,14 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
/* evaluate background closure */
# ifdef __SPLIT_KERNEL__
Ray priv_ray = *ray;
shader_setup_from_background(kg, kg->sd_input, &priv_ray);
path_state_modify_bounce(state, true);
float3 L = shader_eval_background(kg, kg->sd_input, state, state->flag, SHADER_CONTEXT_EMISSION);
path_state_modify_bounce(state, false);
shader_setup_from_background(kg, emission_sd, &priv_ray);
# else
ShaderData sd;
shader_setup_from_background(kg, &sd, ray);
shader_setup_from_background(kg, emission_sd, ray);
# endif
path_state_modify_bounce(state, true);
float3 L = shader_eval_background(kg, &sd, state, state->flag, SHADER_CONTEXT_EMISSION);
float3 L = shader_eval_background(kg, emission_sd, state, state->flag, SHADER_CONTEXT_EMISSION);
path_state_modify_bounce(state, false);
# endif
#ifdef __BACKGROUND_MIS__
/* check if background light exists or if we should skip pdf */

View File

@ -53,6 +53,7 @@
CCL_NAMESPACE_BEGIN
ccl_device void kernel_path_indirect(KernelGlobals *kg,
ShaderData *emission_sd,
RNG *rng,
Ray *ray,
float3 throughput,
@ -60,6 +61,9 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
PathState *state,
PathRadiance *L)
{
/* shader data memory used for both volumes and surfaces, saves stack space */
ShaderData sd;
/* path iteration */
for(;;) {
/* intersect scene */
@ -87,7 +91,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
/* intersect with lamp */
float3 emission;
if(indirect_lamp_emission(kg, state, &light_ray, &emission)) {
if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission)) {
path_radiance_accum_emission(L,
throughput,
emission,
@ -115,15 +119,14 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
if(decoupled) {
/* cache steps along volume for repeated sampling */
VolumeSegment volume_segment;
ShaderData volume_sd;
shader_setup_from_volume(kg,
&volume_sd,
&sd,
&volume_ray);
kernel_volume_decoupled_record(kg,
state,
&volume_ray,
&volume_sd,
&sd,
&volume_segment,
heterogeneous);
@ -146,7 +149,8 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
/* direct light sampling */
kernel_branched_path_volume_connect_light(kg,
rng,
&volume_sd,
&sd,
emission_sd,
throughput,
state,
L,
@ -163,7 +167,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
result = kernel_volume_decoupled_scatter(kg,
state,
&volume_ray,
&volume_sd,
&sd,
&throughput,
rphase,
rscatter,
@ -178,7 +182,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
if(result == VOLUME_PATH_SCATTERED) {
if(kernel_path_volume_bounce(kg,
rng,
&volume_sd,
&sd,
&throughput,
state,
L,
@ -198,16 +202,16 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
# endif
{
/* integrate along volume segment with distance sampling */
ShaderData volume_sd;
VolumeIntegrateResult result = kernel_volume_integrate(
kg, state, &volume_sd, &volume_ray, L, &throughput, rng, heterogeneous);
kg, state, &sd, &volume_ray, L, &throughput, rng, heterogeneous);
# ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
kernel_path_volume_connect_light(kg,
rng,
&volume_sd,
&sd,
emission_sd,
throughput,
state,
L);
@ -215,7 +219,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
/* indirect light bounce */
if(kernel_path_volume_bounce(kg,
rng,
&volume_sd,
&sd,
&throughput,
state,
L,
@ -235,7 +239,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
if(!hit) {
#ifdef __BACKGROUND__
/* sample background shader */
float3 L_background = indirect_background(kg, state, ray);
float3 L_background = indirect_background(kg, emission_sd, state, ray);
path_radiance_accum_background(L,
throughput,
L_background,
@ -246,7 +250,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
}
/* setup shading */
ShaderData sd;
shader_setup_from_ray(kg,
&sd,
&isect,
@ -328,7 +331,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
light_ray.dP = sd.dP;
light_ray.dD = differential3_zero();
if(!shadow_blocked(kg, state, &light_ray, &ao_shadow)) {
if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) {
path_radiance_accum_ao(L,
throughput,
ao_alpha,
@ -378,6 +381,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
kernel_branched_path_surface_connect_light(kg,
rng,
&sd,
emission_sd,
state,
throughput,
1.0f,
@ -393,6 +397,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
PathRadiance *L,
PathState *state,
RNG *rng,
@ -425,7 +430,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
light_ray.dP = ccl_fetch(sd, dP);
light_ray.dD = differential3_zero();
if(!shadow_blocked(kg, state, &light_ray, &ao_shadow))
if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow))
path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
}
}
@ -435,6 +440,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
ccl_device bool kernel_path_subsurface_scatter(
KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
PathRadiance *L,
PathState *state,
RNG *rng,
@ -503,7 +509,7 @@ ccl_device bool kernel_path_subsurface_scatter(
hit_L->direct_throughput = L->direct_throughput;
path_radiance_copy_indirect(hit_L, L);
kernel_path_surface_connect_light(kg, rng, sd, *hit_tp, state, hit_L);
kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L);
if(kernel_path_surface_bounce(kg,
rng,
@ -526,6 +532,7 @@ ccl_device bool kernel_path_subsurface_scatter(
kernel_volume_stack_update_for_subsurface(
kg,
emission_sd,
&volume_ray,
hit_state->volume_stack);
}
@ -604,8 +611,13 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
path_radiance_init(&L, kernel_data.film.use_light_pass);
/* shader data memory used for both volumes and surfaces, saves stack space */
ShaderData sd;
/* shader data used by emission, shadows, volume stacks */
ShaderData emission_sd;
PathState state;
path_state_init(kg, &state, rng, sample, &ray);
path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
#ifdef __KERNEL_DEBUG__
DebugData debug_data;
@ -669,7 +681,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
/* intersect with lamp */
float3 emission;
if(indirect_lamp_emission(kg, &state, &light_ray, &emission))
if(indirect_lamp_emission(kg, &emission_sd, &state, &light_ray, &emission))
path_radiance_accum_emission(&L, throughput, emission, state.bounce);
}
#endif
@ -689,11 +701,10 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
if(decoupled) {
/* cache steps along volume for repeated sampling */
VolumeSegment volume_segment;
ShaderData volume_sd;
shader_setup_from_volume(kg, &volume_sd, &volume_ray);
shader_setup_from_volume(kg, &sd, &volume_ray);
kernel_volume_decoupled_record(kg, &state,
&volume_ray, &volume_sd, &volume_segment, heterogeneous);
&volume_ray, &sd, &volume_segment, heterogeneous);
volume_segment.sampling_method = sampling_method;
@ -708,8 +719,9 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
int all = false;
/* direct light sampling */
kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
throughput, &state, &L, all, &volume_ray, &volume_segment);
kernel_branched_path_volume_connect_light(kg, rng, &sd,
&emission_sd, throughput, &state, &L, all,
&volume_ray, &volume_segment);
/* indirect sample. if we use distance sampling and take just
* one sample for direct and indirect light, we could share
@ -718,7 +730,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
result = kernel_volume_decoupled_scatter(kg,
&state, &volume_ray, &volume_sd, &throughput,
&state, &volume_ray, &sd, &throughput,
rphase, rscatter, &volume_segment, NULL, true);
}
@ -726,7 +738,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
kernel_volume_decoupled_free(kg, &volume_segment);
if(result == VOLUME_PATH_SCATTERED) {
if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
continue;
else
break;
@ -739,17 +751,16 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
# endif
{
/* integrate along volume segment with distance sampling */
ShaderData volume_sd;
VolumeIntegrateResult result = kernel_volume_integrate(
kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng, heterogeneous);
kg, &state, &sd, &volume_ray, &L, &throughput, rng, heterogeneous);
# ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L);
kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
/* indirect light bounce */
if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
continue;
else
break;
@ -772,7 +783,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
#ifdef __BACKGROUND__
/* sample background shader */
float3 L_background = indirect_background(kg, &state, &ray);
float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
path_radiance_accum_background(&L, throughput, L_background, state.bounce);
#endif
@ -780,7 +791,6 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
}
/* setup shading */
ShaderData sd;
shader_setup_from_ray(kg, &sd, &isect, &ray);
float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF);
shader_eval_surface(kg, &sd, &state, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
@ -848,7 +858,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
#ifdef __AO__
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
kernel_path_ao(kg, &sd, &L, &state, rng, throughput);
kernel_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput);
}
#endif
@ -858,6 +868,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
if(sd.flag & SD_BSSRDF) {
if(kernel_path_subsurface_scatter(kg,
&sd,
&emission_sd,
&L,
&state,
rng,
@ -871,7 +882,7 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
#endif /* __SUBSURFACE__ */
/* direct lighting */
kernel_path_surface_connect_light(kg, rng, &sd, throughput, &state, &L);
kernel_path_surface_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
/* compute direct lighting and next bounce */
if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))

View File

@ -18,7 +18,13 @@ CCL_NAMESPACE_BEGIN
#ifdef __BRANCHED_PATH__
ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, float3 throughput)
ccl_device void kernel_branched_path_ao(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
PathRadiance *L,
PathState *state,
RNG *rng,
float3 throughput)
{
int num_samples = kernel_data.integrator.ao_samples;
float num_samples_inv = 1.0f/num_samples;
@ -49,7 +55,7 @@ ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathR
light_ray.dP = ccl_fetch(sd, dP);
light_ray.dD = differential3_zero();
if(!shadow_blocked(kg, state, &light_ray, &ao_shadow))
if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow))
path_radiance_accum_ao(L, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
}
}
@ -58,8 +64,8 @@ ccl_device void kernel_branched_path_ao(KernelGlobals *kg, ShaderData *sd, PathR
/* bounce off surface and integrate indirect light */
ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
RNG *rng, ShaderData *sd, float3 throughput, float num_samples_adjust,
PathState *state, PathRadiance *L)
RNG *rng, ShaderData *sd, ShaderData *emission_sd, float3 throughput,
float num_samples_adjust, PathState *state, PathRadiance *L)
{
for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
const ShaderClosure *sc = &ccl_fetch(sd, closure)[i];
@ -106,6 +112,7 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba
}
kernel_path_indirect(kg,
emission_sd,
rng,
&bsdf_ray,
tp*num_samples_inv,
@ -124,6 +131,7 @@ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGloba
#ifdef __SUBSURFACE__
ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
PathRadiance *L,
PathState *state,
RNG *rng,
@ -186,6 +194,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
kernel_volume_stack_update_for_subsurface(
kg,
emission_sd,
&volume_ray,
hit_state.volume_stack);
}
@ -199,6 +208,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
kg,
rng,
&bssrdf_sd,
emission_sd,
&hit_state,
throughput,
num_samples_inv,
@ -212,6 +222,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
kg,
rng,
&bssrdf_sd,
emission_sd,
throughput,
num_samples_inv,
&hit_state,
@ -231,8 +242,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
path_radiance_init(&L, kernel_data.film.use_light_pass);
/* shader data memory used for both volumes and surfaces, saves stack space */
ShaderData sd;
/* shader data used by emission, shadows, volume stacks */
ShaderData emission_sd;
PathState state;
path_state_init(kg, &state, rng, sample, &ray);
path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
#ifdef __KERNEL_DEBUG__
DebugData debug_data;
@ -287,11 +303,10 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
/* cache steps along volume for repeated sampling */
VolumeSegment volume_segment;
ShaderData volume_sd;
shader_setup_from_volume(kg, &volume_sd, &volume_ray);
shader_setup_from_volume(kg, &sd, &volume_ray);
kernel_volume_decoupled_record(kg, &state,
&volume_ray, &volume_sd, &volume_segment, heterogeneous);
&volume_ray, &sd, &volume_segment, heterogeneous);
/* direct light sampling */
if(volume_segment.closure_flag & SD_SCATTER) {
@ -299,8 +314,9 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
int all = kernel_data.integrator.sample_all_lights_direct;
kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
throughput, &state, &L, all, &volume_ray, &volume_segment);
kernel_branched_path_volume_connect_light(kg, rng, &sd,
&emission_sd, throughput, &state, &L, all,
&volume_ray, &volume_segment);
/* indirect light sampling */
int num_samples = kernel_data.integrator.volume_samples;
@ -326,20 +342,21 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
float rscatter = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_SCATTER_DISTANCE);
VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
&ps, &pray, &volume_sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
&ps, &pray, &sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
(void)result;
kernel_assert(result == VOLUME_PATH_SCATTERED);
if(kernel_path_volume_bounce(kg,
rng,
&volume_sd,
&sd,
&tp,
&ps,
&L,
&pray))
{
kernel_path_indirect(kg,
&emission_sd,
rng,
&pray,
tp*num_samples_inv,
@ -373,30 +390,30 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
for(int j = 0; j < num_samples; j++) {
PathState ps = state;
Ray pray = ray;
ShaderData volume_sd;
float3 tp = throughput * num_samples_inv;
/* branch RNG state */
path_state_branch(&ps, j, num_samples);
VolumeIntegrateResult result = kernel_volume_integrate(
kg, &ps, &volume_sd, &volume_ray, &L, &tp, rng, heterogeneous);
kg, &ps, &sd, &volume_ray, &L, &tp, rng, heterogeneous);
#ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* todo: support equiangular, MIS and all light sampling.
* alternatively get decoupled ray marching working on the GPU */
kernel_path_volume_connect_light(kg, rng, &volume_sd, tp, &state, &L);
kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, tp, &state, &L);
if(kernel_path_volume_bounce(kg,
rng,
&volume_sd,
&sd,
&tp,
&ps,
&L,
&pray))
{
kernel_path_indirect(kg,
&emission_sd,
rng,
&pray,
tp,
@ -414,7 +431,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
}
/* todo: avoid this calculation using decoupled ray marching */
kernel_volume_shadow(kg, &state, &volume_ray, &throughput);
kernel_volume_shadow(kg, &emission_sd, &state, &volume_ray, &throughput);
#endif
}
#endif
@ -432,7 +449,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
#ifdef __BACKGROUND__
/* sample background shader */
float3 L_background = indirect_background(kg, &state, &ray);
float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
path_radiance_accum_background(&L, throughput, L_background, state.bounce);
#endif
@ -440,7 +457,6 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
}
/* setup shading */
ShaderData sd;
shader_setup_from_ray(kg, &sd, &isect, &ray);
shader_eval_surface(kg, &sd, &state, 0.0f, state.flag, SHADER_CONTEXT_MAIN);
shader_merge_closures(&sd);
@ -499,14 +515,14 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
#ifdef __AO__
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
kernel_branched_path_ao(kg, &sd, &L, &state, rng, throughput);
kernel_branched_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput);
}
#endif
#ifdef __SUBSURFACE__
/* bssrdf scatter to a different location on the same object */
if(sd.flag & SD_BSSRDF) {
kernel_branched_path_subsurface_scatter(kg, &sd, &L, &state,
kernel_branched_path_subsurface_scatter(kg, &sd, &emission_sd, &L, &state,
rng, &ray, throughput);
}
#endif
@ -519,13 +535,13 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
if(kernel_data.integrator.use_direct_light) {
int all = kernel_data.integrator.sample_all_lights_direct;
kernel_branched_path_surface_connect_light(kg, rng,
&sd, &hit_state, throughput, 1.0f, &L, all);
&sd, &emission_sd, &hit_state, throughput, 1.0f, &L, all);
}
#endif
/* indirect light */
kernel_branched_path_surface_indirect_light(kg, rng,
&sd, throughput, 1.0f, &hit_state, &L);
&sd, &emission_sd, throughput, 1.0f, &hit_state, &L);
/* continue in case of transparency */
throughput *= shader_bsdf_transparency(kg, &sd);

View File

@ -16,7 +16,12 @@
CCL_NAMESPACE_BEGIN
ccl_device_inline void path_state_init(KernelGlobals *kg, ccl_addr_space PathState *state, ccl_addr_space RNG *rng, int sample, ccl_addr_space Ray *ray)
ccl_device_inline void path_state_init(KernelGlobals *kg,
ShaderData *stack_sd,
ccl_addr_space PathState *state,
ccl_addr_space RNG *rng,
int sample,
ccl_addr_space Ray *ray)
{
state->flag = PATH_RAY_CAMERA|PATH_RAY_MIS_SKIP;
@ -41,7 +46,7 @@ ccl_device_inline void path_state_init(KernelGlobals *kg, ccl_addr_space PathSta
if(kernel_data.integrator.use_volumes) {
/* initialize volume stack with volume we are inside of */
kernel_volume_stack_init(kg, ray, state->volume_stack);
kernel_volume_stack_init(kg, stack_sd, ray, state->volume_stack);
/* seed RNG for cases where we can't use stratified samples */
state->rng_congruential = lcg_init(*rng + sample*0x51633e2d);
}

View File

@ -20,7 +20,8 @@ CCL_NAMESPACE_BEGIN
/* branched path tracing: connect path directly to position on one or more lights and add it to L */
ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RNG *rng,
ShaderData *sd, PathState *state, float3 throughput, float num_samples_adjust, PathRadiance *L, int sample_all_lights)
ShaderData *sd, ShaderData *emission_sd, PathState *state, float3 throughput,
float num_samples_adjust, PathRadiance *L, int sample_all_lights)
{
#ifdef __EMISSION__
/* sample illumination from lights to find path contribution */
@ -55,11 +56,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
LightSample ls;
lamp_light_sample(kg, i, light_u, light_v, ccl_fetch(sd, P), &ls);
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */
path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
}
@ -87,11 +88,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
LightSample ls;
light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */
path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
}
@ -109,11 +110,11 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobal
light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
/* sample random light */
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */
path_radiance_accum_light(L, throughput*num_samples_adjust, &L_light, shadow, num_samples_adjust, state->bounce, is_lamp);
}
@ -184,7 +185,8 @@ ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, RNG *rng,
#ifndef __SPLIT_KERNEL__
/* path tracing: connect path directly to position on a light and add it to L */
ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_addr_space RNG *rng,
ShaderData *sd, float3 throughput, ccl_addr_space PathState *state, PathRadiance *L)
ShaderData *sd, ShaderData *emission_sd, float3 throughput, ccl_addr_space PathState *state,
PathRadiance *L)
{
#ifdef __EMISSION__
if(!(kernel_data.integrator.use_direct_light && (ccl_fetch(sd, flag) & SD_BSDF_HAS_EVAL)))
@ -206,11 +208,11 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_
LightSample ls;
light_sample(kg, light_t, light_u, light_v, ccl_fetch(sd, time), ccl_fetch(sd, P), state->bounce, &ls);
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */
path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
}

View File

@ -19,7 +19,7 @@ CCL_NAMESPACE_BEGIN
#ifdef __VOLUME_SCATTER__
ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L)
ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L)
{
#ifdef __EMISSION__
if(!kernel_data.integrator.use_direct_light)
@ -44,11 +44,11 @@ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
if(ls.pdf == 0.0f)
return;
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */
path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
}
@ -106,7 +106,7 @@ bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng,
}
ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
ShaderData *sd, float3 throughput, PathState *state, PathRadiance *L,
ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L,
bool sample_all_lights, Ray *ray, const VolumeSegment *segment)
{
#ifdef __EMISSION__
@ -160,11 +160,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
if(ls.pdf == 0.0f)
continue;
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */
path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
}
@ -211,11 +211,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
if(ls.pdf == 0.0f)
continue;
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */
path_radiance_accum_light(L, tp*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
}
@ -251,11 +251,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
return;
/* sample random light */
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
if(!shadow_blocked(kg, emission_sd, state, &light_ray, &shadow)) {
/* accumulate */
path_radiance_accum_light(L, tp, &L_light, shadow, 1.0f, state->bounce, is_lamp);
}

View File

@ -41,7 +41,7 @@ CCL_NAMESPACE_BEGIN
#define STACK_MAX_HITS 64
ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow)
ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *shadow)
{
*shadow = make_float3(1.0f, 1.0f, 1.0f);
@ -107,21 +107,20 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
if(ps.volume_stack[0].shader != SHADER_NONE) {
Ray segment_ray = *ray;
segment_ray.t = isect->t;
kernel_volume_shadow(kg, &ps, &segment_ray, &throughput);
kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput);
}
#endif
/* setup shader data at surface */
ShaderData sd;
shader_setup_from_ray(kg, &sd, isect, ray);
shader_setup_from_ray(kg, shadow_sd, isect, ray);
/* attenuation from transparent surface */
if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
path_state_modify_bounce(state, true);
shader_eval_surface(kg, &sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
shader_eval_surface(kg, shadow_sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
path_state_modify_bounce(state, false);
throughput *= shader_bsdf_transparency(kg, &sd);
throughput *= shader_bsdf_transparency(kg, shadow_sd);
}
/* stop if all light is blocked */
@ -133,13 +132,13 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
}
/* move ray forward */
ray->P = sd.P;
ray->P = shadow_sd->P;
if(ray->t != FLT_MAX)
ray->D = normalize_len(Pend - ray->P, &ray->t);
#ifdef __VOLUME__
/* exit/enter volume */
kernel_volume_stack_enter_exit(kg, &sd, ps.volume_stack);
kernel_volume_stack_enter_exit(kg, shadow_sd, ps.volume_stack);
#endif
bounce++;
@ -148,7 +147,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
#ifdef __VOLUME__
/* attenuation for last line segment towards light */
if(ps.volume_stack[0].shader != SHADER_NONE)
kernel_volume_shadow(kg, &ps, ray, &throughput);
kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
#endif
*shadow = throughput;
@ -164,7 +163,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
#ifdef __VOLUME__
if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
/* apply attenuation from current volume shader */
kernel_volume_shadow(kg, state, ray, shadow);
kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
}
#endif
@ -184,6 +183,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
* one extra ray cast for the cases were we do want transparency. */
ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
ShaderData *shadow_sd,
ccl_addr_space PathState *state,
ccl_addr_space Ray *ray_input,
float3 *shadow)
@ -228,7 +228,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
#ifdef __VOLUME__
/* attenuation for last line segment towards light */
if(ps.volume_stack[0].shader != SHADER_NONE)
kernel_volume_shadow(kg, &ps, ray, &throughput);
kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
#endif
*shadow *= throughput;
@ -244,39 +244,33 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
if(ps.volume_stack[0].shader != SHADER_NONE) {
Ray segment_ray = *ray;
segment_ray.t = isect->t;
kernel_volume_shadow(kg, &ps, &segment_ray, &throughput);
kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput);
}
#endif
/* setup shader data at surface */
#ifdef __SPLIT_KERNEL__
ShaderData *sd = kg->sd_input;
#else
ShaderData sd_object;
ShaderData *sd = &sd_object;
#endif
shader_setup_from_ray(kg, sd, isect, ray);
shader_setup_from_ray(kg, shadow_sd, isect, ray);
/* attenuation from transparent surface */
if(!(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME)) {
if(!(ccl_fetch(shadow_sd, flag) & SD_HAS_ONLY_VOLUME)) {
path_state_modify_bounce(state, true);
shader_eval_surface(kg, sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
shader_eval_surface(kg, shadow_sd, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
path_state_modify_bounce(state, false);
throughput *= shader_bsdf_transparency(kg, sd);
throughput *= shader_bsdf_transparency(kg, shadow_sd);
}
if(is_zero(throughput))
return true;
/* move ray forward */
ray->P = ray_offset(ccl_fetch(sd, P), -ccl_fetch(sd, Ng));
ray->P = ray_offset(ccl_fetch(shadow_sd, P), -ccl_fetch(shadow_sd, Ng));
if(ray->t != FLT_MAX)
ray->D = normalize_len(Pend - ray->P, &ray->t);
#ifdef __VOLUME__
/* exit/enter volume */
kernel_volume_stack_enter_exit(kg, sd, ps.volume_stack);
kernel_volume_stack_enter_exit(kg, shadow_sd, ps.volume_stack);
#endif
bounce++;
@ -286,7 +280,7 @@ ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
#ifdef __VOLUME__
else if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
/* apply attenuation from current volume shader */
kernel_volume_shadow(kg, state, ray, shadow);
kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
}
#endif
#endif

View File

@ -219,15 +219,14 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
/* get the volume attenuation over line segment defined by ray, with the
* assumption that there are no surfaces blocking light between the endpoints */
ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, PathState *state, Ray *ray, float3 *throughput)
ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *throughput)
{
ShaderData sd;
shader_setup_from_volume(kg, &sd, ray);
shader_setup_from_volume(kg, shadow_sd, ray);
if(volume_stack_is_heterogeneous(kg, state->volume_stack))
kernel_volume_shadow_heterogeneous(kg, state, ray, &sd, throughput);
kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput);
else
kernel_volume_shadow_homogeneous(kg, state, ray, &sd, throughput);
kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
}
/* Equi-angular sampling as in:
@ -1000,6 +999,7 @@ ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneou
* is inside of. */
ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
ShaderData *stack_sd,
Ray *ray,
VolumeStack *stack)
{
@ -1040,28 +1040,27 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
ShaderData sd;
shader_setup_from_ray(kg, &sd, isect, &volume_ray);
if(sd.flag & SD_BACKFACING) {
shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
if(stack_sd->flag & SD_BACKFACING) {
bool need_add = true;
for(int i = 0; i < enclosed_index && need_add; ++i) {
/* If ray exited the volume and never entered to that volume
* it means that camera is inside such a volume.
*/
if(enclosed_volumes[i] == sd.object) {
if(enclosed_volumes[i] == stack_sd->object) {
need_add = false;
}
}
for(int i = 0; i < stack_index && need_add; ++i) {
/* Don't add intersections twice. */
if(stack[i].object == sd.object) {
if(stack[i].object == stack_sd->object) {
need_add = false;
break;
}
}
if(need_add) {
stack[stack_index].object = sd.object;
stack[stack_index].shader = sd.shader;
stack[stack_index].object = stack_sd->object;
stack[stack_index].shader = stack_sd->shader;
++stack_index;
}
}
@ -1069,7 +1068,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
/* If ray from camera enters the volume, this volume shouldn't
* be added to the stack on exit.
*/
enclosed_volumes[enclosed_index++] = sd.object;
enclosed_volumes[enclosed_index++] = stack_sd->object;
}
}
}
@ -1086,9 +1085,8 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
break;
}
ShaderData sd;
shader_setup_from_ray(kg, &sd, &isect, &volume_ray);
if(sd.flag & SD_BACKFACING) {
shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
if(stack_sd->flag & SD_BACKFACING) {
/* If ray exited the volume and never entered to that volume
* it means that camera is inside such a volume.
*/
@ -1097,20 +1095,20 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
/* If ray exited the volume and never entered to that volume
* it means that camera is inside such a volume.
*/
if(enclosed_volumes[i] == sd.object) {
if(enclosed_volumes[i] == stack_sd->object) {
need_add = false;
}
}
for(int i = 0; i < stack_index && need_add; ++i) {
/* Don't add intersections twice. */
if(stack[i].object == sd.object) {
if(stack[i].object == stack_sd->object) {
need_add = false;
break;
}
}
if(need_add) {
stack[stack_index].object = sd.object;
stack[stack_index].shader = sd.shader;
stack[stack_index].object = stack_sd->object;
stack[stack_index].shader = stack_sd->shader;
++stack_index;
}
}
@ -1118,11 +1116,11 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
/* If ray from camera enters the volume, this volume shouldn't
* be added to the stack on exit.
*/
enclosed_volumes[enclosed_index++] = sd.object;
enclosed_volumes[enclosed_index++] = stack_sd->object;
}
/* Move ray forward. */
volume_ray.P = ray_offset(sd.P, -sd.Ng);
volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
++step;
}
#endif
@ -1190,6 +1188,7 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd
#ifdef __SUBSURFACE__
ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
ShaderData *stack_sd,
Ray *ray,
VolumeStack *stack)
{
@ -1210,9 +1209,8 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
for(uint hit = 0; hit < num_hits; ++hit, ++isect) {
ShaderData sd;
shader_setup_from_ray(kg, &sd, isect, &volume_ray);
kernel_volume_stack_enter_exit(kg, &sd, stack);
shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
kernel_volume_stack_enter_exit(kg, stack_sd, stack);
}
}
# else
@ -1224,13 +1222,12 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
&isect,
PATH_RAY_ALL_VISIBILITY))
{
ShaderData sd;
shader_setup_from_ray(kg, &sd, &isect, &volume_ray);
kernel_volume_stack_enter_exit(kg, &sd, stack);
shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
kernel_volume_stack_enter_exit(kg, stack_sd, stack);
/* Move ray forward. */
volume_ray.P = ray_offset(sd.P, -sd.Ng);
volume_ray.t -= sd.ray_length;
volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
volume_ray.t -= stack_sd->ray_length;
++step;
}
# endif

View File

@ -157,7 +157,7 @@ ccl_device char kernel_background_buffer_update(
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
#ifdef __BACKGROUND__
/* sample background shader */
float3 L_background = indirect_background(kg, state, ray);
float3 L_background = indirect_background(kg, kg->sd_input, state, ray);
path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
#endif
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
@ -226,7 +226,7 @@ ccl_device char kernel_background_buffer_update(
*throughput = make_float3(1.0f, 1.0f, 1.0f);
*L_transparent = 0.0f;
path_radiance_init(L, kernel_data.film.use_light_pass);
path_state_init(kg, state, rng, sample, ray);
path_state_init(kg, kg->sd_input, state, rng, sample, ray);
#ifdef __KERNEL_DEBUG__
debug_data_init(debug_data);
#endif

View File

@ -207,6 +207,7 @@ ccl_device void kernel_data_init(
L_transparent_coop[ray_index] = 0.0f;
path_radiance_init(&PathRadiance_coop[ray_index], kernel_data.film.use_light_pass);
path_state_init(kg,
kg->sd_input,
&PathState_coop[ray_index],
&rng_coop[ray_index],
my_sample,

View File

@ -88,7 +88,7 @@ ccl_device char kernel_direct_lighting(
BsdfEval L_light;
bool is_lamp;
if(direct_emission(kg, sd, &ls, state, &light_ray, &L_light, &is_lamp)) {
if(direct_emission(kg, sd, kg->sd_input, &ls, state, &light_ray, &L_light, &is_lamp)) {
/* Write intermediate data to global memory to access from
* the next kernel.
*/

View File

@ -74,7 +74,7 @@ ccl_device void kernel_lamp_emission(
/* intersect with lamp */
float3 emission;
if(indirect_lamp_emission(kg, state, &light_ray, &emission)) {
if(indirect_lamp_emission(kg, kg->sd_input, state, &light_ray, &emission)) {
path_radiance_accum_emission(L, throughput, emission, state->bounce);
}
}

View File

@ -71,6 +71,7 @@ ccl_device void kernel_shadow_blocked(
float3 shadow;
update_path_radiance = !(shadow_blocked(kg,
kg->sd_input,
state,
light_ray_global,
&shadow));