Code cleanup: deduplicate some branched and split kernel code.

Benchmarks peformance on GTX 1080 and RX 480 on Linux is the same for
bmw27, classroom, pabellon, and about 2% faster on fishy_cat and koro.
This commit is contained in:
Brecht Van Lommel 2017-09-13 02:10:24 +02:00
parent c4c450045d
commit f77cdd1d59
Notes: blender-bot 2023-02-14 06:23:08 +01:00
Referenced by issue #53349, Cycles - difference between OpenCL and CUDA with AO simplify
11 changed files with 410 additions and 741 deletions

View File

@ -37,9 +37,7 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
ray.D = ls->D;
ray.P = ls->P;
ray.t = 1.0f;
# ifdef __OBJECT_MOTION__
ray.time = time;
# endif
ray.dP = differential3_zero();
ray.dD = dI;

View File

@ -225,7 +225,7 @@ ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
#endif /* __KERNEL_DEBUG__ */
ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L,
ShaderData *sd, int sample, ccl_addr_space PathState *state, float3 throughput)
ShaderData *sd, ccl_addr_space PathState *state, float3 throughput)
{
#ifdef __PASSES__
int path_flag = state->flag;
@ -243,6 +243,7 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl
kernel_data.film.pass_alpha_threshold == 0.0f ||
average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold)
{
int sample = state->sample;
if(sample == 0) {
if(flag & PASS_DEPTH) {

View File

@ -50,6 +50,294 @@
CCL_NAMESPACE_BEGIN
ccl_device_forceinline bool kernel_path_scene_intersect(
KernelGlobals *kg,
ccl_addr_space PathState *state,
Ray *ray,
Intersection *isect,
PathRadiance *L)
{
uint visibility = path_state_ray_visibility(kg, state);
#ifdef __HAIR__
float difl = 0.0f, extmax = 0.0f;
uint lcg_state = 0;
if(kernel_data.bvh.have_curves) {
if((kernel_data.cam.resolution == 1) && (state->flag & PATH_RAY_CAMERA)) {
float3 pixdiff = ray->dD.dx + ray->dD.dy;
/*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
}
extmax = kernel_data.curve.maximum_width;
lcg_state = lcg_state_init_addrspace(state, 0x51633e2d);
}
if(path_state_ao_bounce(kg, state)) {
visibility = PATH_RAY_SHADOW;
ray->t = kernel_data.background.ao_distance;
}
bool hit = scene_intersect(kg, *ray, visibility, isect, &lcg_state, difl, extmax);
#else
bool hit = scene_intersect(kg, *ray, visibility, isect, NULL, 0.0f, 0.0f);
#endif /* __HAIR__ */
#ifdef __KERNEL_DEBUG__
if(state->flag & PATH_RAY_CAMERA) {
L->debug_data.num_bvh_traversed_nodes += isect->num_traversed_nodes;
L->debug_data.num_bvh_traversed_instances += isect->num_traversed_instances;
L->debug_data.num_bvh_intersections += isect->num_intersections;
}
L->debug_data.num_ray_bounces++;
#endif /* __KERNEL_DEBUG__ */
return hit;
}
ccl_device_forceinline void kernel_path_lamp_emission(
KernelGlobals *kg,
ccl_addr_space PathState *state,
Ray *ray,
float3 throughput,
ccl_addr_space Intersection *isect,
ShaderData *emission_sd,
PathRadiance *L)
{
#ifdef __LAMP_MIS__
if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
/* ray starting from previous non-transparent bounce */
Ray light_ray;
light_ray.P = ray->P - state->ray_t*ray->D;
state->ray_t += isect->t;
light_ray.D = ray->D;
light_ray.t = state->ray_t;
light_ray.time = ray->time;
light_ray.dD = ray->dD;
light_ray.dP = ray->dP;
/* intersect with lamp */
float3 emission;
if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission))
path_radiance_accum_emission(L, throughput, emission, state->bounce);
}
#endif /* __LAMP_MIS__ */
}
ccl_device_forceinline void kernel_path_background(
KernelGlobals *kg,
ccl_addr_space PathState *state,
ccl_addr_space Ray *ray,
float3 throughput,
ShaderData *emission_sd,
PathRadiance *L)
{
/* eval background shader if nothing hit */
if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) {
L->transparent += average(throughput);
#ifdef __PASSES__
if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
#endif /* __PASSES__ */
return;
}
#ifdef __BACKGROUND__
/* sample background shader */
float3 L_background = indirect_background(kg, emission_sd, state, ray);
path_radiance_accum_background(L, state, throughput, L_background);
#endif /* __BACKGROUND__ */
}
#ifndef __SPLIT_KERNEL__
ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(
KernelGlobals *kg,
ShaderData *sd,
PathState *state,
Ray *ray,
float3 *throughput,
ccl_addr_space Intersection *isect,
bool hit,
ShaderData *emission_sd,
PathRadiance *L)
{
#ifdef __VOLUME__
/* Sanitize volume stack. */
if(!hit) {
kernel_volume_clean_stack(kg, state->volume_stack);
}
/* volume attenuation, emission, scatter */
if(state->volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = *ray;
volume_ray.t = (hit)? isect->t: FLT_MAX;
bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
# ifdef __VOLUME_DECOUPLED__
int sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
bool direct = (state->flag & PATH_RAY_CAMERA) != 0;
bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method);
if(decoupled) {
/* cache steps along volume for repeated sampling */
VolumeSegment volume_segment;
shader_setup_from_volume(kg, sd, &volume_ray);
kernel_volume_decoupled_record(kg, state,
&volume_ray, sd, &volume_segment, heterogeneous);
volume_segment.sampling_method = sampling_method;
/* emission */
if(volume_segment.closure_flag & SD_EMISSION)
path_radiance_accum_emission(L, *throughput, volume_segment.accum_emission, state->bounce);
/* scattering */
VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
if(volume_segment.closure_flag & SD_SCATTER) {
int all = kernel_data.integrator.sample_all_lights_indirect;
/* direct light sampling */
kernel_branched_path_volume_connect_light(kg, sd,
emission_sd, *throughput, state, L, all,
&volume_ray, &volume_segment);
/* indirect sample. if we use distance sampling and take just
* one sample for direct and indirect light, we could share
* this computation, but makes code a bit complex */
float rphase = path_state_rng_1D_for_decision(kg, state, PRNG_PHASE);
float rscatter = path_state_rng_1D_for_decision(kg, state, PRNG_SCATTER_DISTANCE);
result = kernel_volume_decoupled_scatter(kg,
state, &volume_ray, sd, throughput,
rphase, rscatter, &volume_segment, NULL, true);
}
/* free cached steps */
kernel_volume_decoupled_free(kg, &volume_segment);
if(result == VOLUME_PATH_SCATTERED) {
if(kernel_path_volume_bounce(kg, sd, throughput, state, L, ray))
return VOLUME_PATH_SCATTERED;
else
return VOLUME_PATH_MISSED;
}
else {
*throughput *= volume_segment.accum_transmittance;
}
}
else
# endif /* __VOLUME_DECOUPLED__ */
{
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
kg, state, sd, &volume_ray, L, throughput, heterogeneous);
# ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
/* indirect light bounce */
if(kernel_path_volume_bounce(kg, sd, throughput, state, L, ray))
return VOLUME_PATH_SCATTERED;
else
return VOLUME_PATH_MISSED;
}
# endif /* __VOLUME_SCATTER__ */
}
}
#endif /* __VOLUME__ */
return VOLUME_PATH_ATTENUATED;
}
#endif /* __SPLIT_KERNEL__ */
ccl_device_forceinline bool kernel_path_shader_apply(
KernelGlobals *kg,
ShaderData *sd,
ccl_addr_space PathState *state,
ccl_addr_space Ray *ray,
float3 throughput,
ShaderData *emission_sd,
PathRadiance *L,
ccl_global float *buffer)
{
#ifdef __SHADOW_TRICKS__
if((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) {
if(state->flag & PATH_RAY_CAMERA) {
state->flag |= (PATH_RAY_SHADOW_CATCHER |
PATH_RAY_STORE_SHADOW_INFO);
if(!kernel_data.background.transparent) {
L->shadow_background_color =
indirect_background(kg, emission_sd, state, ray);
}
L->shadow_radiance_sum = path_radiance_clamp_and_sum(kg, L);
L->shadow_throughput = average(throughput);
}
}
else if(state->flag & PATH_RAY_SHADOW_CATCHER) {
/* Only update transparency after shadow catcher bounce. */
L->shadow_transparency *=
average(shader_bsdf_transparency(kg, sd));
}
#endif /* __SHADOW_TRICKS__ */
/* holdout */
#ifdef __HOLDOUT__
if(((sd->flag & SD_HOLDOUT) ||
(sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
(state->flag & PATH_RAY_CAMERA))
{
if(kernel_data.background.transparent) {
float3 holdout_weight;
if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
}
else {
holdout_weight = shader_holdout_eval(kg, sd);
}
/* any throughput is ok, should all be identical here */
L->transparent += average(holdout_weight*throughput);
}
if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
return false;
}
}
#endif /* __HOLDOUT__ */
/* holdout mask objects do not write data passes */
kernel_write_data_passes(kg, buffer, L, sd, state, throughput);
/* blurring of bsdf after bounces, for rays that have a small likelihood
* of following this particular path (diffuse, rough glossy) */
if(kernel_data.integrator.filter_glossy != FLT_MAX) {
float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
if(blur_pdf < 1.0f) {
float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
shader_bsdf_blur(kg, sd, blur_roughness);
}
}
#ifdef __EMISSION__
/* emission */
if(sd->flag & SD_EMISSION) {
float3 emission = indirect_primitive_emission(kg, sd, sd->ray_length, state->flag, state->ray_pdf);
path_radiance_accum_emission(L, throughput, emission, state->bounce);
}
#endif /* __EMISSION__ */
return true;
}
ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
@ -78,9 +366,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
light_ray.P = ray_offset(sd->P, sd->Ng);
light_ray.D = ao_D;
light_ray.t = kernel_data.background.ao_distance;
#ifdef __OBJECT_MOTION__
light_ray.time = sd->time;
#endif /* __OBJECT_MOTION__ */
light_ray.dP = sd->dP;
light_ray.dD = differential3_zero();
@ -108,197 +394,41 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
{
/* path iteration */
for(;;) {
/* intersect scene */
/* Find intersection with objects in scene. */
Intersection isect;
uint visibility = path_state_ray_visibility(kg, state);
if(path_state_ao_bounce(kg, state)) {
visibility = PATH_RAY_SHADOW;
ray->t = kernel_data.background.ao_distance;
bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
/* Find intersection with lamps and compute emission for MIS. */
kernel_path_lamp_emission(kg, state, ray, throughput, &isect, emission_sd, L);
/* Volume integration. */
VolumeIntegrateResult result = kernel_path_volume(kg,
sd,
state,
ray,
&throughput,
&isect,
hit,
emission_sd,
L);
if(result == VOLUME_PATH_SCATTERED) {
continue;
}
bool hit = scene_intersect(kg,
*ray,
visibility,
&isect,
NULL,
0.0f, 0.0f);
#ifdef __LAMP_MIS__
if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
/* ray starting from previous non-transparent bounce */
Ray light_ray;
light_ray.P = ray->P - state->ray_t*ray->D;
state->ray_t += isect.t;
light_ray.D = ray->D;
light_ray.t = state->ray_t;
light_ray.time = ray->time;
light_ray.dD = ray->dD;
light_ray.dP = ray->dP;
/* intersect with lamp */
float3 emission;
if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission)) {
path_radiance_accum_emission(L,
throughput,
emission,
state->bounce);
}
}
#endif /* __LAMP_MIS__ */
#ifdef __VOLUME__
/* Sanitize volume stack. */
if(!hit) {
kernel_volume_clean_stack(kg, state->volume_stack);
}
/* volume attenuation, emission, scatter */
if(state->volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = *ray;
volume_ray.t = (hit)? isect.t: FLT_MAX;
bool heterogeneous =
volume_stack_is_heterogeneous(kg,
state->volume_stack);
# ifdef __VOLUME_DECOUPLED__
int sampling_method =
volume_stack_sampling_method(kg,
state->volume_stack);
bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, false, sampling_method);
if(decoupled) {
/* cache steps along volume for repeated sampling */
VolumeSegment volume_segment;
shader_setup_from_volume(kg,
sd,
&volume_ray);
kernel_volume_decoupled_record(kg,
state,
&volume_ray,
sd,
&volume_segment,
heterogeneous);
volume_segment.sampling_method = sampling_method;
/* emission */
if(volume_segment.closure_flag & SD_EMISSION) {
path_radiance_accum_emission(L,
throughput,
volume_segment.accum_emission,
state->bounce);
}
/* scattering */
VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
if(volume_segment.closure_flag & SD_SCATTER) {
int all = kernel_data.integrator.sample_all_lights_indirect;
/* direct light sampling */
kernel_branched_path_volume_connect_light(kg,
sd,
emission_sd,
throughput,
state,
L,
all,
&volume_ray,
&volume_segment);
/* indirect sample. if we use distance sampling and take just
* one sample for direct and indirect light, we could share
* this computation, but makes code a bit complex */
float rphase = path_state_rng_1D_for_decision(kg, state, PRNG_PHASE);
float rscatter = path_state_rng_1D_for_decision(kg, state, PRNG_SCATTER_DISTANCE);
result = kernel_volume_decoupled_scatter(kg,
state,
&volume_ray,
sd,
&throughput,
rphase,
rscatter,
&volume_segment,
NULL,
true);
}
/* free cached steps */
kernel_volume_decoupled_free(kg, &volume_segment);
if(result == VOLUME_PATH_SCATTERED) {
if(kernel_path_volume_bounce(kg,
sd,
&throughput,
state,
L,
ray))
{
continue;
}
else {
break;
}
}
else {
throughput *= volume_segment.accum_transmittance;
}
}
else
# endif /* __VOLUME_DECOUPLED__ */
{
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
kg, state, sd, &volume_ray, L, &throughput, heterogeneous);
# ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
kernel_path_volume_connect_light(kg,
sd,
emission_sd,
throughput,
state,
L);
/* indirect light bounce */
if(kernel_path_volume_bounce(kg,
sd,
&throughput,
state,
L,
ray))
{
continue;
}
else {
break;
}
}
# endif /* __VOLUME_SCATTER__ */
}
}
#endif /* __VOLUME__ */
if(!hit) {
#ifdef __BACKGROUND__
/* sample background shader */
float3 L_background = indirect_background(kg, emission_sd, state, ray);
path_radiance_accum_background(L,
state,
throughput,
L_background);
#endif /* __BACKGROUND__ */
break;
}
else if(path_state_ao_bounce(kg, state)) {
else if(result == VOLUME_PATH_MISSED) {
break;
}
/* setup shading */
/* Shade background. */
if(!hit) {
kernel_path_background(kg, state, ray, throughput, emission_sd, L);
break;
}
else if(path_state_ao_bounce(kg, state)) {
break;
}
/* Setup and evaluate shader. */
shader_setup_from_ray(kg,
sd,
&isect,
@ -309,38 +439,18 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
shader_merge_closures(sd);
#endif /* __BRANCHED_PATH__ */
#ifdef __SHADOW_TRICKS__
if(!(sd->object_flag & SD_OBJECT_SHADOW_CATCHER) &&
(state->flag & PATH_RAY_SHADOW_CATCHER))
/* Apply shadow catcher, holdout, emission. */
if(!kernel_path_shader_apply(kg,
sd,
state,
ray,
throughput,
emission_sd,
L,
NULL))
{
/* Only update transparency after shadow catcher bounce. */
L->shadow_transparency *=
average(shader_bsdf_transparency(kg, sd));
break;
}
#endif /* __SHADOW_TRICKS__ */
/* blurring of bsdf after bounces, for rays that have a small likelihood
* of following this particular path (diffuse, rough glossy) */
if(kernel_data.integrator.filter_glossy != FLT_MAX) {
float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
if(blur_pdf < 1.0f) {
float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
shader_bsdf_blur(kg, sd, blur_roughness);
}
}
#ifdef __EMISSION__
/* emission */
if(sd->flag & SD_EMISSION) {
float3 emission = indirect_primitive_emission(kg,
sd,
isect.t,
state->flag,
state->ray_pdf);
path_radiance_accum_emission(L, throughput, emission, state->bounce);
}
#endif /* __EMISSION__ */
/* path termination. this is a strange place to put the termination, it's
* mainly due to the mixed in MIS that we use. gives too many unneeded
@ -430,7 +540,7 @@ ccl_device_forceinline void kernel_path_integrate(
float3 throughput,
Ray *ray,
PathRadiance *L,
float *buffer,
ccl_global float *buffer,
ShaderData *emission_sd,
bool *is_shadow_catcher)
{
@ -446,252 +556,57 @@ ccl_device_forceinline void kernel_path_integrate(
/* path iteration */
for(;;) {
/* intersect scene */
/* Find intersection with objects in scene. */
Intersection isect;
uint visibility = path_state_ray_visibility(kg, state);
bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
#ifdef __HAIR__
float difl = 0.0f, extmax = 0.0f;
uint lcg_state = 0;
/* Find intersection with lamps and compute emission for MIS. */
kernel_path_lamp_emission(kg, state, ray, throughput, &isect, emission_sd, L);
if(kernel_data.bvh.have_curves) {
if((kernel_data.cam.resolution == 1) && (state->flag & PATH_RAY_CAMERA)) {
float3 pixdiff = ray->dD.dx + ray->dD.dy;
/*pixdiff = pixdiff - dot(pixdiff, ray->D)*ray->D;*/
difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
}
/* Volume integration. */
VolumeIntegrateResult result = kernel_path_volume(kg,
&sd,
state,
ray,
&throughput,
&isect,
hit,
emission_sd,
L);
extmax = kernel_data.curve.maximum_width;
lcg_state = lcg_state_init(state, 0x51633e2d);
if(result == VOLUME_PATH_SCATTERED) {
continue;
}
else if(result == VOLUME_PATH_MISSED) {
break;
}
if(path_state_ao_bounce(kg, state)) {
visibility = PATH_RAY_SHADOW;
ray->t = kernel_data.background.ao_distance;
}
bool hit = scene_intersect(kg, *ray, visibility, &isect, &lcg_state, difl, extmax);
#else
if(path_state_ao_bounce(kg, state)) {
visibility = PATH_RAY_SHADOW;
ray->t = kernel_data.background.ao_distance;
}
bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
#endif /* __HAIR__ */
#ifdef __KERNEL_DEBUG__
if(state->flag & PATH_RAY_CAMERA) {
L->debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes;
L->debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
L->debug_data.num_bvh_intersections += isect.num_intersections;
}
L->debug_data.num_ray_bounces++;
#endif /* __KERNEL_DEBUG__ */
#ifdef __LAMP_MIS__
if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
/* ray starting from previous non-transparent bounce */
Ray light_ray;
light_ray.P = ray->P - state->ray_t*ray->D;
state->ray_t += isect.t;
light_ray.D = ray->D;
light_ray.t = state->ray_t;
light_ray.time = ray->time;
light_ray.dD = ray->dD;
light_ray.dP = ray->dP;
/* intersect with lamp */
float3 emission;
if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission))
path_radiance_accum_emission(L, throughput, emission, state->bounce);
}
#endif /* __LAMP_MIS__ */
#ifdef __VOLUME__
/* Sanitize volume stack. */
/* Shade background. */
if(!hit) {
kernel_volume_clean_stack(kg, state->volume_stack);
}
/* volume attenuation, emission, scatter */
if(state->volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = *ray;
volume_ray.t = (hit)? isect.t: FLT_MAX;
bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
# ifdef __VOLUME_DECOUPLED__
int sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, true, sampling_method);
if(decoupled) {
/* cache steps along volume for repeated sampling */
VolumeSegment volume_segment;
shader_setup_from_volume(kg, &sd, &volume_ray);
kernel_volume_decoupled_record(kg, state,
&volume_ray, &sd, &volume_segment, heterogeneous);
volume_segment.sampling_method = sampling_method;
/* emission */
if(volume_segment.closure_flag & SD_EMISSION)
path_radiance_accum_emission(L, throughput, volume_segment.accum_emission, state->bounce);
/* scattering */
VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
if(volume_segment.closure_flag & SD_SCATTER) {
int all = false;
/* direct light sampling */
kernel_branched_path_volume_connect_light(kg, &sd,
emission_sd, throughput, state, L, all,
&volume_ray, &volume_segment);
/* indirect sample. if we use distance sampling and take just
* one sample for direct and indirect light, we could share
* this computation, but makes code a bit complex */
float rphase = path_state_rng_1D_for_decision(kg, state, PRNG_PHASE);
float rscatter = path_state_rng_1D_for_decision(kg, state, PRNG_SCATTER_DISTANCE);
result = kernel_volume_decoupled_scatter(kg,
state, &volume_ray, &sd, &throughput,
rphase, rscatter, &volume_segment, NULL, true);
}
/* free cached steps */
kernel_volume_decoupled_free(kg, &volume_segment);
if(result == VOLUME_PATH_SCATTERED) {
if(kernel_path_volume_bounce(kg, &sd, &throughput, state, L, ray))
continue;
else
break;
}
else {
throughput *= volume_segment.accum_transmittance;
}
}
else
# endif /* __VOLUME_DECOUPLED__ */
{
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
kg, state, &sd, &volume_ray, L, &throughput, heterogeneous);
# ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
kernel_path_volume_connect_light(kg, &sd, emission_sd, throughput, state, L);
/* indirect light bounce */
if(kernel_path_volume_bounce(kg, &sd, &throughput, state, L, ray))
continue;
else
break;
}
# endif /* __VOLUME_SCATTER__ */
}
}
#endif /* __VOLUME__ */
if(!hit) {
/* eval background shader if nothing hit */
if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) {
L->transparent += average(throughput);
#ifdef __PASSES__
if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
#endif /* __PASSES__ */
break;
}
#ifdef __BACKGROUND__
/* sample background shader */
float3 L_background = indirect_background(kg, emission_sd, state, ray);
path_radiance_accum_background(L, state, throughput, L_background);
#endif /* __BACKGROUND__ */
kernel_path_background(kg, state, ray, throughput, emission_sd, L);
break;
}
else if(path_state_ao_bounce(kg, state)) {
break;
}
/* setup shading */
/* Setup and evaluate shader. */
shader_setup_from_ray(kg, &sd, &isect, ray);
float rbsdf = path_state_rng_1D_for_decision(kg, state, PRNG_BSDF);
shader_eval_surface(kg, &sd, state, rbsdf, state->flag);
#ifdef __SHADOW_TRICKS__
if((sd.object_flag & SD_OBJECT_SHADOW_CATCHER)) {
if(state->flag & PATH_RAY_CAMERA) {
state->flag |= (PATH_RAY_SHADOW_CATCHER |
PATH_RAY_STORE_SHADOW_INFO);
if(!kernel_data.background.transparent) {
L->shadow_background_color =
indirect_background(kg, emission_sd, state, ray);
}
L->shadow_radiance_sum = path_radiance_clamp_and_sum(kg, L);
L->shadow_throughput = average(throughput);
}
}
else if(state->flag & PATH_RAY_SHADOW_CATCHER) {
/* Only update transparency after shadow catcher bounce. */
L->shadow_transparency *=
average(shader_bsdf_transparency(kg, &sd));
}
#endif /* __SHADOW_TRICKS__ */
/* holdout */
#ifdef __HOLDOUT__
if(((sd.flag & SD_HOLDOUT) ||
(sd.object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
(state->flag & PATH_RAY_CAMERA))
/* Apply shadow catcher, holdout, emission. */
if(!kernel_path_shader_apply(kg,
&sd,
state,
ray,
throughput,
emission_sd,
L,
buffer))
{
if(kernel_data.background.transparent) {
float3 holdout_weight;
if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) {
holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
}
else {
holdout_weight = shader_holdout_eval(kg, &sd);
}
/* any throughput is ok, should all be identical here */
L->transparent += average(holdout_weight*throughput);
}
if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) {
break;
}
break;
}
#endif /* __HOLDOUT__ */
/* holdout mask objects do not write data passes */
kernel_write_data_passes(kg, buffer, L, &sd, state->sample, state, throughput);
/* blurring of bsdf after bounces, for rays that have a small likelihood
* of following this particular path (diffuse, rough glossy) */
if(kernel_data.integrator.filter_glossy != FLT_MAX) {
float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
if(blur_pdf < 1.0f) {
float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
shader_bsdf_blur(kg, &sd, blur_roughness);
}
}
#ifdef __EMISSION__
/* emission */
if(sd.flag & SD_EMISSION) {
/* todo: is isect.t wrong here for transparent surfaces? */
float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state->flag, state->ray_pdf);
path_radiance_accum_emission(L, throughput, emission, state->bounce);
}
#endif /* __EMISSION__ */
/* path termination. this is a strange place to put the termination, it's
* mainly due to the mixed in MIS that we use. gives too many unneeded
@ -798,7 +713,6 @@ ccl_device void kernel_path_trace(KernelGlobals *kg,
path_radiance_init(&L, kernel_data.film.use_light_pass);
ShaderData emission_sd;
PathState state;
path_state_init(kg, &emission_sd, &state, rng_hash, sample, &ray);

View File

@ -48,9 +48,7 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
light_ray.P = ray_offset(sd->P, sd->Ng);
light_ray.D = ao_D;
light_ray.t = kernel_data.background.ao_distance;
#ifdef __OBJECT_MOTION__
light_ray.time = sd->time;
#endif /* __OBJECT_MOTION__ */
light_ray.dP = sd->dP;
light_ray.dD = differential3_zero();
@ -292,36 +290,9 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
* Indirect bounces are handled in kernel_branched_path_surface_indirect_light().
*/
for(;;) {
/* intersect scene */
/* Find intersection with objects in scene. */
Intersection isect;
uint visibility = path_state_ray_visibility(kg, &state);
#ifdef __HAIR__
float difl = 0.0f, extmax = 0.0f;
uint lcg_state = 0;
if(kernel_data.bvh.have_curves) {
if(kernel_data.cam.resolution == 1) {
float3 pixdiff = ray.dD.dx + ray.dD.dy;
/*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
}
extmax = kernel_data.curve.maximum_width;
lcg_state = lcg_state_init(&state, 0x51633e2d);
}
bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
#else
bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
#endif /* __HAIR__ */
#ifdef __KERNEL_DEBUG__
L->debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes;
L->debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
L->debug_data.num_bvh_intersections += isect.num_intersections;
L->debug_data.num_ray_bounces++;
#endif /* __KERNEL_DEBUG__ */
bool hit = kernel_path_scene_intersect(kg, &state, &ray, &isect, L);
#ifdef __VOLUME__
/* Sanitize volume stack. */
@ -464,79 +435,29 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
}
#endif /* __VOLUME__ */
/* Shade background. */
if(!hit) {
/* eval background shader if nothing hit */
if(kernel_data.background.transparent) {
L->transparent += average(throughput);
#ifdef __PASSES__
if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
#endif /* __PASSES__ */
break;
}
#ifdef __BACKGROUND__
/* sample background shader */
float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
path_radiance_accum_background(L, &state, throughput, L_background);
#endif /* __BACKGROUND__ */
kernel_path_background(kg, &state, &ray, throughput, &emission_sd, L);
break;
}
/* setup shading */
/* Setup and evaluate shader. */
shader_setup_from_ray(kg, &sd, &isect, &ray);
shader_eval_surface(kg, &sd, &state, 0.0f, state.flag);
shader_merge_closures(&sd);
#ifdef __SHADOW_TRICKS__
if((sd.object_flag & SD_OBJECT_SHADOW_CATCHER)) {
state.flag |= (PATH_RAY_SHADOW_CATCHER |
PATH_RAY_STORE_SHADOW_INFO);
if(!kernel_data.background.transparent) {
L->shadow_background_color =
indirect_background(kg, &emission_sd, &state, &ray);
}
L->shadow_radiance_sum = path_radiance_clamp_and_sum(kg, L);
L->shadow_throughput = average(throughput);
/* Apply shadow catcher, holdout, emission. */
if(!kernel_path_shader_apply(kg,
&sd,
&state,
&ray,
throughput,
&emission_sd,
L,
buffer))
{
break;
}
else if(state.flag & PATH_RAY_SHADOW_CATCHER) {
/* Only update transparency after shadow catcher bounce. */
L->shadow_transparency *=
average(shader_bsdf_transparency(kg, &sd));
}
#endif /* __SHADOW_TRICKS__ */
/* holdout */
#ifdef __HOLDOUT__
if((sd.flag & SD_HOLDOUT) || (sd.object_flag & SD_OBJECT_HOLDOUT_MASK)) {
if(kernel_data.background.transparent) {
float3 holdout_weight;
if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) {
holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
}
else {
holdout_weight = shader_holdout_eval(kg, &sd);
}
/* any throughput is ok, should all be identical here */
L->transparent += average(holdout_weight*throughput);
}
if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) {
break;
}
}
#endif /* __HOLDOUT__ */
/* holdout mask objects do not write data passes */
kernel_write_data_passes(kg, buffer, L, &sd, sample, &state, throughput);
#ifdef __EMISSION__
/* emission */
if(sd.flag & SD_EMISSION) {
float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf);
path_radiance_accum_emission(L, throughput, emission, state.bounce);
}
#endif /* __EMISSION__ */
/* transparency termination */
if(state.flag & PATH_RAY_TRANSPARENT) {

View File

@ -40,9 +40,7 @@ ccl_device_inline void kernel_path_volume_connect_light(
bool is_lamp;
/* connect to light from given point where shader has been evaluated */
# ifdef __OBJECT_MOTION__
light_ray.time = sd->time;
# endif
if(light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls))
{
@ -134,9 +132,7 @@ ccl_device void kernel_branched_path_volume_connect_light(
BsdfEval L_light;
bool is_lamp;
# ifdef __OBJECT_MOTION__
light_ray.time = sd->time;
# endif
if(sample_all_lights) {
/* lamp sampling */

View File

@ -66,8 +66,8 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
/* matrices and time */
#ifdef __OBJECT_MOTION__
shader_setup_object_transforms(kg, sd, ray->time);
sd->time = ray->time;
#endif
sd->time = ray->time;
sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
sd->ray_length = isect->t;
@ -271,17 +271,17 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
sd->u = u;
sd->v = v;
#endif
sd->time = time;
sd->ray_length = t;
sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
sd->object_flag = 0;
if(sd->object != OBJECT_NONE) {
sd->object_flag |= kernel_tex_fetch(__object_flag,
sd->object);
sd->object);
#ifdef __OBJECT_MOTION__
shader_setup_object_transforms(kg, sd, time);
sd->time = time;
}
else if(lamp != LAMP_NONE) {
sd->ob_tfm = lamp_fetch_transform(kg, lamp, false);
@ -385,9 +385,7 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat
sd->shader = kernel_data.background.surface_shader;
sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
sd->object_flag = 0;
#ifdef __OBJECT_MOTION__
sd->time = ray->time;
#endif
sd->ray_length = 0.0f;
#ifdef __INSTANCING__
@ -427,9 +425,7 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *s
sd->shader = SHADER_NONE;
sd->flag = 0;
sd->object_flag = 0;
#ifdef __OBJECT_MOTION__
sd->time = ray->time;
#endif
sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
#ifdef __INSTANCING__

View File

@ -94,9 +94,7 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
&ls)) {
Ray light_ray;
# ifdef __OBJECT_MOTION__
light_ray.time = sd->time;
# endif
BsdfEval L_light;
bool is_lamp;

View File

@ -94,125 +94,42 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
ccl_global PathState *state = 0x0;
float3 throughput;
uint sample;
ccl_global char *ray_state = kernel_split_state.ray_state;
ShaderData *sd = &kernel_split_state.sd[ray_index];
ccl_global float *buffer = kernel_split_params.buffer;
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
uint work_index = kernel_split_state.work_array[ray_index];
sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample;
uint pixel_x, pixel_y, tile_x, tile_y;
get_work_pixel_tile_position(kg, &pixel_x, &pixel_y,
&tile_x, &tile_y,
work_index,
ray_index);
ccl_global float *buffer = kernel_split_params.buffer;
buffer += (kernel_split_params.offset + pixel_x + pixel_y * stride) * kernel_data.film.pass_stride;
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
throughput = kernel_split_state.throughput[ray_index];
state = &kernel_split_state.path_state[ray_index];
#ifdef __SHADOW_TRICKS__
if((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) {
if(state->flag & PATH_RAY_CAMERA) {
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
state->flag |= (PATH_RAY_SHADOW_CATCHER |
PATH_RAY_STORE_SHADOW_INFO);
if(!kernel_data.background.transparent) {
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
L->shadow_background_color = indirect_background(
kg,
&kernel_split_state.sd_DL_shadow[ray_index],
state,
ray);
}
L->shadow_radiance_sum = path_radiance_clamp_and_sum(kg, L);
L->shadow_throughput = average(throughput);
}
}
else if(state->flag & PATH_RAY_SHADOW_CATCHER) {
/* Only update transparency after shadow catcher bounce. */
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
L->shadow_transparency *= average(shader_bsdf_transparency(kg, sd));
}
#endif /* __SHADOW_TRICKS__ */
/* holdout */
#ifdef __HOLDOUT__
if(((sd->flag & SD_HOLDOUT) ||
(sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
(state->flag & PATH_RAY_CAMERA))
if(!kernel_path_shader_apply(kg,
sd,
state,
ray,
throughput,
emission_sd,
L,
buffer))
{
if(kernel_data.background.transparent) {
float3 holdout_weight;
if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
}
else {
holdout_weight = shader_holdout_eval(kg, sd);
}
/* any throughput is ok, should all be identical here */
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
L->transparent += average(holdout_weight*throughput);
}
if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
kernel_split_path_end(kg, ray_index);
}
kernel_split_path_end(kg, ray_index);
}
#endif /* __HOLDOUT__ */
}
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
#ifdef __BRANCHED_PATH__
if(!IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT))
#endif /* __BRANCHED_PATH__ */
{
/* Holdout mask objects do not write data passes. */
kernel_write_data_passes(kg,
buffer,
L,
sd,
sample,
state,
throughput);
}
/* Blurring of bsdf after bounces, for rays that have a small likelihood
* of following this particular path (diffuse, rough glossy.
*/
#ifndef __BRANCHED_PATH__
if(kernel_data.integrator.filter_glossy != FLT_MAX)
#else
if(kernel_data.integrator.filter_glossy != FLT_MAX &&
(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)))
#endif /* __BRANCHED_PATH__ */
{
float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
if(blur_pdf < 1.0f) {
float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
shader_bsdf_blur(kg, sd, blur_roughness);
}
}
#ifdef __EMISSION__
/* emission */
if(sd->flag & SD_EMISSION) {
/* TODO(sergey): is isect.t wrong here for transparent surfaces? */
float3 emission = indirect_primitive_emission(
kg,
sd,
kernel_split_state.isect[ray_index].t,
state->flag,
state->ray_pdf);
path_radiance_accum_emission(L, throughput, emission, state->bounce);
}
#endif /* __EMISSION__ */
/* Path termination. this is a strange place to put the termination, it's
* mainly due to the mixed in MIS that we use. gives too many unneeded
* shader evaluations, only need emission if we are going to terminate.
@ -249,6 +166,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
}
}
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
kernel_update_denoising_features(kg, sd, state, L);
}
}

View File

@ -50,32 +50,16 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
return;
}
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
/* eval background shader if nothing hit */
if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) {
L->transparent += average((*throughput));
#ifdef __PASSES__
if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
#endif
kernel_split_path_end(kg, ray_index);
}
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
float3 throughput = kernel_split_state.throughput[ray_index];
ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
#ifdef __BACKGROUND__
/* sample background shader */
float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray);
path_radiance_accum_background(L, state, (*throughput), L_background);
#endif
kernel_split_path_end(kg, ray_index);
}
kernel_path_background(kg, state, ray, throughput, emission_sd, L);
kernel_split_path_end(kg, ray_index);
}
}
CCL_NAMESPACE_END

View File

@ -57,27 +57,10 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg)
float3 throughput = kernel_split_state.throughput[ray_index];
Ray ray = kernel_split_state.ray[ray_index];
ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
#ifdef __LAMP_MIS__
if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
/* ray starting from previous non-transparent bounce */
Ray light_ray;
light_ray.P = ray.P - state->ray_t*ray.D;
state->ray_t += kernel_split_state.isect[ray_index].t;
light_ray.D = ray.D;
light_ray.t = state->ray_t;
light_ray.time = ray.time;
light_ray.dD = ray.dD;
light_ray.dP = ray.dP;
/* intersect with lamp */
float3 emission;
if(indirect_lamp_emission(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, &light_ray, &emission)) {
path_radiance_accum_emission(L, throughput, emission, state->bounce);
}
}
#endif /* __LAMP_MIS__ */
kernel_path_lamp_emission(kg, state, &ray, throughput, isect, emission_sd, L);
}
}

View File

@ -59,53 +59,13 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
return;
}
Intersection isect;
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
Ray ray = kernel_split_state.ray[ray_index];
/* intersect scene */
uint visibility = path_state_ray_visibility(kg, state);
if(path_state_ao_bounce(kg, state)) {
visibility = PATH_RAY_SHADOW;
ray.t = kernel_data.background.ao_distance;
}
#ifdef __HAIR__
float difl = 0.0f, extmax = 0.0f;
uint lcg_state = 0;
if(kernel_data.bvh.have_curves) {
if((kernel_data.cam.resolution == 1) && (state->flag & PATH_RAY_CAMERA)) {
float3 pixdiff = ray.dD.dx + ray.dD.dy;
/*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
}
extmax = kernel_data.curve.maximum_width;
lcg_state = lcg_state_init_addrspace(state, 0x51633e2d);
}
bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
#else
if(path_state_ao_bounce(kg, state)) {
visibility = PATH_RAY_SHADOW;
ray.t = kernel_data.background.ao_distance;
}
bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
#endif
kernel_split_state.isect[ray_index] = isect;
#ifdef __KERNEL_DEBUG__
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
if(state->flag & PATH_RAY_CAMERA) {
L->debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes;
L->debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
L->debug_data.num_bvh_intersections += isect.num_intersections;
}
L->debug_data.num_ray_bounces++;
#endif
Intersection isect;
bool hit = kernel_path_scene_intersect(kg, state, &ray, &isect, L);
kernel_split_state.isect[ray_index] = isect;
if(!hit) {
/* Change the state of rays that hit the background;