Fix T54105: random walk SSS missing in branched indirect paths.

Unify the path and branched path indirect SSS code. No performance impact
found on CUDA, for AMD split kernel the extra code was already there.
This commit is contained in:
Brecht Van Lommel 2018-02-20 00:15:14 +01:00
parent 5cc1d5fe17
commit 606bc5f301
Notes: blender-bot 2023-02-14 11:42:40 +01:00
Referenced by issue #54105, Wrong shading with "Random Walk" in combination with branched path tracing
5 changed files with 83 additions and 185 deletions

View File

@ -400,6 +400,13 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
PathState *state,
PathRadiance *L)
{
#ifdef __SUBSURFACE__
SubsurfaceIndirectRays ss_indirect;
kernel_path_subsurface_init_indirect(&ss_indirect);
for(;;) {
#endif /* __SUBSURFACE__ */
/* path iteration */
for(;;) {
/* Find intersection with objects in scene. */
@ -485,29 +492,21 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
}
#endif /* __AO__ */
#ifdef __SUBSURFACE__
/* bssrdf scatter to a different location on the same object, replacing
* the closures with a diffuse BSDF */
if(sd->flag & SD_BSSRDF) {
float bssrdf_u, bssrdf_v;
path_state_rng_2D(kg,
state,
PRNG_BSDF_U,
&bssrdf_u, &bssrdf_v);
const ShaderClosure *sc = shader_bssrdf_pick(sd, &throughput, &bssrdf_u);
/* do bssrdf scatter step if we picked a bssrdf closure */
if(sc) {
uint lcg_state = lcg_state_init(state, 0x68bc21eb);
subsurface_scatter_step(kg,
sd,
state,
sc,
&lcg_state,
bssrdf_u, bssrdf_v,
false);
if(kernel_path_subsurface_scatter(kg,
sd,
emission_sd,
L,
state,
ray,
&throughput,
&ss_indirect))
{
break;
}
}
#endif /* __SUBSURFACE__ */
@ -530,6 +529,24 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
if(!kernel_path_surface_bounce(kg, sd, &throughput, state, &L->state, ray))
break;
}
#ifdef __SUBSURFACE__
/* Trace indirect subsurface rays by restarting the loop. this uses less
* stack memory than invoking kernel_path_indirect.
*/
if(ss_indirect.num_rays) {
kernel_path_subsurface_setup_indirect(kg,
&ss_indirect,
state,
ray,
L,
&throughput);
}
else {
break;
}
}
#endif /* __SUBSURFACE__ */
}
#endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */

View File

@ -69,44 +69,42 @@ ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd,
}
/* replace closures with a single diffuse bsdf closure after scatter step */
ccl_device void subsurface_scatter_setup_diffuse_bsdf(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, float3 weight, bool hit, float3 N)
ccl_device void subsurface_scatter_setup_diffuse_bsdf(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, float3 weight, float3 N)
{
sd->flag &= ~SD_CLOSURE_FLAGS;
sd->num_closure = 0;
sd->num_closure_left = kernel_data.integrator.max_closures;
if(hit) {
Bssrdf *bssrdf = (Bssrdf *)sc;
Bssrdf *bssrdf = (Bssrdf *)sc;
#ifdef __PRINCIPLED__
if(bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
{
PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), weight);
if(bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
{
PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), weight);
if(bsdf) {
bsdf->N = N;
bsdf->roughness = bssrdf->roughness;
sd->flag |= bsdf_principled_diffuse_setup(bsdf);
if(bsdf) {
bsdf->N = N;
bsdf->roughness = bssrdf->roughness;
sd->flag |= bsdf_principled_diffuse_setup(bsdf);
/* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes
* can recognize it as not being a regular Disney principled diffuse closure */
bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
}
/* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes
* can recognize it as not being a regular Disney principled diffuse closure */
bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
}
else if(CLOSURE_IS_BSDF_BSSRDF(bssrdf->type) ||
CLOSURE_IS_BSSRDF(bssrdf->type))
}
else if(CLOSURE_IS_BSDF_BSSRDF(bssrdf->type) ||
CLOSURE_IS_BSSRDF(bssrdf->type))
#endif /* __PRINCIPLED__ */
{
DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
{
DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
if(bsdf) {
bsdf->N = N;
sd->flag |= bsdf_diffuse_setup(bsdf);
if(bsdf) {
bsdf->N = N;
sd->flag |= bsdf_diffuse_setup(bsdf);
/* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
* can recognize it as not being a regular diffuse closure */
bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
}
/* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
* can recognize it as not being a regular diffuse closure */
bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
}
}
}
@ -334,104 +332,7 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
subsurface_color_bump_blur(kg, sd, state, &weight, &N);
/* Setup diffuse BSDF. */
subsurface_scatter_setup_diffuse_bsdf(kg, sd, sc, weight, true, N);
}
/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state,
const ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
{
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
/* pick random axis in local frame and point on disk */
float3 disk_N, disk_T, disk_B;
float pick_pdf_N, pick_pdf_T, pick_pdf_B;
disk_N = sd->Ng;
make_orthonormals(disk_N, &disk_T, &disk_B);
if(disk_v < 0.5f) {
pick_pdf_N = 0.5f;
pick_pdf_T = 0.25f;
pick_pdf_B = 0.25f;
disk_v *= 2.0f;
}
else if(disk_v < 0.75f) {
float3 tmp = disk_N;
disk_N = disk_T;
disk_T = tmp;
pick_pdf_N = 0.25f;
pick_pdf_T = 0.5f;
pick_pdf_B = 0.25f;
disk_v = (disk_v - 0.5f)*4.0f;
}
else {
float3 tmp = disk_N;
disk_N = disk_B;
disk_B = tmp;
pick_pdf_N = 0.25f;
pick_pdf_T = 0.25f;
pick_pdf_B = 0.5f;
disk_v = (disk_v - 0.75f)*4.0f;
}
/* sample point on disk */
float phi = M_2PI_F * disk_v;
float disk_height, disk_r;
bssrdf_sample(sc, disk_u, &disk_r, &disk_height);
float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
/* create ray */
Ray ray;
ray.P = sd->P + disk_N*disk_height + disk_P;
ray.D = -disk_N;
ray.t = 2.0f*disk_height;
ray.dP = sd->dP;
ray.dD = differential3_zero();
ray.time = sd->time;
/* intersect with the same object. if multiple intersections are
* found it will randomly pick one of them */
LocalIntersection ss_isect;
scene_intersect_local(kg, ray, &ss_isect, sd->object, lcg_state, 1);
/* evaluate bssrdf */
if(ss_isect.num_hits > 0) {
float3 origP = sd->P;
/* Workaround for AMD GPU OpenCL compiler. Most probably cache bypass issue. */
#if defined(__SPLIT_KERNEL__) && defined(__KERNEL_OPENCL_AMD__) && defined(__KERNEL_GPU__)
kernel_split_params.dummy_sd_flag = sd->flag;
#endif
/* setup new shading point */
shader_setup_from_subsurface(kg, sd, &ss_isect.hits[0], &ray);
/* Probability densities for local frame axes. */
float pdf_N = pick_pdf_N * fabsf(dot(disk_N, sd->Ng));
float pdf_T = pick_pdf_T * fabsf(dot(disk_T, sd->Ng));
float pdf_B = pick_pdf_B * fabsf(dot(disk_B, sd->Ng));
/* Multiple importance sample between 3 axes, power heuristic
* found to be slightly better than balance heuristic. pdf_N
* in the MIS weight and denominator cancelled out. */
float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
w *= ss_isect.num_hits;
/* Real distance to sampled point. */
float r = len(sd->P - origP);
/* Evaluate profiles. */
eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
}
/* optionally blur colors and bump mapping */
float3 N = sd->N;
subsurface_color_bump_blur(kg, sd, state, &eval, &N);
/* setup diffuse bsdf */
subsurface_scatter_setup_diffuse_bsdf(kg, sd, sc, eval, (ss_isect.num_hits > 0), N);
subsurface_scatter_setup_diffuse_bsdf(kg, sd, sc, weight, N);
}
/* Random walk subsurface scattering.

View File

@ -49,28 +49,22 @@ ccl_device void kernel_indirect_subsurface(KernelGlobals *kg)
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
#ifdef __BRANCHED_PATH__
if(!kernel_data.integrator.branched) {
#endif
if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
/* Trace indirect subsurface rays by restarting the loop. this uses less
* stack memory than invoking kernel_path_indirect.
*/
if(ss_indirect->num_rays) {
kernel_path_subsurface_setup_indirect(kg,
ss_indirect,
state,
ray,
L,
throughput);
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
}
/* Trace indirect subsurface rays by restarting the loop. this uses less
* stack memory than invoking kernel_path_indirect.
*/
if(ss_indirect->num_rays) {
kernel_path_subsurface_setup_indirect(kg,
ss_indirect,
state,
ray,
L,
throughput);
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
}
#ifdef __BRANCHED_PATH__
}
#endif
#endif /* __SUBSURFACE__ */

View File

@ -59,7 +59,12 @@ ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index)
ccl_global char *ray_state = kernel_split_state.ray_state;
#ifdef __BRANCHED_PATH__
if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) {
ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
if(ss_indirect->num_rays) {
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
}
else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) {
int orig_ray = kernel_split_state.branched_state[ray_index].original_ray;
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];

View File

@ -228,7 +228,9 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
if(sd->flag & SD_BSSRDF) {
#ifdef __BRANCHED_PATH__
if(!kernel_data.integrator.branched) {
if(!kernel_data.integrator.branched ||
IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT))
{
#endif
if(kernel_path_subsurface_scatter(kg,
sd,
@ -243,27 +245,6 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
}
#ifdef __BRANCHED_PATH__
}
else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
float bssrdf_u, bssrdf_v;
path_state_rng_2D(kg,
state,
PRNG_BSDF_U,
&bssrdf_u, &bssrdf_v);
const ShaderClosure *sc = shader_bssrdf_pick(sd, throughput, &bssrdf_u);
/* do bssrdf scatter step if we picked a bssrdf closure */
if(sc) {
uint lcg_state = lcg_state_init_addrspace(state, 0x68bc21eb);
subsurface_scatter_step(kg,
sd,
state,
sc,
&lcg_state,
bssrdf_u, bssrdf_v,
false);
}
}
else {
kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index);