Cysles: Avoid having ShaderData on the stack

This commit introduces a SSS-oriented intersection structure which is replacing
old logic of having separate arrays for just intersections and shader data and
encapsulates all the data needed for SSS evaluation.

This giver a huge stack memory saving on GPU. In own experiments it gave 25%
memory usage reduction on GTX560Ti (722MB vs. 946MB).

Unfortunately, this gave some performance loss of 20% which only happens on GPU.
This is perhaps due to different memory access pattern. Will be solved in the
future, hopefully.

Famous saying: won in memory - lost in time (which is also valid in other way
around).
This commit is contained in:
Sergey Sharybin 2015-11-22 15:00:29 +05:00
parent e6fff424db
commit 8bca34fe32
9 changed files with 290 additions and 109 deletions

View File

@ -255,38 +255,81 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray *ray, con
}
#ifdef __SUBSURFACE__
ccl_device_intersect uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg,
const Ray *ray,
SubsurfaceIntersection *ss_isect,
int subsurface_object,
uint *lcg_state,
int max_hits)
{
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
#ifdef __HAIR__
if(kernel_data.bvh.have_curves)
return bvh_intersect_subsurface_hair_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
if(kernel_data.bvh.have_curves) {
return bvh_intersect_subsurface_hair_motion(kg,
ray,
ss_isect,
subsurface_object,
lcg_state,
max_hits);
}
#endif /* __HAIR__ */
return bvh_intersect_subsurface_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
return bvh_intersect_subsurface_motion(kg,
ray,
ss_isect,
subsurface_object,
lcg_state,
max_hits);
}
#endif /* __OBJECT_MOTION__ */
#ifdef __HAIR__
if(kernel_data.bvh.have_curves)
return bvh_intersect_subsurface_hair(kg, ray, isect, subsurface_object, lcg_state, max_hits);
#ifdef __HAIR__
if(kernel_data.bvh.have_curves) {
return bvh_intersect_subsurface_hair(kg,
ray,
ss_isect,
subsurface_object,
lcg_state,
max_hits);
}
#endif /* __HAIR__ */
#ifdef __KERNEL_CPU__
#ifdef __INSTANCING__
if(kernel_data.bvh.have_instancing)
return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
if(kernel_data.bvh.have_instancing) {
return bvh_intersect_subsurface_instancing(kg,
ray,
ss_isect,
subsurface_object,
lcg_state,
max_hits);
}
#endif /* __INSTANCING__ */
return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
return bvh_intersect_subsurface(kg,
ray,
ss_isect,
subsurface_object,
lcg_state,
max_hits);
#else /* __KERNEL_CPU__ */
#ifdef __INSTANCING__
return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
return bvh_intersect_subsurface_instancing(kg,
ray,
ss_isect,
subsurface_object,
lcg_state,
max_hits);
#else
return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
return bvh_intersect_subsurface(kg,
ray,
ss_isect,
subsurface_object,
lcg_state,
max_hits);
#endif /* __INSTANCING__ */
#endif /* __KERNEL_CPU__ */

View File

@ -30,9 +30,9 @@
*
*/
ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
const Ray *ray,
Intersection *isect_array,
SubsurfaceIntersection *ss_isect,
int subsurface_object,
uint *lcg_state,
int max_hits)
@ -60,7 +60,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
int object = OBJECT_NONE;
float isect_t = ray->t;
uint num_hits = 0;
ss_isect->num_hits = 0;
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
@ -210,7 +210,15 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
if(tri_object != subsurface_object)
continue;
triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
triangle_intersect_subsurface(kg,
&isect_precalc,
ss_isect,
P,
object,
primAddr,
isect_t,
lcg_state,
max_hits);
}
break;
}
@ -223,7 +231,16 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
if(tri_object != subsurface_object)
continue;
motion_triangle_intersect_subsurface(kg, isect_array, P, dir, ray->time, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
motion_triangle_intersect_subsurface(kg,
ss_isect,
P,
dir,
ray->time,
object,
primAddr,
isect_t,
lcg_state,
max_hits);
}
break;
}
@ -301,13 +318,11 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
return num_hits;
}
ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
ccl_device_inline void BVH_FUNCTION_NAME(KernelGlobals *kg,
const Ray *ray,
Intersection *isect_array,
SubsurfaceIntersection *ss_isect,
int subsurface_object,
uint *lcg_state,
int max_hits)
@ -316,7 +331,7 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
if(kernel_data.bvh.use_qbvh) {
return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
ray,
isect_array,
ss_isect,
subsurface_object,
lcg_state,
max_hits);
@ -327,7 +342,7 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
kernel_assert(kernel_data.bvh.use_qbvh == false);
return BVH_FUNCTION_FULL_NAME(BVH)(kg,
ray,
isect_array,
ss_isect,
subsurface_object,
lcg_state,
max_hits);

View File

@ -358,8 +358,17 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection
* multiple hits we pick a single random primitive as the intersection point. */
#ifdef __SUBSURFACE__
ccl_device_inline void motion_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array,
float3 P, float3 dir, float time, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits)
ccl_device_inline void motion_triangle_intersect_subsurface(
KernelGlobals *kg,
SubsurfaceIntersection *ss_isect,
float3 P,
float3 dir,
float time,
int object,
int triAddr,
float tmax,
uint *lcg_state,
int max_hits)
{
/* primitive index for vertex location lookup */
int prim = kernel_tex_fetch(__prim_index, triAddr);
@ -373,30 +382,34 @@ ccl_device_inline void motion_triangle_intersect_subsurface(KernelGlobals *kg, I
float t, u, v;
if(ray_triangle_intersect_uv(P, dir, tmax, verts[2], verts[0], verts[1], &u, &v, &t)) {
(*num_hits)++;
ss_isect->num_hits++;
int hit;
if(*num_hits <= max_hits) {
hit = *num_hits - 1;
if(ss_isect->num_hits <= max_hits) {
hit = ss_isect->num_hits - 1;
}
else {
/* reservoir sampling: if we are at the maximum number of
* hits, randomly replace element or skip it */
hit = lcg_step_uint(lcg_state) % *num_hits;
hit = lcg_step_uint(lcg_state) % ss_isect->num_hits;
if(hit >= max_hits)
return;
}
/* record intersection */
Intersection *isect = &isect_array[hit];
Intersection *isect = &ss_isect->hits[hit];
isect->t = t;
isect->u = u;
isect->v = v;
isect->prim = triAddr;
isect->object = object;
isect->type = PRIMITIVE_MOTION_TRIANGLE;
/* Record geometric normal. */
ss_isect->Ng[hit] = normalize(cross(verts[1] - verts[0],
verts[2] - verts[0]));
}
}
#endif

View File

@ -26,9 +26,9 @@
*
*/
ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
const Ray *ray,
Intersection *isect_array,
SubsurfaceIntersection *ss_isect,
int subsurface_object,
uint *lcg_state,
int max_hits)
@ -55,7 +55,8 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
float3 idir = bvh_inverse_direction(dir);
int object = OBJECT_NONE;
float isect_t = ray->t;
uint num_hits = 0;
ss_isect->num_hits = 0;
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
@ -63,7 +64,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#ifndef __KERNEL_SSE41__
if(!isfinite(P.x)) {
return 0;
return;
}
#endif
@ -226,7 +227,15 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(tri_object != subsurface_object) {
continue;
}
triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
triangle_intersect_subsurface(kg,
&isect_precalc,
ss_isect,
P,
object,
primAddr,
isect_t,
lcg_state,
max_hits);
}
break;
}
@ -240,7 +249,16 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(tri_object != subsurface_object) {
continue;
}
motion_triangle_intersect_subsurface(kg, isect_array, P, dir, ray->time, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
motion_triangle_intersect_subsurface(kg,
ss_isect,
P,
dir,
ray->time,
object,
primAddr,
isect_t,
lcg_state,
max_hits);
}
break;
}
@ -321,6 +339,4 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
return num_hits;
}

View File

@ -204,12 +204,11 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
ccl_device_inline void triangle_intersect_subsurface(
KernelGlobals *kg,
const IsectPrecalc *isect_precalc,
Intersection *isect_array,
SubsurfaceIntersection *ss_isect,
float3 P,
int object,
int triAddr,
float tmax,
uint *num_hits,
uint *lcg_state,
int max_hits)
{
@ -272,29 +271,36 @@ ccl_device_inline void triangle_intersect_subsurface(
/* Normalize U, V, W, and T. */
const float inv_det = 1.0f / det;
(*num_hits)++;
ss_isect->num_hits++;
int hit;
if(*num_hits <= max_hits) {
hit = *num_hits - 1;
if(ss_isect->num_hits <= max_hits) {
hit = ss_isect->num_hits - 1;
}
else {
/* reservoir sampling: if we are at the maximum number of
* hits, randomly replace element or skip it */
hit = lcg_step_uint(lcg_state) % *num_hits;
hit = lcg_step_uint(lcg_state) % ss_isect->num_hits;
if(hit >= max_hits)
return;
}
/* record intersection */
Intersection *isect = &isect_array[hit];
Intersection *isect = &ss_isect->hits[hit];
isect->prim = triAddr;
isect->object = object;
isect->type = PRIMITIVE_TRIANGLE;
isect->u = U * inv_det;
isect->v = V * inv_det;
isect->t = T * inv_det;
/* Record geometric normal. */
/* TODO(sergey): Use float4_to_float3() on just an edges. */
const float3 v0 = float4_to_float3(tri_a);
const float3 v1 = float4_to_float3(tri_b);
const float3 v2 = float4_to_float3(tri_c);
ss_isect->Ng[hit] = normalize(cross(v1 - v0, v2 - v0));
}
#endif

View File

@ -338,10 +338,16 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd
if(sc) {
uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
SubsurfaceIntersection ss_isect;
float bssrdf_u, bssrdf_v;
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
int num_hits = subsurface_scatter_multi_intersect(kg,
&ss_isect,
sd,
sc,
&lcg_state,
bssrdf_u, bssrdf_v,
false);
#ifdef __VOLUME__
Ray volume_ray = *ray;
bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
@ -350,15 +356,26 @@ ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd
/* compute lighting with the BSDF closure */
for(int hit = 0; hit < num_hits; hit++) {
/* NOTE: We reuse the existing ShaderData, we assume the path
* integration loop stops when this function returns true.
*/
subsurface_scatter_multi_setup(kg,
&ss_isect,
hit,
sd,
state->flag,
sc,
false);
float3 tp = *throughput;
PathState hit_state = *state;
Ray hit_ray = *ray;
hit_state.rng_offset += PRNG_BOUNCE_NUM;
kernel_path_surface_connect_light(kg, rng, &bssrdf_sd[hit], tp, state, L);
if(kernel_path_surface_bounce(kg, rng, &bssrdf_sd[hit], &tp, &hit_state, L, &hit_ray)) {
kernel_path_surface_connect_light(kg, rng, sd, tp, state, L);
if(kernel_path_surface_bounce(kg, rng, sd, &tp, &hit_state, L, &hit_ray)) {
#ifdef __LAMP_MIS__
hit_state.ray_t = 0.0f;
#endif

View File

@ -128,10 +128,16 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
/* do subsurface scatter step with copy of shader data, this will
* replace the BSSRDF with a diffuse BSDF closure */
for(int j = 0; j < num_samples; j++) {
ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
SubsurfaceIntersection ss_isect;
float bssrdf_u, bssrdf_v;
path_branched_rng_2D(kg, &bssrdf_rng, state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
int num_hits = subsurface_scatter_multi_intersect(kg,
&ss_isect,
sd,
sc,
&lcg_state,
bssrdf_u, bssrdf_v,
true);
#ifdef __VOLUME__
Ray volume_ray = *ray;
bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
@ -140,6 +146,15 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
/* compute lighting with the BSDF closure */
for(int hit = 0; hit < num_hits; hit++) {
ShaderData bssrdf_sd = *sd;
subsurface_scatter_multi_setup(kg,
&ss_isect,
hit,
&bssrdf_sd,
state->flag,
sc,
true);
PathState hit_state = *state;
path_state_branch(&hit_state, j, num_samples);
@ -147,7 +162,7 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
#ifdef __VOLUME__
if(need_update_volume_stack) {
/* Setup ray from previous surface point to the new one. */
float3 P = ray_offset(bssrdf_sd[hit].P, -bssrdf_sd[hit].Ng);
float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng);
volume_ray.D = normalize_len(P - volume_ray.P,
&volume_ray.t);
@ -165,15 +180,27 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
/* direct light */
if(kernel_data.integrator.use_direct_light) {
bool all = kernel_data.integrator.sample_all_lights_direct;
kernel_branched_path_surface_connect_light(kg, rng,
&bssrdf_sd[hit], &hit_state, throughput, num_samples_inv, L, all);
kernel_branched_path_surface_connect_light(
kg,
rng,
&bssrdf_sd,
&hit_state,
throughput,
num_samples_inv,
L,
all);
}
#endif
/* indirect light */
kernel_branched_path_surface_indirect_light(kg, rng,
&bssrdf_sd[hit], throughput, num_samples_inv,
&hit_state, L);
kernel_branched_path_surface_indirect_light(
kg,
rng,
&bssrdf_sd,
throughput,
num_samples_inv,
&hit_state,
L);
}
}
}

View File

@ -179,19 +179,23 @@ ccl_device float3 subsurface_color_pow(float3 color, float exponent)
return color;
}
ccl_device void subsurface_color_bump_blur(KernelGlobals *kg, ShaderData *out_sd, ShaderData *in_sd, int state_flag, float3 *eval, float3 *N)
ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
ShaderData *sd,
int state_flag,
float3 *eval,
float3 *N)
{
/* average color and texture blur at outgoing point */
float texture_blur;
float3 out_color = shader_bssrdf_sum(out_sd, NULL, &texture_blur);
float3 out_color = shader_bssrdf_sum(sd, NULL, &texture_blur);
/* do we have bump mapping? */
bool bump = (out_sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
bool bump = (sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
if(bump || texture_blur > 0.0f) {
/* average color and normal at incoming point */
shader_eval_surface(kg, in_sd, 0.0f, state_flag, SHADER_CONTEXT_SSS);
float3 in_color = shader_bssrdf_sum(in_sd, (bump)? N: NULL, NULL);
shader_eval_surface(kg, sd, 0.0f, state_flag, SHADER_CONTEXT_SSS);
float3 in_color = shader_bssrdf_sum(sd, (bump)? N: NULL, NULL);
/* we simply divide out the average color and multiply with the average
* of the other one. we could try to do this per closure but it's quite
@ -206,14 +210,23 @@ ccl_device void subsurface_color_bump_blur(KernelGlobals *kg, ShaderData *out_sd
}
}
/* subsurface scattering step, from a point on the surface to other nearby points on the same object */
ccl_device int subsurface_scatter_multi_step(KernelGlobals *kg, ShaderData *sd, ShaderData bssrdf_sd[BSSRDF_MAX_HITS],
int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
/* Subsurface scattering step, from a point on the surface to other
* nearby points on the same object.
*/
ccl_device int subsurface_scatter_multi_intersect(
KernelGlobals *kg,
SubsurfaceIntersection* ss_isect,
ShaderData *sd,
ShaderClosure *sc,
uint *lcg_state,
float disk_u,
float disk_v,
bool all)
{
/* pick random axis in local frame and point on disk */
float3 disk_N, disk_T, disk_B;
float pick_pdf_N, pick_pdf_T, pick_pdf_B;
disk_N = sd->Ng;
make_orthonormals(disk_N, &disk_T, &disk_B);
@ -259,70 +272,89 @@ ccl_device int subsurface_scatter_multi_step(KernelGlobals *kg, ShaderData *sd,
float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
/* create ray */
Ray ray;
ray.P = sd->P + disk_N*disk_height + disk_P;
ray.D = -disk_N;
ray.t = 2.0f*disk_height;
ray.dP = sd->dP;
ray.dD = differential3_zero();
ray.time = sd->time;
Ray *ray = &ss_isect->ray;
ray->P = sd->P + disk_N*disk_height + disk_P;
ray->D = -disk_N;
ray->t = 2.0f*disk_height;
ray->dP = sd->dP;
ray->dD = differential3_zero();
ray->time = sd->time;
/* intersect with the same object. if multiple intersections are found it
* will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
Intersection isect[BSSRDF_MAX_HITS];
uint num_hits = scene_intersect_subsurface(kg, &ray, isect, sd->object, lcg_state, BSSRDF_MAX_HITS);
/* evaluate bssrdf */
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
int num_eval_hits = min(num_hits, BSSRDF_MAX_HITS);
scene_intersect_subsurface(kg,
ray,
ss_isect,
sd->object,
lcg_state,
BSSRDF_MAX_HITS);
/* TODO(sergey): Investigate whether scene_intersect_subsurface() could
* indeed return more than BSSRDF_MAX_HITS hits.
*/
int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS);
for(int hit = 0; hit < num_eval_hits; hit++) {
ShaderData *bsd = &bssrdf_sd[hit];
/* setup new shading point */
*bsd = *sd;
shader_setup_from_subsurface(kg, bsd, &isect[hit], &ray);
/* Quickly retrieve P and Ng without setting up ShaderData. */
float3 hit_P = ray->P + ss_isect->hits[hit].t * ray->D;
float3 hit_Ng = ss_isect->Ng[hit];
if(ss_isect->hits[hit].object != OBJECT_NONE) {
object_normal_transform(kg, sd, &hit_Ng);
}
/* probability densities for local frame axes */
float pdf_N = pick_pdf_N * fabsf(dot(disk_N, bsd->Ng));
float pdf_T = pick_pdf_T * fabsf(dot(disk_T, bsd->Ng));
float pdf_B = pick_pdf_B * fabsf(dot(disk_B, bsd->Ng));
float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
/* multiple importance sample between 3 axes, power heuristic
* found to be slightly better than balance heuristic */
float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
/* real distance to sampled point */
float r = len(bsd->P - sd->P);
float r = len(hit_P - sd->P);
/* evaluate */
float w = mis_weight / pdf_N;
if(num_hits > BSSRDF_MAX_HITS)
w *= num_hits/(float)BSSRDF_MAX_HITS;
eval = subsurface_scatter_eval(bsd, sc, disk_r, r, all) * w;
if(ss_isect->num_hits > BSSRDF_MAX_HITS)
w *= ss_isect->num_hits/(float)BSSRDF_MAX_HITS;
float3 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
/* optionally blur colors and bump mapping */
float3 N = bsd->N;
subsurface_color_bump_blur(kg, sd, bsd, state_flag, &eval, &N);
/* setup diffuse bsdf */
subsurface_scatter_setup_diffuse_bsdf(bsd, eval, true, N);
ss_isect->weight[hit] = eval;
}
return num_eval_hits;
}
ccl_device void subsurface_scatter_multi_setup(KernelGlobals *kg,
SubsurfaceIntersection* ss_isect,
int hit,
ShaderData *sd,
int state_flag,
ShaderClosure *sc,
bool all)
{
/* Setup new shading point. */
shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], &ss_isect->ray);
/* Optionally blur colors and bump mapping. */
float3 weight = ss_isect->weight[hit];
float3 N = sd->N;
subsurface_color_bump_blur(kg, sd, state_flag, &weight, &N);
/* Setup diffuse BSDF. */
subsurface_scatter_setup_diffuse_bsdf(sd, weight, true, N);
}
/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
{
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
uint num_hits = 0;
/* pick random axis in local frame and point on disk */
float3 disk_N, disk_T, disk_B;
float pick_pdf_N, pick_pdf_T, pick_pdf_B;
disk_N = sd->Ng;
make_orthonormals(disk_N, &disk_T, &disk_B);
@ -368,21 +400,21 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
/* intersect with the same object. if multiple intersections are
* found it will randomly pick one of them */
Intersection isect;
num_hits = scene_intersect_subsurface(kg, &ray, &isect, sd->object, lcg_state, 1);
SubsurfaceIntersection ss_isect;
scene_intersect_subsurface(kg, &ray, &ss_isect, sd->object, lcg_state, 1);
/* evaluate bssrdf */
if(num_hits > 0) {
if(ss_isect.num_hits > 0) {
float3 origP = sd->P;
/* setup new shading point */
shader_setup_from_subsurface(kg, sd, &isect, &ray);
shader_setup_from_subsurface(kg, sd, &ss_isect.hits[0], &ray);
/* probability densities for local frame axes */
float pdf_N = pick_pdf_N * fabsf(dot(disk_N, sd->Ng));
float pdf_T = pick_pdf_T * fabsf(dot(disk_T, sd->Ng));
float pdf_B = pick_pdf_B * fabsf(dot(disk_B, sd->Ng));
/* multiple importance sample between 3 axes, power heuristic
* found to be slightly better than balance heuristic */
float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
@ -391,16 +423,16 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
float r = len(sd->P - origP);
/* evaluate */
float w = (mis_weight * num_hits) / pdf_N;
float w = (mis_weight * ss_isect.num_hits) / pdf_N;
eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
}
/* optionally blur colors and bump mapping */
float3 N = sd->N;
subsurface_color_bump_blur(kg, sd, sd, state_flag, &eval, &N);
subsurface_color_bump_blur(kg, sd, state_flag, &eval, &N);
/* setup diffuse bsdf */
subsurface_scatter_setup_diffuse_bsdf(sd, eval, (num_hits > 0), N);
subsurface_scatter_setup_diffuse_bsdf(sd, eval, (ss_isect.num_hits > 0), N);
}
CCL_NAMESPACE_END

View File

@ -520,6 +520,18 @@ typedef ccl_addr_space struct Intersection {
#endif
} Intersection;
/* Subsurface Intersection result */
struct SubsurfaceIntersection
{
Ray ray;
float3 weight[BSSRDF_MAX_HITS];
int num_hits;
struct Intersection hits[BSSRDF_MAX_HITS];
float3 Ng[BSSRDF_MAX_HITS];
};
/* Primitives */
typedef enum PrimitiveType {