Cycles: bake transparent shadows for hair

These transparent shadows can be expansive to evaluate. Especially on the
GPU they can lead to poor occupancy when only some pixels require many kernel
launches to trace and evaluate many layers of transparency.

Baked transparency allows tracing a single ray in many cases by accumulating
the throughput directly in the intersection program without recording hits
or evaluating shaders. Transparency is baked at curve vertices and
interpolated, for most shaders this will look practically the same as actual
shader evaluation.

Fixes T91428, performance regression with spring demo file due to transparent
hair, and makes it render significantly faster than Blender 2.93.

Differential Revision: https://developer.blender.org/D12880
This commit is contained in:
Brecht Van Lommel 2021-09-20 16:16:11 +02:00
parent d06828f0b8
commit fd77a28031
Notes: blender-bot 2023-02-14 10:37:49 +01:00
Referenced by issue #94555, cycles stalls with hair particles in viewport
Referenced by issue #94136, Cycles: No Hair Shadows with Transparent BSDF
Referenced by issue #94135, Cycles Hair Shadow Repetition with Transparent BSDF
Referenced by issue #92363, OptiX kernels fail to load when Ambient Occlusion node is used
Referenced by issue #91428, Cycles-X: Spring demo scene shows large performance regression
24 changed files with 500 additions and 110 deletions

View File

@ -80,31 +80,49 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
Intersection current_isect;
kernel_embree_convert_hit(kg, ray, hit, &current_isect);
/* If no transparent shadows, all light is blocked. */
/* If no transparent shadows or max number of hits exceeded, all light is blocked. */
const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type);
if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->max_hits == 0) {
if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->num_hits >= ctx->max_hits) {
ctx->opaque_hit = true;
return;
}
++ctx->num_hits;
/* Always use baked shadow transparency for curves. */
if (current_isect.type & PRIMITIVE_ALL_CURVE) {
ctx->throughput *= intersection_curve_shadow_transparency(
kg, current_isect.object, current_isect.prim, current_isect.u);
if (ctx->throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
ctx->opaque_hit = true;
return;
}
else {
*args->valid = 0;
return;
}
}
/* Test if we need to record this transparent intersection. */
if (ctx->num_hits < ctx->max_hits || ray->tfar < ctx->max_t) {
const uint max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
if (ctx->num_recorded_hits < max_record_hits || ray->tfar < ctx->max_t) {
/* If maximum number of hits was reached, replace the intersection with the
* highest distance. We want to find the N closest intersections. */
const int num_recorded_hits = min(ctx->num_hits, ctx->max_hits);
int isect_index = num_recorded_hits;
if (num_recorded_hits + 1 >= ctx->max_hits) {
const uint num_recorded_hits = min(ctx->num_recorded_hits, max_record_hits);
uint isect_index = num_recorded_hits;
if (num_recorded_hits + 1 >= max_record_hits) {
float max_t = ctx->isect_s[0].t;
int max_recorded_hit = 0;
uint max_recorded_hit = 0;
for (int i = 1; i < num_recorded_hits; ++i) {
for (uint i = 1; i < num_recorded_hits; ++i) {
if (ctx->isect_s[i].t > max_t) {
max_recorded_hit = i;
max_t = ctx->isect_s[i].t;
}
}
if (num_recorded_hits >= ctx->max_hits) {
if (num_recorded_hits >= max_record_hits) {
isect_index = max_recorded_hit;
}
@ -118,10 +136,9 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
ctx->isect_s[isect_index] = current_isect;
}
/* Always increase the number of hits, even beyond ray.max_hits so that
* the caller can detect this as and consider it opaque, or trace another
* ray. */
++ctx->num_hits;
/* Always increase the number of recorded hits, even beyond the maximum,
* so that we can detect this and trace another ray if needed. */
++ctx->num_recorded_hits;
/* This tells Embree to continue tracing. */
*args->valid = 0;
@ -160,7 +177,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
if (ctx->lcg_state) {
/* See triangle_intersect_subsurface() for the native equivalent. */
for (int i = min(ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
for (int i = min((int)ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
if (local_isect->hits[i].t == ray->tfar) {
/* This tells Embree to continue tracing. */
*args->valid = 0;

View File

@ -44,6 +44,7 @@ CPUKernels::CPUKernels()
/* Shader evaluation. */
REGISTER_KERNEL(shader_eval_displace),
REGISTER_KERNEL(shader_eval_background),
REGISTER_KERNEL(shader_eval_curve_shadow_transparency),
/* Adaptive sampling. */
REGISTER_KERNEL(adaptive_sampling_convergence_check),
REGISTER_KERNEL(adaptive_sampling_filter_x),

View File

@ -58,6 +58,7 @@ class CPUKernels {
ShaderEvalFunction shader_eval_displace;
ShaderEvalFunction shader_eval_background;
ShaderEvalFunction shader_eval_curve_shadow_transparency;
/* Adaptive stopping. */

View File

@ -74,6 +74,8 @@ const char *device_kernel_as_string(DeviceKernel kernel)
return "shader_eval_displace";
case DEVICE_KERNEL_SHADER_EVAL_BACKGROUND:
return "shader_eval_background";
case DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY:
return "shader_eval_curve_shadow_transparency";
/* Film. */

View File

@ -122,6 +122,9 @@ bool ShaderEval::eval_cpu(Device *device,
case SHADER_EVAL_BACKGROUND:
kernels.shader_eval_background(kg, input_data, output_data, work_index);
break;
case SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY:
kernels.shader_eval_curve_shadow_transparency(kg, input_data, output_data, work_index);
break;
}
});
});
@ -144,6 +147,9 @@ bool ShaderEval::eval_gpu(Device *device,
case SHADER_EVAL_BACKGROUND:
kernel = DEVICE_KERNEL_SHADER_EVAL_BACKGROUND;
break;
case SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY:
kernel = DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY;
break;
};
/* Create device queue. */

View File

@ -30,6 +30,7 @@ class Progress;
enum ShaderEvalType {
SHADER_EVAL_DISPLACE,
SHADER_EVAL_BACKGROUND,
SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY,
};
/* ShaderEval class performs shader evaluation for background light and displacement. */

View File

@ -367,12 +367,13 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
ccl_private const Ray *ray,
uint visibility,
uint max_hits,
ccl_private uint *num_hits)
ccl_private uint *num_recorded_hits,
ccl_private float *throughput)
{
# ifdef __KERNEL_OPTIX__
uint p0 = state;
uint p1 = 0; /* Unused */
uint p2 = 0; /* Number of hits. */
uint p1 = __float_as_uint(1.0f); /* Throughput. */
uint p2 = 0; /* Number of hits. */
uint p3 = max_hits;
uint p4 = visibility;
uint p5 = false;
@ -382,7 +383,6 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
ray_mask = 0xFF;
}
*num_hits = 0; /* Initialize hit count to zero. */
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
ray->P,
ray->D,
@ -402,12 +402,14 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
p4,
p5);
*num_hits = p2;
*num_recorded_hits = uint16_unpack_from_uint_0(p2);
*throughput = __uint_as_float(p1);
return p5;
# else /* __KERNEL_OPTIX__ */
if (!scene_intersect_valid(ray)) {
*num_hits = 0;
*num_recorded_hits = 0;
*throughput = 1.0f;
return false;
}
@ -422,7 +424,8 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
kernel_embree_setup_ray(*ray, rtc_ray, visibility);
rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
*num_hits = ctx.num_hits;
*num_recorded_hits = ctx.num_recorded_hits;
*throughput = ctx.throughput;
return ctx.opaque_hit;
}
# endif /* __EMBREE__ */
@ -431,21 +434,25 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
if (kernel_data.bvh.have_motion) {
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair_motion(kg, ray, state, visibility, max_hits, num_hits);
return bvh_intersect_shadow_all_hair_motion(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __HAIR__ */
return bvh_intersect_shadow_all_motion(kg, ray, state, visibility, max_hits, num_hits);
return bvh_intersect_shadow_all_motion(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __OBJECT_MOTION__ */
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair(kg, ray, state, visibility, max_hits, num_hits);
return bvh_intersect_shadow_all_hair(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __HAIR__ */
return bvh_intersect_shadow_all(kg, ray, state, visibility, max_hits, num_hits);
return bvh_intersect_shadow_all(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
# endif /* __KERNEL_OPTIX__ */
}
#endif /* __SHADOW_RECORD_ALL__ */

View File

@ -40,8 +40,10 @@ struct CCLIntersectContext {
/* for shadow rays */
Intersection *isect_s;
int max_hits;
int num_hits;
uint max_hits;
uint num_hits;
uint num_recorded_hits;
float throughput;
float max_t;
bool opaque_hit;
@ -56,6 +58,8 @@ struct CCLIntersectContext {
type = type_;
max_hits = 1;
num_hits = 0;
num_recorded_hits = 0;
throughput = 1.0f;
max_t = FLT_MAX;
opaque_hit = false;
isect_s = NULL;

View File

@ -41,7 +41,8 @@ ccl_device_inline
IntegratorShadowState state,
const uint visibility,
const uint max_hits,
ccl_private uint *num_hits)
ccl_private uint *num_recorded_hits,
ccl_private float *throughput)
{
/* todo:
* - likely and unlikely for if() statements
@ -61,6 +62,7 @@ ccl_device_inline
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
int object = OBJECT_NONE;
uint num_hits = 0;
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
@ -77,7 +79,8 @@ ccl_device_inline
* otherwise. */
float t_world_to_instance = 1.0f;
*num_hits = 0;
*num_recorded_hits = 0;
*throughput = 1.0f;
/* traversal loop */
do {
@ -212,42 +215,62 @@ ccl_device_inline
* the primitive has a transparent shadow shader? */
const int flags = intersection_get_shader_flags(kg, isect.prim, isect.type);
if (!(flags & SD_HAS_TRANSPARENT_SHADOW) || max_hits == 0) {
if (!(flags & SD_HAS_TRANSPARENT_SHADOW) || num_hits >= max_hits) {
/* If no transparent shadows, all light is blocked and we can
* stop immediately. */
return true;
}
/* Increase the number of hits, possibly beyond max_hits, we will
* simply not record those and only keep the max_hits closest. */
uint record_index = (*num_hits)++;
num_hits++;
if (record_index >= max_hits - 1) {
/* If maximum number of hits reached, find the intersection with
* the largest distance to potentially replace when another hit
* is found. */
const int num_recorded_hits = min(max_hits, record_index);
float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
int max_recorded_hit = 0;
bool record_intersection = true;
for (int i = 1; i < num_recorded_hits; i++) {
const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
if (isect_t > max_recorded_t) {
max_recorded_t = isect_t;
max_recorded_hit = i;
}
/* Always use baked shadow transparency for curves. */
if (isect.type & PRIMITIVE_ALL_CURVE) {
*throughput *= intersection_curve_shadow_transparency(
kg, isect.object, isect.prim, isect.u);
if (*throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
return true;
}
if (record_index >= max_hits) {
record_index = max_recorded_hit;
else {
record_intersection = false;
}
/* Limit the ray distance and stop counting hits beyond this. */
t_max_world = max(max_recorded_t, isect.t);
t_max_current = t_max_world * t_world_to_instance;
}
integrator_state_write_shadow_isect(state, &isect, record_index);
if (record_intersection) {
/* Increase the number of hits, possibly beyond max_hits, we will
* simply not record those and only keep the max_hits closest. */
uint record_index = (*num_recorded_hits)++;
const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
if (record_index >= max_record_hits - 1) {
/* If maximum number of hits reached, find the intersection with
* the largest distance to potentially replace when another hit
* is found. */
const int num_recorded_hits = min(max_record_hits, record_index);
float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
int max_recorded_hit = 0;
for (int i = 1; i < num_recorded_hits; i++) {
const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
if (isect_t > max_recorded_t) {
max_recorded_t = isect_t;
max_recorded_hit = i;
}
}
if (record_index >= max_record_hits) {
record_index = max_recorded_hit;
}
/* Limit the ray distance and stop counting hits beyond this. */
t_max_world = max(max_recorded_t, isect.t);
t_max_current = t_max_world * t_world_to_instance;
}
integrator_state_write_shadow_isect(state, &isect, record_index);
}
}
prim_addr++;
@ -304,9 +327,11 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg,
IntegratorShadowState state,
const uint visibility,
const uint max_hits,
ccl_private uint *num_hits)
ccl_private uint *num_recorded_hits,
ccl_private float *throughput)
{
return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, state, visibility, max_hits, num_hits);
return BVH_FUNCTION_FULL_NAME(BVH)(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
#undef BVH_FUNCTION_NAME

View File

@ -195,4 +195,32 @@ ccl_device_inline int intersection_find_attribute(KernelGlobals kg,
return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
}
/* Transparent Shadows */
/* Cut-off value to stop transparent shadow tracing when practically opaque. */
#define CURVE_SHADOW_TRANSPARENCY_CUTOFF 0.001f
ccl_device_inline float intersection_curve_shadow_transparency(KernelGlobals kg,
const int object,
const int prim,
const float u)
{
/* Find attribute. */
const int offset = intersection_find_attribute(kg, object, ATTR_STD_SHADOW_TRANSPARENCY);
if (offset == ATTR_STD_NOT_FOUND) {
/* If no shadow transparency attribute, assume opaque. */
return 0.0f;
}
/* Interpolate transparency between curve keys. */
const KernelCurve kcurve = kernel_tex_fetch(__curves, prim);
const int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(kcurve.type);
const int k1 = k0 + 1;
const float f0 = kernel_tex_fetch(__attributes_float, offset + k0);
const float f1 = kernel_tex_fetch(__attributes_float, offset + k1);
return (1.0f - u) * f0 + u * f1;
}
CCL_NAMESPACE_END

View File

@ -64,6 +64,11 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobalsCPU *kg,
const KernelShaderEvalInput *input,
float *output,
const int offset);
void KERNEL_FUNCTION_FULL_NAME(shader_eval_curve_shadow_transparency)(
const KernelGlobalsCPU *kg,
const KernelShaderEvalInput *input,
float *output,
const int offset);
/* --------------------------------------------------------------------
* Adaptive sampling.

View File

@ -150,6 +150,19 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobalsCPU *k
#endif
}
void KERNEL_FUNCTION_FULL_NAME(shader_eval_curve_shadow_transparency)(
const KernelGlobalsCPU *kg,
const KernelShaderEvalInput *input,
float *output,
const int offset)
{
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, shader_eval_curve_shadow_transparency);
#else
kernel_curve_shadow_transparency_evaluate(kg, input, output, offset);
#endif
}
/* --------------------------------------------------------------------
* Adaptive sampling.
*/

View File

@ -621,7 +621,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
}
/* Background Shader Evaluation */
/* Background */
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
kernel_gpu_shader_eval_background(KernelShaderEvalInput *input,
@ -635,6 +635,20 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
}
/* Curve Shadow Transparency */
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
kernel_gpu_shader_eval_curve_shadow_transparency(KernelShaderEvalInput *input,
float *output,
const int offset,
const int work_size)
{
int i = ccl_gpu_global_id_x();
if (i < work_size) {
kernel_curve_shadow_transparency_evaluate(NULL, input, output, offset + i);
}
}
/* --------------------------------------------------------------------
* Denoising.
*/

View File

@ -210,29 +210,50 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
optixSetPayload_5(true);
return optixTerminateRay();
# else
const int max_hits = optixGetPayload_3();
const uint max_hits = optixGetPayload_3();
const uint num_hits_packed = optixGetPayload_2();
const uint num_recorded_hits = uint16_unpack_from_uint_0(num_hits_packed);
const uint num_hits = uint16_unpack_from_uint_1(num_hits_packed);
/* If no transparent shadows, all light is blocked and we can stop immediately. */
if (max_hits == 0 ||
if (num_hits >= max_hits ||
!(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
optixSetPayload_5(true);
return optixTerminateRay();
}
/* Record transparent intersection. */
const int num_hits = optixGetPayload_2();
int record_index = num_hits;
/* Always use baked shadow transparency for curves. */
if (type & PRIMITIVE_ALL_CURVE) {
float throughput = __uint_as_float(optixGetPayload_1());
throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, u);
optixSetPayload_1(__float_as_uint(throughput));
optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1));
optixSetPayload_2(num_hits + 1);
if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
optixSetPayload_4(true);
return optixTerminateRay();
}
else {
/* Continue tracing. */
optixIgnoreIntersection();
return;
}
}
/* Record transparent intersection. */
optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits + 1, num_hits + 1));
uint record_index = num_recorded_hits;
const IntegratorShadowState state = optixGetPayload_0();
if (record_index >= max_hits) {
const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
if (record_index >= max_record_hits) {
/* If maximum number of hits reached, find a hit to replace. */
float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
int max_recorded_hit = 0;
uint max_recorded_hit = 0;
for (int i = 1; i < max_hits; i++) {
for (int i = 1; i < max_record_hits; i++) {
const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
if (isect_t > max_recorded_t) {
max_recorded_t = isect_t;
@ -256,6 +277,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
/* Continue tracing. */
optixIgnoreIntersection();
# endif /* __TRANSPARENT_SHADOWS__ */
#endif /* __SHADOW_RECORD_ALL__ */

View File

@ -279,6 +279,81 @@ ccl_device void shader_setup_from_displace(KernelGlobals kg,
LAMP_NONE);
}
/* ShaderData setup for point on curve. */
ccl_device void shader_setup_from_curve(KernelGlobals kg,
ccl_private ShaderData *ccl_restrict sd,
int object,
int prim,
int segment,
float u)
{
/* Primitive */
sd->type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_CURVE_THICK, segment);
sd->lamp = LAMP_NONE;
sd->prim = prim;
sd->u = u;
sd->v = 0.0f;
sd->time = 0.5f;
sd->ray_length = 0.0f;
/* Shader */
sd->shader = kernel_tex_fetch(__curves, prim).shader_id;
sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
/* Object */
sd->object = object;
sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
#ifdef __OBJECT_MOTION__
shader_setup_object_transforms(kg, sd, sd->time);
#endif
/* Get control points. */
KernelCurve kcurve = kernel_tex_fetch(__curves, prim);
int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
int k1 = k0 + 1;
int ka = max(k0 - 1, kcurve.first_key);
int kb = min(k1 + 1, kcurve.first_key + kcurve.num_keys - 1);
float4 P_curve[4];
P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
/* Interpolate position and tangent. */
sd->P = float4_to_float3(catmull_rom_basis_derivative(P_curve, sd->u));
#ifdef __DPDU__
sd->dPdu = float4_to_float3(catmull_rom_basis_derivative(P_curve, sd->u));
#endif
/* Transform into world space */
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
object_position_transform_auto(kg, sd, &sd->P);
#ifdef __DPDU__
object_dir_transform_auto(kg, sd, &sd->dPdu);
#endif
}
/* No view direction, normals or bitangent. */
sd->I = zero_float3();
sd->N = zero_float3();
sd->Ng = zero_float3();
#ifdef __DPDU__
sd->dPdv = zero_float3();
#endif
/* No ray differentials currently. */
#ifdef __RAY_DIFFERENTIALS__
sd->dP = differential3_zero();
sd->dI = differential3_zero();
sd->du = differential_zero();
sd->dv = differential_zero();
#endif
}
/* ShaderData setup from ray into background */
ccl_device_inline void shader_setup_from_background(KernelGlobals kg,

View File

@ -115,18 +115,25 @@ ccl_device bool integrate_intersect_shadow_transparent(KernelGlobals kg,
{
/* Limit the number hits to the max transparent bounces allowed and the size that we
* have available in the integrator state. */
const uint max_transparent_hits = integrate_shadow_max_transparent_hits(kg, state);
const uint max_hits = min(max_transparent_hits, (uint)INTEGRATOR_SHADOW_ISECT_SIZE);
const uint max_hits = integrate_shadow_max_transparent_hits(kg, state);
uint num_hits = 0;
bool opaque_hit = scene_intersect_shadow_all(kg, state, ray, visibility, max_hits, &num_hits);
float throughput = 1.0f;
bool opaque_hit = scene_intersect_shadow_all(
kg, state, ray, visibility, max_hits, &num_hits, &throughput);
/* Computed throughput from baked shadow transparency, where we can bypass recording
* intersections and shader evaluation. */
if (throughput != 1.0f) {
INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) *= throughput;
}
/* If number of hits exceed the transparent bounces limit, make opaque. */
if (num_hits > max_transparent_hits) {
if (num_hits > max_hits) {
opaque_hit = true;
}
if (!opaque_hit) {
uint num_recorded_hits = min(num_hits, max_hits);
const uint num_recorded_hits = min(num_hits, min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE));
if (num_recorded_hits > 0) {
sort_shadow_intersections(state, num_recorded_hits);

View File

@ -23,7 +23,7 @@
CCL_NAMESPACE_BEGIN
ccl_device_inline bool shadow_intersections_has_remaining(const int num_hits)
ccl_device_inline bool shadow_intersections_has_remaining(const uint num_hits)
{
return num_hits >= INTEGRATOR_SHADOW_ISECT_SIZE;
}
@ -105,12 +105,12 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg,
ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg,
IntegratorShadowState state,
const int num_hits)
const uint num_hits)
{
/* Accumulate shadow for transparent surfaces. */
const int num_recorded_hits = min(num_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
const uint num_recorded_hits = min(num_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
for (int hit = 0; hit < num_recorded_hits + 1; hit++) {
for (uint hit = 0; hit < num_recorded_hits + 1; hit++) {
/* Volume shaders. */
if (hit < num_recorded_hits || !shadow_intersections_has_remaining(num_hits)) {
# ifdef __VOLUME__
@ -162,7 +162,7 @@ ccl_device void integrator_shade_shadow(KernelGlobals kg,
ccl_global float *ccl_restrict render_buffer)
{
PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SETUP);
const int num_hits = INTEGRATOR_STATE(state, shadow_path, num_hits);
const uint num_hits = INTEGRATOR_STATE(state, shadow_path, num_hits);
#ifdef __TRANSPARENT_SHADOWS__
/* Evaluate transparent shadows. */

View File

@ -48,19 +48,6 @@
CCL_NAMESPACE_BEGIN
/* Constants
*
* TODO: these could be made dynamic depending on the features used in the scene. */
#define INTEGRATOR_SHADOW_ISECT_SIZE_CPU 1024
#define INTEGRATOR_SHADOW_ISECT_SIZE_GPU 4
#ifdef __KERNEL_CPU__
# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
#else
# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_GPU
#endif
/* Data structures */
/* Integrator State

View File

@ -96,4 +96,25 @@ ccl_device void kernel_background_evaluate(KernelGlobals kg,
output[offset * 3 + 2] += color.z;
}
ccl_device void kernel_curve_shadow_transparency_evaluate(
KernelGlobals kg,
ccl_global const KernelShaderEvalInput *input,
ccl_global float *output,
const int offset)
{
/* Setup shader data. */
const KernelShaderEvalInput in = input[offset];
ShaderData sd;
shader_setup_from_curve(kg, &sd, in.object, in.prim, __float_as_int(in.v), in.u);
/* Evaluate transparency. */
shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW &
~(KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_NODE_LIGHT_PATH)>(
kg, INTEGRATOR_STATE_NULL, &sd, NULL, PATH_RAY_SHADOW);
/* Write output. */
output[offset] = clamp(average(shader_bsdf_transparency(kg, &sd)), 0.0f, 1.0f);
}
CCL_NAMESPACE_END

View File

@ -61,6 +61,15 @@ CCL_NAMESPACE_BEGIN
#define ID_NONE (0.0f)
#define PASS_UNUSED (~0)
#define INTEGRATOR_SHADOW_ISECT_SIZE_CPU 1024U
#define INTEGRATOR_SHADOW_ISECT_SIZE_GPU 4U
#ifdef __KERNEL_CPU__
# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
#else
# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_GPU
#endif
/* Kernel features */
#define __SOBOL__
#define __DPDU__
@ -582,6 +591,7 @@ typedef enum AttributeStandard {
ATTR_STD_VOLUME_VELOCITY,
ATTR_STD_POINTINESS,
ATTR_STD_RANDOM_PER_ISLAND,
ATTR_STD_SHADOW_TRANSPARENCY,
ATTR_STD_NUM,
ATTR_STD_NOT_FOUND = ~0
@ -1452,6 +1462,7 @@ typedef enum DeviceKernel {
DEVICE_KERNEL_SHADER_EVAL_DISPLACE,
DEVICE_KERNEL_SHADER_EVAL_BACKGROUND,
DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY,
#define DECLARE_FILM_CONVERT_KERNEL(variant) \
DEVICE_KERNEL_FILM_CONVERT_##variant, DEVICE_KERNEL_FILM_CONVERT_##variant##_HALF_RGBA

View File

@ -366,6 +366,8 @@ const char *Attribute::standard_name(AttributeStandard std)
return "pointiness";
case ATTR_STD_RANDOM_PER_ISLAND:
return "random_per_island";
case ATTR_STD_SHADOW_TRANSPARENCY:
return "shadow_transparency";
case ATTR_STD_NOT_FOUND:
case ATTR_STD_NONE:
case ATTR_STD_NUM:
@ -603,6 +605,9 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name)
case ATTR_STD_RANDOM_PER_ISLAND:
attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_FACE);
break;
case ATTR_STD_SHADOW_TRANSPARENCY:
attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_CURVE_KEY);
break;
default:
assert(0);
break;

View File

@ -734,6 +734,10 @@ void GeometryManager::device_update_attributes(Device *device,
Shader *shader = static_cast<Shader *>(node);
geom_attributes[i].add(shader->attributes);
}
if (geom->is_hair() && static_cast<Hair *>(geom)->need_shadow_transparency()) {
geom_attributes[i].add(ATTR_STD_SHADOW_TRANSPARENCY);
}
}
/* convert object attributes to use the same data structures as geometry ones */
@ -1659,6 +1663,7 @@ void GeometryManager::device_update(Device *device,
VLOG(1) << "Total " << scene->geometry.size() << " meshes.";
bool true_displacement_used = false;
bool curve_shadow_transparency_used = false;
size_t total_tess_needed = 0;
{
@ -1669,26 +1674,33 @@ void GeometryManager::device_update(Device *device,
});
foreach (Geometry *geom, scene->geometry) {
if (geom->is_modified() &&
(geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME)) {
Mesh *mesh = static_cast<Mesh *>(geom);
if (geom->is_modified()) {
if ((geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME)) {
Mesh *mesh = static_cast<Mesh *>(geom);
/* Update normals. */
mesh->add_face_normals();
mesh->add_vertex_normals();
/* Update normals. */
mesh->add_face_normals();
mesh->add_vertex_normals();
if (mesh->need_attribute(scene, ATTR_STD_POSITION_UNDISPLACED)) {
mesh->add_undisplaced();
if (mesh->need_attribute(scene, ATTR_STD_POSITION_UNDISPLACED)) {
mesh->add_undisplaced();
}
/* Test if we need tessellation. */
if (mesh->need_tesselation()) {
total_tess_needed++;
}
/* Test if we need displacement. */
if (mesh->has_true_displacement()) {
true_displacement_used = true;
}
}
/* Test if we need tessellation. */
if (mesh->need_tesselation()) {
total_tess_needed++;
}
/* Test if we need displacement. */
if (mesh->has_true_displacement()) {
true_displacement_used = true;
else if (geom->geometry_type == Geometry::HAIR) {
Hair *hair = static_cast<Hair *>(geom);
if (hair->need_shadow_transparency()) {
curve_shadow_transparency_used = true;
}
}
if (progress.get_cancel()) {
@ -1752,7 +1764,7 @@ void GeometryManager::device_update(Device *device,
/* Update images needed for true displacement. */
bool old_need_object_flags_update = false;
if (true_displacement_used) {
if (true_displacement_used || curve_shadow_transparency_used) {
scoped_callback_timer timer([scene](double time) {
if (scene->update_stats) {
scene->update_stats->geometry.times.add_entry(
@ -1770,7 +1782,7 @@ void GeometryManager::device_update(Device *device,
const BVHLayout bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout,
device->get_bvh_layout_mask());
mesh_calc_offset(scene, bvh_layout);
if (true_displacement_used) {
if (true_displacement_used || curve_shadow_transparency_used) {
scoped_callback_timer timer([scene](double time) {
if (scene->update_stats) {
scene->update_stats->geometry.times.add_entry(
@ -1795,8 +1807,9 @@ void GeometryManager::device_update(Device *device,
}
}
/* Update displacement. */
/* Update displacement and hair shadow transparency. */
bool displacement_done = false;
bool curve_shadow_transparency_done = false;
size_t num_bvh = 0;
{
@ -1817,6 +1830,12 @@ void GeometryManager::device_update(Device *device,
displacement_done = true;
}
}
else if (geom->geometry_type == Geometry::HAIR) {
Hair *hair = static_cast<Hair *>(geom);
if (hair->update_shadow_transparency(device, scene, progress)) {
curve_shadow_transparency_done = true;
}
}
}
if (geom->is_modified() || geom->need_update_bvh_for_offset) {
@ -1836,7 +1855,7 @@ void GeometryManager::device_update(Device *device,
}
/* Device re-update after displacement. */
if (displacement_done) {
if (displacement_done || curve_shadow_transparency_done) {
scoped_callback_timer timer([scene](double time) {
if (scene->update_stats) {
scene->update_stats->geometry.times.add_entry(

View File

@ -18,8 +18,13 @@
#include "render/curves.h"
#include "render/hair.h"
#include "render/object.h"
#include "render/scene.h"
#include "integrator/shader_eval.h"
#include "util/util_progress.h"
CCL_NAMESPACE_BEGIN
/* Hair Curve */
@ -514,4 +519,114 @@ PrimitiveType Hair::primitive_type() const
((curve_shape == CURVE_RIBBON) ? PRIMITIVE_CURVE_RIBBON : PRIMITIVE_CURVE_THICK);
}
/* Fill in coordinates for curve transparency shader evaluation on device. */
static int fill_shader_input(const Hair *hair,
const int object_index,
device_vector<KernelShaderEvalInput> &d_input)
{
int d_input_size = 0;
KernelShaderEvalInput *d_input_data = d_input.data();
const int num_curves = hair->num_curves();
for (int i = 0; i < num_curves; i++) {
const Hair::Curve curve = hair->get_curve(i);
const int num_segments = curve.num_segments();
for (int j = 0; j < num_segments + 1; j++) {
KernelShaderEvalInput in;
in.object = object_index;
in.prim = hair->prim_offset + i;
in.u = (j < num_segments) ? 0.0f : 1.0f;
in.v = (j < num_segments) ? __int_as_float(j) : __int_as_float(j - 1);
d_input_data[d_input_size++] = in;
}
}
return d_input_size;
}
/* Read back curve transparency shader output. */
static void read_shader_output(float *shadow_transparency,
bool &is_fully_opaque,
const device_vector<float> &d_output)
{
const int num_keys = d_output.size();
const float *output_data = d_output.data();
bool is_opaque = true;
for (int i = 0; i < num_keys; i++) {
shadow_transparency[i] = output_data[i];
if (shadow_transparency[i] > 0.0f) {
is_opaque = false;
}
}
is_fully_opaque = is_opaque;
}
bool Hair::need_shadow_transparency()
{
for (const Node *node : used_shaders) {
const Shader *shader = static_cast<const Shader *>(node);
if (shader->has_surface_transparent && shader->get_use_transparent_shadow()) {
return true;
}
}
return false;
}
bool Hair::update_shadow_transparency(Device *device, Scene *scene, Progress &progress)
{
if (!need_shadow_transparency()) {
/* If no shaders with shadow transparency, remove attribute. */
Attribute *attr = attributes.find(ATTR_STD_SHADOW_TRANSPARENCY);
if (attr) {
attributes.remove(attr);
return true;
}
else {
return false;
}
}
string msg = string_printf("Computing Shadow Transparency %s", name.c_str());
progress.set_status("Updating Hair", msg);
/* Create shadow transparency attribute. */
Attribute *attr = attributes.find(ATTR_STD_SHADOW_TRANSPARENCY);
const bool attribute_exists = (attr != nullptr);
if (!attribute_exists) {
attr = attributes.add(ATTR_STD_SHADOW_TRANSPARENCY);
}
float *attr_data = attr->data_float();
/* Find object index. */
size_t object_index = OBJECT_NONE;
for (size_t i = 0; i < scene->objects.size(); i++) {
if (scene->objects[i]->get_geometry() == this) {
object_index = i;
break;
}
}
/* Evaluate shader on device. */
ShaderEval shader_eval(device, progress);
bool is_fully_opaque = false;
shader_eval.eval(SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY,
num_keys(),
1,
function_bind(&fill_shader_input, this, object_index, _1),
function_bind(&read_shader_output, attr_data, is_fully_opaque, _1));
if (is_fully_opaque) {
attributes.remove(attr);
return attribute_exists;
}
return true;
}
CCL_NAMESPACE_END

View File

@ -153,6 +153,10 @@ class Hair : public Geometry {
KernelCurveSegment *curve_segments);
PrimitiveType primitive_type() const override;
/* Attributes */
bool need_shadow_transparency();
bool update_shadow_transparency(Device *device, Scene *scene, Progress &progress);
};
CCL_NAMESPACE_END