Cycles: SSS and Volume rendering in split kernel

Decoupled ray marching is not supported yet.

Transparent shadows are always enabled for volume rendering.

Changes in kernel/bvh and kernel/geom are from Sergey.
This simiplifies code significantly, and prepares it for
record-all transparent shadow function in split kernel.
This commit is contained in:
Hristo Gueorguiev 2017-03-08 15:42:26 +01:00 committed by Sergey Sharybin
parent 6c942db30d
commit 57e26627c4
46 changed files with 1021 additions and 370 deletions

View File

@ -194,7 +194,7 @@ public:
if(!use_patch_evaluation) {
build_options += " -D__NO_PATCH_EVAL__";
}
if(!use_transparent) {
if(!use_transparent && !use_volume) {
build_options += " -D__NO_TRANSPARENT__";
}
return build_options;

View File

@ -35,13 +35,17 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
kernel_path_init = NULL;
kernel_scene_intersect = NULL;
kernel_lamp_emission = NULL;
kernel_do_volume = NULL;
kernel_queue_enqueue = NULL;
kernel_background_buffer_update = NULL;
kernel_indirect_background = NULL;
kernel_shader_eval = NULL;
kernel_holdout_emission_blurring_pathtermination_ao = NULL;
kernel_subsurface_scatter = NULL;
kernel_direct_lighting = NULL;
kernel_shadow_blocked = NULL;
kernel_next_iteration_setup = NULL;
kernel_indirect_subsurface = NULL;
kernel_buffer_update = NULL;
}
DeviceSplitKernel::~DeviceSplitKernel()
@ -55,13 +59,17 @@ DeviceSplitKernel::~DeviceSplitKernel()
delete kernel_path_init;
delete kernel_scene_intersect;
delete kernel_lamp_emission;
delete kernel_do_volume;
delete kernel_queue_enqueue;
delete kernel_background_buffer_update;
delete kernel_indirect_background;
delete kernel_shader_eval;
delete kernel_holdout_emission_blurring_pathtermination_ao;
delete kernel_subsurface_scatter;
delete kernel_direct_lighting;
delete kernel_shadow_blocked;
delete kernel_next_iteration_setup;
delete kernel_indirect_subsurface;
delete kernel_buffer_update;
}
bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_features)
@ -75,13 +83,17 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
LOAD_KERNEL(path_init);
LOAD_KERNEL(scene_intersect);
LOAD_KERNEL(lamp_emission);
LOAD_KERNEL(do_volume);
LOAD_KERNEL(queue_enqueue);
LOAD_KERNEL(background_buffer_update);
LOAD_KERNEL(indirect_background);
LOAD_KERNEL(shader_eval);
LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
LOAD_KERNEL(subsurface_scatter);
LOAD_KERNEL(direct_lighting);
LOAD_KERNEL(shadow_blocked);
LOAD_KERNEL(next_iteration_setup);
LOAD_KERNEL(indirect_subsurface);
LOAD_KERNEL(buffer_update);
#undef LOAD_KERNEL
@ -220,13 +232,18 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
for(int PathIter = 0; PathIter < 16; PathIter++) {
ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(background_buffer_update, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shadow_blocked, global_size_shadow_blocked, local_size);
ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size);
if(task->get_cancel()) {
return true;

View File

@ -58,13 +58,17 @@ private:
SplitKernelFunction *kernel_path_init;
SplitKernelFunction *kernel_scene_intersect;
SplitKernelFunction *kernel_lamp_emission;
SplitKernelFunction *kernel_do_volume;
SplitKernelFunction *kernel_queue_enqueue;
SplitKernelFunction *kernel_background_buffer_update;
SplitKernelFunction *kernel_indirect_background;
SplitKernelFunction *kernel_shader_eval;
SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
SplitKernelFunction *kernel_subsurface_scatter;
SplitKernelFunction *kernel_direct_lighting;
SplitKernelFunction *kernel_shadow_blocked;
SplitKernelFunction *kernel_next_iteration_setup;
SplitKernelFunction *kernel_indirect_subsurface;
SplitKernelFunction *kernel_buffer_update;
/* Global memory variables [porting]; These memory is used for
* co-operation between different kernels; Data written by one

View File

@ -21,12 +21,16 @@ set(SRC
kernels/opencl/kernel_queue_enqueue.cl
kernels/opencl/kernel_scene_intersect.cl
kernels/opencl/kernel_lamp_emission.cl
kernels/opencl/kernel_background_buffer_update.cl
kernels/opencl/kernel_do_volume.cl
kernels/opencl/kernel_indirect_background.cl
kernels/opencl/kernel_shader_eval.cl
kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
kernels/opencl/kernel_subsurface_scatter.cl
kernels/opencl/kernel_direct_lighting.cl
kernels/opencl/kernel_shadow_blocked.cl
kernels/opencl/kernel_next_iteration_setup.cl
kernels/opencl/kernel_indirect_subsurface.cl
kernels/opencl/kernel_buffer_update.cl
kernels/cuda/kernel.cu
kernels/cuda/kernel_split.cu
)
@ -71,6 +75,7 @@ set(SRC_HEADERS
kernel_path_common.h
kernel_path_state.h
kernel_path_surface.h
kernel_path_subsurface.h
kernel_path_volume.h
kernel_projection.h
kernel_queues.h
@ -196,10 +201,13 @@ set(SRC_UTIL_HEADERS
)
set(SRC_SPLIT_HEADERS
split/kernel_background_buffer_update.h
split/kernel_buffer_update.h
split/kernel_data_init.h
split/kernel_direct_lighting.h
split/kernel_do_volume.h
split/kernel_holdout_emission_blurring_pathtermination_ao.h
split/kernel_indirect_background.h
split/kernel_indirect_subsurface.h
split/kernel_lamp_emission.h
split/kernel_next_iteration_setup.h
split/kernel_path_init.h
@ -210,6 +218,7 @@ set(SRC_SPLIT_HEADERS
split/kernel_split_common.h
split/kernel_split_data.h
split/kernel_split_data_types.h
split/kernel_subsurface_scatter.h
)
# CUDA module
@ -407,12 +416,16 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl"
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_background_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_do_volume.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_background.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_scatter.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)

View File

@ -309,9 +309,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
# else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
# endif
triangle_intersect_precalc(dir, &isect_precalc);
@ -362,12 +362,10 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
}
else {
float ignore_t = FLT_MAX;
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
# endif
triangle_intersect_precalc(dir, &isect_precalc);
}

View File

@ -75,16 +75,16 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
bvh_instance_motion_push(kg,
subsurface_object,
ray,
&P,
&dir,
&idir,
&isect_t,
&ob_itfm);
isect_t = bvh_instance_motion_push(kg,
subsurface_object,
ray,
&P,
&dir,
&idir,
isect_t,
&ob_itfm);
#else
bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t);
isect_t = bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, isect_t);
#endif
object = subsurface_object;
}

View File

@ -354,9 +354,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
triangle_intersect_precalc(dir, &isect_precalc);
@ -391,9 +391,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* instance pop */
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
triangle_intersect_precalc(dir, &isect_precalc);

View File

@ -238,9 +238,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
triangle_intersect_precalc(dir, &isect_precalc);
@ -281,9 +281,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
/* instance pop */
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
triangle_intersect_precalc(dir, &isect_precalc);

View File

@ -288,11 +288,10 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
# else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
# endif
triangle_intersect_precalc(dir, &isect_precalc);
@ -348,11 +347,10 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
}
else {
float ignore_t = FLT_MAX;
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
# endif
triangle_intersect_precalc(dir, &isect_precalc);
}

View File

@ -390,9 +390,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
# else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
# endif
num_hits_in_instance = 0;
@ -445,11 +445,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
}
}
else {
float ignore_t = FLT_MAX;
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
# endif
}

View File

@ -64,16 +64,16 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
bvh_instance_motion_push(kg,
subsurface_object,
ray,
&P,
&dir,
&idir,
&isect_t,
&ob_itfm);
isect_t = bvh_instance_motion_push(kg,
subsurface_object,
ray,
&P,
&dir,
&idir,
isect_t,
&ob_itfm);
#else
bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t);
isect_t = bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, isect_t);
#endif
object = subsurface_object;
}

View File

@ -468,9 +468,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Instance pop. */
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
qbvh_near_far_idx_calc(idir,

View File

@ -295,9 +295,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
qbvh_near_far_idx_calc(idir,
@ -341,9 +341,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Instance pop. */
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
qbvh_near_far_idx_calc(idir,

View File

@ -346,9 +346,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
# else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
# endif
qbvh_near_far_idx_calc(idir,
@ -406,11 +406,10 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
}
}
else {
float ignore_t = FLT_MAX;
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
# else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
# endif
}

View File

@ -425,7 +425,13 @@ ccl_device_inline float3 bvh_inverse_direction(float3 dir)
/* Transform ray into object space to enter static object in BVH */
ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t)
ccl_device_inline float bvh_instance_push(KernelGlobals *kg,
int object,
const Ray *ray,
float3 *P,
float3 *dir,
float3 *idir,
float t)
{
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
@ -435,8 +441,11 @@ ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ra
*dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
*idir = bvh_inverse_direction(*dir);
if(*t != FLT_MAX)
*t *= len;
if(t != FLT_MAX) {
t *= len;
}
return t;
}
#ifdef __QBVH__
@ -473,16 +482,24 @@ ccl_device_inline void qbvh_instance_push(KernelGlobals *kg,
/* Transorm ray to exit static object in BVH */
ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t)
ccl_device_inline float bvh_instance_pop(KernelGlobals *kg,
int object,
const Ray *ray,
float3 *P,
float3 *dir,
float3 *idir,
float t)
{
if(*t != FLT_MAX) {
if(t != FLT_MAX) {
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
*t /= len(transform_direction(&tfm, ray->D));
t /= len(transform_direction(&tfm, ray->D));
}
*P = ray->P;
*dir = bvh_clamp_direction(ray->D);
*idir = bvh_inverse_direction(*dir);
return t;
}
/* Same as above, but returns scale factor to apply to multiple intersection distances */
@ -501,13 +518,13 @@ ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg, int object, co
#ifdef __OBJECT_MOTION__
/* Transform ray into object space to enter motion blurred object in BVH */
ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg,
ccl_device_inline float bvh_instance_motion_push(KernelGlobals *kg,
int object,
const Ray *ray,
float3 *P,
float3 *dir,
float3 *idir,
ccl_addr_space float *t,
float t,
Transform *itfm)
{
object_fetch_transform_motion_test(kg, object, ray->time, itfm);
@ -518,8 +535,11 @@ ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg,
*dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
*idir = bvh_inverse_direction(*dir);
if(*t != FLT_MAX)
*t *= len;
if(t != FLT_MAX) {
t *= len;
}
return t;
}
#ifdef __QBVH__
@ -557,22 +577,24 @@ ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg,
/* Transorm ray to exit motion blurred object in BVH */
ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg,
int object,
const Ray *ray,
float3 *P,
float3 *dir,
float3 *idir,
ccl_addr_space float *t,
Transform *itfm)
ccl_device_inline float bvh_instance_motion_pop(KernelGlobals *kg,
int object,
const Ray *ray,
float3 *P,
float3 *dir,
float3 *idir,
float t,
Transform *itfm)
{
if(*t != FLT_MAX) {
*t /= len(transform_direction(itfm, ray->D));
if(t != FLT_MAX) {
t /= len(transform_direction(itfm, ray->D));
}
*P = ray->P;
*dir = bvh_clamp_direction(ray->D);
*idir = bvh_inverse_direction(*dir);
return t;
}
/* Same as above, but returns scale factor to apply to multiple intersection distances */

View File

@ -46,6 +46,7 @@
#include "kernel_path_common.h"
#include "kernel_path_surface.h"
#include "kernel_path_volume.h"
#include "kernel_path_subsurface.h"
#ifdef __KERNEL_DEBUG__
# include "kernel_debug.h"
@ -413,172 +414,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
}
}
#ifdef __SUBSURFACE__
# ifndef __KERNEL_CUDA__
ccl_device
# else
ccl_device_inline
# endif
bool kernel_path_subsurface_scatter(
KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
PathRadiance *L,
PathState *state,
RNG *rng,
Ray *ray,
float3 *throughput,
SubsurfaceIndirectRays *ss_indirect)
{
float bssrdf_probability;
ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
/* modify throughput for picking bssrdf or bsdf */
*throughput *= bssrdf_probability;
/* do bssrdf scatter step if we picked a bssrdf closure */
if(sc) {
/* We should never have two consecutive BSSRDF bounces,
* the second one should be converted to a diffuse BSDF to
* avoid this.
*/
kernel_assert(!ss_indirect->tracing);
uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
SubsurfaceIntersection ss_isect;
float bssrdf_u, bssrdf_v;
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
int num_hits = subsurface_scatter_multi_intersect(kg,
&ss_isect,
sd,
sc,
&lcg_state,
bssrdf_u, bssrdf_v,
false);
# ifdef __VOLUME__
ss_indirect->need_update_volume_stack =
kernel_data.integrator.use_volumes &&
sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
# endif /* __VOLUME__ */
/* compute lighting with the BSDF closure */
for(int hit = 0; hit < num_hits; hit++) {
/* NOTE: We reuse the existing ShaderData, we assume the path
* integration loop stops when this function returns true.
*/
subsurface_scatter_multi_setup(kg,
&ss_isect,
hit,
sd,
state,
state->flag,
sc,
false);
PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays];
*hit_state = *state;
*hit_ray = *ray;
*hit_tp = *throughput;
hit_state->rng_offset += PRNG_BOUNCE_NUM;
path_radiance_init(hit_L, kernel_data.film.use_light_pass);
hit_L->direct_throughput = L->direct_throughput;
path_radiance_copy_indirect(hit_L, L);
kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L);
if(kernel_path_surface_bounce(kg,
rng,
sd,
hit_tp,
hit_state,
hit_L,
hit_ray))
{
# ifdef __LAMP_MIS__
hit_state->ray_t = 0.0f;
# endif /* __LAMP_MIS__ */
# ifdef __VOLUME__
if(ss_indirect->need_update_volume_stack) {
Ray volume_ray = *ray;
/* Setup ray from previous surface point to the new one. */
volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
&volume_ray.t);
kernel_volume_stack_update_for_subsurface(
kg,
emission_sd,
&volume_ray,
hit_state->volume_stack);
}
# endif /* __VOLUME__ */
path_radiance_reset_indirect(L);
ss_indirect->num_rays++;
}
else {
path_radiance_accum_sample(L, hit_L, 1);
}
}
return true;
}
return false;
}
ccl_device_inline void kernel_path_subsurface_init_indirect(
SubsurfaceIndirectRays *ss_indirect)
{
ss_indirect->tracing = false;
ss_indirect->num_rays = 0;
}
ccl_device void kernel_path_subsurface_accum_indirect(
SubsurfaceIndirectRays *ss_indirect,
PathRadiance *L)
{
if(ss_indirect->tracing) {
path_radiance_sum_indirect(L);
path_radiance_accum_sample(&ss_indirect->direct_L, L, 1);
if(ss_indirect->num_rays == 0) {
*L = ss_indirect->direct_L;
}
}
}
ccl_device void kernel_path_subsurface_setup_indirect(
KernelGlobals *kg,
SubsurfaceIndirectRays *ss_indirect,
PathState *state,
Ray *ray,
PathRadiance *L,
float3 *throughput)
{
if(!ss_indirect->tracing) {
ss_indirect->direct_L = *L;
}
ss_indirect->tracing = true;
/* Setup state, ray and throughput for indirect SSS rays. */
ss_indirect->num_rays--;
Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays];
PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays];
*state = ss_indirect->state[ss_indirect->num_rays];
*ray = *indirect_ray;
*L = *indirect_L;
*throughput = ss_indirect->throughputs[ss_indirect->num_rays];
state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
}
#endif /* __SUBSURFACE__ */
ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
RNG *rng,

View File

@ -0,0 +1,187 @@
/*
* Copyright 2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
CCL_NAMESPACE_BEGIN
#ifdef __SUBSURFACE__
# ifndef __KERNEL_CUDA__
ccl_device
# else
ccl_device_inline
# endif
bool kernel_path_subsurface_scatter(
KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
PathRadiance *L,
ccl_addr_space PathState *state,
ccl_addr_space RNG *rng,
ccl_addr_space Ray *ray,
ccl_addr_space float3 *throughput,
ccl_addr_space SubsurfaceIndirectRays *ss_indirect)
{
float bssrdf_probability;
ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
/* modify throughput for picking bssrdf or bsdf */
*throughput *= bssrdf_probability;
/* do bssrdf scatter step if we picked a bssrdf closure */
if(sc) {
/* We should never have two consecutive BSSRDF bounces,
* the second one should be converted to a diffuse BSDF to
* avoid this.
*/
kernel_assert(!ss_indirect->tracing);
uint lcg_state = lcg_state_init_addrspace(rng, state, 0x68bc21eb);
SubsurfaceIntersection ss_isect;
float bssrdf_u, bssrdf_v;
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
int num_hits = subsurface_scatter_multi_intersect(kg,
&ss_isect,
sd,
sc,
&lcg_state,
bssrdf_u, bssrdf_v,
false);
# ifdef __VOLUME__
ss_indirect->need_update_volume_stack =
kernel_data.integrator.use_volumes &&
sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
# endif /* __VOLUME__ */
/* compute lighting with the BSDF closure */
for(int hit = 0; hit < num_hits; hit++) {
/* NOTE: We reuse the existing ShaderData, we assume the path
* integration loop stops when this function returns true.
*/
subsurface_scatter_multi_setup(kg,
&ss_isect,
hit,
sd,
state,
state->flag,
sc,
false);
ccl_addr_space PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
ccl_addr_space Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
ccl_addr_space float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays];
*hit_state = *state;
*hit_ray = *ray;
*hit_tp = *throughput;
hit_state->rng_offset += PRNG_BOUNCE_NUM;
path_radiance_init(hit_L, kernel_data.film.use_light_pass);
hit_L->direct_throughput = L->direct_throughput;
path_radiance_copy_indirect(hit_L, L);
kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L);
if(kernel_path_surface_bounce(kg,
rng,
sd,
hit_tp,
hit_state,
hit_L,
hit_ray))
{
# ifdef __LAMP_MIS__
hit_state->ray_t = 0.0f;
# endif /* __LAMP_MIS__ */
# ifdef __VOLUME__
if(ss_indirect->need_update_volume_stack) {
Ray volume_ray = *ray;
/* Setup ray from previous surface point to the new one. */
volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
&volume_ray.t);
kernel_volume_stack_update_for_subsurface(
kg,
emission_sd,
&volume_ray,
hit_state->volume_stack);
}
# endif /* __VOLUME__ */
path_radiance_reset_indirect(L);
ss_indirect->num_rays++;
}
else {
path_radiance_accum_sample(L, hit_L, 1);
}
}
return true;
}
return false;
}
ccl_device_inline void kernel_path_subsurface_init_indirect(
ccl_addr_space SubsurfaceIndirectRays *ss_indirect)
{
ss_indirect->tracing = false;
ss_indirect->num_rays = 0;
}
ccl_device void kernel_path_subsurface_accum_indirect(
ccl_addr_space SubsurfaceIndirectRays *ss_indirect,
PathRadiance *L)
{
if(ss_indirect->tracing) {
path_radiance_sum_indirect(L);
path_radiance_accum_sample(&ss_indirect->direct_L, L, 1);
if(ss_indirect->num_rays == 0) {
*L = ss_indirect->direct_L;
}
}
}
ccl_device void kernel_path_subsurface_setup_indirect(
KernelGlobals *kg,
ccl_addr_space SubsurfaceIndirectRays *ss_indirect,
ccl_addr_space PathState *state,
ccl_addr_space Ray *ray,
PathRadiance *L,
ccl_addr_space float3 *throughput)
{
if(!ss_indirect->tracing) {
ss_indirect->direct_L = *L;
}
ss_indirect->tracing = true;
/* Setup state, ray and throughput for indirect SSS rays. */
ss_indirect->num_rays--;
ccl_addr_space Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays];
PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays];
*state = ss_indirect->state[ss_indirect->num_rays];
*ray = *indirect_ray;
*L = *indirect_L;
*throughput = ss_indirect->throughputs[ss_indirect->num_rays];
state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
}
#endif /* __SUBSURFACE__ */
CCL_NAMESPACE_END

View File

@ -16,7 +16,7 @@
CCL_NAMESPACE_BEGIN
#if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__)
#if (defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__)) && !defined(__SPLIT_KERNEL__)
/* branched path tracing: connect path directly to position on one or more lights and add it to L */
ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RNG *rng,
@ -188,7 +188,6 @@ ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, RNG *rng,
#endif
#ifndef __SPLIT_KERNEL__
/* path tracing: connect path directly to position on a light and add it to L */
ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_addr_space RNG *rng,
ShaderData *sd, ShaderData *emission_sd, float3 throughput, ccl_addr_space PathState *state,
@ -226,7 +225,6 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_
}
#endif
}
#endif
/* path tracing: bounce off or through surface to with new direction stored in ray */
ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg,

View File

@ -20,11 +20,11 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline void kernel_path_volume_connect_light(
KernelGlobals *kg,
RNG *rng,
ccl_addr_space RNG *rng,
ShaderData *sd,
ShaderData *emission_sd,
float3 throughput,
PathState *state,
ccl_addr_space PathState *state,
PathRadiance *L)
{
#ifdef __EMISSION__
@ -59,7 +59,7 @@ ccl_device_inline void kernel_path_volume_connect_light(
}
}
}
#endif
#endif /* __EMISSION__ */
}
#ifdef __KERNEL_GPU__
@ -67,8 +67,14 @@ ccl_device_noinline
#else
ccl_device
#endif
bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng,
ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray)
bool kernel_path_volume_bounce(
KernelGlobals *kg,
ccl_addr_space RNG *rng,
ShaderData *sd,
ccl_addr_space float3 *throughput,
ccl_addr_space PathState *state,
PathRadiance *L,
ccl_addr_space Ray *ray)
{
/* sample phase function */
float phase_pdf;
@ -111,6 +117,7 @@ bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng,
return true;
}
#ifdef __BRANCHED_PATH__
ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L,
bool sample_all_lights, Ray *ray, const VolumeSegment *segment)
@ -261,10 +268,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
}
}
}
#endif
#endif /* __EMISSION__ */
}
#endif /* __BRANCHED_PATH__ */
#endif
#endif /* __VOLUME_SCATTER__ */
CCL_NAMESPACE_END

View File

@ -203,11 +203,11 @@ void shader_setup_from_subsurface(
# ifdef __INSTANCING__
if(isect->object != OBJECT_NONE) {
/* instance transform */
object_normal_transform(kg, sd, &sd->N);
object_normal_transform(kg, sd, &sd->Ng);
object_normal_transform_auto(kg, sd, &sd->N);
object_normal_transform_auto(kg, sd, &sd->Ng);
# ifdef __DPDU__
object_dir_transform(kg, sd, &sd->dPdu);
object_dir_transform(kg, sd, &sd->dPdv);
object_dir_transform_auto(kg, sd, &sd->dPdu);
object_dir_transform_auto(kg, sd, &sd->dPdv);
# endif
}
# endif
@ -816,7 +816,7 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b
*N_ = (is_zero(N))? sd->N: normalize(N);
if(texture_blur_)
*texture_blur_ = texture_blur/weight_sum;
*texture_blur_ = safe_divide(texture_blur, weight_sum);
return eval;
}
@ -1036,8 +1036,8 @@ ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *
ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
ShaderData *sd,
PathState *state,
VolumeStack *stack,
ccl_addr_space PathState *state,
ccl_addr_space VolumeStack *stack,
int path_flag,
ShaderContext ctx)
{

View File

@ -24,7 +24,7 @@ ccl_device_forceinline bool shadow_handle_transparent_isect(
ShaderData *shadow_sd,
ccl_addr_space PathState *state,
# ifdef __VOLUME__
struct PathState *volume_state,
ccl_addr_space struct PathState *volume_state,
# endif
Intersection *isect,
Ray *ray,
@ -276,7 +276,13 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(
float3 Pend = ray->P + ray->D*ray->t;
int bounce = state->transparent_bounce;
# ifdef __VOLUME__
PathState ps = *state;
# ifdef __SPLIT_KERNEL__
ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
# else
PathState ps_object;
PathState *ps = &ps_object;
# endif
*ps = *state;
# endif
for(;;) {
if(bounce >= kernel_data.integrator.transparent_max_bounce) {
@ -299,7 +305,7 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(
shadow_sd,
state,
#ifdef __VOLUME__
&ps,
ps,
#endif
isect,
ray,
@ -316,8 +322,8 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(
}
# ifdef __VOLUME__
/* Attenuation for last line segment towards light. */
if(ps.volume_stack[0].shader != SHADER_NONE) {
kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
if(ps->volume_stack[0].shader != SHADER_NONE) {
kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
}
# endif
*shadow *= throughput;
@ -365,21 +371,11 @@ ccl_device bool shadow_blocked_transparent_stepped(
ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
ShaderData *shadow_sd,
ccl_addr_space PathState *state,
ccl_addr_space Ray *ray_input,
Ray *ray_input,
float3 *shadow)
{
/* Special trickery for split kernel: some data is coming from the
* global memory.
*/
#ifdef __SPLIT_KERNEL__
Ray private_ray = *ray_input;
Ray *ray = &private_ray;
Intersection *isect = &kernel_split_state.isect_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
#else /* __SPLIT_KERNEL__ */
Ray *ray = ray_input;
Intersection isect_object;
Intersection *isect = &isect_object;
#endif /* __SPLIT_KERNEL__ */
Intersection isect;
/* Some common early checks. */
*shadow = make_float3(1.0f, 1.0f, 1.0f);
if(ray->t == 0.0f) {
@ -397,7 +393,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
shadow_sd,
state,
ray,
isect,
&isect,
shadow);
}
#ifdef __TRANSPARENT_SHADOWS__
@ -423,11 +419,11 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
const bool blocked = scene_intersect(kg,
*ray,
PATH_RAY_SHADOW_OPAQUE,
isect,
&isect,
NULL,
0.0f, 0.0f);
const bool is_transparent_isect = blocked
? shader_transparent_shadow(kg, isect)
? shader_transparent_shadow(kg, &isect)
: false;
if(!blocked || !is_transparent_isect ||
max_hits + 1 >= SHADOW_STACK_MAX_HITS)
@ -436,7 +432,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
shadow_sd,
state,
ray,
isect,
&isect,
blocked,
is_transparent_isect,
shadow);
@ -454,7 +450,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
shadow_sd,
state,
ray,
isect,
&isect,
shadow);
# endif /* __SHADOW_RECORD_ALL__ */
#endif /* __TRANSPARENT_SHADOWS__ */

View File

@ -185,7 +185,7 @@ ccl_device float3 subsurface_color_pow(float3 color, float exponent)
ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
ShaderData *sd,
PathState *state,
ccl_addr_space PathState *state,
int state_flag,
float3 *eval,
float3 *N)
@ -277,7 +277,12 @@ ccl_device_inline int subsurface_scatter_multi_intersect(
float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
/* create ray */
#ifdef __SPLIT_KERNEL__
Ray ray_object = ss_isect->ray;
Ray *ray = &ray_object;
#else
Ray *ray = &ss_isect->ray;
#endif
ray->P = sd->P + disk_N*disk_height + disk_P;
ray->D = -disk_N;
ray->t = 2.0f*disk_height;
@ -351,6 +356,10 @@ ccl_device_inline int subsurface_scatter_multi_intersect(
ss_isect->weight[hit] = eval;
}
#ifdef __SPLIT_KERNEL__
ss_isect->ray = *ray;
#endif
return num_eval_hits;
}
@ -359,13 +368,19 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
SubsurfaceIntersection* ss_isect,
int hit,
ShaderData *sd,
PathState *state,
ccl_addr_space PathState *state,
int state_flag,
ShaderClosure *sc,
bool all)
{
#ifdef __SPLIT_KERNEL__
Ray ray_object = ss_isect->ray;
Ray *ray = &ray_object;
#else
Ray *ray = &ss_isect->ray;
#endif
/* Setup new shading point. */
shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], &ss_isect->ray);
shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray);
/* Optionally blur colors and bump mapping. */
float3 weight = ss_isect->weight[hit];
@ -376,6 +391,7 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
subsurface_scatter_setup_diffuse_bsdf(sd, weight, true, N);
}
#ifndef __SPLIT_KERNEL__
/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, PathState *state,
int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
@ -465,6 +481,7 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, PathS
/* setup diffuse bsdf */
subsurface_scatter_setup_diffuse_bsdf(sd, eval, (ss_isect.num_hits > 0), N);
}
#endif /* ! __SPLIT_KERNEL__ */
CCL_NAMESPACE_END

View File

@ -76,14 +76,12 @@ CCL_NAMESPACE_BEGIN
# ifdef WITH_OSL
# define __OSL__
# endif
# ifndef __SPLIT_KERNEL__
# define __SUBSURFACE__
# endif
# define __SUBSURFACE__
# define __CMJ__
# define __VOLUME__
# define __VOLUME_SCATTER__
# ifndef __SPLIT_KERNEL__
# define __VOLUME__
# define __VOLUME_DECOUPLED__
# define __VOLUME_SCATTER__
# define __SHADOW_RECORD_ALL__
# define __VOLUME_RECORD_ALL__
# endif
@ -130,6 +128,9 @@ CCL_NAMESPACE_BEGIN
# define __CL_USE_NATIVE__
# define __KERNEL_SHADING__
# define __KERNEL_ADV_SHADING__
# define __SUBSURFACE__
# define __VOLUME__
# define __VOLUME_SCATTER__
# endif /* __KERNEL_OPENCL_AMD__ */
# ifdef __KERNEL_OPENCL_INTEL_CPU__
@ -552,7 +553,7 @@ typedef struct Ray {
/* Intersection */
typedef ccl_addr_space struct Intersection {
typedef struct Intersection {
float t, u, v;
int prim;
int object;
@ -934,7 +935,7 @@ typedef struct PathState {
/* Subsurface */
/* Struct to gather multiple SSS hits. */
struct SubsurfaceIntersection
typedef struct SubsurfaceIntersection
{
Ray ray;
float3 weight[BSSRDF_MAX_HITS];
@ -942,10 +943,10 @@ struct SubsurfaceIntersection
int num_hits;
struct Intersection hits[BSSRDF_MAX_HITS];
float3 Ng[BSSRDF_MAX_HITS];
};
} SubsurfaceIntersection;
/* Struct to gather SSS indirect rays and delay tracing them. */
struct SubsurfaceIndirectRays
typedef struct SubsurfaceIndirectRays
{
bool need_update_volume_stack;
bool tracing;
@ -956,7 +957,7 @@ struct SubsurfaceIndirectRays
struct Ray rays[BSSRDF_MAX_HITS];
float3 throughputs[BSSRDF_MAX_HITS];
struct PathRadiance L[BSSRDF_MAX_HITS];
};
} SubsurfaceIndirectRays;
/* Constant Kernel Data
*

View File

@ -38,7 +38,7 @@ typedef struct VolumeShaderCoefficients {
/* evaluate shader to get extinction coefficient at P */
ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
ShaderData *sd,
PathState *state,
ccl_addr_space PathState *state,
float3 P,
float3 *extinction)
{
@ -64,7 +64,7 @@ ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
/* evaluate shader to get absorption, scattering and emission at P */
ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
ShaderData *sd,
PathState *state,
ccl_addr_space PathState *state,
float3 P,
VolumeShaderCoefficients *coeff)
{
@ -112,7 +112,7 @@ ccl_device float kernel_volume_channel_get(float3 value, int channel)
return (channel == 0)? value.x: ((channel == 1)? value.y: value.z);
}
ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, VolumeStack *stack)
ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
{
for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE);
@ -161,7 +161,11 @@ ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stac
/* homogeneous volume: assume shader evaluation at the starts gives
* the extinction coefficient for the entire line segment */
ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput)
ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg,
ccl_addr_space PathState *state,
Ray *ray,
ShaderData *sd,
float3 *throughput)
{
float3 sigma_t;
@ -171,7 +175,11 @@ ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, PathState *s
/* heterogeneous volume: integrate stepping through the volume until we
* reach the end, get absorbed entirely, or run out of iterations */
ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput)
ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
ccl_addr_space PathState *state,
Ray *ray,
ShaderData *sd,
float3 *throughput)
{
float3 tp = *throughput;
const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
@ -179,7 +187,7 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
/* prepare for stepping */
int max_steps = kernel_data.integrator.volume_max_steps;
float step = kernel_data.integrator.volume_step_size;
float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step;
float random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * step;
/* compute extinction at the start */
float t = 0.0f;
@ -193,7 +201,7 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
/* use random position inside this segment to sample shader */
if(new_t == ray->t)
random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * dt;
float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
float3 sigma_t;
@ -227,7 +235,11 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
/* get the volume attenuation over line segment defined by ray, with the
* assumption that there are no surfaces blocking light between the endpoints */
ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *throughput)
ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg,
ShaderData *shadow_sd,
ccl_addr_space PathState *state,
Ray *ray,
float3 *throughput)
{
shader_setup_from_volume(kg, shadow_sd, ray);
@ -341,9 +353,15 @@ ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coe
/* homogeneous volume: assume shader evaluation at the start gives
* the volume shading coefficient for the entire line segment */
ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGlobals *kg,
PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput,
RNG *rng, bool probalistic_scatter)
ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(
KernelGlobals *kg,
ccl_addr_space PathState *state,
Ray *ray,
ShaderData *sd,
PathRadiance *L,
ccl_addr_space float3 *throughput,
ccl_addr_space RNG *rng,
bool probalistic_scatter)
{
VolumeShaderCoefficients coeff;
@ -444,8 +462,14 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba
* volume until we reach the end, get absorbed entirely, or run out of
* iterations. this does probabilistically scatter or get transmitted through
* for path tracing where we don't want to branch. */
ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput, RNG *rng)
ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
KernelGlobals *kg,
ccl_addr_space PathState *state,
Ray *ray,
ShaderData *sd,
PathRadiance *L,
ccl_addr_space float3 *throughput,
ccl_addr_space RNG *rng)
{
float3 tp = *throughput;
const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
@ -453,7 +477,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
/* prepare for stepping */
int max_steps = kernel_data.integrator.volume_max_steps;
float step_size = kernel_data.integrator.volume_step_size;
float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
float random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * step_size;
/* compute coefficients at the start */
float t = 0.0f;
@ -474,7 +498,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
/* use random position inside this segment to sample shader */
if(new_t == ray->t)
random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * dt;
float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
VolumeShaderCoefficients coeff;
@ -579,8 +603,15 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
* ray, with the assumption that there are no surfaces blocking light
* between the endpoints. distance sampling is used to decide if we will
* scatter or not. */
ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng, bool heterogeneous)
ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(
KernelGlobals *kg,
ccl_addr_space PathState *state,
ShaderData *sd,
Ray *ray,
PathRadiance *L,
ccl_addr_space float3 *throughput,
ccl_addr_space RNG *rng,
bool heterogeneous)
{
shader_setup_from_volume(kg, sd, ray);
@ -590,6 +621,7 @@ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals
return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, rng, true);
}
#ifndef __SPLIT_KERNEL__
/* Decoupled Volume Sampling
*
* VolumeSegment is list of coefficients and transmittance stored at all steps
@ -990,6 +1022,7 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
return VOLUME_PATH_SCATTERED;
}
#endif /* __SPLIT_KERNEL */
/* decide if we need to use decoupled or not */
ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method)
@ -1021,9 +1054,9 @@ ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneou
ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
ShaderData *stack_sd,
const PathState *state,
const Ray *ray,
VolumeStack *stack)
ccl_addr_space const PathState *state,
ccl_addr_space const Ray *ray,
ccl_addr_space VolumeStack *stack)
{
/* NULL ray happens in the baker, does it need proper initialization of
* camera in volume?
@ -1166,7 +1199,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
}
}
ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, VolumeStack *stack)
ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, ccl_addr_space VolumeStack *stack)
{
/* todo: we should have some way for objects to indicate if they want the
* world shader to work inside them. excluding it by default is problematic
@ -1215,7 +1248,7 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd
ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
ShaderData *stack_sd,
Ray *ray,
VolumeStack *stack)
ccl_addr_space VolumeStack *stack)
{
kernel_assert(kernel_data.integrator.use_volumes);
@ -1277,7 +1310,7 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
* the world's one after the last bounce to avoid render artifacts.
*/
ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
VolumeStack *volume_stack)
ccl_addr_space VolumeStack *volume_stack)
{
if(kernel_data.background.volume_shader != SHADER_NONE) {
/* Keep the world's volume in stack. */

View File

@ -74,13 +74,17 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
DECLARE_SPLIT_KERNEL_FUNCTION(path_init)
DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect)
DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission)
DECLARE_SPLIT_KERNEL_FUNCTION(do_volume)
DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
DECLARE_SPLIT_KERNEL_FUNCTION(background_buffer_update)
DECLARE_SPLIT_KERNEL_FUNCTION(indirect_background)
DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval)
DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting)
DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked)
DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update)
void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func));

View File

@ -41,13 +41,17 @@
# include "split/kernel_path_init.h"
# include "split/kernel_scene_intersect.h"
# include "split/kernel_lamp_emission.h"
# include "split/kernel_do_volume.h"
# include "split/kernel_queue_enqueue.h"
# include "split/kernel_background_buffer_update.h"
# include "split/kernel_indirect_background.h"
# include "split/kernel_shader_eval.h"
# include "split/kernel_holdout_emission_blurring_pathtermination_ao.h"
# include "split/kernel_subsurface_scatter.h"
# include "split/kernel_direct_lighting.h"
# include "split/kernel_shadow_blocked.h"
# include "split/kernel_next_iteration_setup.h"
# include "split/kernel_indirect_subsurface.h"
# include "split/kernel_buffer_update.h"
#endif
CCL_NAMESPACE_BEGIN
@ -166,13 +170,17 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
DEFINE_SPLIT_KERNEL_FUNCTION(background_buffer_update)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked)
DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update)
void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func))
{
@ -189,13 +197,17 @@ void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name,
REGISTER(path_init);
REGISTER(scene_intersect);
REGISTER(lamp_emission);
REGISTER(do_volume);
REGISTER(queue_enqueue);
REGISTER(background_buffer_update);
REGISTER(indirect_background);
REGISTER(shader_eval);
REGISTER(holdout_emission_blurring_pathtermination_ao);
REGISTER(subsurface_scatter);
REGISTER(direct_lighting);
REGISTER(shadow_blocked);
REGISTER(next_iteration_setup);
REGISTER(indirect_subsurface);
REGISTER(buffer_update);
#undef REGISTER
#undef REGISTER_EVAL_NAME

View File

@ -16,11 +16,11 @@
#include "kernel_compat_opencl.h"
#include "split/kernel_split_common.h"
#include "split/kernel_background_buffer_update.h"
#include "split/kernel_buffer_update.h"
__kernel void kernel_ocl_path_trace_background_buffer_update(
__kernel void kernel_ocl_path_trace_buffer_update(
KernelGlobals *kg,
ccl_constant KernelData *data)
{
kernel_background_buffer_update(kg);
kernel_buffer_update(kg);
}

View File

@ -0,0 +1,26 @@
/*
* Copyright 2011-2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel_compat_opencl.h"
#include "split/kernel_split_common.h"
#include "split/kernel_do_volume.h"
__kernel void kernel_ocl_path_trace_do_volume(
KernelGlobals *kg,
ccl_constant KernelData *data)
{
kernel_do_volume(kg);
}

View File

@ -0,0 +1,26 @@
/*
* Copyright 2011-2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel_compat_opencl.h"
#include "split/kernel_split_common.h"
#include "split/kernel_indirect_background.h"
__kernel void kernel_ocl_path_trace_indirect_background(
KernelGlobals *kg,
ccl_constant KernelData *data)
{
kernel_indirect_background(kg);
}

View File

@ -0,0 +1,26 @@
/*
* Copyright 2011-2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel_compat_opencl.h"
#include "split/kernel_split_common.h"
#include "split/kernel_indirect_subsurface.h"
__kernel void kernel_ocl_path_trace_indirect_subsurface(
KernelGlobals *kg,
ccl_constant KernelData *data)
{
kernel_indirect_subsurface(kg);
}

View File

@ -0,0 +1,34 @@
/*
* Copyright 2011-2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel_state_buffer_size.cl"
#include "kernel_data_init.cl"
#include "kernel_path_init.cl"
#include "kernel_scene_intersect.cl"
#include "kernel_lamp_emission.cl"
#include "kernel_do_volume.cl"
#include "kernel_indirect_background.cl"
#include "kernel_queue_enqueue.cl"
#include "kernel_shader_eval.cl"
#include "kernel_holdout_emission_blurring_pathtermination_ao.cl"
#include "kernel_subsurface_scatter.cl"
#include "kernel_direct_lighting.cl"
#include "kernel_shadow_blocked.cl"
#include "kernel_next_iteration_setup.cl"
#include "kernel_indirect_subsurface.cl"
#include "kernel_buffer_update.cl"

View File

@ -0,0 +1,26 @@
/*
* Copyright 2011-2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel_compat_opencl.h"
#include "split/kernel_split_common.h"
#include "split/kernel_subsurface_scatter.h"
__kernel void kernel_ocl_path_trace_subsurface_scatter(
KernelGlobals *kg,
ccl_constant KernelData *data)
{
kernel_subsurface_scatter(kg);
}

View File

@ -69,7 +69,7 @@ CCL_NAMESPACE_BEGIN
* QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays
* QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty
*/
ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
ccl_device void kernel_buffer_update(KernelGlobals *kg)
{
ccl_local unsigned int local_queue_atomics;
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
@ -141,26 +141,6 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride;
buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride;
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
/* eval background shader if nothing hit */
if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) {
*L_transparent = (*L_transparent) + average((*throughput));
#ifdef __PASSES__
if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
#endif
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
}
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
#ifdef __BACKGROUND__
/* sample background shader */
float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray);
path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
#endif
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
}
}
if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
float3 L_sum = path_radiance_clamp_and_sum(kg, L);
kernel_write_light_passes(kg, buffer, L, sample);
@ -207,6 +187,9 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
*L_transparent = 0.0f;
path_radiance_init(L, kernel_data.film.use_light_pass);
path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, rng, sample, ray);
#ifdef __SUBSURFACE__
kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
#endif
#ifdef __KERNEL_DEBUG__
debug_data_init(debug_data);
#endif

View File

@ -0,0 +1,97 @@
/*
* Copyright 2011-2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
CCL_NAMESPACE_BEGIN
ccl_device void kernel_do_volume(KernelGlobals *kg)
{
#ifdef __VOLUME__
/* We will empty this queue in this kernel. */
if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
}
/* Fetch use_queues_flag. */
ccl_local char local_use_queues_flag;
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
local_use_queues_flag = *kernel_split_params.use_queues_flag;
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
if(local_use_queues_flag) {
ray_index = get_ray_index(kg, ray_index,
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
kernel_split_state.queue_data,
kernel_split_params.queue_size,
1);
if(ray_index == QUEUE_EMPTY_SLOT) {
return;
}
}
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) {
bool hit = ! IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND);
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ccl_global RNG *rng = &kernel_split_state.rng[ray_index];
ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
ShaderData *sd = &kernel_split_state.sd[ray_index];
ShaderData *sd_input = &kernel_split_state.sd_DL_shadow[ray_index];
/* Sanitize volume stack. */
if(!hit) {
kernel_volume_clean_stack(kg, state->volume_stack);
}
/* volume attenuation, emission, scatter */
if(state->volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = *ray;
volume_ray.t = (hit)? isect->t: FLT_MAX;
bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
{
/* integrate along volume segment with distance sampling */
VolumeIntegrateResult result = kernel_volume_integrate(
kg, state, sd, &volume_ray, L, throughput, rng, heterogeneous);
# ifdef __VOLUME_SCATTER__
if(result == VOLUME_PATH_SCATTERED) {
/* direct lighting */
kernel_path_volume_connect_light(kg, rng, sd, sd_input, *throughput, state, L);
/* indirect light bounce */
if(kernel_path_volume_bounce(kg, rng, sd, throughput, state, L, ray))
ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED);
else
ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER);
}
# endif
}
}
}
#endif
}
CCL_NAMESPACE_END

View File

@ -0,0 +1,87 @@
/*
* Copyright 2011-2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
CCL_NAMESPACE_BEGIN
ccl_device void kernel_indirect_background(KernelGlobals *kg)
{
/*
ccl_local unsigned int local_queue_atomics;
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
local_queue_atomics = 0;
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
// */
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
ray_index = get_ray_index(kg, ray_index,
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
kernel_split_state.queue_data,
kernel_split_params.queue_size,
0);
#ifdef __COMPUTE_DEVICE_GPU__
/* If we are executing on a GPU device, we exit all threads that are not
* required.
*
* If we are executing on a CPU device, then we need to keep all threads
* active since we have barrier() calls later in the kernel. CPU devices,
* expect all threads to execute barrier statement.
*/
if(ray_index == QUEUE_EMPTY_SLOT) {
return;
}
#endif
#ifndef __COMPUTE_DEVICE_GPU__
if(ray_index != QUEUE_EMPTY_SLOT) {
#endif
ccl_global char *ray_state = kernel_split_state.ray_state;
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
ccl_global float *L_transparent = &kernel_split_state.L_transparent[ray_index];
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
/* eval background shader if nothing hit */
if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) {
*L_transparent = (*L_transparent) + average((*throughput));
#ifdef __PASSES__
if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
#endif
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
}
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
#ifdef __BACKGROUND__
/* sample background shader */
float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray);
path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
#endif
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
}
}
#ifndef __COMPUTE_DEVICE_GPU__
}
#endif
}
CCL_NAMESPACE_END

View File

@ -0,0 +1,77 @@
/*
* Copyright 2011-2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
CCL_NAMESPACE_BEGIN
ccl_device void kernel_indirect_subsurface(KernelGlobals *kg)
{
int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
if(thread_index == 0) {
/* We will empty both queues in this kernel. */
kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
}
int ray_index;
get_ray_index(kg, thread_index,
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
kernel_split_state.queue_data,
kernel_split_params.queue_size,
1);
ray_index = get_ray_index(kg, thread_index,
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
kernel_split_state.queue_data,
kernel_split_params.queue_size,
1);
#ifdef __SUBSURFACE__
if(ray_index == QUEUE_EMPTY_SLOT) {
return;
}
ccl_global char *ray_state = kernel_split_state.ray_state;
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
kernel_path_subsurface_accum_indirect(ss_indirect, L);
/* Trace indirect subsurface rays by restarting the loop. this uses less
* stack memory than invoking kernel_path_indirect.
*/
if(ss_indirect->num_rays) {
kernel_path_subsurface_setup_indirect(kg,
ss_indirect,
state,
ray,
L,
throughput);
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
}
else {
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
}
}
#endif /* __SUBSURFACE__ */
}
CCL_NAMESPACE_END

View File

@ -38,10 +38,12 @@ CCL_NAMESPACE_BEGIN
*/
ccl_device void kernel_lamp_emission(KernelGlobals *kg)
{
#ifndef __VOLUME__
/* We will empty this queue in this kernel. */
if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
}
#endif
/* Fetch use_queues_flag. */
ccl_local char local_use_queues_flag;
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
@ -55,7 +57,12 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg)
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
kernel_split_state.queue_data,
kernel_split_params.queue_size,
1);
#ifndef __VOLUME__
1
#else
0
#endif
);
if(ray_index == QUEUE_EMPTY_SLOT) {
return;
}

View File

@ -82,6 +82,10 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
&kernel_split_state.rng[ray_index],
my_sample,
&kernel_split_state.ray[ray_index]);
#ifdef __SUBSURFACE__
kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
#endif
#ifdef __KERNEL_DEBUG__
debug_data_init(&kernel_split_state.debug_data[ray_index]);
#endif

View File

@ -63,10 +63,12 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg)
int queue_number = -1;
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) {
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) ||
IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER)) {
queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
}
else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS;
}

View File

@ -93,7 +93,7 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
#ifdef __KERNEL_DEBUG__
DebugData *debug_data = &kernel_split_state.debug_data[ray_index];
#endif
Intersection *isect = &kernel_split_state.isect[ray_index];
Intersection isect;
PathState state = kernel_split_state.path_state[ray_index];
Ray ray = kernel_split_state.ray[ray_index];
@ -116,16 +116,17 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
lcg_state = lcg_state_init(&rng, &state, 0x51633e2d);
}
bool hit = scene_intersect(kg, ray, visibility, isect, &lcg_state, difl, extmax);
bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
#else
bool hit = scene_intersect(kg, ray, visibility, isect, NULL, 0.0f, 0.0f);
bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
#endif
kernel_split_state.isect[ray_index] = isect;
#ifdef __KERNEL_DEBUG__
if(state.flag & PATH_RAY_CAMERA) {
debug_data->num_bvh_traversed_nodes += isect->num_traversed_nodes;
debug_data->num_bvh_traversed_instances += isect->num_traversed_instances;
debug_data->num_bvh_intersections += isect->num_intersections;
debug_data->num_bvh_traversed_nodes += isect.num_traversed_nodes;
debug_data->num_bvh_traversed_instances += isect.num_traversed_instances;
debug_data->num_bvh_intersections += isect.num_intersections;
}
debug_data->num_ray_bounces++;
#endif

View File

@ -76,14 +76,14 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
/* Continue on with shader evaluation. */
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
Intersection *isect = &kernel_split_state.isect[ray_index];
Intersection isect = kernel_split_state.isect[ray_index];
ccl_global uint *rng = &kernel_split_state.rng[ray_index];
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
Ray ray = kernel_split_state.ray[ray_index];
shader_setup_from_ray(kg,
&kernel_split_state.sd[ray_index],
isect,
&isect,
&ray);
float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
shader_eval_surface(kg, &kernel_split_state.sd[ray_index], rng, state, rbsdf, state->flag, SHADER_CONTEXT_MAIN);

View File

@ -93,12 +93,14 @@ ccl_device void kernel_shadow_blocked(KernelGlobals *kg)
: light_ray_dl_global;
float3 shadow;
Ray ray = *light_ray_global;
update_path_radiance = !(shadow_blocked(kg,
&kernel_split_state.sd_DL_shadow[thread_index],
state,
light_ray_global,
&ray,
&shadow));
*light_ray_global = ray;
/* We use light_ray_global's P and t to store shadow and
* update_path_radiance.
*/

View File

@ -52,11 +52,11 @@
#include "kernel_passes.h"
#ifdef __SUBSURFACE__
#include "kernel_subsurface.h"
# include "kernel_subsurface.h"
#endif
#ifdef __VOLUME__
#include "kernel_volume.h"
# include "kernel_volume.h"
#endif
#include "kernel_path_state.h"
@ -65,9 +65,10 @@
#include "kernel_path_common.h"
#include "kernel_path_surface.h"
#include "kernel_path_volume.h"
#include "kernel_path_subsurface.h"
#ifdef __KERNEL_DEBUG__
#include "kernel_debug.h"
# include "kernel_debug.h"
#endif
#include "kernel_queues.h"

View File

@ -31,6 +31,14 @@ ccl_device_inline size_t split_data_buffer_size(KernelGlobals *kg, size_t num_el
size = size SPLIT_DATA_ENTRIES;
#undef SPLIT_DATA_ENTRY
#ifdef __SUBSURFACE__
size += align_up(num_elements * sizeof(SubsurfaceIndirectRays), 16); /* ss_rays */
#endif
#ifdef __VOLUME__
size += align_up(2 * num_elements * sizeof(PathState), 16); /* state_shadow */
#endif
return size;
}
@ -46,9 +54,19 @@ ccl_device_inline void split_data_init(KernelGlobals *kg,
#define SPLIT_DATA_ENTRY(type, name, num) \
split_data->name = (type*)p; p += align_up(num_elements * num * sizeof(type), 16);
SPLIT_DATA_ENTRIES
SPLIT_DATA_ENTRIES;
#undef SPLIT_DATA_ENTRY
#ifdef __SUBSURFACE__
split_data->ss_rays = (ccl_global SubsurfaceIndirectRays*)p;
p += align_up(num_elements * sizeof(SubsurfaceIndirectRays), 16);
#endif
#ifdef __VOLUME__
split_data->state_shadow = (ccl_global PathState*)p;
p += align_up(2 * num_elements * sizeof(PathState), 16);
#endif
split_data->ray_state = ray_state;
}

View File

@ -68,14 +68,13 @@ typedef struct SplitParams {
SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
SPLIT_DATA_ENTRY(Intersection, isect, 1) \
SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
SPLIT_DATA_ENTRY(ccl_global float3, ao_alpha, 1) \
SPLIT_DATA_ENTRY(ccl_global float3, ao_bsdf, 1) \
SPLIT_DATA_ENTRY(ccl_global Ray, ao_light_ray, 1) \
SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
SPLIT_DATA_ENTRY(Intersection, isect_shadow, 2) \
SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \
SPLIT_DATA_ENTRY(ccl_global uint, work_array, 1) \
SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
@ -88,6 +87,14 @@ typedef struct SplitData {
SPLIT_DATA_ENTRIES
#undef SPLIT_DATA_ENTRY
#ifdef __SUBSURFACE__
ccl_global SubsurfaceIndirectRays *ss_rays;
#endif
#ifdef __VOLUME__
ccl_global PathState *state_shadow;
#endif
/* this is actually in a separate buffer from the rest of the split state data (so it can be read back from
* the host easily) but is still used the same as the other data so we have it here in this struct as well
*/

View File

@ -0,0 +1,86 @@
CCL_NAMESPACE_BEGIN
ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
{
#ifdef __SUBSURFACE__
ccl_local unsigned int local_queue_atomics;
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
local_queue_atomics = 0;
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
ray_index = get_ray_index(kg, ray_index,
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
kernel_split_state.queue_data,
kernel_split_params.queue_size,
0);
#ifdef __COMPUTE_DEVICE_GPU__
/* If we are executing on a GPU device, we exit all threads that are not
* required.
*
* If we are executing on a CPU device, then we need to keep all threads
* active since we have barrier() calls later in the kernel. CPU devices,
* expect all threads to execute barrier statement.
*/
if(ray_index == QUEUE_EMPTY_SLOT) {
return;
}
#endif
#ifndef __COMPUTE_DEVICE_GPU__
if(ray_index != QUEUE_EMPTY_SLOT) {
#endif
char enqueue_flag = 0;
ccl_global char *ray_state = kernel_split_state.ray_state;
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
ccl_global RNG *rng = &kernel_split_state.rng[ray_index];
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
ShaderData *sd = &kernel_split_state.sd[ray_index];
ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
if(sd->flag & SD_BSSRDF) {
if(kernel_path_subsurface_scatter(kg,
sd,
emission_sd,
L,
state,
rng,
ray,
throughput,
ss_indirect)) {
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
enqueue_flag = 1;
}
}
}
#ifndef __COMPUTE_DEVICE_GPU__
}
#endif
/* Enqueue RAY_UPDATE_BUFFER rays. */
enqueue_ray_index_local(ray_index,
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
enqueue_flag,
kernel_split_params.queue_size,
&local_queue_atomics,
kernel_split_state.queue_data,
kernel_split_params.queue_index);
#endif /* __SUBSURFACE__ */
}
CCL_NAMESPACE_END