Cycles: SSS and Volume rendering in split kernel
Decoupled ray marching is not supported yet. Transparent shadows are always enabled for volume rendering. Changes in kernel/bvh and kernel/geom are from Sergey. This simiplifies code significantly, and prepares it for record-all transparent shadow function in split kernel.
This commit is contained in:
parent
6c942db30d
commit
57e26627c4
@ -194,7 +194,7 @@ public:
|
||||
if(!use_patch_evaluation) {
|
||||
build_options += " -D__NO_PATCH_EVAL__";
|
||||
}
|
||||
if(!use_transparent) {
|
||||
if(!use_transparent && !use_volume) {
|
||||
build_options += " -D__NO_TRANSPARENT__";
|
||||
}
|
||||
return build_options;
|
||||
|
@ -35,13 +35,17 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
|
||||
kernel_path_init = NULL;
|
||||
kernel_scene_intersect = NULL;
|
||||
kernel_lamp_emission = NULL;
|
||||
kernel_do_volume = NULL;
|
||||
kernel_queue_enqueue = NULL;
|
||||
kernel_background_buffer_update = NULL;
|
||||
kernel_indirect_background = NULL;
|
||||
kernel_shader_eval = NULL;
|
||||
kernel_holdout_emission_blurring_pathtermination_ao = NULL;
|
||||
kernel_subsurface_scatter = NULL;
|
||||
kernel_direct_lighting = NULL;
|
||||
kernel_shadow_blocked = NULL;
|
||||
kernel_next_iteration_setup = NULL;
|
||||
kernel_indirect_subsurface = NULL;
|
||||
kernel_buffer_update = NULL;
|
||||
}
|
||||
|
||||
DeviceSplitKernel::~DeviceSplitKernel()
|
||||
@ -55,13 +59,17 @@ DeviceSplitKernel::~DeviceSplitKernel()
|
||||
delete kernel_path_init;
|
||||
delete kernel_scene_intersect;
|
||||
delete kernel_lamp_emission;
|
||||
delete kernel_do_volume;
|
||||
delete kernel_queue_enqueue;
|
||||
delete kernel_background_buffer_update;
|
||||
delete kernel_indirect_background;
|
||||
delete kernel_shader_eval;
|
||||
delete kernel_holdout_emission_blurring_pathtermination_ao;
|
||||
delete kernel_subsurface_scatter;
|
||||
delete kernel_direct_lighting;
|
||||
delete kernel_shadow_blocked;
|
||||
delete kernel_next_iteration_setup;
|
||||
delete kernel_indirect_subsurface;
|
||||
delete kernel_buffer_update;
|
||||
}
|
||||
|
||||
bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_features)
|
||||
@ -75,13 +83,17 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
|
||||
LOAD_KERNEL(path_init);
|
||||
LOAD_KERNEL(scene_intersect);
|
||||
LOAD_KERNEL(lamp_emission);
|
||||
LOAD_KERNEL(do_volume);
|
||||
LOAD_KERNEL(queue_enqueue);
|
||||
LOAD_KERNEL(background_buffer_update);
|
||||
LOAD_KERNEL(indirect_background);
|
||||
LOAD_KERNEL(shader_eval);
|
||||
LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
|
||||
LOAD_KERNEL(subsurface_scatter);
|
||||
LOAD_KERNEL(direct_lighting);
|
||||
LOAD_KERNEL(shadow_blocked);
|
||||
LOAD_KERNEL(next_iteration_setup);
|
||||
LOAD_KERNEL(indirect_subsurface);
|
||||
LOAD_KERNEL(buffer_update);
|
||||
|
||||
#undef LOAD_KERNEL
|
||||
|
||||
@ -220,13 +232,18 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
|
||||
for(int PathIter = 0; PathIter < 16; PathIter++) {
|
||||
ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(background_buffer_update, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(shadow_blocked, global_size_shadow_blocked, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
|
||||
ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size);
|
||||
|
||||
if(task->get_cancel()) {
|
||||
return true;
|
||||
|
@ -58,13 +58,17 @@ private:
|
||||
SplitKernelFunction *kernel_path_init;
|
||||
SplitKernelFunction *kernel_scene_intersect;
|
||||
SplitKernelFunction *kernel_lamp_emission;
|
||||
SplitKernelFunction *kernel_do_volume;
|
||||
SplitKernelFunction *kernel_queue_enqueue;
|
||||
SplitKernelFunction *kernel_background_buffer_update;
|
||||
SplitKernelFunction *kernel_indirect_background;
|
||||
SplitKernelFunction *kernel_shader_eval;
|
||||
SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
|
||||
SplitKernelFunction *kernel_subsurface_scatter;
|
||||
SplitKernelFunction *kernel_direct_lighting;
|
||||
SplitKernelFunction *kernel_shadow_blocked;
|
||||
SplitKernelFunction *kernel_next_iteration_setup;
|
||||
SplitKernelFunction *kernel_indirect_subsurface;
|
||||
SplitKernelFunction *kernel_buffer_update;
|
||||
|
||||
/* Global memory variables [porting]; These memory is used for
|
||||
* co-operation between different kernels; Data written by one
|
||||
|
@ -21,12 +21,16 @@ set(SRC
|
||||
kernels/opencl/kernel_queue_enqueue.cl
|
||||
kernels/opencl/kernel_scene_intersect.cl
|
||||
kernels/opencl/kernel_lamp_emission.cl
|
||||
kernels/opencl/kernel_background_buffer_update.cl
|
||||
kernels/opencl/kernel_do_volume.cl
|
||||
kernels/opencl/kernel_indirect_background.cl
|
||||
kernels/opencl/kernel_shader_eval.cl
|
||||
kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
|
||||
kernels/opencl/kernel_subsurface_scatter.cl
|
||||
kernels/opencl/kernel_direct_lighting.cl
|
||||
kernels/opencl/kernel_shadow_blocked.cl
|
||||
kernels/opencl/kernel_next_iteration_setup.cl
|
||||
kernels/opencl/kernel_indirect_subsurface.cl
|
||||
kernels/opencl/kernel_buffer_update.cl
|
||||
kernels/cuda/kernel.cu
|
||||
kernels/cuda/kernel_split.cu
|
||||
)
|
||||
@ -71,6 +75,7 @@ set(SRC_HEADERS
|
||||
kernel_path_common.h
|
||||
kernel_path_state.h
|
||||
kernel_path_surface.h
|
||||
kernel_path_subsurface.h
|
||||
kernel_path_volume.h
|
||||
kernel_projection.h
|
||||
kernel_queues.h
|
||||
@ -196,10 +201,13 @@ set(SRC_UTIL_HEADERS
|
||||
)
|
||||
|
||||
set(SRC_SPLIT_HEADERS
|
||||
split/kernel_background_buffer_update.h
|
||||
split/kernel_buffer_update.h
|
||||
split/kernel_data_init.h
|
||||
split/kernel_direct_lighting.h
|
||||
split/kernel_do_volume.h
|
||||
split/kernel_holdout_emission_blurring_pathtermination_ao.h
|
||||
split/kernel_indirect_background.h
|
||||
split/kernel_indirect_subsurface.h
|
||||
split/kernel_lamp_emission.h
|
||||
split/kernel_next_iteration_setup.h
|
||||
split/kernel_path_init.h
|
||||
@ -210,6 +218,7 @@ set(SRC_SPLIT_HEADERS
|
||||
split/kernel_split_common.h
|
||||
split/kernel_split_data.h
|
||||
split/kernel_split_data_types.h
|
||||
split/kernel_subsurface_scatter.h
|
||||
)
|
||||
|
||||
# CUDA module
|
||||
@ -407,12 +416,16 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl"
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_background_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_do_volume.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_background.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_scatter.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)
|
||||
|
@ -309,9 +309,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
|
||||
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
|
||||
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
|
||||
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
|
||||
# endif
|
||||
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
@ -362,12 +362,10 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
}
|
||||
}
|
||||
else {
|
||||
float ignore_t = FLT_MAX;
|
||||
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
|
||||
# endif
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
}
|
||||
|
@ -75,16 +75,16 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
bvh_instance_motion_push(kg,
|
||||
subsurface_object,
|
||||
ray,
|
||||
&P,
|
||||
&dir,
|
||||
&idir,
|
||||
&isect_t,
|
||||
&ob_itfm);
|
||||
isect_t = bvh_instance_motion_push(kg,
|
||||
subsurface_object,
|
||||
ray,
|
||||
&P,
|
||||
&dir,
|
||||
&idir,
|
||||
isect_t,
|
||||
&ob_itfm);
|
||||
#else
|
||||
bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t);
|
||||
isect_t = bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, isect_t);
|
||||
#endif
|
||||
object = subsurface_object;
|
||||
}
|
||||
|
@ -354,9 +354,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
|
||||
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
# endif
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
@ -391,9 +391,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
|
||||
/* instance pop */
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
# endif
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
||||
|
@ -238,9 +238,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
int object_flag = kernel_tex_fetch(__object_flag, object);
|
||||
if(object_flag & SD_OBJECT_HAS_VOLUME) {
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
# endif
|
||||
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
@ -281,9 +281,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
|
||||
/* instance pop */
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
# endif
|
||||
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
|
@ -288,11 +288,10 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
|
||||
int object_flag = kernel_tex_fetch(__object_flag, object);
|
||||
if(object_flag & SD_OBJECT_HAS_VOLUME) {
|
||||
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
|
||||
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
|
||||
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
|
||||
# endif
|
||||
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
@ -348,11 +347,10 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
}
|
||||
}
|
||||
else {
|
||||
float ignore_t = FLT_MAX;
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
|
||||
# endif
|
||||
triangle_intersect_precalc(dir, &isect_precalc);
|
||||
}
|
||||
|
@ -390,9 +390,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
|
||||
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
|
||||
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
|
||||
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
|
||||
# endif
|
||||
|
||||
num_hits_in_instance = 0;
|
||||
@ -445,11 +445,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
}
|
||||
}
|
||||
else {
|
||||
float ignore_t = FLT_MAX;
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
|
||||
# endif
|
||||
}
|
||||
|
||||
|
@ -64,16 +64,16 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
#if BVH_FEATURE(BVH_MOTION)
|
||||
Transform ob_itfm;
|
||||
bvh_instance_motion_push(kg,
|
||||
subsurface_object,
|
||||
ray,
|
||||
&P,
|
||||
&dir,
|
||||
&idir,
|
||||
&isect_t,
|
||||
&ob_itfm);
|
||||
isect_t = bvh_instance_motion_push(kg,
|
||||
subsurface_object,
|
||||
ray,
|
||||
&P,
|
||||
&dir,
|
||||
&idir,
|
||||
isect_t,
|
||||
&ob_itfm);
|
||||
#else
|
||||
bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t);
|
||||
isect_t = bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, isect_t);
|
||||
#endif
|
||||
object = subsurface_object;
|
||||
}
|
||||
|
@ -468,9 +468,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
|
||||
/* Instance pop. */
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
# endif
|
||||
|
||||
qbvh_near_far_idx_calc(idir,
|
||||
|
@ -295,9 +295,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
int object_flag = kernel_tex_fetch(__object_flag, object);
|
||||
if(object_flag & SD_OBJECT_HAS_VOLUME) {
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
# endif
|
||||
|
||||
qbvh_near_far_idx_calc(idir,
|
||||
@ -341,9 +341,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
|
||||
/* Instance pop. */
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
|
||||
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
|
||||
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
|
||||
# endif
|
||||
|
||||
qbvh_near_far_idx_calc(idir,
|
||||
|
@ -346,9 +346,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
int object_flag = kernel_tex_fetch(__object_flag, object);
|
||||
if(object_flag & SD_OBJECT_HAS_VOLUME) {
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
|
||||
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
|
||||
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
|
||||
# endif
|
||||
|
||||
qbvh_near_far_idx_calc(idir,
|
||||
@ -406,11 +406,10 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|
||||
}
|
||||
}
|
||||
else {
|
||||
float ignore_t = FLT_MAX;
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
|
||||
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
|
||||
# else
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
|
||||
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
|
||||
# endif
|
||||
}
|
||||
|
||||
|
@ -425,7 +425,13 @@ ccl_device_inline float3 bvh_inverse_direction(float3 dir)
|
||||
|
||||
/* Transform ray into object space to enter static object in BVH */
|
||||
|
||||
ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t)
|
||||
ccl_device_inline float bvh_instance_push(KernelGlobals *kg,
|
||||
int object,
|
||||
const Ray *ray,
|
||||
float3 *P,
|
||||
float3 *dir,
|
||||
float3 *idir,
|
||||
float t)
|
||||
{
|
||||
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
|
||||
|
||||
@ -435,8 +441,11 @@ ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ra
|
||||
*dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
|
||||
*idir = bvh_inverse_direction(*dir);
|
||||
|
||||
if(*t != FLT_MAX)
|
||||
*t *= len;
|
||||
if(t != FLT_MAX) {
|
||||
t *= len;
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
#ifdef __QBVH__
|
||||
@ -473,16 +482,24 @@ ccl_device_inline void qbvh_instance_push(KernelGlobals *kg,
|
||||
|
||||
/* Transorm ray to exit static object in BVH */
|
||||
|
||||
ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t)
|
||||
ccl_device_inline float bvh_instance_pop(KernelGlobals *kg,
|
||||
int object,
|
||||
const Ray *ray,
|
||||
float3 *P,
|
||||
float3 *dir,
|
||||
float3 *idir,
|
||||
float t)
|
||||
{
|
||||
if(*t != FLT_MAX) {
|
||||
if(t != FLT_MAX) {
|
||||
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
|
||||
*t /= len(transform_direction(&tfm, ray->D));
|
||||
t /= len(transform_direction(&tfm, ray->D));
|
||||
}
|
||||
|
||||
*P = ray->P;
|
||||
*dir = bvh_clamp_direction(ray->D);
|
||||
*idir = bvh_inverse_direction(*dir);
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
/* Same as above, but returns scale factor to apply to multiple intersection distances */
|
||||
@ -501,13 +518,13 @@ ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg, int object, co
|
||||
#ifdef __OBJECT_MOTION__
|
||||
/* Transform ray into object space to enter motion blurred object in BVH */
|
||||
|
||||
ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg,
|
||||
ccl_device_inline float bvh_instance_motion_push(KernelGlobals *kg,
|
||||
int object,
|
||||
const Ray *ray,
|
||||
float3 *P,
|
||||
float3 *dir,
|
||||
float3 *idir,
|
||||
ccl_addr_space float *t,
|
||||
float t,
|
||||
Transform *itfm)
|
||||
{
|
||||
object_fetch_transform_motion_test(kg, object, ray->time, itfm);
|
||||
@ -518,8 +535,11 @@ ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg,
|
||||
*dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
|
||||
*idir = bvh_inverse_direction(*dir);
|
||||
|
||||
if(*t != FLT_MAX)
|
||||
*t *= len;
|
||||
if(t != FLT_MAX) {
|
||||
t *= len;
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
#ifdef __QBVH__
|
||||
@ -557,22 +577,24 @@ ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg,
|
||||
|
||||
/* Transorm ray to exit motion blurred object in BVH */
|
||||
|
||||
ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg,
|
||||
int object,
|
||||
const Ray *ray,
|
||||
float3 *P,
|
||||
float3 *dir,
|
||||
float3 *idir,
|
||||
ccl_addr_space float *t,
|
||||
Transform *itfm)
|
||||
ccl_device_inline float bvh_instance_motion_pop(KernelGlobals *kg,
|
||||
int object,
|
||||
const Ray *ray,
|
||||
float3 *P,
|
||||
float3 *dir,
|
||||
float3 *idir,
|
||||
float t,
|
||||
Transform *itfm)
|
||||
{
|
||||
if(*t != FLT_MAX) {
|
||||
*t /= len(transform_direction(itfm, ray->D));
|
||||
if(t != FLT_MAX) {
|
||||
t /= len(transform_direction(itfm, ray->D));
|
||||
}
|
||||
|
||||
*P = ray->P;
|
||||
*dir = bvh_clamp_direction(ray->D);
|
||||
*idir = bvh_inverse_direction(*dir);
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
/* Same as above, but returns scale factor to apply to multiple intersection distances */
|
||||
|
@ -46,6 +46,7 @@
|
||||
#include "kernel_path_common.h"
|
||||
#include "kernel_path_surface.h"
|
||||
#include "kernel_path_volume.h"
|
||||
#include "kernel_path_subsurface.h"
|
||||
|
||||
#ifdef __KERNEL_DEBUG__
|
||||
# include "kernel_debug.h"
|
||||
@ -413,172 +414,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __SUBSURFACE__
|
||||
# ifndef __KERNEL_CUDA__
|
||||
ccl_device
|
||||
# else
|
||||
ccl_device_inline
|
||||
# endif
|
||||
bool kernel_path_subsurface_scatter(
|
||||
KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
ShaderData *emission_sd,
|
||||
PathRadiance *L,
|
||||
PathState *state,
|
||||
RNG *rng,
|
||||
Ray *ray,
|
||||
float3 *throughput,
|
||||
SubsurfaceIndirectRays *ss_indirect)
|
||||
{
|
||||
float bssrdf_probability;
|
||||
ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
|
||||
|
||||
/* modify throughput for picking bssrdf or bsdf */
|
||||
*throughput *= bssrdf_probability;
|
||||
|
||||
/* do bssrdf scatter step if we picked a bssrdf closure */
|
||||
if(sc) {
|
||||
/* We should never have two consecutive BSSRDF bounces,
|
||||
* the second one should be converted to a diffuse BSDF to
|
||||
* avoid this.
|
||||
*/
|
||||
kernel_assert(!ss_indirect->tracing);
|
||||
|
||||
uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
|
||||
|
||||
SubsurfaceIntersection ss_isect;
|
||||
float bssrdf_u, bssrdf_v;
|
||||
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
|
||||
int num_hits = subsurface_scatter_multi_intersect(kg,
|
||||
&ss_isect,
|
||||
sd,
|
||||
sc,
|
||||
&lcg_state,
|
||||
bssrdf_u, bssrdf_v,
|
||||
false);
|
||||
# ifdef __VOLUME__
|
||||
ss_indirect->need_update_volume_stack =
|
||||
kernel_data.integrator.use_volumes &&
|
||||
sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
|
||||
# endif /* __VOLUME__ */
|
||||
|
||||
/* compute lighting with the BSDF closure */
|
||||
for(int hit = 0; hit < num_hits; hit++) {
|
||||
/* NOTE: We reuse the existing ShaderData, we assume the path
|
||||
* integration loop stops when this function returns true.
|
||||
*/
|
||||
subsurface_scatter_multi_setup(kg,
|
||||
&ss_isect,
|
||||
hit,
|
||||
sd,
|
||||
state,
|
||||
state->flag,
|
||||
sc,
|
||||
false);
|
||||
|
||||
PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
|
||||
Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
|
||||
float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
|
||||
PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays];
|
||||
|
||||
*hit_state = *state;
|
||||
*hit_ray = *ray;
|
||||
*hit_tp = *throughput;
|
||||
|
||||
hit_state->rng_offset += PRNG_BOUNCE_NUM;
|
||||
|
||||
path_radiance_init(hit_L, kernel_data.film.use_light_pass);
|
||||
hit_L->direct_throughput = L->direct_throughput;
|
||||
path_radiance_copy_indirect(hit_L, L);
|
||||
|
||||
kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L);
|
||||
|
||||
if(kernel_path_surface_bounce(kg,
|
||||
rng,
|
||||
sd,
|
||||
hit_tp,
|
||||
hit_state,
|
||||
hit_L,
|
||||
hit_ray))
|
||||
{
|
||||
# ifdef __LAMP_MIS__
|
||||
hit_state->ray_t = 0.0f;
|
||||
# endif /* __LAMP_MIS__ */
|
||||
|
||||
# ifdef __VOLUME__
|
||||
if(ss_indirect->need_update_volume_stack) {
|
||||
Ray volume_ray = *ray;
|
||||
/* Setup ray from previous surface point to the new one. */
|
||||
volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
|
||||
&volume_ray.t);
|
||||
|
||||
kernel_volume_stack_update_for_subsurface(
|
||||
kg,
|
||||
emission_sd,
|
||||
&volume_ray,
|
||||
hit_state->volume_stack);
|
||||
}
|
||||
# endif /* __VOLUME__ */
|
||||
path_radiance_reset_indirect(L);
|
||||
ss_indirect->num_rays++;
|
||||
}
|
||||
else {
|
||||
path_radiance_accum_sample(L, hit_L, 1);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
ccl_device_inline void kernel_path_subsurface_init_indirect(
|
||||
SubsurfaceIndirectRays *ss_indirect)
|
||||
{
|
||||
ss_indirect->tracing = false;
|
||||
ss_indirect->num_rays = 0;
|
||||
}
|
||||
|
||||
ccl_device void kernel_path_subsurface_accum_indirect(
|
||||
SubsurfaceIndirectRays *ss_indirect,
|
||||
PathRadiance *L)
|
||||
{
|
||||
if(ss_indirect->tracing) {
|
||||
path_radiance_sum_indirect(L);
|
||||
path_radiance_accum_sample(&ss_indirect->direct_L, L, 1);
|
||||
if(ss_indirect->num_rays == 0) {
|
||||
*L = ss_indirect->direct_L;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device void kernel_path_subsurface_setup_indirect(
|
||||
KernelGlobals *kg,
|
||||
SubsurfaceIndirectRays *ss_indirect,
|
||||
PathState *state,
|
||||
Ray *ray,
|
||||
PathRadiance *L,
|
||||
float3 *throughput)
|
||||
{
|
||||
if(!ss_indirect->tracing) {
|
||||
ss_indirect->direct_L = *L;
|
||||
}
|
||||
ss_indirect->tracing = true;
|
||||
|
||||
/* Setup state, ray and throughput for indirect SSS rays. */
|
||||
ss_indirect->num_rays--;
|
||||
|
||||
Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays];
|
||||
PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays];
|
||||
|
||||
*state = ss_indirect->state[ss_indirect->num_rays];
|
||||
*ray = *indirect_ray;
|
||||
*L = *indirect_L;
|
||||
*throughput = ss_indirect->throughputs[ss_indirect->num_rays];
|
||||
|
||||
state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
|
||||
}
|
||||
|
||||
#endif /* __SUBSURFACE__ */
|
||||
|
||||
ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
|
||||
RNG *rng,
|
||||
|
187
intern/cycles/kernel/kernel_path_subsurface.h
Normal file
187
intern/cycles/kernel/kernel_path_subsurface.h
Normal file
@ -0,0 +1,187 @@
|
||||
/*
|
||||
* Copyright 2017 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef __SUBSURFACE__
|
||||
# ifndef __KERNEL_CUDA__
|
||||
ccl_device
|
||||
# else
|
||||
ccl_device_inline
|
||||
# endif
|
||||
bool kernel_path_subsurface_scatter(
|
||||
KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
ShaderData *emission_sd,
|
||||
PathRadiance *L,
|
||||
ccl_addr_space PathState *state,
|
||||
ccl_addr_space RNG *rng,
|
||||
ccl_addr_space Ray *ray,
|
||||
ccl_addr_space float3 *throughput,
|
||||
ccl_addr_space SubsurfaceIndirectRays *ss_indirect)
|
||||
{
|
||||
float bssrdf_probability;
|
||||
ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
|
||||
|
||||
/* modify throughput for picking bssrdf or bsdf */
|
||||
*throughput *= bssrdf_probability;
|
||||
|
||||
/* do bssrdf scatter step if we picked a bssrdf closure */
|
||||
if(sc) {
|
||||
/* We should never have two consecutive BSSRDF bounces,
|
||||
* the second one should be converted to a diffuse BSDF to
|
||||
* avoid this.
|
||||
*/
|
||||
kernel_assert(!ss_indirect->tracing);
|
||||
|
||||
uint lcg_state = lcg_state_init_addrspace(rng, state, 0x68bc21eb);
|
||||
|
||||
SubsurfaceIntersection ss_isect;
|
||||
float bssrdf_u, bssrdf_v;
|
||||
path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
|
||||
int num_hits = subsurface_scatter_multi_intersect(kg,
|
||||
&ss_isect,
|
||||
sd,
|
||||
sc,
|
||||
&lcg_state,
|
||||
bssrdf_u, bssrdf_v,
|
||||
false);
|
||||
# ifdef __VOLUME__
|
||||
ss_indirect->need_update_volume_stack =
|
||||
kernel_data.integrator.use_volumes &&
|
||||
sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
|
||||
# endif /* __VOLUME__ */
|
||||
|
||||
/* compute lighting with the BSDF closure */
|
||||
for(int hit = 0; hit < num_hits; hit++) {
|
||||
/* NOTE: We reuse the existing ShaderData, we assume the path
|
||||
* integration loop stops when this function returns true.
|
||||
*/
|
||||
subsurface_scatter_multi_setup(kg,
|
||||
&ss_isect,
|
||||
hit,
|
||||
sd,
|
||||
state,
|
||||
state->flag,
|
||||
sc,
|
||||
false);
|
||||
|
||||
ccl_addr_space PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
|
||||
ccl_addr_space Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
|
||||
ccl_addr_space float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
|
||||
PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays];
|
||||
|
||||
*hit_state = *state;
|
||||
*hit_ray = *ray;
|
||||
*hit_tp = *throughput;
|
||||
|
||||
hit_state->rng_offset += PRNG_BOUNCE_NUM;
|
||||
|
||||
path_radiance_init(hit_L, kernel_data.film.use_light_pass);
|
||||
hit_L->direct_throughput = L->direct_throughput;
|
||||
path_radiance_copy_indirect(hit_L, L);
|
||||
|
||||
kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L);
|
||||
|
||||
if(kernel_path_surface_bounce(kg,
|
||||
rng,
|
||||
sd,
|
||||
hit_tp,
|
||||
hit_state,
|
||||
hit_L,
|
||||
hit_ray))
|
||||
{
|
||||
# ifdef __LAMP_MIS__
|
||||
hit_state->ray_t = 0.0f;
|
||||
# endif /* __LAMP_MIS__ */
|
||||
|
||||
# ifdef __VOLUME__
|
||||
if(ss_indirect->need_update_volume_stack) {
|
||||
Ray volume_ray = *ray;
|
||||
/* Setup ray from previous surface point to the new one. */
|
||||
volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
|
||||
&volume_ray.t);
|
||||
|
||||
kernel_volume_stack_update_for_subsurface(
|
||||
kg,
|
||||
emission_sd,
|
||||
&volume_ray,
|
||||
hit_state->volume_stack);
|
||||
}
|
||||
# endif /* __VOLUME__ */
|
||||
path_radiance_reset_indirect(L);
|
||||
ss_indirect->num_rays++;
|
||||
}
|
||||
else {
|
||||
path_radiance_accum_sample(L, hit_L, 1);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
ccl_device_inline void kernel_path_subsurface_init_indirect(
|
||||
ccl_addr_space SubsurfaceIndirectRays *ss_indirect)
|
||||
{
|
||||
ss_indirect->tracing = false;
|
||||
ss_indirect->num_rays = 0;
|
||||
}
|
||||
|
||||
ccl_device void kernel_path_subsurface_accum_indirect(
|
||||
ccl_addr_space SubsurfaceIndirectRays *ss_indirect,
|
||||
PathRadiance *L)
|
||||
{
|
||||
if(ss_indirect->tracing) {
|
||||
path_radiance_sum_indirect(L);
|
||||
path_radiance_accum_sample(&ss_indirect->direct_L, L, 1);
|
||||
if(ss_indirect->num_rays == 0) {
|
||||
*L = ss_indirect->direct_L;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device void kernel_path_subsurface_setup_indirect(
|
||||
KernelGlobals *kg,
|
||||
ccl_addr_space SubsurfaceIndirectRays *ss_indirect,
|
||||
ccl_addr_space PathState *state,
|
||||
ccl_addr_space Ray *ray,
|
||||
PathRadiance *L,
|
||||
ccl_addr_space float3 *throughput)
|
||||
{
|
||||
if(!ss_indirect->tracing) {
|
||||
ss_indirect->direct_L = *L;
|
||||
}
|
||||
ss_indirect->tracing = true;
|
||||
|
||||
/* Setup state, ray and throughput for indirect SSS rays. */
|
||||
ss_indirect->num_rays--;
|
||||
|
||||
ccl_addr_space Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays];
|
||||
PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays];
|
||||
|
||||
*state = ss_indirect->state[ss_indirect->num_rays];
|
||||
*ray = *indirect_ray;
|
||||
*L = *indirect_L;
|
||||
*throughput = ss_indirect->throughputs[ss_indirect->num_rays];
|
||||
|
||||
state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
|
||||
}
|
||||
|
||||
#endif /* __SUBSURFACE__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__)
|
||||
#if (defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__)) && !defined(__SPLIT_KERNEL__)
|
||||
|
||||
/* branched path tracing: connect path directly to position on one or more lights and add it to L */
|
||||
ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RNG *rng,
|
||||
@ -188,7 +188,6 @@ ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, RNG *rng,
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef __SPLIT_KERNEL__
|
||||
/* path tracing: connect path directly to position on a light and add it to L */
|
||||
ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_addr_space RNG *rng,
|
||||
ShaderData *sd, ShaderData *emission_sd, float3 throughput, ccl_addr_space PathState *state,
|
||||
@ -226,7 +225,6 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
/* path tracing: bounce off or through surface to with new direction stored in ray */
|
||||
ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg,
|
||||
|
@ -20,11 +20,11 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
ccl_device_inline void kernel_path_volume_connect_light(
|
||||
KernelGlobals *kg,
|
||||
RNG *rng,
|
||||
ccl_addr_space RNG *rng,
|
||||
ShaderData *sd,
|
||||
ShaderData *emission_sd,
|
||||
float3 throughput,
|
||||
PathState *state,
|
||||
ccl_addr_space PathState *state,
|
||||
PathRadiance *L)
|
||||
{
|
||||
#ifdef __EMISSION__
|
||||
@ -59,7 +59,7 @@ ccl_device_inline void kernel_path_volume_connect_light(
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif /* __EMISSION__ */
|
||||
}
|
||||
|
||||
#ifdef __KERNEL_GPU__
|
||||
@ -67,8 +67,14 @@ ccl_device_noinline
|
||||
#else
|
||||
ccl_device
|
||||
#endif
|
||||
bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng,
|
||||
ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray)
|
||||
bool kernel_path_volume_bounce(
|
||||
KernelGlobals *kg,
|
||||
ccl_addr_space RNG *rng,
|
||||
ShaderData *sd,
|
||||
ccl_addr_space float3 *throughput,
|
||||
ccl_addr_space PathState *state,
|
||||
PathRadiance *L,
|
||||
ccl_addr_space Ray *ray)
|
||||
{
|
||||
/* sample phase function */
|
||||
float phase_pdf;
|
||||
@ -111,6 +117,7 @@ bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng,
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef __BRANCHED_PATH__
|
||||
ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
|
||||
ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L,
|
||||
bool sample_all_lights, Ray *ray, const VolumeSegment *segment)
|
||||
@ -261,10 +268,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif /* __EMISSION__ */
|
||||
}
|
||||
#endif /* __BRANCHED_PATH__ */
|
||||
|
||||
#endif
|
||||
#endif /* __VOLUME_SCATTER__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
|
@ -203,11 +203,11 @@ void shader_setup_from_subsurface(
|
||||
# ifdef __INSTANCING__
|
||||
if(isect->object != OBJECT_NONE) {
|
||||
/* instance transform */
|
||||
object_normal_transform(kg, sd, &sd->N);
|
||||
object_normal_transform(kg, sd, &sd->Ng);
|
||||
object_normal_transform_auto(kg, sd, &sd->N);
|
||||
object_normal_transform_auto(kg, sd, &sd->Ng);
|
||||
# ifdef __DPDU__
|
||||
object_dir_transform(kg, sd, &sd->dPdu);
|
||||
object_dir_transform(kg, sd, &sd->dPdv);
|
||||
object_dir_transform_auto(kg, sd, &sd->dPdu);
|
||||
object_dir_transform_auto(kg, sd, &sd->dPdv);
|
||||
# endif
|
||||
}
|
||||
# endif
|
||||
@ -816,7 +816,7 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b
|
||||
*N_ = (is_zero(N))? sd->N: normalize(N);
|
||||
|
||||
if(texture_blur_)
|
||||
*texture_blur_ = texture_blur/weight_sum;
|
||||
*texture_blur_ = safe_divide(texture_blur, weight_sum);
|
||||
|
||||
return eval;
|
||||
}
|
||||
@ -1036,8 +1036,8 @@ ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *
|
||||
|
||||
ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
PathState *state,
|
||||
VolumeStack *stack,
|
||||
ccl_addr_space PathState *state,
|
||||
ccl_addr_space VolumeStack *stack,
|
||||
int path_flag,
|
||||
ShaderContext ctx)
|
||||
{
|
||||
|
@ -24,7 +24,7 @@ ccl_device_forceinline bool shadow_handle_transparent_isect(
|
||||
ShaderData *shadow_sd,
|
||||
ccl_addr_space PathState *state,
|
||||
# ifdef __VOLUME__
|
||||
struct PathState *volume_state,
|
||||
ccl_addr_space struct PathState *volume_state,
|
||||
# endif
|
||||
Intersection *isect,
|
||||
Ray *ray,
|
||||
@ -276,7 +276,13 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(
|
||||
float3 Pend = ray->P + ray->D*ray->t;
|
||||
int bounce = state->transparent_bounce;
|
||||
# ifdef __VOLUME__
|
||||
PathState ps = *state;
|
||||
# ifdef __SPLIT_KERNEL__
|
||||
ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
|
||||
# else
|
||||
PathState ps_object;
|
||||
PathState *ps = &ps_object;
|
||||
# endif
|
||||
*ps = *state;
|
||||
# endif
|
||||
for(;;) {
|
||||
if(bounce >= kernel_data.integrator.transparent_max_bounce) {
|
||||
@ -299,7 +305,7 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(
|
||||
shadow_sd,
|
||||
state,
|
||||
#ifdef __VOLUME__
|
||||
&ps,
|
||||
ps,
|
||||
#endif
|
||||
isect,
|
||||
ray,
|
||||
@ -316,8 +322,8 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(
|
||||
}
|
||||
# ifdef __VOLUME__
|
||||
/* Attenuation for last line segment towards light. */
|
||||
if(ps.volume_stack[0].shader != SHADER_NONE) {
|
||||
kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
|
||||
if(ps->volume_stack[0].shader != SHADER_NONE) {
|
||||
kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
|
||||
}
|
||||
# endif
|
||||
*shadow *= throughput;
|
||||
@ -365,21 +371,11 @@ ccl_device bool shadow_blocked_transparent_stepped(
|
||||
ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
|
||||
ShaderData *shadow_sd,
|
||||
ccl_addr_space PathState *state,
|
||||
ccl_addr_space Ray *ray_input,
|
||||
Ray *ray_input,
|
||||
float3 *shadow)
|
||||
{
|
||||
/* Special trickery for split kernel: some data is coming from the
|
||||
* global memory.
|
||||
*/
|
||||
#ifdef __SPLIT_KERNEL__
|
||||
Ray private_ray = *ray_input;
|
||||
Ray *ray = &private_ray;
|
||||
Intersection *isect = &kernel_split_state.isect_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
|
||||
#else /* __SPLIT_KERNEL__ */
|
||||
Ray *ray = ray_input;
|
||||
Intersection isect_object;
|
||||
Intersection *isect = &isect_object;
|
||||
#endif /* __SPLIT_KERNEL__ */
|
||||
Intersection isect;
|
||||
/* Some common early checks. */
|
||||
*shadow = make_float3(1.0f, 1.0f, 1.0f);
|
||||
if(ray->t == 0.0f) {
|
||||
@ -397,7 +393,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
|
||||
shadow_sd,
|
||||
state,
|
||||
ray,
|
||||
isect,
|
||||
&isect,
|
||||
shadow);
|
||||
}
|
||||
#ifdef __TRANSPARENT_SHADOWS__
|
||||
@ -423,11 +419,11 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
|
||||
const bool blocked = scene_intersect(kg,
|
||||
*ray,
|
||||
PATH_RAY_SHADOW_OPAQUE,
|
||||
isect,
|
||||
&isect,
|
||||
NULL,
|
||||
0.0f, 0.0f);
|
||||
const bool is_transparent_isect = blocked
|
||||
? shader_transparent_shadow(kg, isect)
|
||||
? shader_transparent_shadow(kg, &isect)
|
||||
: false;
|
||||
if(!blocked || !is_transparent_isect ||
|
||||
max_hits + 1 >= SHADOW_STACK_MAX_HITS)
|
||||
@ -436,7 +432,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
|
||||
shadow_sd,
|
||||
state,
|
||||
ray,
|
||||
isect,
|
||||
&isect,
|
||||
blocked,
|
||||
is_transparent_isect,
|
||||
shadow);
|
||||
@ -454,7 +450,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
|
||||
shadow_sd,
|
||||
state,
|
||||
ray,
|
||||
isect,
|
||||
&isect,
|
||||
shadow);
|
||||
# endif /* __SHADOW_RECORD_ALL__ */
|
||||
#endif /* __TRANSPARENT_SHADOWS__ */
|
||||
|
@ -185,7 +185,7 @@ ccl_device float3 subsurface_color_pow(float3 color, float exponent)
|
||||
|
||||
ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
PathState *state,
|
||||
ccl_addr_space PathState *state,
|
||||
int state_flag,
|
||||
float3 *eval,
|
||||
float3 *N)
|
||||
@ -277,7 +277,12 @@ ccl_device_inline int subsurface_scatter_multi_intersect(
|
||||
float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
|
||||
|
||||
/* create ray */
|
||||
#ifdef __SPLIT_KERNEL__
|
||||
Ray ray_object = ss_isect->ray;
|
||||
Ray *ray = &ray_object;
|
||||
#else
|
||||
Ray *ray = &ss_isect->ray;
|
||||
#endif
|
||||
ray->P = sd->P + disk_N*disk_height + disk_P;
|
||||
ray->D = -disk_N;
|
||||
ray->t = 2.0f*disk_height;
|
||||
@ -351,6 +356,10 @@ ccl_device_inline int subsurface_scatter_multi_intersect(
|
||||
ss_isect->weight[hit] = eval;
|
||||
}
|
||||
|
||||
#ifdef __SPLIT_KERNEL__
|
||||
ss_isect->ray = *ray;
|
||||
#endif
|
||||
|
||||
return num_eval_hits;
|
||||
}
|
||||
|
||||
@ -359,13 +368,19 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
|
||||
SubsurfaceIntersection* ss_isect,
|
||||
int hit,
|
||||
ShaderData *sd,
|
||||
PathState *state,
|
||||
ccl_addr_space PathState *state,
|
||||
int state_flag,
|
||||
ShaderClosure *sc,
|
||||
bool all)
|
||||
{
|
||||
#ifdef __SPLIT_KERNEL__
|
||||
Ray ray_object = ss_isect->ray;
|
||||
Ray *ray = &ray_object;
|
||||
#else
|
||||
Ray *ray = &ss_isect->ray;
|
||||
#endif
|
||||
/* Setup new shading point. */
|
||||
shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], &ss_isect->ray);
|
||||
shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray);
|
||||
|
||||
/* Optionally blur colors and bump mapping. */
|
||||
float3 weight = ss_isect->weight[hit];
|
||||
@ -376,6 +391,7 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
|
||||
subsurface_scatter_setup_diffuse_bsdf(sd, weight, true, N);
|
||||
}
|
||||
|
||||
#ifndef __SPLIT_KERNEL__
|
||||
/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
|
||||
ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, PathState *state,
|
||||
int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
|
||||
@ -465,6 +481,7 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, PathS
|
||||
/* setup diffuse bsdf */
|
||||
subsurface_scatter_setup_diffuse_bsdf(sd, eval, (ss_isect.num_hits > 0), N);
|
||||
}
|
||||
#endif /* ! __SPLIT_KERNEL__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
|
@ -76,14 +76,12 @@ CCL_NAMESPACE_BEGIN
|
||||
# ifdef WITH_OSL
|
||||
# define __OSL__
|
||||
# endif
|
||||
# ifndef __SPLIT_KERNEL__
|
||||
# define __SUBSURFACE__
|
||||
# endif
|
||||
# define __SUBSURFACE__
|
||||
# define __CMJ__
|
||||
# define __VOLUME__
|
||||
# define __VOLUME_SCATTER__
|
||||
# ifndef __SPLIT_KERNEL__
|
||||
# define __VOLUME__
|
||||
# define __VOLUME_DECOUPLED__
|
||||
# define __VOLUME_SCATTER__
|
||||
# define __SHADOW_RECORD_ALL__
|
||||
# define __VOLUME_RECORD_ALL__
|
||||
# endif
|
||||
@ -130,6 +128,9 @@ CCL_NAMESPACE_BEGIN
|
||||
# define __CL_USE_NATIVE__
|
||||
# define __KERNEL_SHADING__
|
||||
# define __KERNEL_ADV_SHADING__
|
||||
# define __SUBSURFACE__
|
||||
# define __VOLUME__
|
||||
# define __VOLUME_SCATTER__
|
||||
# endif /* __KERNEL_OPENCL_AMD__ */
|
||||
|
||||
# ifdef __KERNEL_OPENCL_INTEL_CPU__
|
||||
@ -552,7 +553,7 @@ typedef struct Ray {
|
||||
|
||||
/* Intersection */
|
||||
|
||||
typedef ccl_addr_space struct Intersection {
|
||||
typedef struct Intersection {
|
||||
float t, u, v;
|
||||
int prim;
|
||||
int object;
|
||||
@ -934,7 +935,7 @@ typedef struct PathState {
|
||||
/* Subsurface */
|
||||
|
||||
/* Struct to gather multiple SSS hits. */
|
||||
struct SubsurfaceIntersection
|
||||
typedef struct SubsurfaceIntersection
|
||||
{
|
||||
Ray ray;
|
||||
float3 weight[BSSRDF_MAX_HITS];
|
||||
@ -942,10 +943,10 @@ struct SubsurfaceIntersection
|
||||
int num_hits;
|
||||
struct Intersection hits[BSSRDF_MAX_HITS];
|
||||
float3 Ng[BSSRDF_MAX_HITS];
|
||||
};
|
||||
} SubsurfaceIntersection;
|
||||
|
||||
/* Struct to gather SSS indirect rays and delay tracing them. */
|
||||
struct SubsurfaceIndirectRays
|
||||
typedef struct SubsurfaceIndirectRays
|
||||
{
|
||||
bool need_update_volume_stack;
|
||||
bool tracing;
|
||||
@ -956,7 +957,7 @@ struct SubsurfaceIndirectRays
|
||||
struct Ray rays[BSSRDF_MAX_HITS];
|
||||
float3 throughputs[BSSRDF_MAX_HITS];
|
||||
struct PathRadiance L[BSSRDF_MAX_HITS];
|
||||
};
|
||||
} SubsurfaceIndirectRays;
|
||||
|
||||
/* Constant Kernel Data
|
||||
*
|
||||
|
@ -38,7 +38,7 @@ typedef struct VolumeShaderCoefficients {
|
||||
/* evaluate shader to get extinction coefficient at P */
|
||||
ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
PathState *state,
|
||||
ccl_addr_space PathState *state,
|
||||
float3 P,
|
||||
float3 *extinction)
|
||||
{
|
||||
@ -64,7 +64,7 @@ ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
|
||||
/* evaluate shader to get absorption, scattering and emission at P */
|
||||
ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
PathState *state,
|
||||
ccl_addr_space PathState *state,
|
||||
float3 P,
|
||||
VolumeShaderCoefficients *coeff)
|
||||
{
|
||||
@ -112,7 +112,7 @@ ccl_device float kernel_volume_channel_get(float3 value, int channel)
|
||||
return (channel == 0)? value.x: ((channel == 1)? value.y: value.z);
|
||||
}
|
||||
|
||||
ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, VolumeStack *stack)
|
||||
ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
|
||||
{
|
||||
for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
|
||||
int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE);
|
||||
@ -161,7 +161,11 @@ ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stac
|
||||
|
||||
/* homogeneous volume: assume shader evaluation at the starts gives
|
||||
* the extinction coefficient for the entire line segment */
|
||||
ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput)
|
||||
ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg,
|
||||
ccl_addr_space PathState *state,
|
||||
Ray *ray,
|
||||
ShaderData *sd,
|
||||
float3 *throughput)
|
||||
{
|
||||
float3 sigma_t;
|
||||
|
||||
@ -171,7 +175,11 @@ ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, PathState *s
|
||||
|
||||
/* heterogeneous volume: integrate stepping through the volume until we
|
||||
* reach the end, get absorbed entirely, or run out of iterations */
|
||||
ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput)
|
||||
ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
|
||||
ccl_addr_space PathState *state,
|
||||
Ray *ray,
|
||||
ShaderData *sd,
|
||||
float3 *throughput)
|
||||
{
|
||||
float3 tp = *throughput;
|
||||
const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
|
||||
@ -179,7 +187,7 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
|
||||
/* prepare for stepping */
|
||||
int max_steps = kernel_data.integrator.volume_max_steps;
|
||||
float step = kernel_data.integrator.volume_step_size;
|
||||
float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step;
|
||||
float random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * step;
|
||||
|
||||
/* compute extinction at the start */
|
||||
float t = 0.0f;
|
||||
@ -193,7 +201,7 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
|
||||
|
||||
/* use random position inside this segment to sample shader */
|
||||
if(new_t == ray->t)
|
||||
random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
|
||||
random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * dt;
|
||||
|
||||
float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
|
||||
float3 sigma_t;
|
||||
@ -227,7 +235,11 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
|
||||
|
||||
/* get the volume attenuation over line segment defined by ray, with the
|
||||
* assumption that there are no surfaces blocking light between the endpoints */
|
||||
ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *throughput)
|
||||
ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg,
|
||||
ShaderData *shadow_sd,
|
||||
ccl_addr_space PathState *state,
|
||||
Ray *ray,
|
||||
float3 *throughput)
|
||||
{
|
||||
shader_setup_from_volume(kg, shadow_sd, ray);
|
||||
|
||||
@ -341,9 +353,15 @@ ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coe
|
||||
|
||||
/* homogeneous volume: assume shader evaluation at the start gives
|
||||
* the volume shading coefficient for the entire line segment */
|
||||
ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGlobals *kg,
|
||||
PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput,
|
||||
RNG *rng, bool probalistic_scatter)
|
||||
ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(
|
||||
KernelGlobals *kg,
|
||||
ccl_addr_space PathState *state,
|
||||
Ray *ray,
|
||||
ShaderData *sd,
|
||||
PathRadiance *L,
|
||||
ccl_addr_space float3 *throughput,
|
||||
ccl_addr_space RNG *rng,
|
||||
bool probalistic_scatter)
|
||||
{
|
||||
VolumeShaderCoefficients coeff;
|
||||
|
||||
@ -444,8 +462,14 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba
|
||||
* volume until we reach the end, get absorbed entirely, or run out of
|
||||
* iterations. this does probabilistically scatter or get transmitted through
|
||||
* for path tracing where we don't want to branch. */
|
||||
ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
|
||||
PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput, RNG *rng)
|
||||
ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
|
||||
KernelGlobals *kg,
|
||||
ccl_addr_space PathState *state,
|
||||
Ray *ray,
|
||||
ShaderData *sd,
|
||||
PathRadiance *L,
|
||||
ccl_addr_space float3 *throughput,
|
||||
ccl_addr_space RNG *rng)
|
||||
{
|
||||
float3 tp = *throughput;
|
||||
const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
|
||||
@ -453,7 +477,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
|
||||
/* prepare for stepping */
|
||||
int max_steps = kernel_data.integrator.volume_max_steps;
|
||||
float step_size = kernel_data.integrator.volume_step_size;
|
||||
float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
|
||||
float random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * step_size;
|
||||
|
||||
/* compute coefficients at the start */
|
||||
float t = 0.0f;
|
||||
@ -474,7 +498,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
|
||||
|
||||
/* use random position inside this segment to sample shader */
|
||||
if(new_t == ray->t)
|
||||
random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
|
||||
random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * dt;
|
||||
|
||||
float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
|
||||
VolumeShaderCoefficients coeff;
|
||||
@ -579,8 +603,15 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
|
||||
* ray, with the assumption that there are no surfaces blocking light
|
||||
* between the endpoints. distance sampling is used to decide if we will
|
||||
* scatter or not. */
|
||||
ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
|
||||
PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng, bool heterogeneous)
|
||||
ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(
|
||||
KernelGlobals *kg,
|
||||
ccl_addr_space PathState *state,
|
||||
ShaderData *sd,
|
||||
Ray *ray,
|
||||
PathRadiance *L,
|
||||
ccl_addr_space float3 *throughput,
|
||||
ccl_addr_space RNG *rng,
|
||||
bool heterogeneous)
|
||||
{
|
||||
shader_setup_from_volume(kg, sd, ray);
|
||||
|
||||
@ -590,6 +621,7 @@ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals
|
||||
return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, rng, true);
|
||||
}
|
||||
|
||||
#ifndef __SPLIT_KERNEL__
|
||||
/* Decoupled Volume Sampling
|
||||
*
|
||||
* VolumeSegment is list of coefficients and transmittance stored at all steps
|
||||
@ -990,6 +1022,7 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
|
||||
|
||||
return VOLUME_PATH_SCATTERED;
|
||||
}
|
||||
#endif /* __SPLIT_KERNEL */
|
||||
|
||||
/* decide if we need to use decoupled or not */
|
||||
ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method)
|
||||
@ -1021,9 +1054,9 @@ ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneou
|
||||
|
||||
ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
|
||||
ShaderData *stack_sd,
|
||||
const PathState *state,
|
||||
const Ray *ray,
|
||||
VolumeStack *stack)
|
||||
ccl_addr_space const PathState *state,
|
||||
ccl_addr_space const Ray *ray,
|
||||
ccl_addr_space VolumeStack *stack)
|
||||
{
|
||||
/* NULL ray happens in the baker, does it need proper initialization of
|
||||
* camera in volume?
|
||||
@ -1166,7 +1199,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, VolumeStack *stack)
|
||||
ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, ccl_addr_space VolumeStack *stack)
|
||||
{
|
||||
/* todo: we should have some way for objects to indicate if they want the
|
||||
* world shader to work inside them. excluding it by default is problematic
|
||||
@ -1215,7 +1248,7 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd
|
||||
ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
|
||||
ShaderData *stack_sd,
|
||||
Ray *ray,
|
||||
VolumeStack *stack)
|
||||
ccl_addr_space VolumeStack *stack)
|
||||
{
|
||||
kernel_assert(kernel_data.integrator.use_volumes);
|
||||
|
||||
@ -1277,7 +1310,7 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
|
||||
* the world's one after the last bounce to avoid render artifacts.
|
||||
*/
|
||||
ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
|
||||
VolumeStack *volume_stack)
|
||||
ccl_addr_space VolumeStack *volume_stack)
|
||||
{
|
||||
if(kernel_data.background.volume_shader != SHADER_NONE) {
|
||||
/* Keep the world's volume in stack. */
|
||||
|
@ -74,13 +74,17 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(path_init)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(do_volume)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(background_buffer_update)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(indirect_background)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update)
|
||||
|
||||
void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func));
|
||||
|
||||
|
@ -41,13 +41,17 @@
|
||||
# include "split/kernel_path_init.h"
|
||||
# include "split/kernel_scene_intersect.h"
|
||||
# include "split/kernel_lamp_emission.h"
|
||||
# include "split/kernel_do_volume.h"
|
||||
# include "split/kernel_queue_enqueue.h"
|
||||
# include "split/kernel_background_buffer_update.h"
|
||||
# include "split/kernel_indirect_background.h"
|
||||
# include "split/kernel_shader_eval.h"
|
||||
# include "split/kernel_holdout_emission_blurring_pathtermination_ao.h"
|
||||
# include "split/kernel_subsurface_scatter.h"
|
||||
# include "split/kernel_direct_lighting.h"
|
||||
# include "split/kernel_shadow_blocked.h"
|
||||
# include "split/kernel_next_iteration_setup.h"
|
||||
# include "split/kernel_indirect_subsurface.h"
|
||||
# include "split/kernel_buffer_update.h"
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
@ -166,13 +170,17 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(background_buffer_update)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update)
|
||||
|
||||
void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func))
|
||||
{
|
||||
@ -189,13 +197,17 @@ void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name,
|
||||
REGISTER(path_init);
|
||||
REGISTER(scene_intersect);
|
||||
REGISTER(lamp_emission);
|
||||
REGISTER(do_volume);
|
||||
REGISTER(queue_enqueue);
|
||||
REGISTER(background_buffer_update);
|
||||
REGISTER(indirect_background);
|
||||
REGISTER(shader_eval);
|
||||
REGISTER(holdout_emission_blurring_pathtermination_ao);
|
||||
REGISTER(subsurface_scatter);
|
||||
REGISTER(direct_lighting);
|
||||
REGISTER(shadow_blocked);
|
||||
REGISTER(next_iteration_setup);
|
||||
REGISTER(indirect_subsurface);
|
||||
REGISTER(buffer_update);
|
||||
|
||||
#undef REGISTER
|
||||
#undef REGISTER_EVAL_NAME
|
||||
|
@ -16,11 +16,11 @@
|
||||
|
||||
#include "kernel_compat_opencl.h"
|
||||
#include "split/kernel_split_common.h"
|
||||
#include "split/kernel_background_buffer_update.h"
|
||||
#include "split/kernel_buffer_update.h"
|
||||
|
||||
__kernel void kernel_ocl_path_trace_background_buffer_update(
|
||||
__kernel void kernel_ocl_path_trace_buffer_update(
|
||||
KernelGlobals *kg,
|
||||
ccl_constant KernelData *data)
|
||||
{
|
||||
kernel_background_buffer_update(kg);
|
||||
kernel_buffer_update(kg);
|
||||
}
|
26
intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl
Normal file
26
intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl
Normal file
@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright 2011-2017 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel_compat_opencl.h"
|
||||
#include "split/kernel_split_common.h"
|
||||
#include "split/kernel_do_volume.h"
|
||||
|
||||
__kernel void kernel_ocl_path_trace_do_volume(
|
||||
KernelGlobals *kg,
|
||||
ccl_constant KernelData *data)
|
||||
{
|
||||
kernel_do_volume(kg);
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright 2011-2017 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel_compat_opencl.h"
|
||||
#include "split/kernel_split_common.h"
|
||||
#include "split/kernel_indirect_background.h"
|
||||
|
||||
__kernel void kernel_ocl_path_trace_indirect_background(
|
||||
KernelGlobals *kg,
|
||||
ccl_constant KernelData *data)
|
||||
{
|
||||
kernel_indirect_background(kg);
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright 2011-2017 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel_compat_opencl.h"
|
||||
#include "split/kernel_split_common.h"
|
||||
#include "split/kernel_indirect_subsurface.h"
|
||||
|
||||
__kernel void kernel_ocl_path_trace_indirect_subsurface(
|
||||
KernelGlobals *kg,
|
||||
ccl_constant KernelData *data)
|
||||
{
|
||||
kernel_indirect_subsurface(kg);
|
||||
}
|
34
intern/cycles/kernel/kernels/opencl/kernel_split.cl
Normal file
34
intern/cycles/kernel/kernels/opencl/kernel_split.cl
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright 2011-2017 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel_state_buffer_size.cl"
|
||||
#include "kernel_data_init.cl"
|
||||
#include "kernel_path_init.cl"
|
||||
|
||||
#include "kernel_scene_intersect.cl"
|
||||
#include "kernel_lamp_emission.cl"
|
||||
#include "kernel_do_volume.cl"
|
||||
#include "kernel_indirect_background.cl"
|
||||
#include "kernel_queue_enqueue.cl"
|
||||
#include "kernel_shader_eval.cl"
|
||||
#include "kernel_holdout_emission_blurring_pathtermination_ao.cl"
|
||||
#include "kernel_subsurface_scatter.cl"
|
||||
#include "kernel_direct_lighting.cl"
|
||||
#include "kernel_shadow_blocked.cl"
|
||||
#include "kernel_next_iteration_setup.cl"
|
||||
#include "kernel_indirect_subsurface.cl"
|
||||
#include "kernel_buffer_update.cl"
|
||||
|
@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright 2011-2017 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel_compat_opencl.h"
|
||||
#include "split/kernel_split_common.h"
|
||||
#include "split/kernel_subsurface_scatter.h"
|
||||
|
||||
__kernel void kernel_ocl_path_trace_subsurface_scatter(
|
||||
KernelGlobals *kg,
|
||||
ccl_constant KernelData *data)
|
||||
{
|
||||
kernel_subsurface_scatter(kg);
|
||||
}
|
@ -69,7 +69,7 @@ CCL_NAMESPACE_BEGIN
|
||||
* QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays
|
||||
* QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty
|
||||
*/
|
||||
ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
|
||||
ccl_device void kernel_buffer_update(KernelGlobals *kg)
|
||||
{
|
||||
ccl_local unsigned int local_queue_atomics;
|
||||
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
||||
@ -141,26 +141,6 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
|
||||
rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride;
|
||||
buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride;
|
||||
|
||||
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
|
||||
/* eval background shader if nothing hit */
|
||||
if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) {
|
||||
*L_transparent = (*L_transparent) + average((*throughput));
|
||||
#ifdef __PASSES__
|
||||
if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
|
||||
#endif
|
||||
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
|
||||
}
|
||||
|
||||
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
|
||||
#ifdef __BACKGROUND__
|
||||
/* sample background shader */
|
||||
float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray);
|
||||
path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
|
||||
#endif
|
||||
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
|
||||
}
|
||||
}
|
||||
|
||||
if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
|
||||
float3 L_sum = path_radiance_clamp_and_sum(kg, L);
|
||||
kernel_write_light_passes(kg, buffer, L, sample);
|
||||
@ -207,6 +187,9 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
|
||||
*L_transparent = 0.0f;
|
||||
path_radiance_init(L, kernel_data.film.use_light_pass);
|
||||
path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, rng, sample, ray);
|
||||
#ifdef __SUBSURFACE__
|
||||
kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
|
||||
#endif
|
||||
#ifdef __KERNEL_DEBUG__
|
||||
debug_data_init(debug_data);
|
||||
#endif
|
97
intern/cycles/kernel/split/kernel_do_volume.h
Normal file
97
intern/cycles/kernel/split/kernel_do_volume.h
Normal file
@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright 2011-2017 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
||||
ccl_device void kernel_do_volume(KernelGlobals *kg)
|
||||
{
|
||||
#ifdef __VOLUME__
|
||||
/* We will empty this queue in this kernel. */
|
||||
if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
|
||||
kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
|
||||
}
|
||||
/* Fetch use_queues_flag. */
|
||||
ccl_local char local_use_queues_flag;
|
||||
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
||||
local_use_queues_flag = *kernel_split_params.use_queues_flag;
|
||||
}
|
||||
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
||||
|
||||
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
|
||||
if(local_use_queues_flag) {
|
||||
ray_index = get_ray_index(kg, ray_index,
|
||||
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
|
||||
kernel_split_state.queue_data,
|
||||
kernel_split_params.queue_size,
|
||||
1);
|
||||
if(ray_index == QUEUE_EMPTY_SLOT) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
|
||||
IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) {
|
||||
|
||||
bool hit = ! IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND);
|
||||
|
||||
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
|
||||
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
|
||||
|
||||
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
|
||||
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
|
||||
ccl_global RNG *rng = &kernel_split_state.rng[ray_index];
|
||||
ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
|
||||
ShaderData *sd = &kernel_split_state.sd[ray_index];
|
||||
ShaderData *sd_input = &kernel_split_state.sd_DL_shadow[ray_index];
|
||||
|
||||
/* Sanitize volume stack. */
|
||||
if(!hit) {
|
||||
kernel_volume_clean_stack(kg, state->volume_stack);
|
||||
}
|
||||
/* volume attenuation, emission, scatter */
|
||||
if(state->volume_stack[0].shader != SHADER_NONE) {
|
||||
Ray volume_ray = *ray;
|
||||
volume_ray.t = (hit)? isect->t: FLT_MAX;
|
||||
|
||||
bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
|
||||
|
||||
{
|
||||
/* integrate along volume segment with distance sampling */
|
||||
VolumeIntegrateResult result = kernel_volume_integrate(
|
||||
kg, state, sd, &volume_ray, L, throughput, rng, heterogeneous);
|
||||
|
||||
# ifdef __VOLUME_SCATTER__
|
||||
if(result == VOLUME_PATH_SCATTERED) {
|
||||
/* direct lighting */
|
||||
kernel_path_volume_connect_light(kg, rng, sd, sd_input, *throughput, state, L);
|
||||
|
||||
/* indirect light bounce */
|
||||
if(kernel_path_volume_bounce(kg, rng, sd, throughput, state, L, ray))
|
||||
ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED);
|
||||
else
|
||||
ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
CCL_NAMESPACE_END
|
87
intern/cycles/kernel/split/kernel_indirect_background.h
Normal file
87
intern/cycles/kernel/split/kernel_indirect_background.h
Normal file
@ -0,0 +1,87 @@
|
||||
/*
|
||||
* Copyright 2011-2017 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
ccl_device void kernel_indirect_background(KernelGlobals *kg)
|
||||
{
|
||||
/*
|
||||
ccl_local unsigned int local_queue_atomics;
|
||||
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
||||
local_queue_atomics = 0;
|
||||
}
|
||||
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
||||
// */
|
||||
|
||||
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
|
||||
ray_index = get_ray_index(kg, ray_index,
|
||||
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
|
||||
kernel_split_state.queue_data,
|
||||
kernel_split_params.queue_size,
|
||||
0);
|
||||
|
||||
#ifdef __COMPUTE_DEVICE_GPU__
|
||||
/* If we are executing on a GPU device, we exit all threads that are not
|
||||
* required.
|
||||
*
|
||||
* If we are executing on a CPU device, then we need to keep all threads
|
||||
* active since we have barrier() calls later in the kernel. CPU devices,
|
||||
* expect all threads to execute barrier statement.
|
||||
*/
|
||||
if(ray_index == QUEUE_EMPTY_SLOT) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef __COMPUTE_DEVICE_GPU__
|
||||
if(ray_index != QUEUE_EMPTY_SLOT) {
|
||||
#endif
|
||||
|
||||
|
||||
ccl_global char *ray_state = kernel_split_state.ray_state;
|
||||
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
|
||||
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
|
||||
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
|
||||
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
|
||||
ccl_global float *L_transparent = &kernel_split_state.L_transparent[ray_index];
|
||||
|
||||
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
|
||||
/* eval background shader if nothing hit */
|
||||
if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) {
|
||||
*L_transparent = (*L_transparent) + average((*throughput));
|
||||
#ifdef __PASSES__
|
||||
if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
|
||||
#endif
|
||||
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
|
||||
}
|
||||
|
||||
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
|
||||
#ifdef __BACKGROUND__
|
||||
/* sample background shader */
|
||||
float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray);
|
||||
path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
|
||||
#endif
|
||||
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef __COMPUTE_DEVICE_GPU__
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
77
intern/cycles/kernel/split/kernel_indirect_subsurface.h
Normal file
77
intern/cycles/kernel/split/kernel_indirect_subsurface.h
Normal file
@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright 2011-2017 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
ccl_device void kernel_indirect_subsurface(KernelGlobals *kg)
|
||||
{
|
||||
int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
|
||||
if(thread_index == 0) {
|
||||
/* We will empty both queues in this kernel. */
|
||||
kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
|
||||
kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
|
||||
}
|
||||
|
||||
int ray_index;
|
||||
get_ray_index(kg, thread_index,
|
||||
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
|
||||
kernel_split_state.queue_data,
|
||||
kernel_split_params.queue_size,
|
||||
1);
|
||||
ray_index = get_ray_index(kg, thread_index,
|
||||
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
|
||||
kernel_split_state.queue_data,
|
||||
kernel_split_params.queue_size,
|
||||
1);
|
||||
|
||||
#ifdef __SUBSURFACE__
|
||||
|
||||
if(ray_index == QUEUE_EMPTY_SLOT) {
|
||||
return;
|
||||
}
|
||||
|
||||
ccl_global char *ray_state = kernel_split_state.ray_state;
|
||||
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
|
||||
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
|
||||
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
|
||||
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
|
||||
|
||||
if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
|
||||
ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
|
||||
kernel_path_subsurface_accum_indirect(ss_indirect, L);
|
||||
|
||||
/* Trace indirect subsurface rays by restarting the loop. this uses less
|
||||
* stack memory than invoking kernel_path_indirect.
|
||||
*/
|
||||
if(ss_indirect->num_rays) {
|
||||
kernel_path_subsurface_setup_indirect(kg,
|
||||
ss_indirect,
|
||||
state,
|
||||
ray,
|
||||
L,
|
||||
throughput);
|
||||
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
|
||||
}
|
||||
else {
|
||||
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* __SUBSURFACE__ */
|
||||
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
@ -38,10 +38,12 @@ CCL_NAMESPACE_BEGIN
|
||||
*/
|
||||
ccl_device void kernel_lamp_emission(KernelGlobals *kg)
|
||||
{
|
||||
#ifndef __VOLUME__
|
||||
/* We will empty this queue in this kernel. */
|
||||
if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
|
||||
kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
|
||||
}
|
||||
#endif
|
||||
/* Fetch use_queues_flag. */
|
||||
ccl_local char local_use_queues_flag;
|
||||
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
||||
@ -55,7 +57,12 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg)
|
||||
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
|
||||
kernel_split_state.queue_data,
|
||||
kernel_split_params.queue_size,
|
||||
1);
|
||||
#ifndef __VOLUME__
|
||||
1
|
||||
#else
|
||||
0
|
||||
#endif
|
||||
);
|
||||
if(ray_index == QUEUE_EMPTY_SLOT) {
|
||||
return;
|
||||
}
|
||||
|
@ -82,6 +82,10 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
|
||||
&kernel_split_state.rng[ray_index],
|
||||
my_sample,
|
||||
&kernel_split_state.ray[ray_index]);
|
||||
#ifdef __SUBSURFACE__
|
||||
kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL_DEBUG__
|
||||
debug_data_init(&kernel_split_state.debug_data[ray_index]);
|
||||
#endif
|
||||
|
@ -63,10 +63,12 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg)
|
||||
|
||||
int queue_number = -1;
|
||||
|
||||
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) {
|
||||
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) ||
|
||||
IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER)) {
|
||||
queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
|
||||
}
|
||||
else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
|
||||
else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
|
||||
IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
|
||||
queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS;
|
||||
}
|
||||
|
||||
|
@ -93,7 +93,7 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
|
||||
#ifdef __KERNEL_DEBUG__
|
||||
DebugData *debug_data = &kernel_split_state.debug_data[ray_index];
|
||||
#endif
|
||||
Intersection *isect = &kernel_split_state.isect[ray_index];
|
||||
Intersection isect;
|
||||
PathState state = kernel_split_state.path_state[ray_index];
|
||||
Ray ray = kernel_split_state.ray[ray_index];
|
||||
|
||||
@ -116,16 +116,17 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
|
||||
lcg_state = lcg_state_init(&rng, &state, 0x51633e2d);
|
||||
}
|
||||
|
||||
bool hit = scene_intersect(kg, ray, visibility, isect, &lcg_state, difl, extmax);
|
||||
bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
|
||||
#else
|
||||
bool hit = scene_intersect(kg, ray, visibility, isect, NULL, 0.0f, 0.0f);
|
||||
bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
|
||||
#endif
|
||||
kernel_split_state.isect[ray_index] = isect;
|
||||
|
||||
#ifdef __KERNEL_DEBUG__
|
||||
if(state.flag & PATH_RAY_CAMERA) {
|
||||
debug_data->num_bvh_traversed_nodes += isect->num_traversed_nodes;
|
||||
debug_data->num_bvh_traversed_instances += isect->num_traversed_instances;
|
||||
debug_data->num_bvh_intersections += isect->num_intersections;
|
||||
debug_data->num_bvh_traversed_nodes += isect.num_traversed_nodes;
|
||||
debug_data->num_bvh_traversed_instances += isect.num_traversed_instances;
|
||||
debug_data->num_bvh_intersections += isect.num_intersections;
|
||||
}
|
||||
debug_data->num_ray_bounces++;
|
||||
#endif
|
||||
|
@ -76,14 +76,14 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
|
||||
|
||||
/* Continue on with shader evaluation. */
|
||||
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
|
||||
Intersection *isect = &kernel_split_state.isect[ray_index];
|
||||
Intersection isect = kernel_split_state.isect[ray_index];
|
||||
ccl_global uint *rng = &kernel_split_state.rng[ray_index];
|
||||
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
|
||||
Ray ray = kernel_split_state.ray[ray_index];
|
||||
|
||||
shader_setup_from_ray(kg,
|
||||
&kernel_split_state.sd[ray_index],
|
||||
isect,
|
||||
&isect,
|
||||
&ray);
|
||||
float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
|
||||
shader_eval_surface(kg, &kernel_split_state.sd[ray_index], rng, state, rbsdf, state->flag, SHADER_CONTEXT_MAIN);
|
||||
|
@ -93,12 +93,14 @@ ccl_device void kernel_shadow_blocked(KernelGlobals *kg)
|
||||
: light_ray_dl_global;
|
||||
|
||||
float3 shadow;
|
||||
Ray ray = *light_ray_global;
|
||||
update_path_radiance = !(shadow_blocked(kg,
|
||||
&kernel_split_state.sd_DL_shadow[thread_index],
|
||||
state,
|
||||
light_ray_global,
|
||||
&ray,
|
||||
&shadow));
|
||||
|
||||
*light_ray_global = ray;
|
||||
/* We use light_ray_global's P and t to store shadow and
|
||||
* update_path_radiance.
|
||||
*/
|
||||
|
@ -52,11 +52,11 @@
|
||||
#include "kernel_passes.h"
|
||||
|
||||
#ifdef __SUBSURFACE__
|
||||
#include "kernel_subsurface.h"
|
||||
# include "kernel_subsurface.h"
|
||||
#endif
|
||||
|
||||
#ifdef __VOLUME__
|
||||
#include "kernel_volume.h"
|
||||
# include "kernel_volume.h"
|
||||
#endif
|
||||
|
||||
#include "kernel_path_state.h"
|
||||
@ -65,9 +65,10 @@
|
||||
#include "kernel_path_common.h"
|
||||
#include "kernel_path_surface.h"
|
||||
#include "kernel_path_volume.h"
|
||||
#include "kernel_path_subsurface.h"
|
||||
|
||||
#ifdef __KERNEL_DEBUG__
|
||||
#include "kernel_debug.h"
|
||||
# include "kernel_debug.h"
|
||||
#endif
|
||||
|
||||
#include "kernel_queues.h"
|
||||
|
@ -31,6 +31,14 @@ ccl_device_inline size_t split_data_buffer_size(KernelGlobals *kg, size_t num_el
|
||||
size = size SPLIT_DATA_ENTRIES;
|
||||
#undef SPLIT_DATA_ENTRY
|
||||
|
||||
#ifdef __SUBSURFACE__
|
||||
size += align_up(num_elements * sizeof(SubsurfaceIndirectRays), 16); /* ss_rays */
|
||||
#endif
|
||||
|
||||
#ifdef __VOLUME__
|
||||
size += align_up(2 * num_elements * sizeof(PathState), 16); /* state_shadow */
|
||||
#endif
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
@ -46,9 +54,19 @@ ccl_device_inline void split_data_init(KernelGlobals *kg,
|
||||
|
||||
#define SPLIT_DATA_ENTRY(type, name, num) \
|
||||
split_data->name = (type*)p; p += align_up(num_elements * num * sizeof(type), 16);
|
||||
SPLIT_DATA_ENTRIES
|
||||
SPLIT_DATA_ENTRIES;
|
||||
#undef SPLIT_DATA_ENTRY
|
||||
|
||||
#ifdef __SUBSURFACE__
|
||||
split_data->ss_rays = (ccl_global SubsurfaceIndirectRays*)p;
|
||||
p += align_up(num_elements * sizeof(SubsurfaceIndirectRays), 16);
|
||||
#endif
|
||||
|
||||
#ifdef __VOLUME__
|
||||
split_data->state_shadow = (ccl_global PathState*)p;
|
||||
p += align_up(2 * num_elements * sizeof(PathState), 16);
|
||||
#endif
|
||||
|
||||
split_data->ray_state = ray_state;
|
||||
}
|
||||
|
||||
|
@ -68,14 +68,13 @@ typedef struct SplitParams {
|
||||
SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
|
||||
SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
|
||||
SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
|
||||
SPLIT_DATA_ENTRY(Intersection, isect, 1) \
|
||||
SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
|
||||
SPLIT_DATA_ENTRY(ccl_global float3, ao_alpha, 1) \
|
||||
SPLIT_DATA_ENTRY(ccl_global float3, ao_bsdf, 1) \
|
||||
SPLIT_DATA_ENTRY(ccl_global Ray, ao_light_ray, 1) \
|
||||
SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
|
||||
SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
|
||||
SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
|
||||
SPLIT_DATA_ENTRY(Intersection, isect_shadow, 2) \
|
||||
SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \
|
||||
SPLIT_DATA_ENTRY(ccl_global uint, work_array, 1) \
|
||||
SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
|
||||
@ -88,6 +87,14 @@ typedef struct SplitData {
|
||||
SPLIT_DATA_ENTRIES
|
||||
#undef SPLIT_DATA_ENTRY
|
||||
|
||||
#ifdef __SUBSURFACE__
|
||||
ccl_global SubsurfaceIndirectRays *ss_rays;
|
||||
#endif
|
||||
|
||||
#ifdef __VOLUME__
|
||||
ccl_global PathState *state_shadow;
|
||||
#endif
|
||||
|
||||
/* this is actually in a separate buffer from the rest of the split state data (so it can be read back from
|
||||
* the host easily) but is still used the same as the other data so we have it here in this struct as well
|
||||
*/
|
||||
|
86
intern/cycles/kernel/split/kernel_subsurface_scatter.h
Normal file
86
intern/cycles/kernel/split/kernel_subsurface_scatter.h
Normal file
@ -0,0 +1,86 @@
|
||||
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
||||
ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
|
||||
{
|
||||
#ifdef __SUBSURFACE__
|
||||
|
||||
ccl_local unsigned int local_queue_atomics;
|
||||
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
|
||||
local_queue_atomics = 0;
|
||||
}
|
||||
ccl_barrier(CCL_LOCAL_MEM_FENCE);
|
||||
|
||||
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
|
||||
ray_index = get_ray_index(kg, ray_index,
|
||||
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
|
||||
kernel_split_state.queue_data,
|
||||
kernel_split_params.queue_size,
|
||||
0);
|
||||
|
||||
#ifdef __COMPUTE_DEVICE_GPU__
|
||||
/* If we are executing on a GPU device, we exit all threads that are not
|
||||
* required.
|
||||
*
|
||||
* If we are executing on a CPU device, then we need to keep all threads
|
||||
* active since we have barrier() calls later in the kernel. CPU devices,
|
||||
* expect all threads to execute barrier statement.
|
||||
*/
|
||||
if(ray_index == QUEUE_EMPTY_SLOT) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef __COMPUTE_DEVICE_GPU__
|
||||
if(ray_index != QUEUE_EMPTY_SLOT) {
|
||||
#endif
|
||||
|
||||
|
||||
char enqueue_flag = 0;
|
||||
ccl_global char *ray_state = kernel_split_state.ray_state;
|
||||
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
|
||||
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
|
||||
ccl_global RNG *rng = &kernel_split_state.rng[ray_index];
|
||||
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
|
||||
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
|
||||
ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
|
||||
ShaderData *sd = &kernel_split_state.sd[ray_index];
|
||||
ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
|
||||
|
||||
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
|
||||
if(sd->flag & SD_BSSRDF) {
|
||||
if(kernel_path_subsurface_scatter(kg,
|
||||
sd,
|
||||
emission_sd,
|
||||
L,
|
||||
state,
|
||||
rng,
|
||||
ray,
|
||||
throughput,
|
||||
ss_indirect)) {
|
||||
ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
|
||||
enqueue_flag = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef __COMPUTE_DEVICE_GPU__
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Enqueue RAY_UPDATE_BUFFER rays. */
|
||||
enqueue_ray_index_local(ray_index,
|
||||
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
|
||||
enqueue_flag,
|
||||
kernel_split_params.queue_size,
|
||||
&local_queue_atomics,
|
||||
kernel_split_state.queue_data,
|
||||
kernel_split_params.queue_index);
|
||||
|
||||
#endif /* __SUBSURFACE__ */
|
||||
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
Loading…
x
Reference in New Issue
Block a user