Cleanup: replace state flow macros in the kernel with functions
This commit is contained in:
parent
5539fb3121
commit
28c3739a9b
|
@ -181,7 +181,7 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
|
|||
integrator_state_write_ray(kg, state, &ray);
|
||||
|
||||
/* Setup next kernel to execute. */
|
||||
INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
}
|
||||
else {
|
||||
/* Surface baking. */
|
||||
|
@ -247,13 +247,13 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
|
|||
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index);
|
||||
integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
|
||||
integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index);
|
||||
integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -100,10 +100,10 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg,
|
|||
/* Continue with intersect_closest kernel, optionally initializing volume
|
||||
* stack before that if the camera may be inside a volume. */
|
||||
if (kernel_data.cam.is_inside_volume) {
|
||||
INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -109,14 +109,14 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
|
|||
/* If using background pass, schedule background shading kernel so that we have a background
|
||||
* to alpha-over on. The background kernel will then continue the path afterwards. */
|
||||
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
|
||||
INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!integrator_state_volume_stack_is_empty(kg, state)) {
|
||||
/* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher
|
||||
* objects from it, and then continue shading volume and shadow catcher surface after. */
|
||||
INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -128,18 +128,19 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
|
|||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
integrator_path_init_sorted(
|
||||
kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
}
|
||||
}
|
||||
|
||||
/* Schedule next kernel to be executed after updating volume stack for shadow catcher. */
|
||||
template<uint32_t current_kernel>
|
||||
template<DeviceKernel current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_volume(
|
||||
KernelGlobals kg, IntegratorState state)
|
||||
{
|
||||
|
@ -156,20 +157,21 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
|
|||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
}
|
||||
}
|
||||
|
||||
/* Schedule next kernel to be executed after executing background shader for shadow catcher. */
|
||||
template<uint32_t current_kernel>
|
||||
template<DeviceKernel current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_background(
|
||||
KernelGlobals kg, IntegratorState state)
|
||||
{
|
||||
|
@ -177,7 +179,8 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
|
|||
if (!integrator_state_volume_stack_is_empty(kg, state)) {
|
||||
/* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher
|
||||
* objects from it, and then continue shading volume and shadow catcher surface after. */
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
integrator_path_next(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -190,7 +193,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
|
|||
*
|
||||
* Note that current_kernel is a template value since making this a variable
|
||||
* leads to poor performance with CUDA atomics. */
|
||||
template<uint32_t current_kernel>
|
||||
template<DeviceKernel current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_next_kernel(
|
||||
KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
|
@ -206,10 +209,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
|
|||
const int flags = (hit_surface) ? kernel_data_fetch(shaders, shader).flags : 0;
|
||||
|
||||
if (!integrator_intersect_terminate(kg, state, flags)) {
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
|
||||
integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_TERMINATE(current_kernel);
|
||||
integrator_path_terminate(kg, state, current_kernel);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -218,7 +221,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
|
|||
if (hit) {
|
||||
/* Hit a surface, continue with light or surface kernel. */
|
||||
if (isect->type & PRIMITIVE_LAMP) {
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
}
|
||||
else {
|
||||
/* Hit a surface, continue with surface kernel unless terminated. */
|
||||
|
@ -231,16 +234,16 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
|
|||
(object_flags & SD_OBJECT_CAUSTICS);
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
}
|
||||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
|
@ -249,13 +252,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
|
|||
#endif
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_TERMINATE(current_kernel);
|
||||
integrator_path_terminate(kg, state, current_kernel);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Nothing hit, continue with background kernel. */
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -263,7 +266,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
|
|||
*
|
||||
* The logic here matches integrator_intersect_next_kernel, except that
|
||||
* volume shading and termination testing have already been done. */
|
||||
template<uint32_t current_kernel>
|
||||
template<DeviceKernel current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
||||
KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
|
@ -273,7 +276,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
|||
if (isect->prim != PRIM_NONE) {
|
||||
/* Hit a surface, continue with light or surface kernel. */
|
||||
if (isect->type & PRIMITIVE_LAMP) {
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
|
@ -286,16 +289,16 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
|||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
integrator_path_next_sorted(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
}
|
||||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
|
@ -307,7 +310,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
|||
}
|
||||
else {
|
||||
/* Nothing hit, continue with background kernel. */
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -162,7 +162,7 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt
|
|||
|
||||
if (opaque_hit) {
|
||||
/* Hit an opaque surface, shadow path ends here. */
|
||||
INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
|
||||
integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
|
@ -171,7 +171,7 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt
|
|||
*
|
||||
* TODO: could also write to render buffer directly if no transparent shadows?
|
||||
* Could save a kernel execution for the common case. */
|
||||
INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW,
|
||||
integrator_shadow_path_next(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@ ccl_device void integrator_intersect_subsurface(KernelGlobals kg, IntegratorStat
|
|||
}
|
||||
#endif
|
||||
|
||||
INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
|
||||
integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -222,7 +222,7 @@ ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorSt
|
|||
}
|
||||
else {
|
||||
/* Volume stack init for camera rays, continue with intersection of camera ray. */
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
|
||||
integrator_path_next(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -213,7 +213,7 @@ ccl_device void integrator_shade_background(KernelGlobals kg,
|
|||
}
|
||||
#endif
|
||||
|
||||
INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -99,11 +99,11 @@ ccl_device void integrator_shade_light(KernelGlobals kg,
|
|||
INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce;
|
||||
|
||||
if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
|
||||
INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
|
||||
integrator_path_next(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -158,20 +158,20 @@ ccl_device void integrator_shade_shadow(KernelGlobals kg,
|
|||
/* Evaluate transparent shadows. */
|
||||
const bool opaque = integrate_transparent_shadow(kg, state, num_hits);
|
||||
if (opaque) {
|
||||
INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
|
||||
integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (shadow_intersections_has_remaining(num_hits)) {
|
||||
/* More intersections to find, continue shadow ray. */
|
||||
INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
|
||||
integrator_shadow_path_next(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
kernel_accum_light(kg, state, render_buffer);
|
||||
INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
|
||||
integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -190,8 +190,8 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
|
|||
const bool is_light = light_sample_is_light(&ls);
|
||||
|
||||
/* Branch off shadow kernel. */
|
||||
INTEGRATOR_SHADOW_PATH_INIT(
|
||||
shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow);
|
||||
IntegratorShadowState shadow_state = integrator_shadow_path_init(
|
||||
kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false);
|
||||
|
||||
/* Copy volume stack and enter/exit volume. */
|
||||
integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
|
||||
|
@ -442,7 +442,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
|
|||
ray.dD = differential_zero_compact();
|
||||
|
||||
/* Branch off shadow kernel. */
|
||||
INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, ao);
|
||||
IntegratorShadowState shadow_state = integrator_shadow_path_init(
|
||||
kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, true);
|
||||
|
||||
/* Copy volume stack and enter/exit volume. */
|
||||
integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
|
||||
|
@ -604,22 +605,23 @@ ccl_device bool integrate_surface(KernelGlobals kg,
|
|||
}
|
||||
|
||||
template<uint node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE,
|
||||
int current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE>
|
||||
DeviceKernel current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE>
|
||||
ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
if (integrate_surface<node_feature_mask>(kg, state, render_buffer)) {
|
||||
if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE) {
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
|
||||
integrator_path_next(
|
||||
kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
|
||||
}
|
||||
else {
|
||||
kernel_assert(INTEGRATOR_STATE(state, ray, t) != 0.0f);
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
}
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_TERMINATE(current_kernel);
|
||||
integrator_path_terminate(kg, state, current_kernel);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -774,8 +774,8 @@ ccl_device_forceinline void integrate_volume_direct_light(
|
|||
const bool is_light = light_sample_is_light(ls);
|
||||
|
||||
/* Branch off shadow kernel. */
|
||||
INTEGRATOR_SHADOW_PATH_INIT(
|
||||
shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow);
|
||||
IntegratorShadowState shadow_state = integrator_shadow_path_init(
|
||||
kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false);
|
||||
|
||||
/* Write shadow ray and associated state to global memory. */
|
||||
integrator_state_write_shadow_ray(kg, shadow_state, &ray);
|
||||
|
@ -1032,13 +1032,15 @@ ccl_device void integrator_shade_volume(KernelGlobals kg,
|
|||
|
||||
if (event == VOLUME_PATH_SCATTERED) {
|
||||
/* Queue intersect_closest kernel. */
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
|
||||
integrator_path_next(kg,
|
||||
state,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
return;
|
||||
}
|
||||
else if (event == VOLUME_PATH_MISSED) {
|
||||
/* End path. */
|
||||
INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
|
||||
integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
|
|
|
@ -50,7 +50,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals
|
|||
ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg,
|
||||
ConstIntegratorState state)
|
||||
{
|
||||
if (INTEGRATOR_PATH_IS_TERMINATED) {
|
||||
if (integrator_path_is_terminated(state)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -10,62 +10,94 @@ CCL_NAMESPACE_BEGIN
|
|||
|
||||
/* Control Flow
|
||||
*
|
||||
* Utilities for control flow between kernels. The implementation may differ per device
|
||||
* or even be handled on the host side. To abstract such differences, experiment with
|
||||
* different implementations and for debugging, this is abstracted using macros.
|
||||
* Utilities for control flow between kernels. The implementation is different between CPU and
|
||||
* GPU devices. For the latter part of the logic is handled on the host side with wavefronts.
|
||||
*
|
||||
* There is a main path for regular path tracing camera for path tracing. Shadows for next
|
||||
* event estimation branch off from this into their own path, that may be computed in
|
||||
* parallel while the main path continues.
|
||||
* parallel while the main path continues. Additionally, shading kernels are sorted using
|
||||
* a key for coherence.
|
||||
*
|
||||
* Each kernel on the main path must call one of these functions. These may not be called
|
||||
* multiple times from the same kernel.
|
||||
*
|
||||
* INTEGRATOR_PATH_INIT(next_kernel)
|
||||
* INTEGRATOR_PATH_NEXT(current_kernel, next_kernel)
|
||||
* INTEGRATOR_PATH_TERMINATE(current_kernel)
|
||||
* integrator_path_init(kg, state, next_kernel)
|
||||
* integrator_path_next(kg, state, current_kernel, next_kernel)
|
||||
* integrator_path_terminate(kg, state, current_kernel)
|
||||
*
|
||||
* For the shadow path similar functions are used, and again each shadow kernel must call
|
||||
* one of them, and only once.
|
||||
*/
|
||||
|
||||
#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(state, path, queued_kernel) == 0)
|
||||
#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED \
|
||||
(INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0)
|
||||
ccl_device_forceinline bool integrator_path_is_terminated(ConstIntegratorState state)
|
||||
{
|
||||
return INTEGRATOR_STATE(state, path, queued_kernel) == 0;
|
||||
}
|
||||
|
||||
ccl_device_forceinline bool integrator_shadow_path_is_terminated(ConstIntegratorShadowState state)
|
||||
{
|
||||
return INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0;
|
||||
}
|
||||
|
||||
#ifdef __KERNEL_GPU__
|
||||
|
||||
# define INTEGRATOR_PATH_INIT(next_kernel) \
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
|
||||
1); \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
|
||||
atomic_fetch_and_sub_uint32( \
|
||||
&kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
|
||||
1); \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
# define INTEGRATOR_PATH_TERMINATE(current_kernel) \
|
||||
atomic_fetch_and_sub_uint32( \
|
||||
&kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
|
||||
ccl_device_forceinline void integrator_path_init(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel next_kernel)
|
||||
{
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
}
|
||||
|
||||
# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \
|
||||
IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32( \
|
||||
&kernel_integrator_state.next_shadow_path_index[0], 1); \
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
|
||||
1); \
|
||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
|
||||
# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
|
||||
atomic_fetch_and_sub_uint32( \
|
||||
&kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
|
||||
1); \
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
|
||||
# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
|
||||
atomic_fetch_and_sub_uint32( \
|
||||
&kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
|
||||
ccl_device_forceinline void integrator_path_next(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel current_kernel,
|
||||
const DeviceKernel next_kernel)
|
||||
{
|
||||
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
|
||||
1);
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel current_kernel)
|
||||
{
|
||||
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
|
||||
1);
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
|
||||
}
|
||||
|
||||
ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(
|
||||
KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
|
||||
{
|
||||
IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32(
|
||||
&kernel_integrator_state.next_shadow_path_index[0], 1);
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
|
||||
return shadow_state;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg,
|
||||
IntegratorShadowState state,
|
||||
const DeviceKernel current_kernel,
|
||||
const DeviceKernel next_kernel)
|
||||
{
|
||||
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
|
||||
1);
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg,
|
||||
IntegratorShadowState state,
|
||||
const DeviceKernel current_kernel)
|
||||
{
|
||||
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
|
||||
1);
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
|
||||
}
|
||||
|
||||
# ifdef __KERNEL_SORT_PARTITIONING__
|
||||
/* Sort first by truncated state index (for good locality), then by key (for good coherence). */
|
||||
|
@ -75,68 +107,103 @@ CCL_NAMESPACE_BEGIN
|
|||
# define INTEGRATOR_SORT_KEY(key, state) (key)
|
||||
# endif
|
||||
|
||||
# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
|
||||
{ \
|
||||
const int key_ = INTEGRATOR_SORT_KEY(key, state); \
|
||||
atomic_fetch_and_add_uint32( \
|
||||
&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
|
||||
INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
|
||||
1); \
|
||||
}
|
||||
# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
|
||||
{ \
|
||||
const int key_ = INTEGRATOR_SORT_KEY(key, state); \
|
||||
atomic_fetch_and_sub_uint32( \
|
||||
&kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
|
||||
atomic_fetch_and_add_uint32( \
|
||||
&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
|
||||
INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
|
||||
1); \
|
||||
}
|
||||
ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel next_kernel,
|
||||
const uint32_t key)
|
||||
{
|
||||
const int key_ = INTEGRATOR_SORT_KEY(key, state);
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel current_kernel,
|
||||
const DeviceKernel next_kernel,
|
||||
const uint32_t key)
|
||||
{
|
||||
const int key_ = INTEGRATOR_SORT_KEY(key, state);
|
||||
atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
|
||||
1);
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
|
||||
atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
# define INTEGRATOR_PATH_INIT(next_kernel) \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
|
||||
{ \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
|
||||
(void)key; \
|
||||
}
|
||||
# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
|
||||
{ \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
|
||||
(void)current_kernel; \
|
||||
}
|
||||
# define INTEGRATOR_PATH_TERMINATE(current_kernel) \
|
||||
{ \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; \
|
||||
(void)current_kernel; \
|
||||
}
|
||||
# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
|
||||
{ \
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
|
||||
(void)key; \
|
||||
(void)current_kernel; \
|
||||
}
|
||||
ccl_device_forceinline void integrator_path_init(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel next_kernel)
|
||||
{
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
}
|
||||
|
||||
# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \
|
||||
IntegratorShadowState shadow_state = &state->shadow_type; \
|
||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
|
||||
# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
|
||||
{ \
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; \
|
||||
(void)current_kernel; \
|
||||
}
|
||||
# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
|
||||
{ \
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; \
|
||||
(void)current_kernel; \
|
||||
}
|
||||
ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel next_kernel,
|
||||
const uint32_t key)
|
||||
{
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
(void)key;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_path_next(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel current_kernel,
|
||||
const DeviceKernel next_kernel)
|
||||
{
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
(void)current_kernel;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel current_kernel)
|
||||
{
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
|
||||
(void)current_kernel;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const DeviceKernel current_kernel,
|
||||
const DeviceKernel next_kernel,
|
||||
const uint32_t key)
|
||||
{
|
||||
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
|
||||
(void)key;
|
||||
(void)current_kernel;
|
||||
}
|
||||
|
||||
ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(
|
||||
KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
|
||||
{
|
||||
IntegratorShadowState shadow_state = (is_ao) ? &state->ao : &state->shadow;
|
||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
|
||||
return shadow_state;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg,
|
||||
IntegratorShadowState state,
|
||||
const DeviceKernel current_kernel,
|
||||
const DeviceKernel next_kernel)
|
||||
{
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
|
||||
(void)current_kernel;
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg,
|
||||
IntegratorShadowState state,
|
||||
const DeviceKernel current_kernel)
|
||||
{
|
||||
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
|
||||
(void)current_kernel;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -177,17 +177,17 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
|
|||
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
|
||||
integrator_path_next_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE,
|
||||
shader);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
|
||||
integrator_path_next_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
|
||||
shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
|
||||
integrator_path_next_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
|
||||
shader);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue