Cycles: Split path initialization into own kernel
This makes it easier to initialize things correctly in the data_init kernel before they are needed by path tracing.
This commit is contained in:
parent
5b8f1c8d34
commit
4cf501b835
|
@ -41,6 +41,7 @@ DeviceSplitKernel::~DeviceSplitKernel()
|
|||
device->mem_free(queue_index);
|
||||
device->mem_free(work_pool_wgs);
|
||||
|
||||
delete kernel_path_init;
|
||||
delete kernel_scene_intersect;
|
||||
delete kernel_lamp_emission;
|
||||
delete kernel_queue_enqueue;
|
||||
|
@ -61,6 +62,7 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
|
|||
return false; \
|
||||
}
|
||||
|
||||
LOAD_KERNEL(path_init);
|
||||
LOAD_KERNEL(scene_intersect);
|
||||
LOAD_KERNEL(lamp_emission);
|
||||
LOAD_KERNEL(queue_enqueue);
|
||||
|
@ -200,6 +202,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
|
|||
return false;
|
||||
}
|
||||
|
||||
ENQUEUE_SPLIT_KERNEL(path_init, global_size, local_size);
|
||||
|
||||
bool activeRaysAvailable = true;
|
||||
|
||||
while(activeRaysAvailable) {
|
||||
|
|
|
@ -55,6 +55,7 @@ class DeviceSplitKernel {
|
|||
private:
|
||||
Device *device;
|
||||
|
||||
SplitKernelFunction *kernel_path_init;
|
||||
SplitKernelFunction *kernel_scene_intersect;
|
||||
SplitKernelFunction *kernel_lamp_emission;
|
||||
SplitKernelFunction *kernel_queue_enqueue;
|
||||
|
|
|
@ -16,6 +16,7 @@ set(SRC
|
|||
kernels/cpu/kernel_split.cpp
|
||||
kernels/opencl/kernel.cl
|
||||
kernels/opencl/kernel_data_init.cl
|
||||
kernels/opencl/kernel_path_init.cl
|
||||
kernels/opencl/kernel_queue_enqueue.cl
|
||||
kernels/opencl/kernel_scene_intersect.cl
|
||||
kernels/opencl/kernel_lamp_emission.cl
|
||||
|
@ -201,6 +202,7 @@ set(SRC_SPLIT_HEADERS
|
|||
split/kernel_holdout_emission_blurring_pathtermination_ao.h
|
||||
split/kernel_lamp_emission.h
|
||||
split/kernel_next_iteration_setup.h
|
||||
split/kernel_path_init.h
|
||||
split/kernel_queue_enqueue.h
|
||||
split/kernel_scene_intersect.h
|
||||
split/kernel_shader_eval.h
|
||||
|
@ -400,6 +402,7 @@ endif()
|
|||
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
|
||||
|
|
|
@ -71,6 +71,7 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
|
|||
#define DECLARE_SPLIT_KERNEL_FUNCTION(name) \
|
||||
void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData *data);
|
||||
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(path_init)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission)
|
||||
DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
# include "split/kernel_split_common.h"
|
||||
|
||||
# include "split/kernel_data_init.h"
|
||||
# include "split/kernel_path_init.h"
|
||||
# include "split/kernel_scene_intersect.h"
|
||||
# include "split/kernel_lamp_emission.h"
|
||||
# include "split/kernel_queue_enqueue.h"
|
||||
|
@ -163,6 +164,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
|
|||
kernel_##name(kg); \
|
||||
}
|
||||
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
|
||||
|
@ -186,6 +188,7 @@ void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name,
|
|||
REGISTER(shader);
|
||||
|
||||
REGISTER(data_init);
|
||||
REGISTER(path_init);
|
||||
REGISTER(scene_intersect);
|
||||
REGISTER(lamp_emission);
|
||||
REGISTER(queue_enqueue);
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
#include "../../split/kernel_split_common.h"
|
||||
#include "../../split/kernel_data_init.h"
|
||||
#include "../../split/kernel_path_init.h"
|
||||
#include "../../split/kernel_scene_intersect.h"
|
||||
#include "../../split/kernel_lamp_emission.h"
|
||||
#include "../../split/kernel_queue_enqueue.h"
|
||||
|
@ -81,6 +82,7 @@ kernel_cuda_path_trace_data_init(
|
|||
kernel_##name(NULL); \
|
||||
}
|
||||
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
|
||||
DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
/*
|
||||
* Copyright 2011-2017 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel_compat_opencl.h"
|
||||
#include "split/kernel_split_common.h"
|
||||
#include "split/kernel_path_init.h"
|
||||
|
||||
__kernel void kernel_ocl_path_trace_path_init(
|
||||
KernelGlobals *kg,
|
||||
ccl_constant KernelData *data)
|
||||
{
|
||||
kernel_path_init(kg);
|
||||
}
|
|
@ -18,33 +18,6 @@ CCL_NAMESPACE_BEGIN
|
|||
|
||||
/* Note on kernel_data_initialization kernel
|
||||
* This kernel Initializes structures needed in path-iteration kernels.
|
||||
* This is the first kernel in ray-tracing logic.
|
||||
*
|
||||
* Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE
|
||||
*
|
||||
* Its input and output are as follows,
|
||||
*
|
||||
* Un-initialized rng---------------|--- kernel_data_initialization ---|--- Initialized rng
|
||||
* Un-initialized throughput -------| |--- Initialized throughput
|
||||
* Un-initialized L_transparent ----| |--- Initialized L_transparent
|
||||
* Un-initialized PathRadiance -----| |--- Initialized PathRadiance
|
||||
* Un-initialized Ray --------------| |--- Initialized Ray
|
||||
* Un-initialized PathState --------| |--- Initialized PathState
|
||||
* Un-initialized QueueData --------| |--- Initialized QueueData (to QUEUE_EMPTY_SLOT)
|
||||
* Un-initialized QueueIndex -------| |--- Initialized QueueIndex (to 0)
|
||||
* Un-initialized use_queues_flag---| |--- Initialized use_queues_flag (to false)
|
||||
* Un-initialized ray_state --------| |--- Initialized ray_state
|
||||
* parallel_samples --------------- | |--- Initialized per_sample_output_buffers
|
||||
* rng_state -----------------------| |--- Initialized work_array
|
||||
* data ----------------------------| |--- Initialized work_pool_wgs
|
||||
* start_sample --------------------| |
|
||||
* sx ------------------------------| |
|
||||
* sy ------------------------------| |
|
||||
* sw ------------------------------| |
|
||||
* sh ------------------------------| |
|
||||
* stride --------------------------| |
|
||||
* queuesize -----------------------| |
|
||||
* num_samples ---------------------| |
|
||||
*
|
||||
* Note on Queues :
|
||||
* All slots in queues are initialized to queue empty slot;
|
||||
|
@ -137,80 +110,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
|
|||
*/
|
||||
*use_queues_flag = 0;
|
||||
}
|
||||
|
||||
int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
|
||||
|
||||
/* This is the first assignment to ray_state;
|
||||
* So we dont use ASSIGN_RAY_STATE macro.
|
||||
*/
|
||||
kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
|
||||
|
||||
unsigned int my_sample;
|
||||
unsigned int pixel_x;
|
||||
unsigned int pixel_y;
|
||||
unsigned int tile_x;
|
||||
unsigned int tile_y;
|
||||
unsigned int my_sample_tile;
|
||||
|
||||
unsigned int work_index = 0;
|
||||
/* Get work. */
|
||||
if(!get_next_work(kg, &work_index, ray_index)) {
|
||||
/* No more work, mark ray as inactive */
|
||||
kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* Get the sample associated with the work. */
|
||||
my_sample = get_work_sample(kg, work_index, ray_index) + start_sample;
|
||||
|
||||
my_sample_tile = 0;
|
||||
|
||||
/* Get pixel and tile position associated with the work. */
|
||||
get_work_pixel_tile_position(kg, &pixel_x, &pixel_y,
|
||||
&tile_x, &tile_y,
|
||||
work_index,
|
||||
ray_index);
|
||||
kernel_split_state.work_array[ray_index] = work_index;
|
||||
|
||||
rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride;
|
||||
|
||||
ccl_global float *per_sample_output_buffers = kernel_split_state.per_sample_output_buffers;
|
||||
per_sample_output_buffers += ((tile_x + (tile_y * stride)) + (my_sample_tile)) * kernel_data.film.pass_stride;
|
||||
|
||||
/* Initialize random numbers and ray. */
|
||||
kernel_path_trace_setup(kg,
|
||||
rng_state,
|
||||
my_sample,
|
||||
pixel_x, pixel_y,
|
||||
&kernel_split_state.rng[ray_index],
|
||||
&kernel_split_state.ray[ray_index]);
|
||||
|
||||
if(kernel_split_state.ray[ray_index].t != 0.0f) {
|
||||
/* Initialize throughput, L_transparent, Ray, PathState;
|
||||
* These rays proceed with path-iteration.
|
||||
*/
|
||||
kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
|
||||
kernel_split_state.L_transparent[ray_index] = 0.0f;
|
||||
path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass);
|
||||
path_state_init(kg,
|
||||
&kernel_split_state.sd_DL_shadow[ray_index],
|
||||
&kernel_split_state.path_state[ray_index],
|
||||
&kernel_split_state.rng[ray_index],
|
||||
my_sample,
|
||||
&kernel_split_state.ray[ray_index]);
|
||||
#ifdef __KERNEL_DEBUG__
|
||||
debug_data_init(&kernel_split_state.debug_data[ray_index]);
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
/* These rays do not participate in path-iteration. */
|
||||
float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
/* Accumulate result in output buffer. */
|
||||
kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad);
|
||||
path_rng_end(kg, rng_state, kernel_split_state.rng[ray_index]);
|
||||
ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Copyright 2011-2017 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* This kernel initializes structures needed in path-iteration kernels.
|
||||
* This is the first kernel in ray-tracing logic.
|
||||
*
|
||||
* Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE
|
||||
*/
|
||||
|
||||
ccl_device void kernel_path_init(KernelGlobals *kg) {
|
||||
int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
|
||||
|
||||
/* This is the first assignment to ray_state;
|
||||
* So we dont use ASSIGN_RAY_STATE macro.
|
||||
*/
|
||||
kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
|
||||
|
||||
unsigned int my_sample;
|
||||
unsigned int pixel_x;
|
||||
unsigned int pixel_y;
|
||||
unsigned int tile_x;
|
||||
unsigned int tile_y;
|
||||
unsigned int my_sample_tile;
|
||||
|
||||
unsigned int work_index = 0;
|
||||
/* Get work. */
|
||||
if(!get_next_work(kg, &work_index, ray_index)) {
|
||||
/* No more work, mark ray as inactive */
|
||||
kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* Get the sample associated with the work. */
|
||||
my_sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample;
|
||||
|
||||
my_sample_tile = 0;
|
||||
|
||||
/* Get pixel and tile position associated with the work. */
|
||||
get_work_pixel_tile_position(kg, &pixel_x, &pixel_y,
|
||||
&tile_x, &tile_y,
|
||||
work_index,
|
||||
ray_index);
|
||||
kernel_split_state.work_array[ray_index] = work_index;
|
||||
|
||||
ccl_global uint *rng_state = kernel_split_params.rng_state;
|
||||
rng_state += kernel_split_params.offset + pixel_x + pixel_y*kernel_split_params.stride;
|
||||
|
||||
ccl_global float *per_sample_output_buffers = kernel_split_state.per_sample_output_buffers;
|
||||
per_sample_output_buffers += (tile_x + tile_y * kernel_split_params.stride + my_sample_tile)
|
||||
* kernel_data.film.pass_stride;
|
||||
|
||||
/* Initialize random numbers and ray. */
|
||||
kernel_path_trace_setup(kg,
|
||||
rng_state,
|
||||
my_sample,
|
||||
pixel_x, pixel_y,
|
||||
&kernel_split_state.rng[ray_index],
|
||||
&kernel_split_state.ray[ray_index]);
|
||||
|
||||
if(kernel_split_state.ray[ray_index].t != 0.0f) {
|
||||
/* Initialize throughput, L_transparent, Ray, PathState;
|
||||
* These rays proceed with path-iteration.
|
||||
*/
|
||||
kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
|
||||
kernel_split_state.L_transparent[ray_index] = 0.0f;
|
||||
path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass);
|
||||
path_state_init(kg,
|
||||
&kernel_split_state.sd_DL_shadow[ray_index],
|
||||
&kernel_split_state.path_state[ray_index],
|
||||
&kernel_split_state.rng[ray_index],
|
||||
my_sample,
|
||||
&kernel_split_state.ray[ray_index]);
|
||||
#ifdef __KERNEL_DEBUG__
|
||||
debug_data_init(&kernel_split_state.debug_data[ray_index]);
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
/* These rays do not participate in path-iteration. */
|
||||
float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
/* Accumulate result in output buffer. */
|
||||
kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad);
|
||||
path_rng_end(kg, rng_state, kernel_split_state.rng[ray_index]);
|
||||
ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
Loading…
Reference in New Issue