Cleanup: some renaming to better distinguish main and shadow paths
This commit is contained in:
parent
cccfa597ba
commit
52c5300214
|
@ -80,24 +80,23 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
|
|||
device, "integrator_shader_raytrace_sort_counter", MEM_READ_WRITE),
|
||||
integrator_shader_sort_prefix_sum_(
|
||||
device, "integrator_shader_sort_prefix_sum", MEM_READ_WRITE),
|
||||
integrator_next_main_path_index_(device, "integrator_next_main_path_index", MEM_READ_WRITE),
|
||||
integrator_next_shadow_path_index_(
|
||||
device, "integrator_next_shadow_path_index", MEM_READ_WRITE),
|
||||
integrator_next_shadow_catcher_path_index_(
|
||||
device, "integrator_next_shadow_catcher_path_index", MEM_READ_WRITE),
|
||||
queued_paths_(device, "queued_paths", MEM_READ_WRITE),
|
||||
num_queued_paths_(device, "num_queued_paths", MEM_READ_WRITE),
|
||||
work_tiles_(device, "work_tiles", MEM_READ_WRITE),
|
||||
display_rgba_half_(device, "display buffer half", MEM_READ_WRITE),
|
||||
max_num_paths_(queue_->num_concurrent_states(estimate_single_state_size())),
|
||||
min_num_active_paths_(queue_->num_concurrent_busy_states()),
|
||||
max_active_path_index_(0)
|
||||
min_num_active_main_paths_(queue_->num_concurrent_busy_states()),
|
||||
max_active_main_path_index_(0)
|
||||
{
|
||||
memset(&integrator_state_gpu_, 0, sizeof(integrator_state_gpu_));
|
||||
|
||||
/* Limit number of active paths to the half of the overall state. This is due to the logic in the
|
||||
* path compaction which relies on the fact that regeneration does not happen sooner than half of
|
||||
* the states are available again. */
|
||||
min_num_active_paths_ = min(min_num_active_paths_, max_num_paths_ / 2);
|
||||
min_num_active_main_paths_ = min(min_num_active_main_paths_, max_num_paths_ / 2);
|
||||
}
|
||||
|
||||
void PathTraceWorkGPU::alloc_integrator_soa()
|
||||
|
@ -222,13 +221,13 @@ void PathTraceWorkGPU::alloc_integrator_path_split()
|
|||
(int *)integrator_next_shadow_path_index_.device_pointer;
|
||||
}
|
||||
|
||||
if (integrator_next_shadow_catcher_path_index_.size() == 0) {
|
||||
integrator_next_shadow_catcher_path_index_.alloc(1);
|
||||
if (integrator_next_main_path_index_.size() == 0) {
|
||||
integrator_next_main_path_index_.alloc(1);
|
||||
integrator_next_shadow_path_index_.data()[0] = 0;
|
||||
integrator_next_shadow_catcher_path_index_.zero_to_device();
|
||||
integrator_next_main_path_index_.zero_to_device();
|
||||
|
||||
integrator_state_gpu_.next_shadow_catcher_path_index =
|
||||
(int *)integrator_next_shadow_catcher_path_index_.device_pointer;
|
||||
integrator_state_gpu_.next_main_path_index =
|
||||
(int *)integrator_next_main_path_index_.device_pointer;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -303,7 +302,7 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
|
|||
break;
|
||||
}
|
||||
|
||||
num_busy_accum += get_num_active_paths();
|
||||
num_busy_accum += num_active_main_paths_paths();
|
||||
++num_iterations;
|
||||
}
|
||||
|
||||
|
@ -416,7 +415,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
|
|||
void *d_path_index = (void *)NULL;
|
||||
|
||||
/* Create array of path indices for which this kernel is queued to be executed. */
|
||||
int work_size = kernel_max_active_path_index(kernel);
|
||||
int work_size = kernel_max_active_main_path_index(kernel);
|
||||
|
||||
IntegratorQueueCounter *queue_counter = integrator_queue_counter_.data();
|
||||
int num_queued = queue_counter->num_queued[kernel];
|
||||
|
@ -505,7 +504,7 @@ void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel kernel,
|
|||
*
|
||||
* Also, when the number paths is limited it may be better to prefer paths from the
|
||||
* end of the array since compaction would need to do less work. */
|
||||
const int work_size = kernel_max_active_path_index(queued_kernel);
|
||||
const int work_size = kernel_max_active_main_path_index(queued_kernel);
|
||||
|
||||
void *d_queued_paths = (void *)queued_paths_.device_pointer;
|
||||
void *d_num_queued_paths = (void *)num_queued_paths_.device_pointer;
|
||||
|
@ -526,7 +525,7 @@ void PathTraceWorkGPU::compute_queued_paths(DeviceKernel kernel, DeviceKernel qu
|
|||
int d_queued_kernel = queued_kernel;
|
||||
|
||||
/* Launch kernel to fill the active paths arrays. */
|
||||
const int work_size = kernel_max_active_path_index(queued_kernel);
|
||||
const int work_size = kernel_max_active_main_path_index(queued_kernel);
|
||||
void *d_queued_paths = (void *)queued_paths_.device_pointer;
|
||||
void *d_num_queued_paths = (void *)num_queued_paths_.device_pointer;
|
||||
void *args[] = {
|
||||
|
@ -539,12 +538,12 @@ void PathTraceWorkGPU::compute_queued_paths(DeviceKernel kernel, DeviceKernel qu
|
|||
void PathTraceWorkGPU::compact_states(const int num_active_paths)
|
||||
{
|
||||
if (num_active_paths == 0) {
|
||||
max_active_path_index_ = 0;
|
||||
max_active_main_path_index_ = 0;
|
||||
}
|
||||
|
||||
/* Compact fragmented path states into the start of the array, moving any paths
|
||||
* with index higher than the number of active paths into the gaps. */
|
||||
if (max_active_path_index_ == num_active_paths) {
|
||||
if (max_active_main_path_index_ == num_active_paths) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -564,7 +563,7 @@ void PathTraceWorkGPU::compact_states(const int num_active_paths)
|
|||
/* Create array of paths that we need to compact, where the path index is bigger
|
||||
* than the number of active paths. */
|
||||
{
|
||||
int work_size = max_active_path_index_;
|
||||
int work_size = max_active_main_path_index_;
|
||||
void *args[] = {
|
||||
&work_size, &d_compact_paths, &d_num_queued_paths, const_cast<int *>(&num_active_paths)};
|
||||
queue_->zero_to_device(num_queued_paths_);
|
||||
|
@ -589,7 +588,7 @@ void PathTraceWorkGPU::compact_states(const int num_active_paths)
|
|||
queue_->synchronize();
|
||||
|
||||
/* Adjust max active path index now we know which part of the array is actually used. */
|
||||
max_active_path_index_ = num_active_paths;
|
||||
max_active_main_path_index_ = num_active_paths;
|
||||
}
|
||||
|
||||
bool PathTraceWorkGPU::enqueue_work_tiles(bool &finished)
|
||||
|
@ -603,7 +602,7 @@ bool PathTraceWorkGPU::enqueue_work_tiles(bool &finished)
|
|||
return false;
|
||||
}
|
||||
|
||||
int num_active_paths = get_num_active_paths();
|
||||
int num_active_paths = num_active_main_paths_paths();
|
||||
|
||||
/* Don't schedule more work if canceling. */
|
||||
if (is_cancel_requested()) {
|
||||
|
@ -643,7 +642,7 @@ bool PathTraceWorkGPU::enqueue_work_tiles(bool &finished)
|
|||
/* Schedule when we're out of paths or there are too few paths to keep the
|
||||
* device occupied. */
|
||||
int num_paths = num_active_paths;
|
||||
if (num_paths == 0 || num_paths < min_num_active_paths_) {
|
||||
if (num_paths == 0 || num_paths < min_num_active_main_paths_) {
|
||||
/* Get work tiles until the maximum number of path is reached. */
|
||||
while (num_paths < max_num_camera_paths) {
|
||||
KernelWorkTile work_tile;
|
||||
|
@ -673,8 +672,8 @@ bool PathTraceWorkGPU::enqueue_work_tiles(bool &finished)
|
|||
compact_states(num_active_paths);
|
||||
|
||||
if (has_shadow_catcher()) {
|
||||
integrator_next_shadow_catcher_path_index_.data()[0] = num_paths;
|
||||
queue_->copy_to_device(integrator_next_shadow_catcher_path_index_);
|
||||
integrator_next_main_path_index_.data()[0] = num_paths;
|
||||
queue_->copy_to_device(integrator_next_main_path_index_);
|
||||
}
|
||||
|
||||
enqueue_work_tiles((device_scene_->data.bake.use) ? DEVICE_KERNEL_INTEGRATOR_INIT_FROM_BAKE :
|
||||
|
@ -727,10 +726,10 @@ void PathTraceWorkGPU::enqueue_work_tiles(DeviceKernel kernel,
|
|||
|
||||
queue_->enqueue(kernel, max_tile_work_size * num_work_tiles, args);
|
||||
|
||||
max_active_path_index_ = path_index_offset + num_predicted_splits;
|
||||
max_active_main_path_index_ = path_index_offset + num_predicted_splits;
|
||||
}
|
||||
|
||||
int PathTraceWorkGPU::get_num_active_paths()
|
||||
int PathTraceWorkGPU::num_active_main_paths_paths()
|
||||
{
|
||||
/* TODO: this is wrong, does not account for duplicates with shadow! */
|
||||
IntegratorQueueCounter *queue_counter = integrator_queue_counter_.data();
|
||||
|
@ -1005,7 +1004,7 @@ bool PathTraceWorkGPU::has_shadow_catcher() const
|
|||
|
||||
int PathTraceWorkGPU::shadow_catcher_count_possible_splits()
|
||||
{
|
||||
if (max_active_path_index_ == 0) {
|
||||
if (max_active_main_path_index_ == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1015,7 +1014,7 @@ int PathTraceWorkGPU::shadow_catcher_count_possible_splits()
|
|||
|
||||
queue_->zero_to_device(num_queued_paths_);
|
||||
|
||||
const int work_size = max_active_path_index_;
|
||||
const int work_size = max_active_main_path_index_;
|
||||
void *d_num_queued_paths = (void *)num_queued_paths_.device_pointer;
|
||||
void *args[] = {const_cast<int *>(&work_size), &d_num_queued_paths};
|
||||
|
||||
|
@ -1052,10 +1051,10 @@ bool PathTraceWorkGPU::kernel_is_shadow_path(DeviceKernel kernel)
|
|||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
|
||||
}
|
||||
|
||||
int PathTraceWorkGPU::kernel_max_active_path_index(DeviceKernel kernel)
|
||||
int PathTraceWorkGPU::kernel_max_active_main_path_index(DeviceKernel kernel)
|
||||
{
|
||||
return (kernel_is_shadow_path(kernel)) ? integrator_next_shadow_path_index_.data()[0] :
|
||||
max_active_path_index_;
|
||||
max_active_main_path_index_;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -88,7 +88,7 @@ class PathTraceWorkGPU : public PathTraceWork {
|
|||
|
||||
void compact_states(const int num_active_paths);
|
||||
|
||||
int get_num_active_paths();
|
||||
int num_active_main_paths_paths();
|
||||
|
||||
/* Check whether graphics interop can be used for the PathTraceDisplay update. */
|
||||
bool should_use_graphics_interop();
|
||||
|
@ -120,7 +120,7 @@ class PathTraceWorkGPU : public PathTraceWork {
|
|||
bool kernel_creates_shadow_paths(DeviceKernel kernel);
|
||||
bool kernel_creates_ao_paths(DeviceKernel kernel);
|
||||
bool kernel_is_shadow_path(DeviceKernel kernel);
|
||||
int kernel_max_active_path_index(DeviceKernel kernel);
|
||||
int kernel_max_active_main_path_index(DeviceKernel kernel);
|
||||
|
||||
/* Integrator queue. */
|
||||
unique_ptr<DeviceQueue> queue_;
|
||||
|
@ -141,8 +141,8 @@ class PathTraceWorkGPU : public PathTraceWork {
|
|||
device_vector<int> integrator_shader_raytrace_sort_counter_;
|
||||
device_vector<int> integrator_shader_sort_prefix_sum_;
|
||||
/* Path split. */
|
||||
device_vector<int> integrator_next_main_path_index_;
|
||||
device_vector<int> integrator_next_shadow_path_index_;
|
||||
device_vector<int> integrator_next_shadow_catcher_path_index_;
|
||||
|
||||
/* Temporary buffer to get an array of queued path for a particular kernel. */
|
||||
device_vector<int> queued_paths_;
|
||||
|
@ -166,12 +166,12 @@ class PathTraceWorkGPU : public PathTraceWork {
|
|||
|
||||
/* Minimum number of paths which keeps the device bust. If the actual number of paths falls below
|
||||
* this value more work will be scheduled. */
|
||||
int min_num_active_paths_;
|
||||
int min_num_active_main_paths_;
|
||||
|
||||
/* Maximum path index, effective number of paths used may be smaller than
|
||||
* the size of the integrator_state_ buffer so can avoid iterating over the
|
||||
* full buffer. */
|
||||
int max_active_path_index_;
|
||||
int max_active_main_path_index_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -139,7 +139,7 @@ typedef struct IntegratorStateGPU {
|
|||
ccl_global int *next_shadow_path_index;
|
||||
|
||||
/* Index of main path which will be used by a next shadow catcher split. */
|
||||
ccl_global int *next_shadow_catcher_path_index;
|
||||
ccl_global int *next_main_path_index;
|
||||
} IntegratorStateGPU;
|
||||
|
||||
/* Abstraction
|
||||
|
|
|
@ -274,7 +274,7 @@ ccl_device_inline void integrator_state_shadow_catcher_split(KernelGlobals kg,
|
|||
{
|
||||
#if defined(__KERNEL_GPU__)
|
||||
ConstIntegratorState to_state = atomic_fetch_and_add_uint32(
|
||||
&kernel_integrator_state.next_shadow_catcher_path_index[0], 1);
|
||||
&kernel_integrator_state.next_main_path_index[0], 1);
|
||||
|
||||
integrator_state_copy_only(kg, to_state, state);
|
||||
#else
|
||||
|
|
Loading…
Reference in New Issue