Cleanup: some renaming to better distinguish main and shadow paths

This commit is contained in:
Brecht Van Lommel 2021-10-20 14:45:17 +02:00
parent cccfa597ba
commit 52c5300214
4 changed files with 34 additions and 35 deletions

View File

@ -80,24 +80,23 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
device, "integrator_shader_raytrace_sort_counter", MEM_READ_WRITE),
integrator_shader_sort_prefix_sum_(
device, "integrator_shader_sort_prefix_sum", MEM_READ_WRITE),
integrator_next_main_path_index_(device, "integrator_next_main_path_index", MEM_READ_WRITE),
integrator_next_shadow_path_index_(
device, "integrator_next_shadow_path_index", MEM_READ_WRITE),
integrator_next_shadow_catcher_path_index_(
device, "integrator_next_shadow_catcher_path_index", MEM_READ_WRITE),
queued_paths_(device, "queued_paths", MEM_READ_WRITE),
num_queued_paths_(device, "num_queued_paths", MEM_READ_WRITE),
work_tiles_(device, "work_tiles", MEM_READ_WRITE),
display_rgba_half_(device, "display buffer half", MEM_READ_WRITE),
max_num_paths_(queue_->num_concurrent_states(estimate_single_state_size())),
min_num_active_paths_(queue_->num_concurrent_busy_states()),
max_active_path_index_(0)
min_num_active_main_paths_(queue_->num_concurrent_busy_states()),
max_active_main_path_index_(0)
{
memset(&integrator_state_gpu_, 0, sizeof(integrator_state_gpu_));
/* Limit number of active paths to the half of the overall state. This is due to the logic in the
* path compaction which relies on the fact that regeneration does not happen sooner than half of
* the states are available again. */
min_num_active_paths_ = min(min_num_active_paths_, max_num_paths_ / 2);
min_num_active_main_paths_ = min(min_num_active_main_paths_, max_num_paths_ / 2);
}
void PathTraceWorkGPU::alloc_integrator_soa()
@ -222,13 +221,13 @@ void PathTraceWorkGPU::alloc_integrator_path_split()
(int *)integrator_next_shadow_path_index_.device_pointer;
}
if (integrator_next_shadow_catcher_path_index_.size() == 0) {
integrator_next_shadow_catcher_path_index_.alloc(1);
if (integrator_next_main_path_index_.size() == 0) {
integrator_next_main_path_index_.alloc(1);
integrator_next_shadow_path_index_.data()[0] = 0;
integrator_next_shadow_catcher_path_index_.zero_to_device();
integrator_next_main_path_index_.zero_to_device();
integrator_state_gpu_.next_shadow_catcher_path_index =
(int *)integrator_next_shadow_catcher_path_index_.device_pointer;
integrator_state_gpu_.next_main_path_index =
(int *)integrator_next_main_path_index_.device_pointer;
}
}
@ -303,7 +302,7 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
break;
}
num_busy_accum += get_num_active_paths();
num_busy_accum += num_active_main_paths_paths();
++num_iterations;
}
@ -416,7 +415,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
void *d_path_index = (void *)NULL;
/* Create array of path indices for which this kernel is queued to be executed. */
int work_size = kernel_max_active_path_index(kernel);
int work_size = kernel_max_active_main_path_index(kernel);
IntegratorQueueCounter *queue_counter = integrator_queue_counter_.data();
int num_queued = queue_counter->num_queued[kernel];
@ -505,7 +504,7 @@ void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel kernel,
*
* Also, when the number paths is limited it may be better to prefer paths from the
* end of the array since compaction would need to do less work. */
const int work_size = kernel_max_active_path_index(queued_kernel);
const int work_size = kernel_max_active_main_path_index(queued_kernel);
void *d_queued_paths = (void *)queued_paths_.device_pointer;
void *d_num_queued_paths = (void *)num_queued_paths_.device_pointer;
@ -526,7 +525,7 @@ void PathTraceWorkGPU::compute_queued_paths(DeviceKernel kernel, DeviceKernel qu
int d_queued_kernel = queued_kernel;
/* Launch kernel to fill the active paths arrays. */
const int work_size = kernel_max_active_path_index(queued_kernel);
const int work_size = kernel_max_active_main_path_index(queued_kernel);
void *d_queued_paths = (void *)queued_paths_.device_pointer;
void *d_num_queued_paths = (void *)num_queued_paths_.device_pointer;
void *args[] = {
@ -539,12 +538,12 @@ void PathTraceWorkGPU::compute_queued_paths(DeviceKernel kernel, DeviceKernel qu
void PathTraceWorkGPU::compact_states(const int num_active_paths)
{
if (num_active_paths == 0) {
max_active_path_index_ = 0;
max_active_main_path_index_ = 0;
}
/* Compact fragmented path states into the start of the array, moving any paths
* with index higher than the number of active paths into the gaps. */
if (max_active_path_index_ == num_active_paths) {
if (max_active_main_path_index_ == num_active_paths) {
return;
}
@ -564,7 +563,7 @@ void PathTraceWorkGPU::compact_states(const int num_active_paths)
/* Create array of paths that we need to compact, where the path index is bigger
* than the number of active paths. */
{
int work_size = max_active_path_index_;
int work_size = max_active_main_path_index_;
void *args[] = {
&work_size, &d_compact_paths, &d_num_queued_paths, const_cast<int *>(&num_active_paths)};
queue_->zero_to_device(num_queued_paths_);
@ -589,7 +588,7 @@ void PathTraceWorkGPU::compact_states(const int num_active_paths)
queue_->synchronize();
/* Adjust max active path index now we know which part of the array is actually used. */
max_active_path_index_ = num_active_paths;
max_active_main_path_index_ = num_active_paths;
}
bool PathTraceWorkGPU::enqueue_work_tiles(bool &finished)
@ -603,7 +602,7 @@ bool PathTraceWorkGPU::enqueue_work_tiles(bool &finished)
return false;
}
int num_active_paths = get_num_active_paths();
int num_active_paths = num_active_main_paths_paths();
/* Don't schedule more work if canceling. */
if (is_cancel_requested()) {
@ -643,7 +642,7 @@ bool PathTraceWorkGPU::enqueue_work_tiles(bool &finished)
/* Schedule when we're out of paths or there are too few paths to keep the
* device occupied. */
int num_paths = num_active_paths;
if (num_paths == 0 || num_paths < min_num_active_paths_) {
if (num_paths == 0 || num_paths < min_num_active_main_paths_) {
/* Get work tiles until the maximum number of path is reached. */
while (num_paths < max_num_camera_paths) {
KernelWorkTile work_tile;
@ -673,8 +672,8 @@ bool PathTraceWorkGPU::enqueue_work_tiles(bool &finished)
compact_states(num_active_paths);
if (has_shadow_catcher()) {
integrator_next_shadow_catcher_path_index_.data()[0] = num_paths;
queue_->copy_to_device(integrator_next_shadow_catcher_path_index_);
integrator_next_main_path_index_.data()[0] = num_paths;
queue_->copy_to_device(integrator_next_main_path_index_);
}
enqueue_work_tiles((device_scene_->data.bake.use) ? DEVICE_KERNEL_INTEGRATOR_INIT_FROM_BAKE :
@ -727,10 +726,10 @@ void PathTraceWorkGPU::enqueue_work_tiles(DeviceKernel kernel,
queue_->enqueue(kernel, max_tile_work_size * num_work_tiles, args);
max_active_path_index_ = path_index_offset + num_predicted_splits;
max_active_main_path_index_ = path_index_offset + num_predicted_splits;
}
int PathTraceWorkGPU::get_num_active_paths()
int PathTraceWorkGPU::num_active_main_paths_paths()
{
/* TODO: this is wrong, does not account for duplicates with shadow! */
IntegratorQueueCounter *queue_counter = integrator_queue_counter_.data();
@ -1005,7 +1004,7 @@ bool PathTraceWorkGPU::has_shadow_catcher() const
int PathTraceWorkGPU::shadow_catcher_count_possible_splits()
{
if (max_active_path_index_ == 0) {
if (max_active_main_path_index_ == 0) {
return 0;
}
@ -1015,7 +1014,7 @@ int PathTraceWorkGPU::shadow_catcher_count_possible_splits()
queue_->zero_to_device(num_queued_paths_);
const int work_size = max_active_path_index_;
const int work_size = max_active_main_path_index_;
void *d_num_queued_paths = (void *)num_queued_paths_.device_pointer;
void *args[] = {const_cast<int *>(&work_size), &d_num_queued_paths};
@ -1052,10 +1051,10 @@ bool PathTraceWorkGPU::kernel_is_shadow_path(DeviceKernel kernel)
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
}
int PathTraceWorkGPU::kernel_max_active_path_index(DeviceKernel kernel)
int PathTraceWorkGPU::kernel_max_active_main_path_index(DeviceKernel kernel)
{
return (kernel_is_shadow_path(kernel)) ? integrator_next_shadow_path_index_.data()[0] :
max_active_path_index_;
max_active_main_path_index_;
}
CCL_NAMESPACE_END

View File

@ -88,7 +88,7 @@ class PathTraceWorkGPU : public PathTraceWork {
void compact_states(const int num_active_paths);
int get_num_active_paths();
int num_active_main_paths_paths();
/* Check whether graphics interop can be used for the PathTraceDisplay update. */
bool should_use_graphics_interop();
@ -120,7 +120,7 @@ class PathTraceWorkGPU : public PathTraceWork {
bool kernel_creates_shadow_paths(DeviceKernel kernel);
bool kernel_creates_ao_paths(DeviceKernel kernel);
bool kernel_is_shadow_path(DeviceKernel kernel);
int kernel_max_active_path_index(DeviceKernel kernel);
int kernel_max_active_main_path_index(DeviceKernel kernel);
/* Integrator queue. */
unique_ptr<DeviceQueue> queue_;
@ -141,8 +141,8 @@ class PathTraceWorkGPU : public PathTraceWork {
device_vector<int> integrator_shader_raytrace_sort_counter_;
device_vector<int> integrator_shader_sort_prefix_sum_;
/* Path split. */
device_vector<int> integrator_next_main_path_index_;
device_vector<int> integrator_next_shadow_path_index_;
device_vector<int> integrator_next_shadow_catcher_path_index_;
/* Temporary buffer to get an array of queued path for a particular kernel. */
device_vector<int> queued_paths_;
@ -166,12 +166,12 @@ class PathTraceWorkGPU : public PathTraceWork {
/* Minimum number of paths which keeps the device bust. If the actual number of paths falls below
* this value more work will be scheduled. */
int min_num_active_paths_;
int min_num_active_main_paths_;
/* Maximum path index, effective number of paths used may be smaller than
* the size of the integrator_state_ buffer so can avoid iterating over the
* full buffer. */
int max_active_path_index_;
int max_active_main_path_index_;
};
CCL_NAMESPACE_END

View File

@ -139,7 +139,7 @@ typedef struct IntegratorStateGPU {
ccl_global int *next_shadow_path_index;
/* Index of main path which will be used by a next shadow catcher split. */
ccl_global int *next_shadow_catcher_path_index;
ccl_global int *next_main_path_index;
} IntegratorStateGPU;
/* Abstraction

View File

@ -274,7 +274,7 @@ ccl_device_inline void integrator_state_shadow_catcher_split(KernelGlobals kg,
{
#if defined(__KERNEL_GPU__)
ConstIntegratorState to_state = atomic_fetch_and_add_uint32(
&kernel_integrator_state.next_shadow_catcher_path_index[0], 1);
&kernel_integrator_state.next_main_path_index[0], 1);
integrator_state_copy_only(kg, to_state, state);
#else