Cleanup: use lambdas instead of functors for task pools, remove threadid

This commit is contained in:
Brecht Van Lommel 2020-06-05 14:18:02 +02:00
parent d8c2092b15
commit b10b7cdb43
11 changed files with 113 additions and 216 deletions

View File

@ -39,48 +39,6 @@
CCL_NAMESPACE_BEGIN
/* BVH Build Task */
class BVHBuildTask : public Task {
public:
BVHBuildTask(
BVHBuild *build, InnerNode *node, int child, const BVHObjectBinning &range, int level)
: range_(range)
{
run = function_bind(&BVHBuild::thread_build_node, build, node, child, &range_, level);
}
private:
BVHObjectBinning range_;
};
class BVHSpatialSplitBuildTask : public Task {
public:
BVHSpatialSplitBuildTask(BVHBuild *build,
InnerNode *node,
int child,
const BVHRange &range,
const vector<BVHReference> &references,
int level)
: range_(range),
references_(references.begin() + range.start(), references.begin() + range.end())
{
range_.set_start(0);
run = function_bind(&BVHBuild::thread_build_spatial_split_node,
build,
node,
child,
&range_,
&references_,
level,
_1);
}
private:
BVHRange range_;
vector<BVHReference> references_;
};
/* Constructor / Destructor */
BVHBuild::BVHBuild(const vector<Object *> &objects_,
@ -449,7 +407,8 @@ BVHNode *BVHBuild::run()
if (params.use_spatial_split) {
/* Perform multithreaded spatial split build. */
rootnode = build_node(root, &references, 0, 0);
BVHSpatialStorage *local_storage = &spatial_storage.local();
rootnode = build_node(root, references, 0, local_storage);
task_pool.wait_work();
}
else {
@ -516,30 +475,36 @@ void BVHBuild::progress_update()
progress_start_time = time_dt();
}
void BVHBuild::thread_build_node(InnerNode *inner, int child, BVHObjectBinning *range, int level)
void BVHBuild::thread_build_node(InnerNode *inner,
int child,
const BVHObjectBinning &range,
int level)
{
if (progress.get_cancel())
return;
/* build nodes */
BVHNode *node = build_node(*range, level);
BVHNode *node = build_node(range, level);
/* set child in inner node */
inner->children[child] = node;
/* update progress */
if (range->size() < THREAD_TASK_SIZE) {
if (range.size() < THREAD_TASK_SIZE) {
/*rotate(node, INT_MAX, 5);*/
thread_scoped_lock lock(build_mutex);
progress_count += range->size();
progress_count += range.size();
progress_update();
}
}
void BVHBuild::thread_build_spatial_split_node(
InnerNode *inner, int child, BVHRange *range, vector<BVHReference> *references, int level)
void BVHBuild::thread_build_spatial_split_node(InnerNode *inner,
int child,
const BVHRange &range,
vector<BVHReference> &references,
int level)
{
if (progress.get_cancel()) {
return;
@ -549,7 +514,7 @@ void BVHBuild::thread_build_spatial_split_node(
BVHSpatialStorage *local_storage = &spatial_storage.local();
/* build nodes */
BVHNode *node = build_node(*range, references, level, local_storage);
BVHNode *node = build_node(range, references, level, local_storage);
/* set child in inner node */
inner->children[child] = node;
@ -661,8 +626,8 @@ BVHNode *BVHBuild::build_node(const BVHObjectBinning &range, int level)
/* Threaded build */
inner = new InnerNode(bounds);
task_pool.push(new BVHBuildTask(this, inner, 0, left, level + 1), true);
task_pool.push(new BVHBuildTask(this, inner, 1, right, level + 1), true);
task_pool.push([=] { thread_build_node(inner, 0, left, level + 1); }, true);
task_pool.push([=] { thread_build_node(inner, 1, right, level + 1); }, true);
}
if (do_unalinged_split) {
@ -674,7 +639,7 @@ BVHNode *BVHBuild::build_node(const BVHObjectBinning &range, int level)
/* multithreaded spatial split builder */
BVHNode *BVHBuild::build_node(const BVHRange &range,
vector<BVHReference> *references,
vector<BVHReference> &references,
int level,
BVHSpatialStorage *storage)
{
@ -693,7 +658,7 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
if (!(range.size() > 0 && params.top_level && level == 0)) {
if (params.small_enough_for_leaf(range.size(), level)) {
progress_count += range.size();
return create_leaf_node(range, *references);
return create_leaf_node(range, references);
}
}
@ -703,7 +668,7 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
if (!(range.size() > 0 && params.top_level && level == 0)) {
if (split.no_split) {
progress_count += range.size();
return create_leaf_node(range, *references);
return create_leaf_node(range, references);
}
}
float leafSAH = params.sah_primitive_cost * split.leafSAH;
@ -716,7 +681,7 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
Transform aligned_space;
bool do_unalinged_split = false;
if (params.use_unaligned_nodes && splitSAH > params.unaligned_split_threshold * leafSAH) {
aligned_space = unaligned_heuristic.compute_aligned_space(range, &references->at(0));
aligned_space = unaligned_heuristic.compute_aligned_space(range, &references.at(0));
unaligned_split = BVHMixedSplit(
this, storage, range, references, level, &unaligned_heuristic, &aligned_space);
/* unalignedLeafSAH = params.sah_primitive_cost * split.leafSAH; */
@ -742,8 +707,7 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
BoundBox bounds;
if (do_unalinged_split) {
bounds = unaligned_heuristic.compute_aligned_boundbox(
range, &references->at(0), aligned_space);
bounds = unaligned_heuristic.compute_aligned_boundbox(range, &references.at(0), aligned_space);
}
else {
bounds = range.bounds();
@ -755,24 +719,39 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
/* Local build. */
/* Build left node. */
vector<BVHReference> copy(references->begin() + right.start(),
references->begin() + right.end());
vector<BVHReference> right_references(references.begin() + right.start(),
references.begin() + right.end());
right.set_start(0);
BVHNode *leftnode = build_node(left, references, level + 1, thread_id);
BVHNode *leftnode = build_node(left, references, level + 1, storage);
/* Build right node. */
BVHNode *rightnode = build_node(right, &copy, level + 1, thread_id);
BVHNode *rightnode = build_node(right, right_references, level + 1, storage);
inner = new InnerNode(bounds, leftnode, rightnode);
}
else {
/* Threaded build. */
inner = new InnerNode(bounds);
task_pool.push(new BVHSpatialSplitBuildTask(this, inner, 0, left, *references, level + 1),
true);
task_pool.push(new BVHSpatialSplitBuildTask(this, inner, 1, right, *references, level + 1),
true);
vector<BVHReference> left_references(references.begin() + left.start(),
references.begin() + left.end());
vector<BVHReference> right_references(references.begin() + right.start(),
references.begin() + right.end());
right.set_start(0);
/* Create tasks for left and right nodes, using copy for most arguments and
* move for reference to avoid memory copies. */
task_pool.push(
[=, refs = std::move(left_references)]() mutable {
thread_build_spatial_split_node(inner, 0, left, refs, level + 1);
},
true);
task_pool.push(
[=, refs = std::move(right_references)]() mutable {
thread_build_spatial_split_node(inner, 1, right, refs, level + 1);
},
true);
}
if (do_unalinged_split) {

View File

@ -74,7 +74,7 @@ class BVHBuild {
/* Building. */
BVHNode *build_node(const BVHRange &range,
vector<BVHReference> *references,
vector<BVHReference> &references,
int level,
BVHSpatialStorage *storage);
BVHNode *build_node(const BVHObjectBinning &range, int level);
@ -86,9 +86,12 @@ class BVHBuild {
/* Threads. */
enum { THREAD_TASK_SIZE = 4096 };
void thread_build_node(InnerNode *node, int child, BVHObjectBinning *range, int level);
void thread_build_spatial_split_node(
InnerNode *node, int child, BVHRange *range, vector<BVHReference> *references, int level);
void thread_build_node(InnerNode *node, int child, const BVHObjectBinning &range, int level);
void thread_build_spatial_split_node(InnerNode *node,
int child,
const BVHRange &range,
vector<BVHReference> &references,
int level);
thread_mutex build_mutex;
/* Progress. */

View File

@ -88,18 +88,6 @@ static void bvh_reference_sort_threaded(TaskPool *task_pool,
const int job_end,
const BVHReferenceCompare &compare);
class BVHSortTask : public Task {
public:
BVHSortTask(TaskPool *task_pool,
BVHReference *data,
const int job_start,
const int job_end,
const BVHReferenceCompare &compare)
{
run = function_bind(bvh_reference_sort_threaded, task_pool, data, job_start, job_end, compare);
}
};
/* Multi-threaded reference sort. */
static void bvh_reference_sort_threaded(TaskPool *task_pool,
BVHReference *data,
@ -158,7 +146,8 @@ static void bvh_reference_sort_threaded(TaskPool *task_pool,
have_work = false;
if (left < end) {
if (start < right) {
task_pool->push(new BVHSortTask(task_pool, data, left, end, compare), true);
task_pool->push(
function_bind(bvh_reference_sort_threaded, task_pool, data, left, end, compare), true);
}
else {
start = left;

View File

@ -33,7 +33,7 @@ CCL_NAMESPACE_BEGIN
BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange &range,
vector<BVHReference> *references,
vector<BVHReference> &references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space)
@ -43,7 +43,7 @@ BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
left_bounds(BoundBox::empty),
right_bounds(BoundBox::empty),
storage_(storage),
references_(references),
references_(&references),
unaligned_heuristic_(unaligned_heuristic),
aligned_space_(aligned_space)
{
@ -133,7 +133,7 @@ void BVHObjectSplit::split(BVHRange &left, BVHRange &right, const BVHRange &rang
BVHSpatialSplit::BVHSpatialSplit(const BVHBuild &builder,
BVHSpatialStorage *storage,
const BVHRange &range,
vector<BVHReference> *references,
vector<BVHReference> &references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space)
@ -141,7 +141,7 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild &builder,
dim(0),
pos(0.0f),
storage_(storage),
references_(references),
references_(&references),
unaligned_heuristic_(unaligned_heuristic),
aligned_space_(aligned_space)
{
@ -152,7 +152,7 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild &builder,
}
else {
range_bounds = unaligned_heuristic->compute_aligned_boundbox(
range, &references->at(0), *aligned_space);
range, &references_->at(0), *aligned_space);
}
float3 origin = range_bounds.min;

View File

@ -44,7 +44,7 @@ class BVHObjectSplit {
BVHObjectSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange &range,
vector<BVHReference> *references,
vector<BVHReference> &references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
@ -82,7 +82,7 @@ class BVHSpatialSplit {
BVHSpatialSplit(const BVHBuild &builder,
BVHSpatialStorage *storage,
const BVHRange &range,
vector<BVHReference> *references,
vector<BVHReference> &references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
@ -187,7 +187,7 @@ class BVHMixedSplit {
__forceinline BVHMixedSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange &range,
vector<BVHReference> *references,
vector<BVHReference> &references,
int level,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL)
@ -197,7 +197,7 @@ class BVHMixedSplit {
}
else {
bounds = unaligned_heuristic->compute_aligned_boundbox(
range, &references->at(0), *aligned_space);
range, &references.at(0), *aligned_space);
}
/* find split candidates. */
float area = bounds.safe_area();
@ -220,7 +220,7 @@ class BVHMixedSplit {
/* leaf SAH is the lowest => create leaf. */
minSAH = min(min(leafSAH, object.sah), spatial.sah);
no_split = (minSAH == leafSAH && builder->range_within_max_leaf_size(range, *references));
no_split = (minSAH == leafSAH && builder->range_within_max_leaf_size(range, references));
}
__forceinline void split(BVHBuild *builder,

View File

@ -2401,16 +2401,6 @@ void CUDADevice::thread_run(DeviceTask &task)
}
}
class CUDADeviceTask : public Task {
public:
CUDADeviceTask(CUDADevice *device, DeviceTask &task) : task(task)
{
run = function_bind(&CUDADevice::thread_run, device, task);
}
DeviceTask task;
};
void CUDADevice::task_add(DeviceTask &task)
{
CUDAContextScope scope(this);
@ -2426,7 +2416,10 @@ void CUDADevice::task_add(DeviceTask &task)
film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
}
else {
task_pool.push(new CUDADeviceTask(this, task));
task_pool.push([=] {
DeviceTask task_copy = task;
thread_run(task_copy);
});
}
}

View File

@ -540,16 +540,6 @@ class CPUDevice : public Device {
thread_denoise(task);
}
class CPUDeviceTask : public Task {
public:
CPUDeviceTask(CPUDevice *device, DeviceTask &task) : task(task)
{
run = function_bind(&CPUDevice::thread_run, device, task);
}
DeviceTask task;
};
bool denoising_non_local_means(device_ptr image_ptr,
device_ptr guide_ptr,
device_ptr variance_ptr,
@ -1163,8 +1153,12 @@ class CPUDevice : public Device {
else
task.split(tasks, info.cpu_threads);
foreach (DeviceTask &task, tasks)
task_pool.push(new CPUDeviceTask(this, task));
foreach (DeviceTask &task, tasks) {
task_pool.push([=] {
DeviceTask task_copy = task;
thread_run(task_copy);
});
}
}
void task_wait()

View File

@ -1463,17 +1463,6 @@ class OptiXDevice : public CUDADevice {
void task_add(DeviceTask &task) override
{
struct OptiXDeviceTask : public Task {
OptiXDeviceTask(OptiXDevice *device, DeviceTask &task, int task_index) : task(task)
{
// Using task index parameter instead of thread index, since number of CUDA streams may
// differ from number of threads
run = function_bind(&OptiXDevice::thread_run, device, task, task_index);
}
DeviceTask task;
};
// Upload texture information to device if it has changed since last launch
load_texture_info();
@ -1485,7 +1474,10 @@ class OptiXDevice : public CUDADevice {
if (task.type == DeviceTask::DENOISE_BUFFER) {
// Execute denoising in a single thread (e.g. to avoid race conditions during creation)
task_pool.push(new OptiXDeviceTask(this, task, 0));
task_pool.push([=] {
DeviceTask task_copy = task;
thread_run(task_copy, 0);
});
return;
}
@ -1495,8 +1487,15 @@ class OptiXDevice : public CUDADevice {
// Queue tasks in internal task pool
int task_index = 0;
for (DeviceTask &task : tasks)
task_pool.push(new OptiXDeviceTask(this, task, task_index++));
for (DeviceTask &task : tasks) {
task_pool.push([=] {
// Using task index parameter instead of thread index, since number of CUDA streams may
// differ from number of threads
DeviceTask task_copy = task;
thread_run(task_copy, task_index);
});
task_index++;
}
}
void task_wait() override

View File

@ -456,16 +456,6 @@ class OpenCLDevice : public Device {
void denoise(RenderTile &tile, DenoisingTask &denoising);
class OpenCLDeviceTask : public Task {
public:
OpenCLDeviceTask(OpenCLDevice *device, DeviceTask &task) : task(task)
{
run = function_bind(&OpenCLDevice::thread_run, device, task);
}
DeviceTask task;
};
int get_split_task_count(DeviceTask & /*task*/)
{
return 1;
@ -473,7 +463,10 @@ class OpenCLDevice : public Device {
void task_add(DeviceTask &task)
{
task_pool.push(new OpenCLDeviceTask(this, task));
task_pool.push([=] {
DeviceTask task_copy = task;
thread_run(task_copy);
});
}
void task_wait()

View File

@ -49,21 +49,16 @@ TaskPool::~TaskPool()
stop();
}
void TaskPool::push(Task *task, bool front)
void TaskPool::push(TaskRunFunction &&task, bool front)
{
TaskScheduler::Entry entry;
entry.task = task;
entry.task = new TaskRunFunction(std::move(task));
entry.pool = this;
TaskScheduler::push(entry, front);
}
void TaskPool::push(TaskRunFunction &&run, bool front)
{
push(new Task(std::move(run)), front);
}
void TaskPool::wait_work(Summary *stats)
{
thread_scoped_lock num_lock(num_mutex);
@ -95,7 +90,7 @@ void TaskPool::wait_work(Summary *stats)
/* if found task, do it, otherwise wait until other tasks are done */
if (found_entry) {
/* run task */
work_entry.task->run(0);
(*work_entry.task)();
/* delete task */
delete work_entry.task;
@ -334,7 +329,7 @@ void TaskScheduler::init(int num_threads)
/* Launch threads that will be waiting for work. */
threads.resize(num_threads);
for (int thread_index = 0; thread_index < num_threads; ++thread_index) {
threads[thread_index] = new thread(function_bind(&TaskScheduler::thread_run, thread_index + 1),
threads[thread_index] = new thread(function_bind(&TaskScheduler::thread_run),
thread_nodes[thread_index]);
}
}
@ -384,7 +379,7 @@ bool TaskScheduler::thread_wait_pop(Entry &entry)
return true;
}
void TaskScheduler::thread_run(int thread_id)
void TaskScheduler::thread_run()
{
Entry entry;
@ -393,7 +388,7 @@ void TaskScheduler::thread_run(int thread_id)
/* keep popping off tasks */
while (thread_wait_pop(entry)) {
/* run task */
entry.task->run(thread_id);
(*entry.task)();
/* delete task */
delete entry.task;
@ -463,26 +458,21 @@ DedicatedTaskPool::~DedicatedTaskPool()
delete worker_thread;
}
void DedicatedTaskPool::push(Task *task, bool front)
void DedicatedTaskPool::push(TaskRunFunction &&task, bool front)
{
num_increase();
/* add task to queue */
queue_mutex.lock();
if (front)
queue.push_front(task);
queue.emplace_front(std::move(task));
else
queue.push_back(task);
queue.emplace_back(std::move(task));
queue_cond.notify_one();
queue_mutex.unlock();
}
void DedicatedTaskPool::push(TaskRunFunction &&run, bool front)
{
push(new Task(std::move(run)), front);
}
void DedicatedTaskPool::wait()
{
thread_scoped_lock num_lock(num_mutex);
@ -535,7 +525,7 @@ void DedicatedTaskPool::num_increase()
num_cond.notify_all();
}
bool DedicatedTaskPool::thread_wait_pop(Task *&task)
bool DedicatedTaskPool::thread_wait_pop(TaskRunFunction &task)
{
thread_scoped_lock queue_lock(queue_mutex);
@ -555,15 +545,15 @@ bool DedicatedTaskPool::thread_wait_pop(Task *&task)
void DedicatedTaskPool::thread_run()
{
Task *task;
TaskRunFunction task;
/* keep popping off tasks */
while (thread_wait_pop(task)) {
/* run task */
task->run(0);
task();
/* delete task */
delete task;
task = nullptr;
/* notify task was done */
num_decrease(1);
@ -575,15 +565,8 @@ void DedicatedTaskPool::clear()
thread_scoped_lock queue_lock(queue_mutex);
/* erase all tasks from the queue */
list<Task *>::iterator it = queue.begin();
int done = 0;
while (it != queue.end()) {
done++;
delete *it;
it = queue.erase(it);
}
int done = queue.size();
queue.clear();
queue_lock.unlock();

View File

@ -31,43 +31,10 @@ using tbb::blocked_range;
using tbb::enumerable_thread_specific;
using tbb::parallel_for;
class Task;
class TaskPool;
class TaskScheduler;
/* Notes on Thread ID
*
* Thread ID argument reports the 0-based ID of a working thread from which
* the run() callback is being invoked. Thread ID of 0 denotes the thread from
* which wait_work() was called.
*
* DO NOT use this ID to control execution flaw, use it only for things like
* emulating TLS which does not affect on scheduling. Don't use this ID to make
* any decisions.
*
* It is to be noted here that dedicated task pool will always report thread ID
* of 0.
*/
typedef function<void(int thread_id)> TaskRunFunction;
/* Task
*
* Base class for tasks to be executed in threads. */
class Task {
public:
Task(){};
explicit Task(TaskRunFunction &&run_) : run(run_)
{
}
virtual ~Task()
{
}
TaskRunFunction run;
};
typedef function<void(void)> TaskRunFunction;
/* Task Pool
*
@ -75,8 +42,7 @@ class Task {
* pool, we can wait for all tasks to be done, or cancel them before they are
* done.
*
* The run callback that actually executes the task may be created like this:
* function_bind(&MyClass::task_execute, this, _1, _2) */
* TaskRunFunction may be created with std::bind or lambda expressions. */
class TaskPool {
public:
@ -96,8 +62,7 @@ class TaskPool {
TaskPool();
~TaskPool();
void push(Task *task, bool front = false);
void push(TaskRunFunction &&run, bool front = false);
void push(TaskRunFunction &&task, bool front = false);
void wait_work(Summary *stats = NULL); /* work and wait until all tasks are done */
void cancel(); /* cancel all tasks, keep worker threads running */
@ -154,7 +119,7 @@ class TaskScheduler {
friend class TaskPool;
struct Entry {
Task *task;
TaskRunFunction *task;
TaskPool *pool;
};
@ -167,7 +132,7 @@ class TaskScheduler {
static thread_mutex queue_mutex;
static thread_condition_variable queue_cond;
static void thread_run(int thread_id);
static void thread_run();
static bool thread_wait_pop(Entry &entry);
static void push(Entry &entry, bool front);
@ -186,7 +151,6 @@ class DedicatedTaskPool {
DedicatedTaskPool();
~DedicatedTaskPool();
void push(Task *task, bool front = false);
void push(TaskRunFunction &&run, bool front = false);
void wait(); /* wait until all tasks are done */
@ -200,14 +164,14 @@ class DedicatedTaskPool {
void num_increase();
void thread_run();
bool thread_wait_pop(Task *&entry);
bool thread_wait_pop(TaskRunFunction &task);
void clear();
thread_mutex num_mutex;
thread_condition_variable num_cond;
list<Task *> queue;
list<TaskRunFunction> queue;
thread_mutex queue_mutex;
thread_condition_variable queue_cond;