BLI_task: add support for listbase parallelized for loops.

Code by @sergey, with small edits and doc by @mont29.
This commit is contained in:
Bastien Montagne 2016-05-13 11:03:04 +02:00
parent 990fab73ea
commit 868cfc5a4a
2 changed files with 128 additions and 5 deletions

View File

@ -21,6 +21,9 @@
#ifndef __BLI_TASK_H__
#define __BLI_TASK_H__
struct Link;
struct ListBase;
/** \file BLI_task.h
* \ingroup bli
*/
@ -129,6 +132,15 @@ void BLI_task_parallel_range(
TaskParallelRangeFunc func,
const bool use_threading);
typedef void (*TaskParallelListbaseFunc)(void *userdata,
struct Link *iter,
int index);
void BLI_task_parallel_listbase(
struct ListBase *listbase,
void *userdata,
TaskParallelListbaseFunc func,
const bool use_threading);
#ifdef __cplusplus
}
#endif

View File

@ -28,6 +28,8 @@
#include "MEM_guardedalloc.h"
#include "DNA_listBase.h"
#include "BLI_listbase.h"
#include "BLI_math.h"
#include "BLI_task.h"
@ -750,16 +752,13 @@ size_t BLI_task_pool_tasks_done(TaskPool *pool)
*
* Main functions:
* - #BLI_task_parallel_range
* - #BLI_task_parallel_listbase (#ListBase - double linked list)
*
* TODO:
* - #BLI_task_parallel_foreach_listbase (#ListBase - double linked list)
* - #BLI_task_parallel_foreach_link (#Link - single linked list)
* - #BLI_task_parallel_foreach_ghash/gset (#GHash/#GSet - hash & set)
* - #BLI_task_parallel_foreach_mempool (#BLI_mempool - iterate over mempools)
*
* Possible improvements:
*
* - Chunk iterations to reduce number of spin locks.
*/
/* Allows to avoid using malloc for userdata_chunk in tasks, when small enough. */
@ -934,7 +933,7 @@ static void task_parallel_range_ex(
}
/**
* This function allows to parallelized for loops in a similar way to OpenMP's 'parallel for' statement.
* This function allows to parallelize for loops in a similar way to OpenMP's 'parallel for' statement.
*
* \param start First index to process.
* \param stop Index to stop looping (excluded).
@ -985,3 +984,115 @@ void BLI_task_parallel_range(
#undef MALLOCA
#undef MALLOCA_FREE
typedef struct ParallelListbaseState {
void *userdata;
TaskParallelListbaseFunc func;
int chunk_size;
int index;
Link *link;
SpinLock lock;
} ParallelListState;
BLI_INLINE Link *parallel_listbase_next_iter_get(
ParallelListState * __restrict state,
int * __restrict index,
int * __restrict count)
{
int task_count = 0;
BLI_spin_lock(&state->lock);
Link *result = state->link;
if (LIKELY(result != NULL)) {
*index = state->index;
while (state->link != NULL && task_count < state->chunk_size) {
++task_count;
state->link = state->link->next;
}
state->index += task_count;
}
BLI_spin_unlock(&state->lock);
*count = task_count;
return result;
}
static void parallel_listbase_func(
TaskPool * __restrict pool,
void *UNUSED(taskdata),
int UNUSED(threadid))
{
ParallelListState * __restrict state = BLI_task_pool_userdata(pool);
Link *link;
int index, count;
while ((link = parallel_listbase_next_iter_get(state, &index, &count)) != NULL) {
for (int i = 0; i < count; ++i) {
state->func(state->userdata, link, index + i);
link = link->next;
}
}
}
/**
* This function allows to parallelize for loops over ListBase items.
*
* \param listbase The double linked list to loop over.
* \param userdata Common userdata passed to all instances of \a func.
* \param func Callback function.
* \param use_threading If \a true, actually split-execute loop in threads, else just do a sequential forloop
* (allows caller to use any kind of test to switch on parallelization or not).
*
* \note There is no static scheduling here, since it would need another full loop over items to count them...
*/
void BLI_task_parallel_listbase(
struct ListBase *listbase,
void *userdata,
TaskParallelListbaseFunc func,
const bool use_threading)
{
TaskScheduler *task_scheduler;
TaskPool *task_pool;
ParallelListState state;
int i, num_threads, num_tasks;
if (BLI_listbase_is_empty(listbase)) {
return;
}
if (!use_threading) {
i = 0;
for (Link *link = listbase->first; link != NULL; link = link->next, ++i) {
func(userdata, link, i);
}
return;
}
task_scheduler = BLI_task_scheduler_get();
task_pool = BLI_task_pool_create(task_scheduler, &state);
num_threads = BLI_task_scheduler_num_threads(task_scheduler);
/* The idea here is to prevent creating task for each of the loop iterations
* and instead have tasks which are evenly distributed across CPU cores and
* pull next iter to be crunched using the queue.
*/
num_tasks = num_threads * 2;
state.index = 0;
state.link = listbase->first;
state.userdata = userdata;
state.func = func;
state.chunk_size = 32;
BLI_spin_init(&state.lock);
for (i = 0; i < num_tasks; i++) {
/* Use this pool's pre-allocated tasks. */
BLI_task_pool_push_from_thread(task_pool,
parallel_listbase_func,
NULL, false,
TASK_PRIORITY_HIGH, 0);
}
BLI_task_pool_work_and_wait(task_pool);
BLI_task_pool_free(task_pool);
BLI_spin_end(&state.lock);
}