BLI_task: add support for listbase parallelized for loops.

Code by @sergey, with small edits and doc by @mont29.
2016-05-13 11:03:04 +02:00 · 2016-05-13 11:03:04 +02:00 · 868cfc5a4a
parent 990fab73ea
commit 868cfc5a4a
2 changed files with 128 additions and 5 deletions
--- a/source/blender/blenlib/BLI_task.h
+++ b/source/blender/blenlib/BLI_task.h
@ -21,6 +21,9 @@
 #ifndef __BLI_TASK_H__
 #define __BLI_TASK_H__ 

+struct Link;
+struct ListBase;
+
 /** \file BLI_task.h
 *  \ingroup bli
 */
@ -129,6 +132,15 @@ void BLI_task_parallel_range(
        TaskParallelRangeFunc func,
        const bool use_threading);

+typedef void (*TaskParallelListbaseFunc)(void *userdata,
+                                         struct Link *iter,
+                                         int index);
+void BLI_task_parallel_listbase(
+        struct ListBase *listbase,
+        void *userdata,
+        TaskParallelListbaseFunc func,
+        const bool use_threading);
+
 #ifdef __cplusplus
 }
 #endif
--- a/source/blender/blenlib/intern/task.c
+++ b/source/blender/blenlib/intern/task.c
@ -28,6 +28,8 @@

 #include "MEM_guardedalloc.h"

+#include "DNA_listBase.h"
+
 #include "BLI_listbase.h"
 #include "BLI_math.h"
 #include "BLI_task.h"
@ -750,16 +752,13 @@ size_t BLI_task_pool_tasks_done(TaskPool *pool)
 *
 * Main functions:
 * - #BLI_task_parallel_range
+ * - #BLI_task_parallel_listbase (#ListBase - double linked list)
 *
 * TODO:
- * - #BLI_task_parallel_foreach_listbase (#ListBase - double linked list)
 * - #BLI_task_parallel_foreach_link (#Link - single linked list)
 * - #BLI_task_parallel_foreach_ghash/gset (#GHash/#GSet - hash & set)
 * - #BLI_task_parallel_foreach_mempool (#BLI_mempool - iterate over mempools)
 *
- * Possible improvements:
- *
- * - Chunk iterations to reduce number of spin locks.
 */

 /* Allows to avoid using malloc for userdata_chunk in tasks, when small enough. */
@ -934,7 +933,7 @@ static void task_parallel_range_ex(
 }

 /**
- * This function allows to parallelized for loops in a similar way to OpenMP's 'parallel for' statement.
+ * This function allows to parallelize for loops in a similar way to OpenMP's 'parallel for' statement.
 *
 * \param start First index to process.
 * \param stop Index to stop looping (excluded).
@ -985,3 +984,115 @@ void BLI_task_parallel_range(
 #undef MALLOCA
 #undef MALLOCA_FREE

+typedef struct ParallelListbaseState {
+	void *userdata;
+	TaskParallelListbaseFunc func;
+
+	int chunk_size;
+	int index;
+	Link *link;
+	SpinLock lock;
+} ParallelListState;
+
+BLI_INLINE Link *parallel_listbase_next_iter_get(
+        ParallelListState * __restrict state,
+        int * __restrict index,
+        int * __restrict count)
+{
+	int task_count = 0;
+	BLI_spin_lock(&state->lock);
+	Link *result = state->link;
+	if (LIKELY(result != NULL)) {
+		*index = state->index;
+		while (state->link != NULL && task_count < state->chunk_size) {
+			++task_count;
+			state->link = state->link->next;
+		}
+		state->index += task_count;
+	}
+	BLI_spin_unlock(&state->lock);
+	*count = task_count;
+	return result;
+}
+
+static void parallel_listbase_func(
+        TaskPool * __restrict pool,
+        void *UNUSED(taskdata),
+        int UNUSED(threadid))
+{
+	ParallelListState * __restrict state = BLI_task_pool_userdata(pool);
+	Link *link;
+	int index, count;
+
+	while ((link = parallel_listbase_next_iter_get(state, &index, &count)) != NULL) {
+		for (int i = 0; i < count; ++i) {
+			state->func(state->userdata, link, index + i);
+			link = link->next;
+		}
+	}
+}
+
+/**
+ * This function allows to parallelize for loops over ListBase items.
+ *
+ * \param listbase The double linked list to loop over.
+ * \param userdata Common userdata passed to all instances of \a func.
+ * \param func Callback function.
+ * \param use_threading If \a true, actually split-execute loop in threads, else just do a sequential forloop
+ *                      (allows caller to use any kind of test to switch on parallelization or not).
+ *
+ * \note There is no static scheduling here, since it would need another full loop over items to count them...
+ */
+void BLI_task_parallel_listbase(
+        struct ListBase *listbase,
+        void *userdata,
+        TaskParallelListbaseFunc func,
+        const bool use_threading)
+{
+	TaskScheduler *task_scheduler;
+	TaskPool *task_pool;
+	ParallelListState state;
+	int i, num_threads, num_tasks;
+
+	if (BLI_listbase_is_empty(listbase)) {
+		return;
+	}
+
+	if (!use_threading) {
+		i = 0;
+		for (Link *link = listbase->first; link != NULL; link = link->next, ++i) {
+			func(userdata, link, i);
+		}
+		return;
+	}
+
+	task_scheduler = BLI_task_scheduler_get();
+	task_pool = BLI_task_pool_create(task_scheduler, &state);
+	num_threads = BLI_task_scheduler_num_threads(task_scheduler);
+
+	/* The idea here is to prevent creating task for each of the loop iterations
+	 * and instead have tasks which are evenly distributed across CPU cores and
+	 * pull next iter to be crunched using the queue.
+	 */
+	num_tasks = num_threads * 2;
+
+	state.index = 0;
+	state.link = listbase->first;
+	state.userdata = userdata;
+	state.func = func;
+	state.chunk_size = 32;
+	BLI_spin_init(&state.lock);
+
+	for (i = 0; i < num_tasks; i++) {
+		/* Use this pool's pre-allocated tasks. */
+		BLI_task_pool_push_from_thread(task_pool,
+		                               parallel_listbase_func,
+		                               NULL, false,
+		                               TASK_PRIORITY_HIGH, 0);
+	}
+
+	BLI_task_pool_work_and_wait(task_pool);
+	BLI_task_pool_free(task_pool);
+
+	BLI_spin_end(&state.lock);
+}