BLI_mempool: split thread-safe iteration into the private API

Splitting out thread safe iteration logic means regular iteration
isn't checking for the thread-safe pointer each step.

This gives a small but measurable overall performance gain of 2-3%
when redrawing a high-poly mesh.

Ref D11564

Reviewed By: mont29
This commit is contained in:
Campbell Barton 2021-06-10 18:16:17 +10:00
parent 9df1e0cad5
commit bcefce33f2
Notes: blender-bot 2023-02-14 01:35:49 +01:00
Referenced by issue #88550, Mesh Optimization Project Progress
4 changed files with 69 additions and 38 deletions

View File

@ -68,8 +68,6 @@ typedef struct BLI_mempool_iter {
BLI_mempool *pool;
struct BLI_mempool_chunk *curchunk;
unsigned int curindex;
struct BLI_mempool_chunk **curchunk_threaded_shared;
} BLI_mempool_iter;
/* flag */

View File

@ -542,8 +542,12 @@ void BLI_mempool_iternew(BLI_mempool *pool, BLI_mempool_iter *iter)
iter->pool = pool;
iter->curchunk = pool->chunks;
iter->curindex = 0;
}
iter->curchunk_threaded_shared = NULL;
static void mempool_threadsafe_iternew(BLI_mempool *pool, BLI_mempool_threadsafe_iter *ts_iter)
{
BLI_mempool_iternew(pool, &ts_iter->iter);
ts_iter->curchunk_threaded_shared = NULL;
}
/**
@ -566,13 +570,13 @@ ParallelMempoolTaskData *mempool_iter_threadsafe_create(BLI_mempool *pool, const
ParallelMempoolTaskData *iter_arr = MEM_mallocN(sizeof(*iter_arr) * num_iter, __func__);
BLI_mempool_chunk **curchunk_threaded_shared = MEM_mallocN(sizeof(void *), __func__);
BLI_mempool_iternew(pool, &iter_arr->iter);
mempool_threadsafe_iternew(pool, &iter_arr->ts_iter);
*curchunk_threaded_shared = iter_arr->iter.curchunk;
iter_arr->iter.curchunk_threaded_shared = curchunk_threaded_shared;
*curchunk_threaded_shared = iter_arr->ts_iter.iter.curchunk;
iter_arr->ts_iter.curchunk_threaded_shared = curchunk_threaded_shared;
for (size_t i = 1; i < num_iter; i++) {
iter_arr[i].iter = iter_arr[0].iter;
*curchunk_threaded_shared = iter_arr[i].iter.curchunk =
iter_arr[i].ts_iter = iter_arr[0].ts_iter;
*curchunk_threaded_shared = iter_arr[i].ts_iter.iter.curchunk =
((*curchunk_threaded_shared) ? (*curchunk_threaded_shared)->next : NULL);
}
@ -581,9 +585,9 @@ ParallelMempoolTaskData *mempool_iter_threadsafe_create(BLI_mempool *pool, const
void mempool_iter_threadsafe_destroy(ParallelMempoolTaskData *iter_arr)
{
BLI_assert(iter_arr->iter.curchunk_threaded_shared != NULL);
BLI_assert(iter_arr->ts_iter.curchunk_threaded_shared != NULL);
MEM_freeN(iter_arr->iter.curchunk_threaded_shared);
MEM_freeN(iter_arr->ts_iter.curchunk_threaded_shared);
MEM_freeN(iter_arr);
}
@ -604,19 +608,6 @@ static void *bli_mempool_iternext(BLI_mempool_iter *iter)
if (iter->curindex == iter->pool->pchunk) {
iter->curindex = 0;
if (iter->curchunk_threaded_shared) {
while (1) {
iter->curchunk = *iter->curchunk_threaded_shared;
if (iter->curchunk == NULL) {
return ret;
}
if (atomic_cas_ptr((void **)iter->curchunk_threaded_shared,
iter->curchunk,
iter->curchunk->next) == iter->curchunk) {
break;
}
}
}
iter->curchunk = iter->curchunk->next;
}
@ -658,19 +649,54 @@ void *BLI_mempool_iterstep(BLI_mempool_iter *iter)
}
else {
iter->curindex = 0;
if (iter->curchunk_threaded_shared) {
for (iter->curchunk = *iter->curchunk_threaded_shared;
(iter->curchunk != NULL) && (atomic_cas_ptr((void **)iter->curchunk_threaded_shared,
iter->curchunk,
iter->curchunk->next) != iter->curchunk);
iter->curchunk = *iter->curchunk_threaded_shared) {
/* pass. */
}
if (UNLIKELY(iter->curchunk == NULL)) {
return (ret->freeword == FREEWORD) ? NULL : ret;
}
iter->curchunk = iter->curchunk->next;
if (UNLIKELY(iter->curchunk == NULL)) {
return (ret->freeword == FREEWORD) ? NULL : ret;
}
curnode = CHUNK_DATA(iter->curchunk);
}
} while (ret->freeword == FREEWORD);
return ret;
}
/**
* A version of #BLI_mempool_iterstep that uses
* #BLI_mempool_threadsafe_iter.curchunk_threaded_shared for threaded iteration support.
* (threaded section noted in comments).
*/
void *mempool_iter_threadsafe_step(BLI_mempool_threadsafe_iter *ts_iter)
{
BLI_mempool_iter *iter = &ts_iter->iter;
if (UNLIKELY(iter->curchunk == NULL)) {
return NULL;
}
const uint esize = iter->pool->esize;
BLI_freenode *curnode = POINTER_OFFSET(CHUNK_DATA(iter->curchunk), (esize * iter->curindex));
BLI_freenode *ret;
do {
ret = curnode;
if (++iter->curindex != iter->pool->pchunk) {
curnode = POINTER_OFFSET(curnode, esize);
}
else {
iter->curindex = 0;
/* Begin unique to the `threadsafe` version of this function. */
for (iter->curchunk = *ts_iter->curchunk_threaded_shared;
(iter->curchunk != NULL) && (atomic_cas_ptr((void **)ts_iter->curchunk_threaded_shared,
iter->curchunk,
iter->curchunk->next) != iter->curchunk);
iter->curchunk = *ts_iter->curchunk_threaded_shared) {
/* pass. */
}
if (UNLIKELY(iter->curchunk == NULL)) {
return (ret->freeword == FREEWORD) ? NULL : ret;
}
/* End `threadsafe` exception. */
iter->curchunk = iter->curchunk->next;
if (UNLIKELY(iter->curchunk == NULL)) {
return (ret->freeword == FREEWORD) ? NULL : ret;

View File

@ -31,8 +31,13 @@
#include "BLI_mempool.h"
#include "BLI_task.h"
typedef struct ParallelMempoolTaskData {
typedef struct BLI_mempool_threadsafe_iter {
BLI_mempool_iter iter;
struct BLI_mempool_chunk **curchunk_threaded_shared;
} BLI_mempool_threadsafe_iter;
typedef struct ParallelMempoolTaskData {
BLI_mempool_threadsafe_iter ts_iter;
TaskParallelTLS tls;
} ParallelMempoolTaskData;
@ -40,6 +45,8 @@ ParallelMempoolTaskData *mempool_iter_threadsafe_create(BLI_mempool *pool, const
ATTR_WARN_UNUSED_RESULT ATTR_NONNULL();
void mempool_iter_threadsafe_destroy(ParallelMempoolTaskData *iter_arr) ATTR_NONNULL();
void *mempool_iter_threadsafe_step(BLI_mempool_threadsafe_iter *iter);
#ifdef __cplusplus
}
#endif

View File

@ -379,11 +379,11 @@ typedef struct ParallelMempoolState {
static void parallel_mempool_func(TaskPool *__restrict pool, void *taskdata)
{
ParallelMempoolState *__restrict state = BLI_task_pool_user_data(pool);
BLI_mempool_iter *iter = &((ParallelMempoolTaskData *)taskdata)->iter;
BLI_mempool_threadsafe_iter *iter = &((ParallelMempoolTaskData *)taskdata)->ts_iter;
TaskParallelTLS *tls = &((ParallelMempoolTaskData *)taskdata)->tls;
MempoolIterData *item;
while ((item = BLI_mempool_iterstep(iter)) != NULL) {
while ((item = mempool_iter_threadsafe_step(iter)) != NULL) {
state->func(state->userdata, item, tls);
}
}