Sculpt: experiment with lock-free mempools

and fine grained locks.
This commit is contained in:
Joseph Eagar 2021-10-20 14:14:12 -07:00
parent 6ddd95f15a
commit 5de8134abc
15 changed files with 578 additions and 16 deletions

View File

@ -81,6 +81,12 @@ extern short (*MEM_testN)(void *vmemh);
* newly allocated block. */
extern void *(*MEM_dupallocN)(const void *vmemh) /* ATTR_MALLOC */ ATTR_WARN_UNUSED_RESULT;
/**
* Duplicates a block of memory, and returns a pointer to the
* newly allocated block. */
extern void *(*MEM_dupallocN_id)(const void *vmemh,
const char *str) /* ATTR_MALLOC */ ATTR_WARN_UNUSED_RESULT;
/**
* Reallocates a block of memory, and returns pointer to the newly
* allocated block, the old one is freed. this is not as optimized
@ -253,6 +259,8 @@ void MEM_use_lockfree_allocator(void);
* NOTE: The switch between allocator types can only happen before any allocation did happen. */
void MEM_use_guarded_allocator(void);
#define MEM_dupallocN(vmemh) MEM_dupallocN_id(vmemh, __func__)
#ifdef __cplusplus
}
#endif /* __cplusplus */

View File

@ -40,6 +40,7 @@ const char *malloc_conf = "background_thread:true,dirty_decay_ms:4000";
size_t (*MEM_allocN_len)(const void *vmemh) = MEM_lockfree_allocN_len;
void (*MEM_freeN)(void *vmemh) = MEM_lockfree_freeN;
void *(*MEM_dupallocN)(const void *vmemh) = MEM_lockfree_dupallocN;
void *(*MEM_dupallocN_id)(const void *vmemh) = MEM_lockfree_dupallocN_id;
void *(*MEM_reallocN_id)(void *vmemh, size_t len, const char *str) = MEM_lockfree_reallocN_id;
void *(*MEM_recallocN_id)(void *vmemh, size_t len, const char *str) = MEM_lockfree_recallocN_id;
void *(*MEM_callocN)(size_t len, const char *str) = MEM_lockfree_callocN;
@ -121,6 +122,7 @@ void MEM_use_lockfree_allocator(void)
MEM_allocN_len = MEM_lockfree_allocN_len;
MEM_freeN = MEM_lockfree_freeN;
MEM_dupallocN = MEM_lockfree_dupallocN;
MEM_dupallocN_id = MEM_lockfree_dupallocN_id;
MEM_reallocN_id = MEM_lockfree_reallocN_id;
MEM_recallocN_id = MEM_lockfree_recallocN_id;
MEM_callocN = MEM_lockfree_callocN;
@ -152,6 +154,7 @@ void MEM_use_guarded_allocator(void)
MEM_allocN_len = MEM_guarded_allocN_len;
MEM_freeN = MEM_guarded_freeN;
MEM_dupallocN = MEM_guarded_dupallocN;
MEM_dupallocN_id = MEM_guarded_dupallocN_id;
MEM_reallocN_id = MEM_guarded_reallocN_id;
MEM_recallocN_id = MEM_guarded_recallocN_id;
MEM_callocN = MEM_guarded_callocN;

View File

@ -303,6 +303,31 @@ void *MEM_guarded_dupallocN(const void *vmemh)
return newp;
}
void *MEM_guarded_dupallocN_id(const void *vmemh, const char *str)
{
void *newp = NULL;
if (vmemh) {
const MemHead *memh = vmemh;
memh--;
if (LIKELY(memh->alignment == 0)) {
newp = MEM_guarded_mallocN(memh->len, str);
}
else {
newp = MEM_guarded_mallocN_aligned(memh->len, (size_t)memh->alignment, str);
}
if (newp == NULL) {
return NULL;
}
memcpy(newp, vmemh, memh->len);
}
return newp;
}
void *MEM_guarded_reallocN_id(void *vmemh, size_t len, const char *str)
{
void *newp = NULL;
@ -408,7 +433,7 @@ static void print_memhead_backtrace(MemHead *memh)
(void)memh; /* Ignored. */
}
# endif /* defined(__linux__) || defined(__APPLE__) */
#endif /* DEBUG_BACKTRACE */
#endif /* DEBUG_BACKTRACE */
static void make_memhead_header(MemHead *memh, size_t len, const char *str)
{

View File

@ -110,6 +110,8 @@ extern char free_after_leak_detection_message[];
size_t MEM_lockfree_allocN_len(const void *vmemh) ATTR_WARN_UNUSED_RESULT;
void MEM_lockfree_freeN(void *vmemh);
void *MEM_lockfree_dupallocN(const void *vmemh) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
void *MEM_lockfree_dupallocN_id(const void *vmemh,
const char *str) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
void *MEM_lockfree_reallocN_id(void *vmemh,
size_t len,
const char *str) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT
@ -153,6 +155,8 @@ const char *MEM_lockfree_name_ptr(void *vmemh);
size_t MEM_guarded_allocN_len(const void *vmemh) ATTR_WARN_UNUSED_RESULT;
void MEM_guarded_freeN(void *vmemh);
void *MEM_guarded_dupallocN(const void *vmemh) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
void *MEM_guarded_dupallocN_id(const void *vmemh,
const char *str) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
void *MEM_guarded_reallocN_id(void *vmemh,
size_t len,
const char *str) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT

View File

@ -29,8 +29,8 @@
#include "MEM_guardedalloc.h"
/* to ensure strict conversions */
#include "../../source/blender/blenlib/BLI_strict_flags.h"
#include "../../source/blender/blenlib/BLI_asan.h"
#include "../../source/blender/blenlib/BLI_strict_flags.h"
#include "atomic_ops.h"
#include "mallocn_intern.h"
@ -167,6 +167,29 @@ void *MEM_lockfree_dupallocN(const void *vmemh)
return newp;
}
void *MEM_lockfree_dupallocN_id(const void *vmemh, const char *str)
{
void *newp = NULL;
if (vmemh) {
MemHead *memh = MEMHEAD_FROM_PTR(vmemh);
const size_t prev_size = MEM_lockfree_allocN_len(vmemh);
MEM_UNPOISON_MEMHEAD(vmemh);
if (UNLIKELY(MEMHEAD_IS_ALIGNED(memh))) {
MemHeadAligned *memh_aligned = MEMHEAD_ALIGNED_FROM_PTR(vmemh);
newp = MEM_lockfree_mallocN_aligned(prev_size, (size_t)memh_aligned->alignment, str);
}
else {
newp = MEM_lockfree_mallocN(prev_size, str);
}
MEM_POISON_MEMHEAD(vmemh);
memcpy(newp, vmemh, prev_size);
}
return newp;
}
void *MEM_lockfree_reallocN_id(void *vmemh, size_t len, const char *str)
{
void *newp = NULL;

@ -1 +1 @@
Subproject commit 75e46177f36a49ad36b917e641ee1586ddef7092
Subproject commit 80d9e7ee122c626cbbcd1da554683bce79f8d3df

View File

@ -163,6 +163,15 @@ void BKE_curvemapping_copy_data_tag_ex(CurveMapping *target,
}
}
static void *debug_dupalloc_id(const void *mem, const char *id)
{
int len = MEM_allocN_len(mem);
void *cpy = MEM_mallocN(len, id);
memcpy(cpy, mem, len);
return cpy;
}
void BKE_curvemapping_copy_data(CurveMapping *target, const CurveMapping *cumap)
{
int a;
@ -177,13 +186,14 @@ void BKE_curvemapping_copy_data(CurveMapping *target, const CurveMapping *cumap)
for (a = 0; a < CM_TOT; a++) {
if (cumap->cm[a].curve) {
target->cm[a].curve = MEM_dupallocN(cumap->cm[a].curve);
target->cm[a].curve = debug_dupalloc_id(cumap->cm[a].curve, "curvemapping.curve");
}
if (cumap->cm[a].table) {
target->cm[a].table = MEM_dupallocN(cumap->cm[a].table);
target->cm[a].table = debug_dupalloc_id(cumap->cm[a].table, "curvemapping.table");
}
if (cumap->cm[a].premultable) {
target->cm[a].premultable = MEM_dupallocN(cumap->cm[a].premultable);
target->cm[a].premultable = debug_dupalloc_id(cumap->cm[a].premultable,
"curvemapping.premultable");
}
}
}
@ -191,7 +201,7 @@ void BKE_curvemapping_copy_data(CurveMapping *target, const CurveMapping *cumap)
CurveMapping *BKE_curvemapping_copy(const CurveMapping *cumap)
{
if (cumap) {
CurveMapping *cumapn = MEM_dupallocN(cumap);
CurveMapping *cumapn = debug_dupalloc_id(cumap, "CurveMapping");
BKE_curvemapping_copy_data(cumapn, cumap);
cumapn->flag &= ~CUMA_PART_OF_CACHE;
return cumapn;

View File

@ -34,6 +34,7 @@
#include <stdio.h>
#define DYNTOPO_EDGE_LOCKS
//#define DYNTOPO_REPORT
//#define WITH_ADAPTIVE_CURVATURE
@ -190,6 +191,169 @@ void bmesh_radial_loop_append(BMEdge *e, BMLoop *l);
void bm_kill_only_edge(BMesh *bm, BMEdge *e);
void bm_kill_only_loop(BMesh *bm, BMLoop *l);
void bm_kill_only_face(BMesh *bm, BMFace *f);
static bool bm_elem_is_free(BMElem *elem, int htype);
extern char dyntopop_node_idx_layer_id[];
extern char dyntopop_faces_areas_layer_id[];
#ifdef DYNTOPO_EDGE_LOCKS
char *cdlayer_lock_attr_name = "__bm_lock";
static int cdlayer_lock_begin(PBVH *pbvh, BMesh *bm)
{
int idx = CustomData_get_named_layer_index(&bm->edata, CD_PROP_INT32, cdlayer_lock_attr_name);
if (idx == -1) {
BM_data_layer_add_named(bm, &bm->edata, CD_PROP_INT32, cdlayer_lock_attr_name);
idx = CustomData_get_named_layer_index(&bm->edata, CD_PROP_INT32, cdlayer_lock_attr_name);
bm->vdata.layers[idx].flag |= CD_FLAG_TEMPORARY | CD_FLAG_ELEM_NOCOPY | CD_FLAG_ELEM_NOINTERP;
pbvh->cd_vert_node_offset = CustomData_get_named_layer_index(
&pbvh->bm->vdata, CD_PROP_INT32, dyntopop_node_idx_layer_id);
pbvh->cd_face_node_offset = CustomData_get_named_layer_index(
&pbvh->bm->pdata, CD_PROP_INT32, dyntopop_node_idx_layer_id);
pbvh->cd_vert_node_offset = bm->vdata.layers[pbvh->cd_vert_node_offset].offset;
pbvh->cd_face_node_offset = bm->pdata.layers[pbvh->cd_face_node_offset].offset;
}
return bm->vdata.layers[idx].offset;
}
static bool cdlayer_elem_lock(BMElem *elem, int cd_lock, int thread_nr)
{
thread_nr++;
int *lock = BM_ELEM_CD_GET_VOID_P(elem, cd_lock);
int old = *lock;
if (old == thread_nr) {
return true;
}
while (old != atomic_cas_int32(lock, old, thread_nr)) {
if (elem->head.htype != BM_EDGE) {
// element was freed
return false;
}
}
return true;
}
static void cdlayer_elem_unlock(BMElem *elem, int cd_lock, int thread_nr)
{
thread_nr++;
int *lock = BM_ELEM_CD_GET_VOID_P(elem, cd_lock);
// int old = *lock;
*lock = 0;
}
static bool cdlayer_lock_edge(BMEdge *e, int cd_lock, int thread_nr)
{
if (BM_ELEM_CD_GET_INT(e, cd_lock) == thread_nr + 1) {
return true;
}
if (!cdlayer_elem_lock((BMElem *)e, cd_lock, thread_nr)) {
return false;
}
for (int i = 0; i < 2; i++) {
BMVert *v = i ? e->v2 : e->v1;
BMEdge *e2 = v->e;
do {
BMLoop *l = e2->l;
if (!l) {
cdlayer_elem_lock((BMElem *)e2, cd_lock, thread_nr);
continue;
}
do {
BMLoop *l2 = l;
do {
cdlayer_elem_lock((BMElem *)l2->e, cd_lock, thread_nr);
} while ((l2 = l2->next) != l);
} while ((l = l->next) != e2->l);
} while ((e2 = BM_DISK_EDGE_NEXT(e2, v)) != v->e);
}
return true;
}
static void cdlayer_unlock_edge(BMEdge *e, int cd_lock, int thread_nr)
{
if (BM_ELEM_CD_GET_INT(e, cd_lock) == thread_nr + 1) {
return;
}
BMEdge **es = NULL;
BLI_array_staticdeclare(es, 32);
const int tag = BM_ELEM_TAG_ALT;
for (int i = 0; i < 2; i++) {
BMVert *v = i ? e->v2 : e->v1;
BMEdge *e2 = v->e;
do {
BMLoop *l = e2->l;
if (!l) {
BLI_array_append(es, e2);
continue;
}
do {
BMLoop *l2 = l;
do {
l2->e->head.hflag &= ~tag;
} while ((l2 = l2->next) != l);
} while ((l = l->next) != e2->l);
} while ((e2 = BM_DISK_EDGE_NEXT(e2, v)) != v->e);
}
for (int i = 0; i < 2; i++) {
BMVert *v = i ? e->v2 : e->v1;
BMEdge *e2 = v->e;
do {
BMLoop *l = e2->l;
if (!l) {
BLI_array_append(es, e2);
continue;
}
do {
BMLoop *l2 = l;
do {
if (!(l2->e->head.hflag & tag)) {
l2->e->head.hflag |= tag;
BLI_array_append(es, l2->e);
}
} while ((l2 = l2->next) != l);
} while ((l = l->next) != e2->l);
} while ((e2 = BM_DISK_EDGE_NEXT(e2, v)) != v->e);
}
for (int i = 0; i < BLI_array_len(es); i++) {
BMEdge *e2 = es[i];
if (!bm_elem_is_free((BMElem *)e2, BM_EDGE) &&
BM_ELEM_CD_GET_INT(e2, cd_lock) == thread_nr + 1) {
cdlayer_elem_unlock((BMElem *)e2, cd_lock, thread_nr);
}
}
}
#endif
static void fix_mesh(PBVH *pbvh, BMesh *bm)
{
@ -1749,6 +1913,7 @@ typedef struct EdgeQueueThreadData {
int size;
bool is_collapse;
int seed;
int n;
} EdgeQueueThreadData;
static void edge_thread_data_insert(EdgeQueueThreadData *tdata, BMEdge *e)
@ -3537,8 +3702,10 @@ static BMVert *pbvh_bmesh_collapse_edge(PBVH *pbvh,
return NULL;
}
#ifndef DYNTOPO_EDGE_LOCKS
pbvh_check_vert_boundary(pbvh, v1);
pbvh_check_vert_boundary(pbvh, v2);
#endif
const int mupdateflag = SCULPTVERT_NEED_VALENCE | SCULPTVERT_NEED_BOUNDARY |
SCULPTVERT_NEED_DISK_SORT;
@ -3546,8 +3713,10 @@ static BMVert *pbvh_bmesh_collapse_edge(PBVH *pbvh,
validate_edge(pbvh, pbvh->bm, e, true, true);
#ifndef DYNTOPO_EDGE_LOCKS
check_vert_fan_are_tris(pbvh, e->v1);
check_vert_fan_are_tris(pbvh, e->v2);
#endif
MSculptVert *mv1 = BKE_PBVH_SCULPTVERT(pbvh->cd_sculpt_vert, v1);
MSculptVert *mv2 = BKE_PBVH_SCULPTVERT(pbvh->cd_sculpt_vert, v2);
@ -3785,7 +3954,9 @@ static BMVert *pbvh_bmesh_collapse_edge(PBVH *pbvh,
BMLoop *l = e2->l;
if (e2 != e && !(e2->head.hflag & tag)) {
#ifndef DYNTOPO_EDGE_LOCKS
BM_log_edge_topo_pre(pbvh->bm_log, e2);
#endif
}
e2->head.hflag |= tag;
@ -3797,7 +3968,9 @@ static BMVert *pbvh_bmesh_collapse_edge(PBVH *pbvh,
do {
if (BM_ELEM_CD_GET_INT(l->f, pbvh->cd_face_node_offset) != DYNTOPO_NODE_NONE) {
pbvh_bmesh_face_remove(pbvh, l->f, false, false, false);
#ifndef DYNTOPO_EDGE_LOCKS
BM_log_face_topo_pre(pbvh->bm_log, l->f);
#endif
}
} while ((l = l->radial_next) != e2->l);
} while ((e2 = BM_DISK_EDGE_NEXT(e2, v_step)) != v_step->e);
@ -3805,8 +3978,10 @@ static BMVert *pbvh_bmesh_collapse_edge(PBVH *pbvh,
pbvh_bmesh_vert_remove(pbvh, v_del);
#ifndef DYNTOPO_EDGE_LOCKS
BM_log_edge_topo_pre(pbvh->bm_log, e);
BM_log_vert_removed(pbvh->bm_log, v_del, pbvh->cd_vert_mask_offset);
#endif
BLI_ghash_insert(deleted_verts, (void *)v_del, NULL);
@ -3931,7 +4106,9 @@ static BMVert *pbvh_bmesh_collapse_edge(PBVH *pbvh,
if (e2->head.hflag & tag) {
e2->head.hflag &= ~tag;
#ifndef DYNTOPO_EDGE_LOCKS
BM_log_edge_topo_post(pbvh->bm_log, e2);
#endif
}
BMLoop *lnext;
@ -3962,7 +4139,9 @@ static BMVert *pbvh_bmesh_collapse_edge(PBVH *pbvh,
if (!fbad && BM_ELEM_CD_GET_INT(l->f, pbvh->cd_face_node_offset) == DYNTOPO_NODE_NONE) {
BKE_pbvh_bmesh_add_face(pbvh, l->f, false, false);
#ifndef DYNTOPO_EDGE_LOCKS
BM_log_face_topo_post(pbvh->bm_log, l->f);
#endif
}
if (!lnext) {
@ -4007,6 +4186,20 @@ static BMVert *pbvh_bmesh_collapse_edge(PBVH *pbvh,
return v_conn;
}
#ifdef DYNTOPO_EDGE_LOCKS
static void pbvh_bmesh_collapse_short_edges_cb(void *__restrict userdata,
const int n,
const TaskParallelTLS *__restrict tls)
{
EdgeQueueThreadData *tdata = ((EdgeQueueThreadData *)userdata) + n;
int thread_nr = n;
for (int i = 0; i < tdata->totedge; i++) {
BMEdge *e = tdata->edges[i];
}
}
#endif
static bool pbvh_bmesh_collapse_short_edges(EdgeQueueContext *eq_ctx,
PBVH *pbvh,
BLI_Buffer *deleted_faces,
@ -4035,6 +4228,23 @@ static bool pbvh_bmesh_collapse_short_edges(EdgeQueueContext *eq_ctx,
BMVert **checkvs = NULL;
BLI_array_declare(checkvs);
#ifdef DYNTOPO_EDGE_LOCKS
const int totthread = 8;
EdgeQueueThreadData *tdata = MEM_callocN(sizeof(EdgeQueueThreadData), "EdgeQueueThreadData");
int totedge = max_steps / totthread + 1;
int curthread = 0;
if (totedge * totthread < max_steps) {
totedge += ((totedge * totthread) % max_steps) + 100;
}
for (int i = 0; i < totthread; i++) {
tdata[i].edges = MEM_mallocN(sizeof(void *) * totedge, "edge queue thread data edges");
}
#endif
while (!BLI_heapsimple_is_empty(eq_ctx->q->heap)) {
if (step++ > max_steps) {
break;
@ -4085,9 +4295,14 @@ static bool pbvh_bmesh_collapse_short_edges(EdgeQueueContext *eq_ctx,
continue;
}
#ifdef USE_EDGEQUEUE_TAG
#ifdef DYNTOPO_EDGE_LOCKS
tdata[curthread].edges[tdata[curthread].totedge++] = e;
curthread = (curthread + 1) % totthread;
#else
# ifdef USE_EDGEQUEUE_TAG
EDGE_QUEUE_DISABLE(e);
#endif
# endif
if (calc_weighted_edge_collapse(eq_ctx, v1, v2) >= limit_len_squared) {
continue;
@ -4112,13 +4327,25 @@ static bool pbvh_bmesh_collapse_short_edges(EdgeQueueContext *eq_ctx,
BLI_array_append(checkvs, v_conn);
}
#ifdef TEST_COLLAPSE
# ifdef TEST_COLLAPSE
if (_i++ > 10) {
break;
}
# endif
#endif
}
TaskParallelSettings settings;
BLI_parallel_range_settings_defaults(&settings);
BLI_task_parallel_range(0, totthread, tdata, pbvh_bmesh_collapse_short_edges_cb, &settings);
for (int i = 0; i < totthread; i++) {
MEM_SAFE_FREE(tdata[i].edges);
}
MEM_SAFE_FREE(tdata);
// add log subentry
BM_log_entry_add_ex(pbvh->bm, pbvh->bm_log, true);
@ -5660,9 +5887,6 @@ static void pbvh_split_edges(EdgeQueueContext *eq_ctx,
#endif
}
extern char dyntopop_node_idx_layer_id[];
extern char dyntopop_faces_areas_layer_id[];
typedef struct DynTopoState {
PBVH *pbvh;
bool is_fake_pbvh;

View File

@ -0,0 +1,13 @@
#pragma once
typedef struct BLI_lfmempool_iter {
void *chunk;
BLI_lfmempool *pool;
int i;
} BLI_lfmempool_iter;
void BLI_lfmempool_destroy(BLI_lfmempool *pool);
void *BLI_lfmempool_alloc(BLI_lfmempool *pool);
void BLI_lfmempool_free(BLI_lfmempool *pool, void *mem);
void BLI_lfmempool_iternew(BLI_lfmempool *_pool, BLI_lfmempool_iter *iter);
void *BLI_lfmempool_iterstep(BLI_lfmempool_iter *iter);

View File

@ -90,6 +90,7 @@ set(SRC
intern/kdtree_2d.c
intern/kdtree_3d.c
intern/kdtree_4d.c
intern/lockfree_mempool.cc
intern/lasso_2d.c
intern/listbase.c
intern/math_base.c

View File

@ -0,0 +1,241 @@
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <atomic>
#include "MEM_guardedalloc.h"
/* NOTE: copied from BLO_blend_defs.h, don't use here because we're in BLI. */
#ifdef __BIG_ENDIAN__
/* Big Endian */
# define MAKE_ID(a, b, c, d) ((int)(a) << 24 | (int)(b) << 16 | (c) << 8 | (d))
# define MAKE_ID_8(a, b, c, d, e, f, g, h) \
((int64_t)(a) << 56 | (int64_t)(b) << 48 | (int64_t)(c) << 40 | (int64_t)(d) << 32 | \
(int64_t)(e) << 24 | (int64_t)(f) << 16 | (int64_t)(g) << 8 | (h))
#else
/* Little Endian */
# define MAKE_ID(a, b, c, d) ((int)(d) << 24 | (int)(c) << 16 | (b) << 8 | (a))
# define MAKE_ID_8(a, b, c, d, e, f, g, h) \
((int64_t)(h) << 56 | (int64_t)(g) << 48 | (int64_t)(f) << 40 | (int64_t)(e) << 32 | \
(int64_t)(d) << 24 | (int64_t)(c) << 16 | (int64_t)(b) << 8 | (a))
#endif
/**
* Important that this value is an is _not_ aligned with `sizeof(void *)`.
* So having a pointer to 2/4/8... aligned memory is enough to ensure
* the `freeword` will never be used.
* To be safe, use a word that's the same in both directions.
*/
#define FREEWORD \
((sizeof(void *) > sizeof(int32_t)) ? MAKE_ID_8('e', 'e', 'r', 'f', 'f', 'r', 'e', 'e') : \
MAKE_ID('e', 'f', 'f', 'e'))
/**
* The 'used' word just needs to be set to something besides FREEWORD.
*/
#define USEDWORD MAKE_ID('u', 's', 'e', 'd')
typedef struct BLI_lfmempool BLI_lfmempool;
namespace blender {
struct LockFreeElem {
struct LockFreeElem *next;
uintptr_t freeword;
};
struct LockFreeChunk {
struct LockFreeChunk *next, *prev;
// we are convienently aligned to 16 bytes here
};
static void *data_from_chunk(LockFreeChunk *chunk)
{
return reinterpret_cast<void *>(chunk + 1);
}
static LockFreeElem *elem_from_chunk(LockFreeChunk *chunk)
{
return reinterpret_cast<LockFreeElem *>(data_from_chunk(chunk));
}
static LockFreeElem *elem_next(LockFreeElem *elem, int esize)
{
char *ptr = reinterpret_cast<char *>(elem);
ptr += esize;
return reinterpret_cast<LockFreeElem *>(ptr);
}
static_assert(sizeof(std::atomic<void *>) == sizeof(void *), "std:atomic has space overhead!");
struct LockFreePool {
struct {
std::atomic<LockFreeChunk *> first;
std::atomic<LockFreeChunk *> last;
} chunks;
std::atomic<int> totchunk;
std::atomic<int> totused;
std::atomic<LockFreeElem *> free_elem;
int esize, psize, csize;
LockFreePool(int esize, int psize) : esize(esize), psize(psize)
{
esize = std::max(esize, (int)(sizeof(void *) * 2));
csize = esize * psize + sizeof(LockFreeChunk);
}
~LockFreePool()
{
LockFreeChunk *chunk, *next;
for (chunk = chunks.first; chunk; chunk = next) {
next = chunk->next;
OBJECT_GUARDED_DELETE(chunk, LockFreeChunk);
}
}
void add_chunk()
{
LockFreeChunk *chunk = OBJECT_GUARDED_NEW(LockFreeChunk);
LockFreeElem *elem = elem_from_chunk(chunk), *last;
chunk->next = chunk->prev = nullptr;
for (int i = 0; i < psize; i++, elem = elem_next(elem, esize)) {
elem->next = i == psize - 1 ? nullptr : elem_next(elem, esize);
elem->freeword = FREEWORD;
if (i == psize - 1) {
last = elem;
}
}
// last->next = free_elem
// free_elem = last;
while (1) {
last->next = free_elem.load();
if (free_elem.compare_exchange_strong(last->next, last)) {
break;
}
}
while (1) {
chunk->prev = chunks.last.load();
if (chunks.last.compare_exchange_strong(chunk->prev, chunk)) {
if (!chunk->prev) {
// chunks.first is not accessed in threading cases, only when pool
// is destroyed
chunks.first.store(chunk);
}
break;
}
}
}
void *alloc()
{
while (1) {
if (!free_elem.load()) {
add_chunk();
}
LockFreeElem *cur = free_elem.load();
if (free_elem.compare_exchange_strong(cur, cur->next)) {
cur->freeword = 0;
return reinterpret_cast<void *>(cur);
}
}
}
void free(void *mem)
{
LockFreeElem *elem = reinterpret_cast<LockFreeElem *>(mem);
elem->freeword = FREEWORD;
while (!free_elem.compare_exchange_strong(elem->next, elem)) {
}
}
};
static LockFreePool *cast_pool(BLI_lfmempool *pool)
{
return reinterpret_cast<LockFreePool *>(pool);
}
extern "C" {
BLI_lfmempool *BLI_lfmempool_create(int esize, int psize)
{
LockFreePool *pool = OBJECT_GUARDED_NEW(LockFreePool, esize, psize);
return reinterpret_cast<BLI_lfmempool *>(pool);
}
typedef struct BLI_lfmempool_iter {
void *chunk;
BLI_lfmempool *pool;
int i;
} BLI_lfmempool_iter;
void BLI_lfmempool_destroy(BLI_lfmempool *pool)
{
OBJECT_GUARDED_DELETE(cast_pool(pool), LockFreePool);
}
void *BLI_lfmempool_alloc(BLI_lfmempool *pool)
{
return cast_pool(pool)->alloc();
}
void BLI_lfmempool_free(BLI_lfmempool *pool, void *mem)
{
return cast_pool(pool)->free(mem);
}
void BLI_lfmempool_iternew(BLI_lfmempool *_pool, BLI_lfmempool_iter *iter)
{
LockFreePool *pool = cast_pool(_pool);
iter->pool = _pool;
iter->chunk = reinterpret_cast<void *>(pool->chunks.first.load());
iter->i = 0;
}
void *BLI_lfmempool_iterstep(BLI_lfmempool_iter *iter)
{
if (!iter->chunk) {
return nullptr;
}
LockFreePool *pool = cast_pool(iter->pool);
LockFreeChunk *chunk = reinterpret_cast<LockFreeChunk *>(iter->chunk);
char *data = reinterpret_cast<char *>(data_from_chunk(chunk));
void *ret = reinterpret_cast<void *>(data + pool->esize * iter->i);
iter->i++;
if (iter->i >= pool->psize) {
iter->i = 0;
iter->chunk = reinterpret_cast<void *>(chunk->next);
}
LockFreeElem *elem = reinterpret_cast<LockFreeElem *>(ret);
if (elem->freeword == FREEWORD) {
return BLI_lfmempool_iterstep(iter);
}
return ret;
}
}
} // namespace blender

View File

@ -32,6 +32,7 @@
* Usage: msgfmt input.po output.po
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

View File

@ -746,7 +746,7 @@ static void sculpt_gesture_face_set_begin(bContext *C, SculptGestureContext *sgc
SCULPT_undo_push_node(sgcontext->vc.obact, NULL, SCULPT_UNDO_FACE_SETS);
}
static void face_set_gesture_apply_task_cb(void *__restrict userdata,
static void (void *__restrict userdata,
const int i,
const TaskParallelTLS *__restrict UNUSED(tls))
{

View File

@ -5059,6 +5059,7 @@ static void sculpt_topology_update(Sculpt *sd,
SCULPT_get_int(ss, dyntopo_disable_smooth, sd, brush));
SCULPT_dyntopo_automasking_end(mask_cb_data);
SCULPT_update_customdata_refs(ss);
/* Update average stroke position. */
copy_v3_v3(location, ss->cache->true_location);

View File

@ -587,6 +587,7 @@ int SCULPT_dyntopo_get_templayer(SculptSession *ss, int type, const char *name)
}
char dyntopop_faces_areas_layer_id[] = "__dyntopo_face_areas";
extern char *cdlayer_lock_attr_name;
void SCULPT_dyntopo_node_layers_add(SculptSession *ss)
{
@ -600,15 +601,22 @@ void SCULPT_dyntopo_node_layers_add(SculptSession *ss)
{CD_DYNTOPO_VERT, NULL, CD_FLAG_TEMPORARY | CD_FLAG_NOCOPY},
{CD_PROP_INT32, dyntopop_node_idx_layer_id, CD_FLAG_TEMPORARY | CD_FLAG_NOCOPY}};
BM_data_layers_ensure(ss->bm, &ss->bm->vdata, vlayers, 3);
BM_data_layers_ensure(ss->bm, &ss->bm->vdata, vlayers, ARRAY_SIZE(vlayers));
ss->cd_vert_mask_offset = CustomData_get_offset(&ss->bm->vdata, CD_PAINT_MASK);
BMCustomLayerReq elayers[] = {CD_PROP_INT32,
cdlayer_lock_attr_name,
CD_FLAG_TEMPORARY | CD_FLAG_ELEM_NOCOPY | CD_FLAG_ELEM_NOINTERP};
BM_data_layers_ensure(ss->bm, &ss->bm->edata, elayers, 1);
BMCustomLayerReq flayers[] = {
{CD_PROP_INT32, dyntopop_node_idx_layer_id, CD_FLAG_TEMPORARY | CD_FLAG_NOCOPY},
{CD_PROP_FLOAT, dyntopop_faces_areas_layer_id, CD_FLAG_TEMPORARY | CD_FLAG_NOCOPY},
};
BM_data_layers_ensure(ss->bm, &ss->bm->pdata, flayers, 2);
BM_data_layers_ensure(ss->bm, &ss->bm->pdata, flayers, ARRAY_SIZE(flayers));
// get indices again, as they might have changed after adding new layers
cd_node_layer_index = CustomData_get_named_layer_index(