Fix T96810: Bitmap race condition in PBVH normal calculation

The final normalization step of sculpt normal calculation iterates over
all unique vertices in each node and marks them as done. However,
storing the done mask in a bitmap meant that multiple threads could
write to a single byte at the same time, because the bits for separate
threads could be stored in the same byte. This is not threadsafe

Fixing this issue seems to improve performance as well. First I tested
just clearing the entire bitmap after the normal calculation. Then I
tested using an array of booleans instead, which turned out to be
slightly better, and simplifies code a bit.

I tested on a Ryzen 3800x, on an 8 million polygon subdivided
Suzanne by using the grab brush with a radius large enough to
affect most of the mesh.

| Original  | Clear Entire Bitmap | Boolean Array |
| --------- | ------------------- | ------------- |
| 67.9 ms   | 59.1 ms             | 57.9 ms       |
| 1.0x      | 1.15x               | 1.17x         |

Differential Revision: https://developer.blender.org/D14985
This commit is contained in:
Hans Goudey 2022-05-20 09:49:29 +02:00
parent d27f4e8493
commit b8bd20d7e0
Notes: blender-bot 2023-02-14 07:45:38 +01:00
Referenced by issue #96810, Regression: Shading artefacts in sculpt mode
2 changed files with 11 additions and 11 deletions

View File

@ -244,8 +244,8 @@ static int map_insert_vert(
key = POINTER_FROM_INT(vertex);
if (!BLI_ghash_ensure_p(map, key, &value_p)) {
int value_i;
if (BLI_BITMAP_TEST(pbvh->vert_bitmap, vertex) == 0) {
BLI_BITMAP_ENABLE(pbvh->vert_bitmap, vertex);
if (!pbvh->vert_bitmap[vertex]) {
pbvh->vert_bitmap[vertex] = true;
value_i = *uniq_verts;
(*uniq_verts)++;
}
@ -562,7 +562,7 @@ void BKE_pbvh_build_mesh(PBVH *pbvh,
pbvh->verts = verts;
BKE_mesh_vertex_normals_ensure(mesh);
pbvh->vert_normals = BKE_mesh_vertex_normals_for_write(mesh);
pbvh->vert_bitmap = BLI_BITMAP_NEW(totvert, "bvh->vert_bitmap");
pbvh->vert_bitmap = MEM_calloc_arrayN(totvert, sizeof(bool), "bvh->vert_bitmap");
pbvh->totvert = totvert;
pbvh->leaf_limit = LEAF_LIMIT;
pbvh->vdata = vdata;
@ -600,7 +600,7 @@ void BKE_pbvh_build_mesh(PBVH *pbvh,
MEM_freeN(prim_bbc);
/* Clear the bitmap so it can be used as an update tag later on. */
BLI_bitmap_set_all(pbvh->vert_bitmap, false, totvert);
memset(pbvh->vert_bitmap, 0, sizeof(bool) * totvert);
BKE_pbvh_update_active_vcol(pbvh, mesh);
}
@ -1021,7 +1021,7 @@ static void pbvh_update_normals_clear_task_cb(void *__restrict userdata,
const int totvert = node->uniq_verts;
for (int i = 0; i < totvert; i++) {
const int v = verts[i];
if (BLI_BITMAP_TEST(pbvh->vert_bitmap, v)) {
if (pbvh->vert_bitmap[v]) {
zero_v3(vnors[v]);
}
}
@ -1064,7 +1064,7 @@ static void pbvh_update_normals_accum_task_cb(void *__restrict userdata,
for (int j = sides; j--;) {
const int v = vtri[j];
if (BLI_BITMAP_TEST(pbvh->vert_bitmap, v)) {
if (pbvh->vert_bitmap[v]) {
/* NOTE: This avoids `lock, add_v3_v3, unlock`
* and is five to ten times quicker than a spin-lock.
* Not exact equivalent though, since atomicity is only ensured for one component
@ -1096,9 +1096,9 @@ static void pbvh_update_normals_store_task_cb(void *__restrict userdata,
/* No atomics necessary because we are iterating over uniq_verts only,
* so we know only this thread will handle this vertex. */
if (BLI_BITMAP_TEST(pbvh->vert_bitmap, v)) {
if (pbvh->vert_bitmap[v]) {
normalize_v3(vnors[v]);
BLI_BITMAP_DISABLE(pbvh->vert_bitmap, v);
pbvh->vert_bitmap[v] = false;
}
}
@ -1879,7 +1879,7 @@ bool BKE_pbvh_node_fully_unmasked_get(PBVHNode *node)
void BKE_pbvh_vert_mark_update(PBVH *pbvh, int index)
{
BLI_assert(pbvh->type == PBVH_FACES);
BLI_BITMAP_ENABLE(pbvh->vert_bitmap, index);
pbvh->vert_bitmap[index] = true;
}
void BKE_pbvh_node_get_loops(PBVH *pbvh,
@ -2044,7 +2044,7 @@ bool BKE_pbvh_node_vert_update_check_any(PBVH *pbvh, PBVHNode *node)
for (int i = 0; i < totvert; i++) {
const int v = verts[i];
if (BLI_BITMAP_TEST(pbvh->vert_bitmap, v)) {
if (pbvh->vert_bitmap[v]) {
return true;
}
}

View File

@ -165,7 +165,7 @@ struct PBVH {
/* Used during BVH build and later to mark that a vertex needs to update
* (its normal must be recalculated). */
BLI_bitmap *vert_bitmap;
bool *vert_bitmap;
#ifdef PERFCNTRS
int perf_modified;