BMesh: remove unit-length edge-vector cache from normal calculation

Bypass stored edge-vectors for ~16% performance gains.

While this increases unit-length edge-vector calculations by around ~4x
the overhead of a parallel loop over all edges makes it worthwhile.

Note that caching edge-vectors per-vertex performs better and may be
worth investigating further, although in my tests this increases code
complexity with barley measurable benefits over not using cache at all.

Details about performance and possible optimizations are noted in
bm_vert_calc_normals_impl.
This commit is contained in:
Campbell Barton 2021-06-14 22:56:02 +10:00
parent 8083527f90
commit 6bef255904
Notes: blender-bot 2023-02-14 08:42:53 +01:00
Referenced by issue #88550, Mesh Optimization Project Progress
3 changed files with 69 additions and 202 deletions

View File

@ -49,67 +49,9 @@
* assuming no other tool using it would run concurrently to clnors editing. */
#define BM_LNORSPACE_UPDATE _FLAG_MF
typedef struct BMEdgesCalcVectorsData {
/* Read-only data. */
const float (*vcos)[3];
/* Read-write data, but no need to protect it, no concurrency to fear here. */
float (*edgevec)[3];
} BMEdgesCalcVectorsData;
static void bm_edge_calc_vectors_cb(void *userdata,
MempoolIterData *mp_e,
const TaskParallelTLS *__restrict UNUSED(tls))
{
BMEdge *e = (BMEdge *)mp_e;
/* The edge vector will not be needed when the edge has no radial. */
if (e->l != NULL) {
float(*edgevec)[3] = userdata;
float *e_diff = edgevec[BM_elem_index_get(e)];
sub_v3_v3v3(e_diff, e->v2->co, e->v1->co);
normalize_v3(e_diff);
}
}
static void bm_edge_calc_vectors_with_coords_cb(void *userdata,
MempoolIterData *mp_e,
const TaskParallelTLS *__restrict UNUSED(tls))
{
BMEdge *e = (BMEdge *)mp_e;
/* The edge vector will not be needed when the edge has no radial. */
if (e->l != NULL) {
BMEdgesCalcVectorsData *data = userdata;
float *e_diff = data->edgevec[BM_elem_index_get(e)];
sub_v3_v3v3(
e_diff, data->vcos[BM_elem_index_get(e->v2)], data->vcos[BM_elem_index_get(e->v1)]);
normalize_v3(e_diff);
}
}
static void bm_mesh_edges_calc_vectors(BMesh *bm, float (*edgevec)[3], const float (*vcos)[3])
{
BM_mesh_elem_index_ensure(bm, BM_EDGE | (vcos ? BM_VERT : 0));
TaskParallelSettings settings;
BLI_parallel_mempool_settings_defaults(&settings);
settings.use_threading = bm->totedge >= BM_OMP_LIMIT;
if (vcos == NULL) {
BM_iter_parallel(bm, BM_EDGES_OF_MESH, bm_edge_calc_vectors_cb, edgevec, &settings);
}
else {
BMEdgesCalcVectorsData data = {
.edgevec = edgevec,
.vcos = vcos,
};
BM_iter_parallel(bm, BM_EDGES_OF_MESH, bm_edge_calc_vectors_with_coords_cb, &data, &settings);
}
}
typedef struct BMVertsCalcNormalsWithCoordsData {
/* Read-only data. */
const float (*fnos)[3];
const float (*edgevec)[3];
const float (*vcos)[3];
/* Write data. */
@ -117,13 +59,12 @@ typedef struct BMVertsCalcNormalsWithCoordsData {
} BMVertsCalcNormalsWithCoordsData;
BLI_INLINE void bm_vert_calc_normals_accum_loop(const BMLoop *l_iter,
const float (*edgevec)[3],
const float e1diff[3],
const float e2diff[3],
const float f_no[3],
float v_no[3])
{
/* Calculate the dot product of the two edges that meet at the loop's vertex. */
const float *e1diff = edgevec[BM_elem_index_get(l_iter->prev->e)];
const float *e2diff = edgevec[BM_elem_index_get(l_iter->e)];
/* Edge vectors are calculated from e->v1 to e->v2, so adjust the dot product if one but not
* both loops actually runs from from e->v2 to e->v1. */
float dotprod = dot_v3v3(e1diff, e2diff);
@ -132,25 +73,61 @@ BLI_INLINE void bm_vert_calc_normals_accum_loop(const BMLoop *l_iter,
}
const float fac = saacos(-dotprod);
/* NAN detection, otherwise this is a degenerated case, ignore that vertex in this case. */
if (fac == fac) { /* NAN detection. */
if (fac == fac) {
madd_v3_v3fl(v_no, f_no, fac);
}
}
static void bm_vert_calc_normals_impl(const float (*edgevec)[3], BMVert *v)
static void bm_vert_calc_normals_impl(BMVert *v)
{
/* Note on redundant unit-length edge-vector calculation:
*
* This functions calculates unit-length edge-vector for every loop edge
* in practice this means 2x `sqrt` calls per face-corner connected to each vertex.
*
* Previously (2.9x and older), the edge vectors were calculated and stored for reuse.
* However the overhead of did not perform well (~16% slower - single & multi-threaded)
* when compared with calculating the values as they are needed.
*
* For simple grid topologies this function calculates the edge-vectors 4x times.
* There is some room for improved performance by storing the edge-vectors for reuse locally
* in this function, reducing the number of redundant `sqrtf` in half (2x instead of 4x).
* so face loops that share an edge would not calculate it multiple times.
* From my tests the performance improvements are so small they're difficult to measure,
* the time saved removing `sqrtf` calls is lost on storing and looking up the information,
* even in the case of `BLI_smallhash.h` & small inline lookup tables.
*
* Further, local data structures would need to support cases where
* stack memory isn't sufficient - adding additional complexity for corner-cases
* (a vertex that has thousands of connected edges for example).
* Unless there are important use-cases that benefit from edge-vector caching,
* keep this simple and calculate ~4x as many edge-vectors.
*
* In conclusion, the cost of caching & looking up edge-vectors both globally or per-vertex
* doesn't save enough time to make it worthwhile.
* - Campbell. */
float *v_no = v->no;
zero_v3(v_no);
BMEdge *e_first = v->e;
if (e_first != NULL) {
float e1diff[3], e2diff[3];
BMEdge *e_iter = e_first;
do {
BMLoop *l_first = e_iter->l;
if (l_first != NULL) {
sub_v3_v3v3(e2diff, e_iter->v1->co, e_iter->v2->co);
normalize_v3(e2diff);
BMLoop *l_iter = l_first;
do {
if (l_iter->v == v) {
bm_vert_calc_normals_accum_loop(l_iter, edgevec, l_iter->f->no, v_no);
BMEdge *e_prev = l_iter->prev->e;
sub_v3_v3v3(e1diff, e_prev->v1->co, e_prev->v2->co);
normalize_v3(e1diff);
bm_vert_calc_normals_accum_loop(l_iter, e1diff, e2diff, l_iter->f->no, v_no);
}
} while ((l_iter = l_iter->radial_next) != l_first);
}
@ -164,32 +141,44 @@ static void bm_vert_calc_normals_impl(const float (*edgevec)[3], BMVert *v)
normalize_v3_v3(v_no, v->co);
}
static void bm_vert_calc_normals_cb(void *userdata,
static void bm_vert_calc_normals_cb(void *UNUSED(userdata),
MempoolIterData *mp_v,
const TaskParallelTLS *__restrict UNUSED(tls))
{
const float(*edgevec)[3] = userdata;
BMVert *v = (BMVert *)mp_v;
bm_vert_calc_normals_impl(edgevec, v);
bm_vert_calc_normals_impl(v);
}
static void bm_vert_calc_normals_with_coords(BMVert *v, BMVertsCalcNormalsWithCoordsData *data)
{
/* See #bm_vert_calc_normals_impl note on performance. */
float *v_no = data->vnos[BM_elem_index_get(v)];
zero_v3(v_no);
/* Loop over edges. */
BMEdge *e_first = v->e;
if (e_first != NULL) {
float e1diff[3], e2diff[3];
BMEdge *e_iter = e_first;
do {
BMLoop *l_first = e_iter->l;
if (l_first != NULL) {
sub_v3_v3v3(e2diff,
data->vcos[BM_elem_index_get(e_iter->v1)],
data->vcos[BM_elem_index_get(e_iter->v2)]);
normalize_v3(e2diff);
BMLoop *l_iter = l_first;
do {
if (l_iter->v == v) {
BMEdge *e_prev = l_iter->prev->e;
sub_v3_v3v3(e1diff,
data->vcos[BM_elem_index_get(e_prev->v1)],
data->vcos[BM_elem_index_get(e_prev->v2)]);
normalize_v3(e1diff);
bm_vert_calc_normals_accum_loop(
l_iter, data->edgevec, data->fnos[BM_elem_index_get(l_iter->f)], v_no);
l_iter, e1diff, e2diff, data->fnos[BM_elem_index_get(l_iter->f)], v_no);
}
} while ((l_iter = l_iter->radial_next) != l_first);
}
@ -213,24 +202,22 @@ static void bm_vert_calc_normals_with_coords_cb(void *userdata,
}
static void bm_mesh_verts_calc_normals(BMesh *bm,
const float (*edgevec)[3],
const float (*fnos)[3],
const float (*vcos)[3],
float (*vnos)[3])
{
BM_mesh_elem_index_ensure(bm, (BM_EDGE | BM_FACE) | ((vnos || vcos) ? BM_VERT : 0));
BM_mesh_elem_index_ensure(bm, BM_FACE | ((vnos || vcos) ? BM_VERT : 0));
TaskParallelSettings settings;
BLI_parallel_mempool_settings_defaults(&settings);
settings.use_threading = bm->totvert >= BM_OMP_LIMIT;
if (vcos == NULL) {
BM_iter_parallel(bm, BM_VERTS_OF_MESH, bm_vert_calc_normals_cb, (void *)edgevec, &settings);
BM_iter_parallel(bm, BM_VERTS_OF_MESH, bm_vert_calc_normals_cb, NULL, &settings);
}
else {
BLI_assert(!ELEM(NULL, fnos, vnos));
BMVertsCalcNormalsWithCoordsData data = {
.edgevec = edgevec,
.fnos = fnos,
.vcos = vcos,
.vnos = vnos,
@ -255,11 +242,6 @@ static void bm_face_calc_normals_cb(void *UNUSED(userdata),
*/
void BM_mesh_normals_update(BMesh *bm)
{
float(*edgevec)[3] = MEM_mallocN(sizeof(*edgevec) * bm->totedge, __func__);
/* Parallel mempool iteration does not allow generating indices inline anymore. */
BM_mesh_elem_index_ensure(bm, (BM_EDGE | BM_FACE));
/* Calculate all face normals. */
TaskParallelSettings settings;
BLI_parallel_mempool_settings_defaults(&settings);
@ -267,11 +249,8 @@ void BM_mesh_normals_update(BMesh *bm)
BM_iter_parallel(bm, BM_FACES_OF_MESH, bm_face_calc_normals_cb, NULL, &settings);
bm_mesh_edges_calc_vectors(bm, edgevec, NULL);
/* Add weighted face normals to vertices, and normalize vert normals. */
bm_mesh_verts_calc_normals(bm, edgevec, NULL, NULL, NULL);
MEM_freeN(edgevec);
bm_mesh_verts_calc_normals(bm, NULL, NULL, NULL);
}
/** \} */
@ -287,40 +266,26 @@ static void bm_partial_faces_parallel_range_calc_normals_cb(
BM_face_calc_normal(f, f->no);
}
static void bm_partial_edges_parallel_range_calc_vectors_cb(
void *userdata, const int iter, const TaskParallelTLS *__restrict UNUSED(tls))
{
BMEdge *e = ((BMEdge **)((void **)userdata)[0])[iter];
float *r_edgevec = ((float(*)[3])((void **)userdata)[1])[iter];
sub_v3_v3v3(r_edgevec, e->v1->co, e->v2->co);
normalize_v3(r_edgevec);
}
static void bm_partial_verts_parallel_range_calc_normal_cb(
void *userdata, const int iter, const TaskParallelTLS *__restrict UNUSED(tls))
{
BMVert *v = ((BMVert **)((void **)userdata)[0])[iter];
const float(*edgevec)[3] = (const float(*)[3])((void **)userdata)[1];
bm_vert_calc_normals_impl(edgevec, v);
BMVert *v = ((BMVert **)userdata)[iter];
bm_vert_calc_normals_impl(v);
}
/**
* A version of #BM_mesh_normals_update that updates a subset of geometry,
* used to avoid the overhead of updating everything.
*/
void BM_mesh_normals_update_with_partial(BMesh *bm, const BMPartialUpdate *bmpinfo)
void BM_mesh_normals_update_with_partial(BMesh *UNUSED(bm), const BMPartialUpdate *bmpinfo)
{
BLI_assert(bmpinfo->params.do_normals);
BMVert **verts = bmpinfo->verts;
BMEdge **edges = bmpinfo->edges;
BMFace **faces = bmpinfo->faces;
const int verts_len = bmpinfo->verts_len;
const int edges_len = bmpinfo->edges_len;
const int faces_len = bmpinfo->faces_len;
float(*edgevec)[3] = MEM_mallocN(sizeof(*edgevec) * edges_len, __func__);
TaskParallelSettings settings;
BLI_parallel_range_settings_defaults(&settings);
@ -328,58 +293,9 @@ void BM_mesh_normals_update_with_partial(BMesh *bm, const BMPartialUpdate *bmpin
BLI_task_parallel_range(
0, faces_len, faces, bm_partial_faces_parallel_range_calc_normals_cb, &settings);
/* Temporarily override the edge indices,
* storing the correct indices in the case they're not dirty.
*
* \note in most cases indices are modified and #BMesh.elem_index_dirty is set.
* This is an exceptional case where indices are restored because the worst case downside
* of marking the edge indices dirty would require a full loop over all edges to
* correct the indices in other functions which need them to be valid.
* When moving a few vertices on a high poly mesh setting and restoring connected
* edges has very little overhead compared with restoring all edge indices. */
int *edge_index_value = NULL;
if ((bm->elem_index_dirty & BM_EDGE) == 0) {
edge_index_value = MEM_mallocN(sizeof(*edge_index_value) * edges_len, __func__);
for (int i = 0; i < edges_len; i++) {
BMEdge *e = edges[i];
edge_index_value[i] = BM_elem_index_get(e);
BM_elem_index_set(e, i); /* set_dirty! (restore before this function exits). */
}
}
else {
for (int i = 0; i < edges_len; i++) {
BMEdge *e = edges[i];
BM_elem_index_set(e, i); /* set_dirty! (already dirty) */
}
}
{
/* Verts. */
/* Compute normalized direction vectors for each edge.
* Directions will be used for calculating the weights of the face normals on the vertex
* normals. */
void *data[2] = {edges, edgevec};
BLI_task_parallel_range(
0, edges_len, data, bm_partial_edges_parallel_range_calc_vectors_cb, &settings);
/* Calculate vertex normals. */
data[0] = verts;
BLI_task_parallel_range(
0, verts_len, data, bm_partial_verts_parallel_range_calc_normal_cb, &settings);
}
if (edge_index_value != NULL) {
for (int i = 0; i < edges_len; i++) {
BMEdge *e = edges[i];
BM_elem_index_set(e, edge_index_value[i]); /* set_ok (restore) */
}
MEM_freeN(edge_index_value);
}
MEM_freeN(edgevec);
/* Verts. */
BLI_task_parallel_range(
0, verts_len, verts, bm_partial_verts_parallel_range_calc_normal_cb, &settings);
}
/** \} */
@ -399,16 +315,8 @@ void BM_verts_calc_normal_vcos(BMesh *bm,
const float (*vcos)[3],
float (*vnos)[3])
{
float(*edgevec)[3] = MEM_mallocN(sizeof(*edgevec) * bm->totedge, __func__);
/* Compute normalized direction vectors for each edge.
* Directions will be used for calculating the weights of the face normals on the vertex normals.
*/
bm_mesh_edges_calc_vectors(bm, edgevec, vcos);
/* Add weighted face normals to vertices, and normalize vert normals. */
bm_mesh_verts_calc_normals(bm, edgevec, fnos, vcos, vnos);
MEM_freeN(edgevec);
bm_mesh_verts_calc_normals(bm, fnos, vcos, vnos);
}
/** \} */

View File

@ -79,20 +79,6 @@ BLI_INLINE bool partial_elem_vert_ensure(BMPartialUpdate *bmpinfo,
return false;
}
BLI_INLINE bool partial_elem_edge_ensure(BMPartialUpdate *bmpinfo,
BLI_bitmap *edges_tag,
BMEdge *e)
{
const int i = BM_elem_index_get(e);
if (!BLI_BITMAP_TEST(edges_tag, i)) {
BLI_BITMAP_ENABLE(edges_tag, i);
GROW_ARRAY_AS_NEEDED(bmpinfo->edges, bmpinfo->edges_len_alloc, bmpinfo->edges_len);
bmpinfo->edges[bmpinfo->edges_len++] = e;
return true;
}
return false;
}
BLI_INLINE bool partial_elem_face_ensure(BMPartialUpdate *bmpinfo,
BLI_bitmap *faces_tag,
BMFace *f)
@ -121,17 +107,15 @@ BMPartialUpdate *BM_mesh_partial_create_from_verts(BMesh *bm,
/* Reserve more edges than vertices since it's common for a grid topology
* to use around twice as many edges as vertices. */
const int default_verts_len_alloc = verts_len;
const int default_edges_len_alloc = min_ii(bm->totedge, verts_len * 2);
const int default_faces_len_alloc = min_ii(bm->totface, verts_len);
/* Allocate tags instead of using #BM_ELEM_TAG because the caller may already be using tags.
* Further, walking over all geometry to clear the tags isn't so efficient. */
BLI_bitmap *verts_tag = NULL;
BLI_bitmap *edges_tag = NULL;
BLI_bitmap *faces_tag = NULL;
/* Set vert inline. */
BM_mesh_elem_index_ensure(bm, (BM_EDGE | BM_FACE));
BM_mesh_elem_index_ensure(bm, BM_FACE);
if (params->do_normals || params->do_tessellate) {
/* - Extend to all vertices connected faces:
@ -197,29 +181,12 @@ BMPartialUpdate *BM_mesh_partial_create_from_verts(BMesh *bm,
verts_tag = BLI_BITMAP_NEW((size_t)bm->totvert, __func__);
}
/* Edges. */
if (bmpinfo->edges == NULL) {
bmpinfo->edges_len_alloc = default_edges_len_alloc;
bmpinfo->edges = MEM_mallocN((sizeof(BMEdge *) * bmpinfo->edges_len_alloc), __func__);
edges_tag = BLI_BITMAP_NEW((size_t)bm->totedge, __func__);
}
for (int i = 0; i < bmpinfo->faces_len; i++) {
BMFace *f = bmpinfo->faces[i];
BMLoop *l_iter, *l_first;
l_iter = l_first = BM_FACE_FIRST_LOOP(f);
do {
if (!partial_elem_vert_ensure(bmpinfo, verts_tag, l_iter->v)) {
continue;
}
BMVert *v = l_iter->v;
BMEdge *e_first = v->e;
BMEdge *e_iter = e_first;
do {
if (e_iter->l) {
partial_elem_edge_ensure(bmpinfo, edges_tag, e_iter);
}
} while ((e_iter = BM_DISK_EDGE_NEXT(e_iter, v)) != e_first);
partial_elem_vert_ensure(bmpinfo, verts_tag, l_iter->v);
} while ((l_iter = l_iter->next) != l_first);
}
}
@ -227,9 +194,6 @@ BMPartialUpdate *BM_mesh_partial_create_from_verts(BMesh *bm,
if (verts_tag) {
MEM_freeN(verts_tag);
}
if (edges_tag) {
MEM_freeN(edges_tag);
}
if (faces_tag) {
MEM_freeN(faces_tag);
}
@ -244,9 +208,6 @@ void BM_mesh_partial_destroy(BMPartialUpdate *bmpinfo)
if (bmpinfo->verts) {
MEM_freeN(bmpinfo->verts);
}
if (bmpinfo->edges) {
MEM_freeN(bmpinfo->edges);
}
if (bmpinfo->faces) {
MEM_freeN(bmpinfo->faces);
}

View File

@ -44,10 +44,8 @@ typedef struct BMPartialUpdate_Params {
*/
typedef struct BMPartialUpdate {
BMVert **verts;
BMEdge **edges;
BMFace **faces;
int verts_len, verts_len_alloc;
int edges_len, edges_len_alloc;
int faces_len, faces_len_alloc;
/** Store the parameters used in creation so invalid use can be asserted. */