Mesh: optimize normal calculation
Optimize mesh normal calculation. - Remove the intermediate `lnors_weighted` array, accumulate directly into the normal array using a spin-lock for thread safety. - Remove single threaded iteration over loops (normal calculation is now fully multi-threaded). - Remove stack array (alloca) for pre-calculating edge-directions. Summary of Performance Characteristics: - The largest gains are for single high poly meshes, with isolated normal-calculation benchmarks of meshes over ~1.5 million showing 2x+ speedup, ~25 million polygons are ~2.85x faster. - Single lower poly meshes (250k polys) can be ~2x slower. Since these meshes aren't normally a bottleneck, and this problem isn't noticeable on large scenes, we considered the performance trade-off reasonable. - The performance difference reduces with larger scenes, tests with production files from "Sprite Fight" showing the same or slightly better overall performance. NOTE: tested on a AMD Ryzen TR 3970X 32-Core. For more details & benchmarking scripts, see the patch description. Reviewed By: mont29 Ref D11993
This commit is contained in:
parent
1275ce61b1
commit
399b6ec76c
Notes:
blender-bot
2023-02-14 10:21:15 +01:00
Referenced by issue #90644, Inconsistency on toggling triangulate option of Decimate modifier Referenced by issue #88550, Mesh Optimization Project Progress
|
@ -50,6 +50,8 @@
|
|||
#include "BKE_global.h"
|
||||
#include "BKE_mesh.h"
|
||||
|
||||
#include "atomic_ops.h"
|
||||
|
||||
// #define DEBUG_TIME
|
||||
|
||||
#ifdef DEBUG_TIME
|
||||
|
@ -59,6 +61,46 @@
|
|||
|
||||
static CLG_LogRef LOG = {"bke.mesh_normals"};
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Private Utility Functions
|
||||
* \{ */
|
||||
|
||||
/**
|
||||
* A thread-safe version of #add_v3_v3 that uses a spin-lock.
|
||||
*
|
||||
* \note Avoid using this when the chance of contention is high.
|
||||
*/
|
||||
static void add_v3_v3_atomic(float r[3], const float a[3])
|
||||
{
|
||||
#define FLT_EQ_NONAN(_fa, _fb) (*((const uint32_t *)&_fa) == *((const uint32_t *)&_fb))
|
||||
|
||||
float virtual_lock = r[0];
|
||||
while (true) {
|
||||
/* This loops until following conditions are met:
|
||||
* - `r[0]` has same value as virtual_lock (i.e. it did not change since last try).
|
||||
* - `r[0]` was not `FLT_MAX`, i.e. it was not locked by another thread. */
|
||||
const float test_lock = atomic_cas_float(&r[0], virtual_lock, FLT_MAX);
|
||||
if (_ATOMIC_LIKELY(FLT_EQ_NONAN(test_lock, virtual_lock) && (test_lock != FLT_MAX))) {
|
||||
break;
|
||||
}
|
||||
virtual_lock = test_lock;
|
||||
}
|
||||
virtual_lock += a[0];
|
||||
r[1] += a[1];
|
||||
r[2] += a[2];
|
||||
|
||||
/* Second atomic operation to 'release'
|
||||
* our lock on that vector and set its first scalar value. */
|
||||
/* Note that we do not need to loop here, since we 'locked' `r[0]`,
|
||||
* nobody should have changed it in the mean time. */
|
||||
virtual_lock = atomic_cas_float(&r[0], FLT_MAX, virtual_lock);
|
||||
BLI_assert(virtual_lock == FLT_MAX);
|
||||
|
||||
#undef FLT_EQ_NONAN
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Mesh Normal Calculation
|
||||
* \{ */
|
||||
|
@ -210,7 +252,6 @@ struct MeshCalcNormalsData {
|
|||
const MLoop *mloop;
|
||||
MVert *mverts;
|
||||
float (*pnors)[3];
|
||||
float (*lnors_weighted)[3];
|
||||
float (*vnors)[3];
|
||||
};
|
||||
|
||||
|
@ -224,65 +265,65 @@ static void mesh_calc_normals_poly_cb(void *__restrict userdata,
|
|||
BKE_mesh_calc_poly_normal(mp, data->mloop + mp->loopstart, data->mverts, data->pnors[pidx]);
|
||||
}
|
||||
|
||||
static void mesh_calc_normals_poly_prepare_cb(void *__restrict userdata,
|
||||
const int pidx,
|
||||
const TaskParallelTLS *__restrict UNUSED(tls))
|
||||
static void mesh_calc_normals_poly_and_accum_cb(void *__restrict userdata,
|
||||
const int pidx,
|
||||
const TaskParallelTLS *__restrict UNUSED(tls))
|
||||
{
|
||||
MeshCalcNormalsData *data = (MeshCalcNormalsData *)userdata;
|
||||
const MeshCalcNormalsData *data = (MeshCalcNormalsData *)userdata;
|
||||
const MPoly *mp = &data->mpolys[pidx];
|
||||
const MLoop *ml = &data->mloop[mp->loopstart];
|
||||
const MVert *mverts = data->mverts;
|
||||
float(*vnors)[3] = data->vnors;
|
||||
|
||||
float pnor_temp[3];
|
||||
float *pnor = data->pnors ? data->pnors[pidx] : pnor_temp;
|
||||
float(*lnors_weighted)[3] = data->lnors_weighted;
|
||||
|
||||
const int nverts = mp->totloop;
|
||||
float(*edgevecbuf)[3] = (float(*)[3])BLI_array_alloca(edgevecbuf, (size_t)nverts);
|
||||
const int i_end = mp->totloop - 1;
|
||||
|
||||
/* Polygon Normal and edge-vector */
|
||||
/* inline version of #BKE_mesh_calc_poly_normal, also does edge-vectors */
|
||||
{
|
||||
int i_prev = nverts - 1;
|
||||
const float *v_prev = mverts[ml[i_prev].v].co;
|
||||
const float *v_curr;
|
||||
|
||||
zero_v3(pnor);
|
||||
/* Newell's Method */
|
||||
for (int i = 0; i < nverts; i++) {
|
||||
v_curr = mverts[ml[i].v].co;
|
||||
add_newell_cross_v3_v3v3(pnor, v_prev, v_curr);
|
||||
|
||||
/* Unrelated to normalize, calculate edge-vector */
|
||||
sub_v3_v3v3(edgevecbuf[i_prev], v_prev, v_curr);
|
||||
normalize_v3(edgevecbuf[i_prev]);
|
||||
i_prev = i;
|
||||
|
||||
v_prev = v_curr;
|
||||
const float *v_curr = mverts[ml[i_end].v].co;
|
||||
for (int i_next = 0; i_next <= i_end; i_next++) {
|
||||
const float *v_next = mverts[ml[i_next].v].co;
|
||||
add_newell_cross_v3_v3v3(pnor, v_curr, v_next);
|
||||
v_curr = v_next;
|
||||
}
|
||||
if (UNLIKELY(normalize_v3(pnor) == 0.0f)) {
|
||||
pnor[2] = 1.0f; /* other axes set to 0.0 */
|
||||
}
|
||||
}
|
||||
|
||||
/* accumulate angle weighted face normal */
|
||||
/* inline version of #accumulate_vertex_normals_poly_v3,
|
||||
* split between this threaded callback and #mesh_calc_normals_poly_accum_cb. */
|
||||
/* Accumulate angle weighted face normal into the vertex normal. */
|
||||
/* inline version of #accumulate_vertex_normals_poly_v3. */
|
||||
{
|
||||
const float *prev_edge = edgevecbuf[nverts - 1];
|
||||
float edvec_prev[3], edvec_next[3], edvec_end[3];
|
||||
const float *v_curr = mverts[ml[i_end].v].co;
|
||||
sub_v3_v3v3(edvec_prev, mverts[ml[i_end - 1].v].co, v_curr);
|
||||
normalize_v3(edvec_prev);
|
||||
copy_v3_v3(edvec_end, edvec_prev);
|
||||
|
||||
for (int i = 0; i < nverts; i++) {
|
||||
const int lidx = mp->loopstart + i;
|
||||
const float *cur_edge = edgevecbuf[i];
|
||||
for (int i_next = 0, i_curr = i_end; i_next <= i_end; i_curr = i_next++) {
|
||||
const float *v_next = mverts[ml[i_next].v].co;
|
||||
|
||||
/* calculate angle between the two poly edges incident on
|
||||
* this vertex */
|
||||
const float fac = saacos(-dot_v3v3(cur_edge, prev_edge));
|
||||
/* Skip an extra normalization by reusing the first calculated edge. */
|
||||
if (i_next != i_end) {
|
||||
sub_v3_v3v3(edvec_next, v_curr, v_next);
|
||||
normalize_v3(edvec_next);
|
||||
}
|
||||
else {
|
||||
copy_v3_v3(edvec_next, edvec_end);
|
||||
}
|
||||
|
||||
/* Store for later accumulation */
|
||||
mul_v3_v3fl(lnors_weighted[lidx], pnor, fac);
|
||||
/* Calculate angle between the two poly edges incident on this vertex. */
|
||||
const float fac = saacos(-dot_v3v3(edvec_prev, edvec_next));
|
||||
const float vnor_add[3] = {pnor[0] * fac, pnor[1] * fac, pnor[2] * fac};
|
||||
|
||||
prev_edge = cur_edge;
|
||||
add_v3_v3_atomic(vnors[ml[i_curr].v], vnor_add);
|
||||
v_curr = v_next;
|
||||
copy_v3_v3(edvec_prev, edvec_next);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -309,7 +350,7 @@ void BKE_mesh_calc_normals_poly(MVert *mverts,
|
|||
int numVerts,
|
||||
const MLoop *mloop,
|
||||
const MPoly *mpolys,
|
||||
int numLoops,
|
||||
int UNUSED(numLoops),
|
||||
int numPolys,
|
||||
float (*r_polynors)[3],
|
||||
const bool only_face_normals)
|
||||
|
@ -335,8 +376,6 @@ void BKE_mesh_calc_normals_poly(MVert *mverts,
|
|||
}
|
||||
|
||||
float(*vnors)[3] = r_vertnors;
|
||||
float(*lnors_weighted)[3] = (float(*)[3])MEM_malloc_arrayN(
|
||||
(size_t)numLoops, sizeof(*lnors_weighted), __func__);
|
||||
bool free_vnors = false;
|
||||
|
||||
/* first go through and calculate normals for all the polys */
|
||||
|
@ -353,27 +392,17 @@ void BKE_mesh_calc_normals_poly(MVert *mverts,
|
|||
data.mloop = mloop;
|
||||
data.mverts = mverts;
|
||||
data.pnors = pnors;
|
||||
data.lnors_weighted = lnors_weighted;
|
||||
data.vnors = vnors;
|
||||
|
||||
/* Compute poly normals, and prepare weighted loop normals. */
|
||||
BLI_task_parallel_range(0, numPolys, &data, mesh_calc_normals_poly_prepare_cb, &settings);
|
||||
/* Compute poly normals (`pnors`), accumulating them into vertex normals (`vnors`). */
|
||||
BLI_task_parallel_range(0, numPolys, &data, mesh_calc_normals_poly_and_accum_cb, &settings);
|
||||
|
||||
/* Actually accumulate weighted loop normals into vertex ones. */
|
||||
/* Unfortunately, not possible to thread that
|
||||
* (not in a reasonable, totally lock- and barrier-free fashion),
|
||||
* since several loops will point to the same vertex... */
|
||||
for (int lidx = 0; lidx < numLoops; lidx++) {
|
||||
add_v3_v3(vnors[mloop[lidx].v], data.lnors_weighted[lidx]);
|
||||
}
|
||||
|
||||
/* Normalize and validate computed vertex normals. */
|
||||
/* Normalize and validate computed vertex normals (`vnors`). */
|
||||
BLI_task_parallel_range(0, numVerts, &data, mesh_calc_normals_poly_finalize_cb, &settings);
|
||||
|
||||
if (free_vnors) {
|
||||
MEM_freeN(vnors);
|
||||
}
|
||||
MEM_freeN(lnors_weighted);
|
||||
}
|
||||
|
||||
void BKE_mesh_ensure_normals(Mesh *mesh)
|
||||
|
|
Loading…
Reference in New Issue