Mesh: optimize object mode face tessellation

- Multi-thread BKE_mesh_recalc_looptri.

- Add BKE_mesh_recalc_looptri_with_normals,
  this skips having to calculate normals for ngons.

Exact performance depends on number of faces, size of ngons and
available CPU cores.

For high poly meshes the isolated improvement to BKE_mesh_recalc_looptri
in my tests was between 6.7x .. 25.0x, with the largest gains seen in
meshes containing ngons with many sides.

The overall speedup for high poly meshes containing quads and triangles
is only ~20% although ngon heavy meshes can be much faster.
This commit is contained in:
Campbell Barton 2021-06-20 13:21:11 +10:00
parent b5e5fbcfc8
commit 513f566b40
Notes: blender-bot 2023-02-14 07:17:43 +01:00
Referenced by issue #88550, Mesh Optimization Project Progress
4 changed files with 323 additions and 133 deletions

View File

@ -297,6 +297,13 @@ void BKE_mesh_recalc_looptri(const struct MLoop *mloop,
int totloop,
int totpoly,
struct MLoopTri *mlooptri);
void BKE_mesh_recalc_looptri_with_normals(const struct MLoop *mloop,
const struct MPoly *mpoly,
const struct MVert *mvert,
int totloop,
int totpoly,
struct MLoopTri *mlooptri,
const float (*poly_normals)[3]);
/* *** mesh_evaluate.c *** */

View File

@ -36,6 +36,7 @@
#include "BLI_math.h"
#include "BLI_memarena.h"
#include "BLI_polyfill_2d.h"
#include "BLI_task.h"
#include "BLI_utildefines.h"
#include "BKE_customdata.h"
@ -43,6 +44,9 @@
#include "BLI_strict_flags.h"
/** Compared against total loops. */
#define MESH_FACE_TESSELLATE_THREADED_LIMIT 4096
/* -------------------------------------------------------------------- */
/** \name MFace Tessellation
* \{ */
@ -439,6 +443,265 @@ void BKE_mesh_tessface_calc(Mesh *mesh)
/** \name Loop Tessellation
* \{ */
/**
* \param face_normal: This will be optimized out as a constant.
*/
BLI_INLINE void mesh_calc_tessellation_for_face_impl(const MLoop *mloop,
const MPoly *mpoly,
const MVert *mvert,
uint poly_index,
MLoopTri *mlt,
MemArena **pf_arena_p,
const bool face_normal,
const float normal_precalc[3])
{
const uint mp_loopstart = (uint)mpoly[poly_index].loopstart;
const uint mp_totloop = (uint)mpoly[poly_index].totloop;
#define ML_TO_MLT(i1, i2, i3) \
{ \
ARRAY_SET_ITEMS(mlt->tri, mp_loopstart + i1, mp_loopstart + i2, mp_loopstart + i3); \
mlt->poly = poly_index; \
} \
((void)0)
switch (mp_totloop) {
case 3: {
ML_TO_MLT(0, 1, 2);
break;
}
case 4: {
ML_TO_MLT(0, 1, 2);
MLoopTri *mlt_a = mlt++;
ML_TO_MLT(0, 2, 3);
MLoopTri *mlt_b = mlt;
if (UNLIKELY(is_quad_flip_v3_first_third_fast(mvert[mloop[mlt_a->tri[0]].v].co,
mvert[mloop[mlt_a->tri[1]].v].co,
mvert[mloop[mlt_a->tri[2]].v].co,
mvert[mloop[mlt_b->tri[2]].v].co))) {
/* Flip out of degenerate 0-2 state. */
mlt_a->tri[2] = mlt_b->tri[2];
mlt_b->tri[0] = mlt_a->tri[1];
}
break;
}
default: {
const MLoop *ml;
float axis_mat[3][3];
/* Calculate `axis_mat` to project verts to 2D. */
if (face_normal == false) {
float normal[3];
const float *co_curr, *co_prev;
zero_v3(normal);
/* Calc normal, flipped: to get a positive 2D cross product. */
ml = mloop + mp_loopstart;
co_prev = mvert[ml[mp_totloop - 1].v].co;
for (uint j = 0; j < mp_totloop; j++, ml++) {
co_curr = mvert[ml->v].co;
add_newell_cross_v3_v3v3(normal, co_prev, co_curr);
co_prev = co_curr;
}
if (UNLIKELY(normalize_v3(normal) == 0.0f)) {
normal[2] = 1.0f;
}
axis_dominant_v3_to_m3_negate(axis_mat, normal);
}
else {
axis_dominant_v3_to_m3_negate(axis_mat, normal_precalc);
}
const uint totfilltri = mp_totloop - 2;
MemArena *pf_arena = *pf_arena_p;
if (UNLIKELY(pf_arena == NULL)) {
pf_arena = *pf_arena_p = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, __func__);
}
uint(*tris)[3] = tris = BLI_memarena_alloc(pf_arena, sizeof(*tris) * (size_t)totfilltri);
float(*projverts)[2] = projverts = BLI_memarena_alloc(
pf_arena, sizeof(*projverts) * (size_t)mp_totloop);
ml = mloop + mp_loopstart;
for (uint j = 0; j < mp_totloop; j++, ml++) {
mul_v2_m3v3(projverts[j], axis_mat, mvert[ml->v].co);
}
BLI_polyfill_calc_arena(projverts, mp_totloop, 1, tris, pf_arena);
/* Apply fill. */
for (uint j = 0; j < totfilltri; j++, mlt++) {
const uint *tri = tris[j];
ML_TO_MLT(tri[0], tri[1], tri[2]);
}
BLI_memarena_clear(pf_arena);
break;
}
}
#undef ML_TO_MLT
}
static void mesh_calc_tessellation_for_face(const MLoop *mloop,
const MPoly *mpoly,
const MVert *mvert,
uint poly_index,
MLoopTri *mlt,
MemArena **pf_arena_p)
{
mesh_calc_tessellation_for_face_impl(
mloop, mpoly, mvert, poly_index, mlt, pf_arena_p, false, NULL);
}
static void mesh_calc_tessellation_for_face_with_normal(const MLoop *mloop,
const MPoly *mpoly,
const MVert *mvert,
uint poly_index,
MLoopTri *mlt,
MemArena **pf_arena_p,
const float normal_precalc[3])
{
mesh_calc_tessellation_for_face_impl(
mloop, mpoly, mvert, poly_index, mlt, pf_arena_p, true, normal_precalc);
}
static void mesh_recalc_looptri__single_threaded(const MLoop *mloop,
const MPoly *mpoly,
const MVert *mvert,
int totloop,
int totpoly,
MLoopTri *mlooptri,
const float (*poly_normals)[3])
{
MemArena *pf_arena = NULL;
const MPoly *mp = mpoly;
uint tri_index = 0;
if (poly_normals != NULL) {
for (uint poly_index = 0; poly_index < (uint)totpoly; poly_index++, mp++) {
mesh_calc_tessellation_for_face_with_normal(mloop,
mpoly,
mvert,
poly_index,
&mlooptri[tri_index],
&pf_arena,
poly_normals[poly_index]);
tri_index += (uint)(mp->totloop - 2);
}
}
else {
for (uint poly_index = 0; poly_index < (uint)totpoly; poly_index++, mp++) {
mesh_calc_tessellation_for_face(
mloop, mpoly, mvert, poly_index, &mlooptri[tri_index], &pf_arena);
tri_index += (uint)(mp->totloop - 2);
}
}
if (pf_arena) {
BLI_memarena_free(pf_arena);
pf_arena = NULL;
}
BLI_assert(tri_index == (uint)poly_to_tri_count(totpoly, totloop));
UNUSED_VARS_NDEBUG(totloop);
}
struct TessellationUserData {
const MLoop *mloop;
const MPoly *mpoly;
const MVert *mvert;
/** Output array. */
MLoopTri *mlooptri;
/** Optional pre-calculated polygon normals array. */
const float (*poly_normals)[3];
};
struct TessellationUserTLS {
MemArena *pf_arena;
};
static void mesh_calc_tessellation_for_face_fn(void *__restrict userdata,
const int index,
const TaskParallelTLS *__restrict tls)
{
const struct TessellationUserData *data = userdata;
struct TessellationUserTLS *tls_data = tls->userdata_chunk;
const int tri_index = poly_to_tri_count(index, data->mpoly[index].loopstart);
mesh_calc_tessellation_for_face_impl(data->mloop,
data->mpoly,
data->mvert,
(uint)index,
&data->mlooptri[tri_index],
&tls_data->pf_arena,
false,
NULL);
}
static void mesh_calc_tessellation_for_face_with_normal_fn(void *__restrict userdata,
const int index,
const TaskParallelTLS *__restrict tls)
{
const struct TessellationUserData *data = userdata;
struct TessellationUserTLS *tls_data = tls->userdata_chunk;
const int tri_index = poly_to_tri_count(index, data->mpoly[index].loopstart);
mesh_calc_tessellation_for_face_impl(data->mloop,
data->mpoly,
data->mvert,
(uint)index,
&data->mlooptri[tri_index],
&tls_data->pf_arena,
true,
data->poly_normals[index]);
}
static void mesh_calc_tessellation_for_face_free_fn(const void *__restrict UNUSED(userdata),
void *__restrict tls_v)
{
struct TessellationUserTLS *tls_data = tls_v;
if (tls_data->pf_arena) {
BLI_memarena_free(tls_data->pf_arena);
}
}
static void mesh_recalc_looptri__multi_threaded(const MLoop *mloop,
const MPoly *mpoly,
const MVert *mvert,
int UNUSED(totloop),
int totpoly,
MLoopTri *mlooptri,
const float (*poly_normals)[3])
{
struct TessellationUserTLS tls_data_dummy = {NULL};
struct TessellationUserData data = {
.mloop = mloop,
.mpoly = mpoly,
.mvert = mvert,
.mlooptri = mlooptri,
.poly_normals = poly_normals,
};
TaskParallelSettings settings;
BLI_parallel_range_settings_defaults(&settings);
settings.userdata_chunk = &tls_data_dummy;
settings.userdata_chunk_size = sizeof(tls_data_dummy);
settings.func_free = mesh_calc_tessellation_for_face_free_fn;
BLI_task_parallel_range(0,
totpoly,
&data,
poly_normals ? mesh_calc_tessellation_for_face_with_normal_fn :
mesh_calc_tessellation_for_face_fn,
&settings);
}
/**
* Calculate tessellation into #MLoopTri which exist only for this purpose.
*/
@ -449,136 +712,39 @@ void BKE_mesh_recalc_looptri(const MLoop *mloop,
int totpoly,
MLoopTri *mlooptri)
{
/* use this to avoid locking pthread for _every_ polygon
* and calling the fill function */
#define USE_TESSFACE_SPEEDUP
const MPoly *mp;
const MLoop *ml;
MLoopTri *mlt;
MemArena *arena = NULL;
int poly_index, mlooptri_index;
uint j;
mlooptri_index = 0;
mp = mpoly;
for (poly_index = 0; poly_index < totpoly; poly_index++, mp++) {
const uint mp_loopstart = (uint)mp->loopstart;
const uint mp_totloop = (uint)mp->totloop;
uint l1, l2, l3;
if (mp_totloop < 3) {
/* do nothing */
}
#ifdef USE_TESSFACE_SPEEDUP
# define ML_TO_MLT(i1, i2, i3) \
{ \
mlt = &mlooptri[mlooptri_index]; \
l1 = mp_loopstart + i1; \
l2 = mp_loopstart + i2; \
l3 = mp_loopstart + i3; \
ARRAY_SET_ITEMS(mlt->tri, l1, l2, l3); \
mlt->poly = (uint)poly_index; \
} \
((void)0)
else if (mp_totloop == 3) {
ML_TO_MLT(0, 1, 2);
mlooptri_index++;
}
else if (mp_totloop == 4) {
ML_TO_MLT(0, 1, 2);
MLoopTri *mlt_a = mlt;
mlooptri_index++;
ML_TO_MLT(0, 2, 3);
MLoopTri *mlt_b = mlt;
mlooptri_index++;
if (UNLIKELY(is_quad_flip_v3_first_third_fast(mvert[mloop[mlt_a->tri[0]].v].co,
mvert[mloop[mlt_a->tri[1]].v].co,
mvert[mloop[mlt_a->tri[2]].v].co,
mvert[mloop[mlt_b->tri[2]].v].co))) {
/* flip out of degenerate 0-2 state. */
mlt_a->tri[2] = mlt_b->tri[2];
mlt_b->tri[0] = mlt_a->tri[1];
}
}
#endif /* USE_TESSFACE_SPEEDUP */
else {
const float *co_curr, *co_prev;
float normal[3];
float axis_mat[3][3];
float(*projverts)[2];
uint(*tris)[3];
const uint totfilltri = mp_totloop - 2;
if (UNLIKELY(arena == NULL)) {
arena = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, __func__);
}
tris = BLI_memarena_alloc(arena, sizeof(*tris) * (size_t)totfilltri);
projverts = BLI_memarena_alloc(arena, sizeof(*projverts) * (size_t)mp_totloop);
zero_v3(normal);
/* calc normal, flipped: to get a positive 2d cross product */
ml = mloop + mp_loopstart;
co_prev = mvert[ml[mp_totloop - 1].v].co;
for (j = 0; j < mp_totloop; j++, ml++) {
co_curr = mvert[ml->v].co;
add_newell_cross_v3_v3v3(normal, co_prev, co_curr);
co_prev = co_curr;
}
if (UNLIKELY(normalize_v3(normal) == 0.0f)) {
normal[2] = 1.0f;
}
/* project verts to 2d */
axis_dominant_v3_to_m3_negate(axis_mat, normal);
ml = mloop + mp_loopstart;
for (j = 0; j < mp_totloop; j++, ml++) {
mul_v2_m3v3(projverts[j], axis_mat, mvert[ml->v].co);
}
BLI_polyfill_calc_arena(projverts, mp_totloop, 1, tris, arena);
/* apply fill */
for (j = 0; j < totfilltri; j++) {
uint *tri = tris[j];
mlt = &mlooptri[mlooptri_index];
/* set loop indices, transformed to vert indices later */
l1 = mp_loopstart + tri[0];
l2 = mp_loopstart + tri[1];
l3 = mp_loopstart + tri[2];
ARRAY_SET_ITEMS(mlt->tri, l1, l2, l3);
mlt->poly = (uint)poly_index;
mlooptri_index++;
}
BLI_memarena_clear(arena);
}
if (totloop < MESH_FACE_TESSELLATE_THREADED_LIMIT) {
mesh_recalc_looptri__single_threaded(mloop, mpoly, mvert, totloop, totpoly, mlooptri, NULL);
}
if (arena) {
BLI_memarena_free(arena);
arena = NULL;
else {
mesh_recalc_looptri__multi_threaded(mloop, mpoly, mvert, totloop, totpoly, mlooptri, NULL);
}
}
BLI_assert(mlooptri_index == poly_to_tri_count(totpoly, totloop));
UNUSED_VARS_NDEBUG(totloop);
#undef USE_TESSFACE_SPEEDUP
#undef ML_TO_MLT
/**
* A version of #BKE_mesh_recalc_looptri which takes pre-calculated polygon normals
* (used to avoid having to calculate the face normal for NGON tessellation).
*
* \note Only use this function if normals have already been calculated, there is no need
* to calculate normals just to use this function as it will cause the normals for triangles
* to be calculated which aren't needed for tessellation.
*/
void BKE_mesh_recalc_looptri_with_normals(const MLoop *mloop,
const MPoly *mpoly,
const MVert *mvert,
int totloop,
int totpoly,
MLoopTri *mlooptri,
const float (*poly_normals)[3])
{
BLI_assert(poly_normals != NULL);
if (totloop < MESH_FACE_TESSELLATE_THREADED_LIMIT) {
mesh_recalc_looptri__single_threaded(
mloop, mpoly, mvert, totloop, totpoly, mlooptri, poly_normals);
}
else {
mesh_recalc_looptri__multi_threaded(
mloop, mpoly, mvert, totloop, totpoly, mlooptri, poly_normals);
}
}
/** \} */

View File

@ -349,8 +349,19 @@ void mesh_render_data_update_looptris(MeshRenderData *mr,
/* Mesh */
if ((iter_type & MR_ITER_LOOPTRI) || (data_flag & MR_DATA_LOOPTRI)) {
mr->mlooptri = MEM_mallocN(sizeof(*mr->mlooptri) * mr->tri_len, "MR_DATATYPE_LOOPTRI");
BKE_mesh_recalc_looptri(
me->mloop, me->mpoly, me->mvert, me->totloop, me->totpoly, mr->mlooptri);
if (mr->poly_normals != NULL) {
BKE_mesh_recalc_looptri_with_normals(me->mloop,
me->mpoly,
me->mvert,
me->totloop,
me->totpoly,
mr->mlooptri,
mr->poly_normals);
}
else {
BKE_mesh_recalc_looptri(
me->mloop, me->mpoly, me->mvert, me->totloop, me->totpoly, mr->mlooptri);
}
}
}
else {

View File

@ -466,7 +466,16 @@ static TriTessFace *mesh_calc_tri_tessface(Mesh *me, bool tangent, Mesh *me_eval
looptri = MEM_mallocN(sizeof(*looptri) * tottri, __func__);
triangles = MEM_callocN(sizeof(TriTessFace) * tottri, __func__);
BKE_mesh_recalc_looptri(me->mloop, me->mpoly, me->mvert, me->totloop, me->totpoly, looptri);
const float(*precomputed_normals)[3] = CustomData_get_layer(&me->pdata, CD_NORMAL);
const bool calculate_normal = precomputed_normals ? false : true;
if (precomputed_normals != NULL) {
BKE_mesh_recalc_looptri_with_normals(
me->mloop, me->mpoly, me->mvert, me->totloop, me->totpoly, looptri, precomputed_normals);
}
else {
BKE_mesh_recalc_looptri(me->mloop, me->mpoly, me->mvert, me->totloop, me->totpoly, looptri);
}
if (tangent) {
BKE_mesh_ensure_normals_for_display(me_eval);
@ -479,9 +488,6 @@ static TriTessFace *mesh_calc_tri_tessface(Mesh *me, bool tangent, Mesh *me_eval
loop_normals = CustomData_get_layer(&me_eval->ldata, CD_NORMAL);
}
const float(*precomputed_normals)[3] = CustomData_get_layer(&me->pdata, CD_NORMAL);
const bool calculate_normal = precomputed_normals ? false : true;
for (i = 0; i < tottri; i++) {
const MLoopTri *lt = &looptri[i];
const MPoly *mp = &me->mpoly[lt->poly];