Mesh: Reduce memory consumption when calculating corner normals

Similar to previous commits, avoid using pointers that are redundant to their corresponding index. Also avoid storing the edge vectors stack in the data-per-loop when it can just be a function argument. Face corner normals are calculated when auto smooth or custom per-corner normals are used. This brings the per-face-corner data from 64 to 24 bytes, as measured by `sizeof`. In a large test file I observed a 20% performance improvement, from 285 to 239 ms.
Referenced by commit 9fcfba4aae, Mesh: Reduce memory corner normals memory usage and simplify threading
2022-12-11 22:47:46 -06:00 · 2022-12-11 22:47:46 -06:00 · 9e9ebcdd72 · 2023-04-27 14:51:19 +02:00
parent f06d7c98db
commit 9e9ebcdd72
1 changed files with 16 additions and 29 deletions
--- a/source/blender/blenkernel/intern/mesh_normals.cc
+++ b/source/blender/blenkernel/intern/mesh_normals.cc
@ -27,6 +27,7 @@
 #include "BLI_stack.h"
 #include "BLI_task.h"
 #include "BLI_task.hh"
+#include "BLI_timeit.hh"
 #include "BLI_utildefines.h"

 #include "BKE_customdata.h"
@ -792,23 +793,20 @@ void BKE_lnor_space_custom_normal_to_data(const MLoopNorSpace *lnor_space,
 #define LOOP_SPLIT_TASK_BLOCK_SIZE 1024

 struct LoopSplitTaskData {
-  /* Specific to each instance (each task). */
+  enum class Type : int8_t {
+    BlockEnd = 0, /* Set implicitly by calloc. */
+    Fan = 1,
+    Single = 2,
+  };

  /** We have to create those outside of tasks, since #MemArena is not thread-safe. */
  MLoopNorSpace *lnor_space;
-  const MLoop *ml_curr;
-  const MLoop *ml_prev;
  int ml_curr_index;
-  int ml_prev_index;
  /** Also used a flag to switch between single or fan process! */
-  const int *e2l_prev;
+  int ml_prev_index;
  int mp_index;

-  /** This one is special, it's owned and managed by worker tasks,
-   * avoid to have to create it for each fan! */
-  BLI_Stack *edge_vectors;
-
-  char pad_c;
+  Type flag;
 };

 struct LoopSplitTaskDataCommon {
@ -1048,7 +1046,9 @@ static void split_loop_nor_single_do(LoopSplitTaskDataCommon *common_data, LoopS
  }
 }

-static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data, LoopSplitTaskData *data)
+static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data,
+                                  LoopSplitTaskData *data,
+                                  BLI_Stack *edge_vectors)
 {
  MLoopNorSpaceArray *lnors_spacearr = common_data->lnors_spacearr;
  MutableSpan<float3> loopnors = common_data->loopnors;
@ -1070,8 +1070,6 @@ static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data, LoopSpli
  const int ml_prev_index = data->ml_prev_index;
  const int mp_index = data->mp_index;

-  BLI_Stack *edge_vectors = data->edge_vectors;
-
  /* Sigh! we have to fan around current vertex, until we find the other non-smooth edge,
   * and accumulate face normals into the vertex!
   * Note in case this vertex has only one sharp edges, this is a waste because the normal is the
@ -1253,11 +1251,9 @@ static void loop_split_worker_do(LoopSplitTaskDataCommon *common_data,
                                 LoopSplitTaskData *data,
                                 BLI_Stack *edge_vectors)
 {
-  BLI_assert(data->ml_curr);
-  if (data->e2l_prev) {
+  if (data->flag == LoopSplitTaskData::Type::Fan) {
    BLI_assert((edge_vectors == nullptr) || BLI_stack_is_empty(edge_vectors));
-    data->edge_vectors = edge_vectors;
-    split_loop_nor_fan_do(common_data, data);
+    split_loop_nor_fan_do(common_data, data, edge_vectors);
  }
  else {
    /* No need for edge_vectors for 'single' case! */
@ -1276,8 +1272,7 @@ static void loop_split_worker(TaskPool *__restrict pool, void *taskdata)
                                nullptr;

  for (int i = 0; i < LOOP_SPLIT_TASK_BLOCK_SIZE; i++, data++) {
-    /* A nullptr ml_curr is used to tag ended data! */
-    if (data->ml_curr == nullptr) {
+    if (data->flag == LoopSplitTaskData::Type::BlockEnd) {
      break;
    }

@ -1448,13 +1443,9 @@ static void loop_split_generator(TaskPool *pool, LoopSplitTaskDataCommon *common

        if (IS_EDGE_SHARP(edge_to_loops[loops[ml_curr_index].e]) &&
            IS_EDGE_SHARP(edge_to_loops[loops[ml_prev_index].e])) {
-          data->ml_curr = &loops[ml_curr_index];
-          data->ml_prev = &loops[ml_prev_index];
          data->ml_curr_index = ml_curr_index;
          data->ml_prev_index = ml_prev_index;
-#if 0 /* Not needed for 'single' loop. */
-          data->e2l_prev = nullptr; /* Tag as 'single' task. */
-#endif
+          data->flag = LoopSplitTaskData::Type::Single;
          data->mp_index = mp_index;
          if (lnors_spacearr) {
            data->lnor_space = BKE_lnor_space_create(lnors_spacearr);
@ -1470,11 +1461,9 @@ static void loop_split_generator(TaskPool *pool, LoopSplitTaskDataCommon *common
         * All this due/thanks to link between normals and loop ordering (i.e. winding).
         */
        else {
-          data->ml_curr = &loops[ml_curr_index];
-          data->ml_prev = &loops[ml_prev_index];
          data->ml_curr_index = ml_curr_index;
          data->ml_prev_index = ml_prev_index;
-          data->e2l_prev = edge_to_loops[loops[ml_prev_index].e]; /* Also tag as 'fan' task. */
+          data->flag = LoopSplitTaskData::Type::Fan;
          data->mp_index = mp_index;
          if (lnors_spacearr) {
            data->lnor_space = BKE_lnor_space_create(lnors_spacearr);
@ -1495,8 +1484,6 @@ static void loop_split_generator(TaskPool *pool, LoopSplitTaskDataCommon *common
    }
  }

-  /* Last block of data. Since it is calloc'ed and we use first nullptr item as stopper,
-   * everything is fine. */
  if (pool && data_idx) {
    BLI_task_pool_push(pool, loop_split_worker, data_buff, true, nullptr);
  }