Draw Cache: extract tris in parallel ranges

The `ibo.tris` extraction in multithread is currently only done if the mesh has only 1 material. Now we cache a map indicating the index of each polygon after sort and thus allow the extraction of tris with materials in multithreaded. As caching is a heavy operation and was already being performed in multi-thread for triangle offsets, no significant improvements are expected. The benefit will be much greater when we can skip updating the cache while transforming a geometry. **Profiling:** ||master:|PATCH: |---|---|---| |large_mesh_editing_materials:|Average: 13.855380 FPS|Average: 15.525684 FPS ||rdata 9ms iter 36ms (frame 71ms)|rdata 9ms iter 29ms (frame 64ms) |subdiv_mesh_final_only_materials:|Average: 28.113742 FPS|Average: 28.633599 FPS ||rdata 0ms iter 1ms (frame 36ms)|rdata 0ms iter 1ms (frame 35ms) 1.1x overall speedup Differential Revision: https://developer.blender.org/D11445
Referenced by issue #88550, Mesh Optimization Project Progress
2021-07-20 11:43:38 -03:00 · 2021-07-20 11:43:38 -03:00 · 178086d581 · 2023-02-14 08:33:26 +01:00
parent 785d87ee42
commit 178086d581
6 changed files with 188 additions and 175 deletions
--- a/source/blender/draw/intern/draw_cache_extract.h
+++ b/source/blender/draw/intern/draw_cache_extract.h
@ -84,9 +84,9 @@ typedef enum eMRDataType {
  MR_DATA_LOOSE_GEOM = 1 << 4,
  /** Force loop normals calculation. */
  MR_DATA_TAN_LOOP_NOR = 1 << 5,
-  MR_DATA_MAT_OFFSETS = 1 << 6,
+  MR_DATA_POLYS_SORTED = 1 << 6,
 } eMRDataType;
-ENUM_OPERATORS(eMRDataType, MR_DATA_MAT_OFFSETS)
+ENUM_OPERATORS(eMRDataType, MR_DATA_POLYS_SORTED)

 #ifdef __cplusplus
 extern "C" {
@ -170,10 +170,10 @@ typedef struct MeshBufferExtractionCache {
  } loose_geom;

  struct {
-    int *tri;
+    int *tri_first_index;
+    int *mat_tri_len;
    int visible_tri_len;
-  } mat_offsets;
-
+  } poly_sorted;
 } MeshBufferExtractionCache;

 #define FOREACH_MESH_BUFFER_CACHE(batch_cache, mbc) \
--- a/source/blender/draw/intern/draw_cache_extract_mesh.cc
+++ b/source/blender/draw/intern/draw_cache_extract_mesh.cc
@ -532,7 +532,7 @@ static void mesh_extract_render_data_node_exec(void *__restrict task_data)
  mesh_render_data_update_normals(mr, data_flag);
  mesh_render_data_update_looptris(mr, iter_type, data_flag);
  mesh_render_data_update_loose_geom(mr, update_task_data->cache, iter_type, data_flag);
-  mesh_render_data_update_mat_offsets(mr, update_task_data->cache, data_flag);
+  mesh_render_data_update_polys_sorted(mr, update_task_data->cache, data_flag);
 }

 static struct TaskNode *mesh_extract_render_data_node_create(struct TaskGraph *task_graph,
--- a/source/blender/draw/intern/draw_cache_extract_mesh_private.h
+++ b/source/blender/draw/intern/draw_cache_extract_mesh_private.h
@ -101,10 +101,12 @@ typedef struct MeshRenderData {
  float (*loop_normals)[3];
  float (*poly_normals)[3];
  int *lverts, *ledges;
+
  struct {
-    int *tri;
+    int *tri_first_index;
+    int *mat_tri_len;
    int visible_tri_len;
-  } mat_offsets;
+  } poly_sorted;
 } MeshRenderData;

 BLI_INLINE BMFace *bm_original_face_get(const MeshRenderData *mr, int idx)
@ -254,9 +256,9 @@ void mesh_render_data_update_loose_geom(MeshRenderData *mr,
                                        MeshBufferExtractionCache *cache,
                                        const eMRIterType iter_type,
                                        const eMRDataType data_flag);
-void mesh_render_data_update_mat_offsets(MeshRenderData *mr,
-                                         MeshBufferExtractionCache *cache,
-                                         const eMRDataType data_flag);
+void mesh_render_data_update_polys_sorted(MeshRenderData *mr,
+                                          MeshBufferExtractionCache *cache,
+                                          const eMRDataType data_flag);
 void mesh_render_data_update_looptris(MeshRenderData *mr,
                                      const eMRIterType iter_type,
                                      const eMRDataType data_flag);
--- a/source/blender/draw/intern/draw_cache_extract_mesh_render_data.c
+++ b/source/blender/draw/intern/draw_cache_extract_mesh_render_data.c
@ -25,6 +25,7 @@

 #include "MEM_guardedalloc.h"

+#include "BLI_alloca.h"
 #include "BLI_bitmap.h"
 #include "BLI_math.h"
 #include "BLI_task.h"
@ -179,116 +180,104 @@ void mesh_render_data_update_loose_geom(MeshRenderData *mr,
 /** \} */

 /* ---------------------------------------------------------------------- */
-/** \name Material Offsets
+/** \name Polygons sorted per material
 *
- * Material offsets contains the offset of a material after sorting tris based on their material.
+ * Contains polygon indices sorted based on their material.
 *
 * \{ */
-static void mesh_render_data_mat_offset_load(MeshRenderData *mr,
-                                             const MeshBufferExtractionCache *cache);
-static void mesh_render_data_mat_offset_ensure(MeshRenderData *mr,
-                                               MeshBufferExtractionCache *cache);
-static void mesh_render_data_mat_offset_build(MeshRenderData *mr,
-                                              MeshBufferExtractionCache *cache);
-static void mesh_render_data_mat_offset_build_bm(MeshRenderData *mr,
+static void mesh_render_data_polys_sorted_load(MeshRenderData *mr,
+                                               const MeshBufferExtractionCache *cache);
+static void mesh_render_data_polys_sorted_ensure(MeshRenderData *mr,
                                                 MeshBufferExtractionCache *cache);
-static void mesh_render_data_mat_offset_build_mesh(MeshRenderData *mr,
-                                                   MeshBufferExtractionCache *cache);
-static void mesh_render_data_mat_offset_apply_offset(MeshRenderData *mr,
-                                                     MeshBufferExtractionCache *cache);
+static void mesh_render_data_polys_sorted_build(MeshRenderData *mr,
+                                                MeshBufferExtractionCache *cache);
+static int *mesh_render_data_mat_tri_len_build(MeshRenderData *mr);

-void mesh_render_data_update_mat_offsets(MeshRenderData *mr,
-                                         MeshBufferExtractionCache *cache,
-                                         const eMRDataType data_flag)
+void mesh_render_data_update_polys_sorted(MeshRenderData *mr,
+                                          MeshBufferExtractionCache *cache,
+                                          const eMRDataType data_flag)
 {
-  if (data_flag & MR_DATA_MAT_OFFSETS) {
-    mesh_render_data_mat_offset_ensure(mr, cache);
-    mesh_render_data_mat_offset_load(mr, cache);
+  if (data_flag & MR_DATA_POLYS_SORTED) {
+    mesh_render_data_polys_sorted_ensure(mr, cache);
+    mesh_render_data_polys_sorted_load(mr, cache);
  }
 }

-static void mesh_render_data_mat_offset_load(MeshRenderData *mr,
-                                             const MeshBufferExtractionCache *cache)
+static void mesh_render_data_polys_sorted_load(MeshRenderData *mr,
+                                               const MeshBufferExtractionCache *cache)
 {
-  mr->mat_offsets.tri = cache->mat_offsets.tri;
-  mr->mat_offsets.visible_tri_len = cache->mat_offsets.visible_tri_len;
+  mr->poly_sorted.tri_first_index = cache->poly_sorted.tri_first_index;
+  mr->poly_sorted.mat_tri_len = cache->poly_sorted.mat_tri_len;
+  mr->poly_sorted.visible_tri_len = cache->poly_sorted.visible_tri_len;
 }

-static void mesh_render_data_mat_offset_ensure(MeshRenderData *mr,
-                                               MeshBufferExtractionCache *cache)
+static void mesh_render_data_polys_sorted_ensure(MeshRenderData *mr,
+                                                 MeshBufferExtractionCache *cache)
 {
-  if (cache->mat_offsets.tri) {
+  if (cache->poly_sorted.tri_first_index) {
    return;
  }
-  mesh_render_data_mat_offset_build(mr, cache);
+  mesh_render_data_polys_sorted_build(mr, cache);
 }

-static void mesh_render_data_mat_offset_build(MeshRenderData *mr, MeshBufferExtractionCache *cache)
+static void mesh_render_data_polys_sorted_build(MeshRenderData *mr,
+                                                MeshBufferExtractionCache *cache)
 {
-  size_t mat_tri_idx_size = sizeof(int) * mr->mat_len;
-  cache->mat_offsets.tri = MEM_callocN(mat_tri_idx_size, __func__);
+  int *tri_first_index = MEM_mallocN(sizeof(*tri_first_index) * mr->poly_len, __func__);
+  int *mat_tri_len = mesh_render_data_mat_tri_len_build(mr);

-  /* Count how many triangles for each material. */
+  /* Apply offset. */
+  int visible_tri_len = 0;
+  int *mat_tri_offs = BLI_array_alloca(mat_tri_offs, mr->mat_len);
+  {
+    for (int i = 0; i < mr->mat_len; i++) {
+      mat_tri_offs[i] = visible_tri_len;
+      visible_tri_len += mat_tri_len[i];
+    }
+  }
+
+  /* Sort per material. */
+  int mat_last = mr->mat_len - 1;
  if (mr->extract_type == MR_EXTRACT_BMESH) {
-    mesh_render_data_mat_offset_build_bm(mr, cache);
+    BMIter iter;
+    BMFace *f;
+    int i;
+    BM_ITER_MESH_INDEX (f, &iter, mr->bm, BM_FACES_OF_MESH, i) {
+      if (!BM_elem_flag_test(f, BM_ELEM_HIDDEN)) {
+        const int mat = min_ii(f->mat_nr, mat_last);
+        tri_first_index[i] = mat_tri_offs[mat];
+        mat_tri_offs[mat] += f->len - 2;
+      }
+      else {
+        tri_first_index[i] = -1;
+      }
+    }
  }
  else {
-    mesh_render_data_mat_offset_build_mesh(mr, cache);
+    const MPoly *mp = &mr->mpoly[0];
+    for (int i = 0; i < mr->poly_len; i++, mp++) {
+      if (!(mr->use_hide && (mp->flag & ME_HIDE))) {
+        const int mat = min_ii(mp->mat_nr, mat_last);
+        tri_first_index[i] = mat_tri_offs[mat];
+        mat_tri_offs[mat] += mp->totloop - 2;
+      }
+      else {
+        tri_first_index[i] = -1;
+      }
+    }
  }

-  mesh_render_data_mat_offset_apply_offset(mr, cache);
+  cache->poly_sorted.tri_first_index = tri_first_index;
+  cache->poly_sorted.mat_tri_len = mat_tri_len;
+  cache->poly_sorted.visible_tri_len = visible_tri_len;
 }

-typedef struct MatOffsetUserData {
-  MeshRenderData *mr;
-  /** This struct is extended during allocation to hold mat_tri_len for each material. */
-  int mat_tri_len[0];
-} MatOffsetUserData;
-
-static void mesh_render_data_mat_offset_reduce(const void *__restrict UNUSED(userdata),
-                                               void *__restrict chunk_join,
-                                               void *__restrict chunk)
+static void mesh_render_data_mat_tri_len_bm_range_fn(void *__restrict userdata,
+                                                     const int iter,
+                                                     const TaskParallelTLS *__restrict tls)
 {
-  MatOffsetUserData *dst = chunk_join;
-  MatOffsetUserData *src = chunk;
-  int *dst_mat_len = dst->mat_tri_len;
-  int *src_mat_len = src->mat_tri_len;
-  for (int i = 0; i < dst->mr->mat_len; i++) {
-    dst_mat_len[i] += src_mat_len[i];
-  }
-}
-
-static void mesh_render_data_mat_offset_build_threaded(MeshRenderData *mr,
-                                                       MeshBufferExtractionCache *cache,
-                                                       int face_len,
-                                                       TaskParallelRangeFunc range_func)
-{
-  /* Extending the #MatOffsetUserData with an int per material slot. */
-  size_t userdata_size = sizeof(MatOffsetUserData) +
-                         (mr->mat_len) * sizeof(*cache->mat_offsets.tri);
-  MatOffsetUserData *userdata = MEM_callocN(userdata_size, __func__);
-  userdata->mr = mr;
-  TaskParallelSettings settings;
-  BLI_parallel_range_settings_defaults(&settings);
-  settings.userdata_chunk = userdata;
-  settings.userdata_chunk_size = userdata_size;
-  settings.min_iter_per_thread = MIN_RANGE_LEN;
-  settings.func_reduce = mesh_render_data_mat_offset_reduce;
-  BLI_task_parallel_range(0, face_len, NULL, range_func, &settings);
-
-  memcpy(cache->mat_offsets.tri,
-         &userdata->mat_tri_len,
-         (mr->mat_len) * sizeof(*cache->mat_offsets.tri));
-  MEM_freeN(userdata);
-}
-
-static void mesh_render_data_mat_offset_bm_range(void *__restrict UNUSED(userdata),
-                                                 const int iter,
-                                                 const TaskParallelTLS *__restrict tls)
-{
-  MatOffsetUserData *mat_offset_userdata = tls->userdata_chunk;
-  MeshRenderData *mr = mat_offset_userdata->mr;
-  int *mat_tri_len = mat_offset_userdata->mat_tri_len;
+  MeshRenderData *mr = userdata;
+  int *mat_tri_len = tls->userdata_chunk;

  BMesh *bm = mr->bm;
  BMFace *efa = BM_face_at_index(bm, iter);
@ -298,21 +287,12 @@ static void mesh_render_data_mat_offset_bm_range(void *__restrict UNUSED(userdat
  }
 }

-static void mesh_render_data_mat_offset_build_bm(MeshRenderData *mr,
-                                                 MeshBufferExtractionCache *cache)
+static void mesh_render_data_mat_tri_len_mesh_range_fn(void *__restrict userdata,
+                                                       const int iter,
+                                                       const TaskParallelTLS *__restrict tls)
 {
-  BMesh *bm = mr->bm;
-  mesh_render_data_mat_offset_build_threaded(
-      mr, cache, bm->totface, mesh_render_data_mat_offset_bm_range);
-}
-
-static void mesh_render_data_mat_offset_mesh_range(void *__restrict UNUSED(userdata),
-                                                   const int iter,
-                                                   const TaskParallelTLS *__restrict tls)
-{
-  MatOffsetUserData *mat_offset_userdata = tls->userdata_chunk;
-  const MeshRenderData *mr = mat_offset_userdata->mr;
-  int *mat_tri_len = mat_offset_userdata->mat_tri_len;
+  MeshRenderData *mr = userdata;
+  int *mat_tri_len = tls->userdata_chunk;

  const MPoly *mp = &mr->mpoly[iter];
  if (!(mr->use_hide && (mp->flag & ME_HIDE))) {
@ -321,25 +301,49 @@ static void mesh_render_data_mat_offset_mesh_range(void *__restrict UNUSED(userd
  }
 }

-static void mesh_render_data_mat_offset_build_mesh(MeshRenderData *mr,
-                                                   MeshBufferExtractionCache *cache)
+static void mesh_render_data_mat_tri_len_reduce_fn(const void *__restrict userdata,
+                                                   void *__restrict chunk_join,
+                                                   void *__restrict chunk)
 {
-  mesh_render_data_mat_offset_build_threaded(
-      mr, cache, mr->poly_len, mesh_render_data_mat_offset_mesh_range);
+  const MeshRenderData *mr = userdata;
+  int *dst_mat_len = chunk_join;
+  int *src_mat_len = chunk;
+  for (int i = 0; i < mr->mat_len; i++) {
+    dst_mat_len[i] += src_mat_len[i];
+  }
 }

-static void mesh_render_data_mat_offset_apply_offset(MeshRenderData *mr,
-                                                     MeshBufferExtractionCache *cache)
+static int *mesh_render_data_mat_tri_len_build_threaded(MeshRenderData *mr,
+                                                        int face_len,
+                                                        TaskParallelRangeFunc range_func)
 {
-  int *mat_tri_len = cache->mat_offsets.tri;
-  int ofs = mat_tri_len[0];
-  mat_tri_len[0] = 0;
-  for (int i = 1; i < mr->mat_len; i++) {
-    int tmp = mat_tri_len[i];
-    mat_tri_len[i] = ofs;
-    ofs += tmp;
+  /* Extending the #MatOffsetUserData with an int per material slot. */
+  size_t mat_tri_len_size = sizeof(int) * mr->mat_len;
+  int *mat_tri_len = MEM_callocN(mat_tri_len_size, __func__);
+
+  TaskParallelSettings settings;
+  BLI_parallel_range_settings_defaults(&settings);
+  settings.userdata_chunk = mat_tri_len;
+  settings.userdata_chunk_size = mat_tri_len_size;
+  settings.min_iter_per_thread = MIN_RANGE_LEN;
+  settings.func_reduce = mesh_render_data_mat_tri_len_reduce_fn;
+  BLI_task_parallel_range(0, face_len, mr, range_func, &settings);
+
+  return mat_tri_len;
+}
+
+/* Count how many triangles for each material. */
+static int *mesh_render_data_mat_tri_len_build(MeshRenderData *mr)
+{
+  if (mr->extract_type == MR_EXTRACT_BMESH) {
+    BMesh *bm = mr->bm;
+    return mesh_render_data_mat_tri_len_build_threaded(
+        mr, bm->totface, mesh_render_data_mat_tri_len_bm_range_fn);
+  }
+  else {
+    return mesh_render_data_mat_tri_len_build_threaded(
+        mr, mr->poly_len, mesh_render_data_mat_tri_len_mesh_range_fn);
  }
-  cache->mat_offsets.visible_tri_len = ofs;
 }

 /** \} */
--- a/source/blender/draw/intern/draw_cache_impl_mesh.c
+++ b/source/blender/draw/intern/draw_cache_impl_mesh.c
@ -856,7 +856,9 @@ static void mesh_buffer_extraction_cache_clear(MeshBufferExtractionCache *extrac
  extraction_cache->loose_geom.edge_len = 0;
  extraction_cache->loose_geom.vert_len = 0;

-  MEM_SAFE_FREE(extraction_cache->mat_offsets.tri);
+  MEM_SAFE_FREE(extraction_cache->poly_sorted.tri_first_index);
+  MEM_SAFE_FREE(extraction_cache->poly_sorted.mat_tri_len);
+  extraction_cache->poly_sorted.visible_tri_len = 0;
 }

 static void mesh_batch_cache_clear(Mesh *me)
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
@ -27,59 +27,70 @@

 namespace blender::draw {

+static void extract_tris_mat_task_reduce(void *_userdata_to, void *_userdata_from)
+{
+  GPUIndexBufBuilder *elb_to = static_cast<GPUIndexBufBuilder *>(_userdata_to);
+  GPUIndexBufBuilder *elb_from = static_cast<GPUIndexBufBuilder *>(_userdata_from);
+  GPU_indexbuf_join(elb_to, elb_from);
+}
+
 /* ---------------------------------------------------------------------- */
 /** \name Extract Triangles Indices (multi material)
 * \{ */

-struct MeshExtract_Tri_Data {
-  GPUIndexBufBuilder elb;
-  const int *tri_mat_start;
-  int *tri_mat_end;
-};
-
 static void extract_tris_init(const MeshRenderData *mr,
                              struct MeshBatchCache *UNUSED(cache),
                              void *UNUSED(ibo),
                              void *tls_data)
 {
-  MeshExtract_Tri_Data *data = static_cast<MeshExtract_Tri_Data *>(tls_data);
-  data->tri_mat_start = mr->mat_offsets.tri;
-  data->tri_mat_end = static_cast<int *>(MEM_dupallocN(data->tri_mat_start));
-  GPU_indexbuf_init(&data->elb, GPU_PRIM_TRIS, mr->mat_offsets.visible_tri_len, mr->loop_len);
+  GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(tls_data);
+  GPU_indexbuf_init(elb, GPU_PRIM_TRIS, mr->poly_sorted.visible_tri_len, mr->loop_len);
 }

-static void extract_tris_iter_looptri_bm(const MeshRenderData *mr,
-                                         BMLoop **elt,
-                                         const int UNUSED(elt_index),
-                                         void *_data)
+static void extract_tris_iter_poly_bm(const MeshRenderData *mr,
+                                      const BMFace *f,
+                                      const int f_index,
+                                      void *_data)
 {
-  MeshExtract_Tri_Data *data = static_cast<MeshExtract_Tri_Data *>(_data);
-  const int mat_last = mr->mat_len - 1;
+  int tri_first_index = mr->poly_sorted.tri_first_index[f_index];
+  if (tri_first_index == -1) {
+    return;
+  }

-  if (!BM_elem_flag_test(elt[0]->f, BM_ELEM_HIDDEN)) {
-    int *mat_tri_ofs = data->tri_mat_end;
-    const int mat = min_ii(elt[0]->f->mat_nr, mat_last);
-    GPU_indexbuf_set_tri_verts(&data->elb,
-                               mat_tri_ofs[mat]++,
+  GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(_data);
+  int tri_first_index_real = poly_to_tri_count(f_index, BM_elem_index_get(f->l_first));
+
+  struct BMLoop *(*looptris)[3] = mr->edit_bmesh->looptris;
+  int tri_len = f->len - 2;
+  for (int offs = 0; offs < tri_len; offs++) {
+    BMLoop **elt = looptris[tri_first_index_real + offs];
+    int tri_index = tri_first_index + offs;
+    GPU_indexbuf_set_tri_verts(elb,
+                               tri_index,
                               BM_elem_index_get(elt[0]),
                               BM_elem_index_get(elt[1]),
                               BM_elem_index_get(elt[2]));
  }
 }

-static void extract_tris_iter_looptri_mesh(const MeshRenderData *mr,
-                                           const MLoopTri *mlt,
-                                           const int UNUSED(elt_index),
-                                           void *_data)
+static void extract_tris_iter_poly_mesh(const MeshRenderData *mr,
+                                        const MPoly *mp,
+                                        const int mp_index,
+                                        void *_data)
 {
-  MeshExtract_Tri_Data *data = static_cast<MeshExtract_Tri_Data *>(_data);
-  const int mat_last = mr->mat_len - 1;
-  const MPoly *mp = &mr->mpoly[mlt->poly];
-  if (!(mr->use_hide && (mp->flag & ME_HIDE))) {
-    int *mat_tri_ofs = data->tri_mat_end;
-    const int mat = min_ii(mp->mat_nr, mat_last);
-    GPU_indexbuf_set_tri_verts(
-        &data->elb, mat_tri_ofs[mat]++, mlt->tri[0], mlt->tri[1], mlt->tri[2]);
+  int tri_first_index = mr->poly_sorted.tri_first_index[mp_index];
+  if (tri_first_index == -1) {
+    return;
+  }
+
+  GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(_data);
+  int tri_first_index_real = poly_to_tri_count(mp_index, mp->loopstart);
+
+  int tri_len = mp->totloop - 2;
+  for (int offs = 0; offs < tri_len; offs++) {
+    const MLoopTri *mlt = &mr->mlooptri[tri_first_index_real + offs];
+    int tri_index = tri_first_index + offs;
+    GPU_indexbuf_set_tri_verts(elb, tri_index, mlt->tri[0], mlt->tri[1], mlt->tri[2]);
  }
 }

@ -89,40 +100,41 @@ static void extract_tris_finish(const MeshRenderData *mr,
                                void *_data)
 {
  GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf);
-  MeshExtract_Tri_Data *data = static_cast<MeshExtract_Tri_Data *>(_data);
-  GPU_indexbuf_build_in_place(&data->elb, ibo);
+  GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(_data);
+  GPU_indexbuf_build_in_place(elb, ibo);

  /* Create ibo sub-ranges. Always do this to avoid error when the standard surface batch
   * is created before the surfaces-per-material. */
  if (mr->use_final_mesh && cache->final.tris_per_mat) {
    MeshBufferCache *mbc_final = &cache->final;
+    int mat_start = 0;
    for (int i = 0; i < mr->mat_len; i++) {
      /* These IBOs have not been queried yet but we create them just in case they are needed
       * later since they are not tracked by mesh_buffer_cache_create_requested(). */
      if (mbc_final->tris_per_mat[i] == nullptr) {
        mbc_final->tris_per_mat[i] = GPU_indexbuf_calloc();
      }
+      const int mat_tri_len = mr->poly_sorted.mat_tri_len[i];
      /* Multiply by 3 because these are triangle indices. */
-      const int mat_start = data->tri_mat_start[i];
-      const int mat_end = data->tri_mat_end[i];
      const int start = mat_start * 3;
-      const int len = (mat_end - mat_start) * 3;
+      const int len = mat_tri_len * 3;
      GPU_indexbuf_create_subrange_in_place(mbc_final->tris_per_mat[i], ibo, start, len);
+      mat_start += mat_tri_len;
    }
  }
-  MEM_freeN(data->tri_mat_end);
 }

 constexpr MeshExtract create_extractor_tris()
 {
  MeshExtract extractor = {nullptr};
  extractor.init = extract_tris_init;
-  extractor.iter_looptri_bm = extract_tris_iter_looptri_bm;
-  extractor.iter_looptri_mesh = extract_tris_iter_looptri_mesh;
+  extractor.iter_poly_bm = extract_tris_iter_poly_bm;
+  extractor.iter_poly_mesh = extract_tris_iter_poly_mesh;
+  extractor.task_reduce = extract_tris_mat_task_reduce;
  extractor.finish = extract_tris_finish;
-  extractor.data_type = MR_DATA_MAT_OFFSETS;
-  extractor.data_size = sizeof(MeshExtract_Tri_Data);
-  extractor.use_threading = false;
+  extractor.data_type = MR_DATA_LOOPTRI | MR_DATA_POLYS_SORTED;
+  extractor.data_size = sizeof(GPUIndexBufBuilder);
+  extractor.use_threading = true;
  extractor.mesh_buffer_offset = offsetof(MeshBufferCache, ibo.tris);
  return extractor;
 }
@ -174,13 +186,6 @@ static void extract_tris_single_mat_iter_looptri_mesh(const MeshRenderData *mr,
  }
 }

-static void extract_tris_single_mat_task_reduce(void *_userdata_to, void *_userdata_from)
-{
-  GPUIndexBufBuilder *elb_to = static_cast<GPUIndexBufBuilder *>(_userdata_to);
-  GPUIndexBufBuilder *elb_from = static_cast<GPUIndexBufBuilder *>(_userdata_from);
-  GPU_indexbuf_join(elb_to, elb_from);
-}
-
 static void extract_tris_single_mat_finish(const MeshRenderData *mr,
                                           struct MeshBatchCache *cache,
                                           void *buf,
@ -213,7 +218,7 @@ constexpr MeshExtract create_extractor_tris_single_mat()
  extractor.init = extract_tris_single_mat_init;
  extractor.iter_looptri_bm = extract_tris_single_mat_iter_looptri_bm;
  extractor.iter_looptri_mesh = extract_tris_single_mat_iter_looptri_mesh;
-  extractor.task_reduce = extract_tris_single_mat_task_reduce;
+  extractor.task_reduce = extract_tris_mat_task_reduce;
  extractor.finish = extract_tris_single_mat_finish;
  extractor.data_type = MR_DATA_NONE;
  extractor.data_size = sizeof(GPUIndexBufBuilder);