Draw Cache: extract tris in parallel ranges

The `ibo.tris` extraction in multithread is currently only done if the
mesh has only 1 material.

Now we cache a map indicating the index of each polygon after sort and
thus allow the extraction of tris with materials in multithreaded.

As caching is a heavy operation and was already being performed in
multi-thread for triangle offsets, no significant improvements are
expected.

The benefit will be much greater when we can skip updating the cache
while transforming a geometry.

**Profiling:**
||master:|PATCH:
|---|---|---|
|large_mesh_editing_materials:|Average: 13.855380 FPS|Average: 15.525684 FPS
||rdata 9ms iter 36ms (frame 71ms)|rdata 9ms iter 29ms (frame 64ms)
|subdiv_mesh_final_only_materials:|Average: 28.113742 FPS|Average: 28.633599 FPS
||rdata 0ms iter 1ms (frame 36ms)|rdata 0ms iter 1ms (frame 35ms)

1.1x overall speedup

Differential Revision: https://developer.blender.org/D11445
This commit is contained in:
Germano Cavalcante 2021-07-20 11:43:38 -03:00 committed by Germano Cavalcante
parent 785d87ee42
commit 178086d581
Notes: blender-bot 2023-02-14 08:33:26 +01:00
Referenced by issue #88550, Mesh Optimization Project Progress
6 changed files with 188 additions and 175 deletions

View File

@ -84,9 +84,9 @@ typedef enum eMRDataType {
MR_DATA_LOOSE_GEOM = 1 << 4,
/** Force loop normals calculation. */
MR_DATA_TAN_LOOP_NOR = 1 << 5,
MR_DATA_MAT_OFFSETS = 1 << 6,
MR_DATA_POLYS_SORTED = 1 << 6,
} eMRDataType;
ENUM_OPERATORS(eMRDataType, MR_DATA_MAT_OFFSETS)
ENUM_OPERATORS(eMRDataType, MR_DATA_POLYS_SORTED)
#ifdef __cplusplus
extern "C" {
@ -170,10 +170,10 @@ typedef struct MeshBufferExtractionCache {
} loose_geom;
struct {
int *tri;
int *tri_first_index;
int *mat_tri_len;
int visible_tri_len;
} mat_offsets;
} poly_sorted;
} MeshBufferExtractionCache;
#define FOREACH_MESH_BUFFER_CACHE(batch_cache, mbc) \

View File

@ -532,7 +532,7 @@ static void mesh_extract_render_data_node_exec(void *__restrict task_data)
mesh_render_data_update_normals(mr, data_flag);
mesh_render_data_update_looptris(mr, iter_type, data_flag);
mesh_render_data_update_loose_geom(mr, update_task_data->cache, iter_type, data_flag);
mesh_render_data_update_mat_offsets(mr, update_task_data->cache, data_flag);
mesh_render_data_update_polys_sorted(mr, update_task_data->cache, data_flag);
}
static struct TaskNode *mesh_extract_render_data_node_create(struct TaskGraph *task_graph,

View File

@ -101,10 +101,12 @@ typedef struct MeshRenderData {
float (*loop_normals)[3];
float (*poly_normals)[3];
int *lverts, *ledges;
struct {
int *tri;
int *tri_first_index;
int *mat_tri_len;
int visible_tri_len;
} mat_offsets;
} poly_sorted;
} MeshRenderData;
BLI_INLINE BMFace *bm_original_face_get(const MeshRenderData *mr, int idx)
@ -254,9 +256,9 @@ void mesh_render_data_update_loose_geom(MeshRenderData *mr,
MeshBufferExtractionCache *cache,
const eMRIterType iter_type,
const eMRDataType data_flag);
void mesh_render_data_update_mat_offsets(MeshRenderData *mr,
MeshBufferExtractionCache *cache,
const eMRDataType data_flag);
void mesh_render_data_update_polys_sorted(MeshRenderData *mr,
MeshBufferExtractionCache *cache,
const eMRDataType data_flag);
void mesh_render_data_update_looptris(MeshRenderData *mr,
const eMRIterType iter_type,
const eMRDataType data_flag);

View File

@ -25,6 +25,7 @@
#include "MEM_guardedalloc.h"
#include "BLI_alloca.h"
#include "BLI_bitmap.h"
#include "BLI_math.h"
#include "BLI_task.h"
@ -179,116 +180,104 @@ void mesh_render_data_update_loose_geom(MeshRenderData *mr,
/** \} */
/* ---------------------------------------------------------------------- */
/** \name Material Offsets
/** \name Polygons sorted per material
*
* Material offsets contains the offset of a material after sorting tris based on their material.
* Contains polygon indices sorted based on their material.
*
* \{ */
static void mesh_render_data_mat_offset_load(MeshRenderData *mr,
const MeshBufferExtractionCache *cache);
static void mesh_render_data_mat_offset_ensure(MeshRenderData *mr,
MeshBufferExtractionCache *cache);
static void mesh_render_data_mat_offset_build(MeshRenderData *mr,
MeshBufferExtractionCache *cache);
static void mesh_render_data_mat_offset_build_bm(MeshRenderData *mr,
static void mesh_render_data_polys_sorted_load(MeshRenderData *mr,
const MeshBufferExtractionCache *cache);
static void mesh_render_data_polys_sorted_ensure(MeshRenderData *mr,
MeshBufferExtractionCache *cache);
static void mesh_render_data_mat_offset_build_mesh(MeshRenderData *mr,
MeshBufferExtractionCache *cache);
static void mesh_render_data_mat_offset_apply_offset(MeshRenderData *mr,
MeshBufferExtractionCache *cache);
static void mesh_render_data_polys_sorted_build(MeshRenderData *mr,
MeshBufferExtractionCache *cache);
static int *mesh_render_data_mat_tri_len_build(MeshRenderData *mr);
void mesh_render_data_update_mat_offsets(MeshRenderData *mr,
MeshBufferExtractionCache *cache,
const eMRDataType data_flag)
void mesh_render_data_update_polys_sorted(MeshRenderData *mr,
MeshBufferExtractionCache *cache,
const eMRDataType data_flag)
{
if (data_flag & MR_DATA_MAT_OFFSETS) {
mesh_render_data_mat_offset_ensure(mr, cache);
mesh_render_data_mat_offset_load(mr, cache);
if (data_flag & MR_DATA_POLYS_SORTED) {
mesh_render_data_polys_sorted_ensure(mr, cache);
mesh_render_data_polys_sorted_load(mr, cache);
}
}
static void mesh_render_data_mat_offset_load(MeshRenderData *mr,
const MeshBufferExtractionCache *cache)
static void mesh_render_data_polys_sorted_load(MeshRenderData *mr,
const MeshBufferExtractionCache *cache)
{
mr->mat_offsets.tri = cache->mat_offsets.tri;
mr->mat_offsets.visible_tri_len = cache->mat_offsets.visible_tri_len;
mr->poly_sorted.tri_first_index = cache->poly_sorted.tri_first_index;
mr->poly_sorted.mat_tri_len = cache->poly_sorted.mat_tri_len;
mr->poly_sorted.visible_tri_len = cache->poly_sorted.visible_tri_len;
}
static void mesh_render_data_mat_offset_ensure(MeshRenderData *mr,
MeshBufferExtractionCache *cache)
static void mesh_render_data_polys_sorted_ensure(MeshRenderData *mr,
MeshBufferExtractionCache *cache)
{
if (cache->mat_offsets.tri) {
if (cache->poly_sorted.tri_first_index) {
return;
}
mesh_render_data_mat_offset_build(mr, cache);
mesh_render_data_polys_sorted_build(mr, cache);
}
static void mesh_render_data_mat_offset_build(MeshRenderData *mr, MeshBufferExtractionCache *cache)
static void mesh_render_data_polys_sorted_build(MeshRenderData *mr,
MeshBufferExtractionCache *cache)
{
size_t mat_tri_idx_size = sizeof(int) * mr->mat_len;
cache->mat_offsets.tri = MEM_callocN(mat_tri_idx_size, __func__);
int *tri_first_index = MEM_mallocN(sizeof(*tri_first_index) * mr->poly_len, __func__);
int *mat_tri_len = mesh_render_data_mat_tri_len_build(mr);
/* Count how many triangles for each material. */
/* Apply offset. */
int visible_tri_len = 0;
int *mat_tri_offs = BLI_array_alloca(mat_tri_offs, mr->mat_len);
{
for (int i = 0; i < mr->mat_len; i++) {
mat_tri_offs[i] = visible_tri_len;
visible_tri_len += mat_tri_len[i];
}
}
/* Sort per material. */
int mat_last = mr->mat_len - 1;
if (mr->extract_type == MR_EXTRACT_BMESH) {
mesh_render_data_mat_offset_build_bm(mr, cache);
BMIter iter;
BMFace *f;
int i;
BM_ITER_MESH_INDEX (f, &iter, mr->bm, BM_FACES_OF_MESH, i) {
if (!BM_elem_flag_test(f, BM_ELEM_HIDDEN)) {
const int mat = min_ii(f->mat_nr, mat_last);
tri_first_index[i] = mat_tri_offs[mat];
mat_tri_offs[mat] += f->len - 2;
}
else {
tri_first_index[i] = -1;
}
}
}
else {
mesh_render_data_mat_offset_build_mesh(mr, cache);
const MPoly *mp = &mr->mpoly[0];
for (int i = 0; i < mr->poly_len; i++, mp++) {
if (!(mr->use_hide && (mp->flag & ME_HIDE))) {
const int mat = min_ii(mp->mat_nr, mat_last);
tri_first_index[i] = mat_tri_offs[mat];
mat_tri_offs[mat] += mp->totloop - 2;
}
else {
tri_first_index[i] = -1;
}
}
}
mesh_render_data_mat_offset_apply_offset(mr, cache);
cache->poly_sorted.tri_first_index = tri_first_index;
cache->poly_sorted.mat_tri_len = mat_tri_len;
cache->poly_sorted.visible_tri_len = visible_tri_len;
}
typedef struct MatOffsetUserData {
MeshRenderData *mr;
/** This struct is extended during allocation to hold mat_tri_len for each material. */
int mat_tri_len[0];
} MatOffsetUserData;
static void mesh_render_data_mat_offset_reduce(const void *__restrict UNUSED(userdata),
void *__restrict chunk_join,
void *__restrict chunk)
static void mesh_render_data_mat_tri_len_bm_range_fn(void *__restrict userdata,
const int iter,
const TaskParallelTLS *__restrict tls)
{
MatOffsetUserData *dst = chunk_join;
MatOffsetUserData *src = chunk;
int *dst_mat_len = dst->mat_tri_len;
int *src_mat_len = src->mat_tri_len;
for (int i = 0; i < dst->mr->mat_len; i++) {
dst_mat_len[i] += src_mat_len[i];
}
}
static void mesh_render_data_mat_offset_build_threaded(MeshRenderData *mr,
MeshBufferExtractionCache *cache,
int face_len,
TaskParallelRangeFunc range_func)
{
/* Extending the #MatOffsetUserData with an int per material slot. */
size_t userdata_size = sizeof(MatOffsetUserData) +
(mr->mat_len) * sizeof(*cache->mat_offsets.tri);
MatOffsetUserData *userdata = MEM_callocN(userdata_size, __func__);
userdata->mr = mr;
TaskParallelSettings settings;
BLI_parallel_range_settings_defaults(&settings);
settings.userdata_chunk = userdata;
settings.userdata_chunk_size = userdata_size;
settings.min_iter_per_thread = MIN_RANGE_LEN;
settings.func_reduce = mesh_render_data_mat_offset_reduce;
BLI_task_parallel_range(0, face_len, NULL, range_func, &settings);
memcpy(cache->mat_offsets.tri,
&userdata->mat_tri_len,
(mr->mat_len) * sizeof(*cache->mat_offsets.tri));
MEM_freeN(userdata);
}
static void mesh_render_data_mat_offset_bm_range(void *__restrict UNUSED(userdata),
const int iter,
const TaskParallelTLS *__restrict tls)
{
MatOffsetUserData *mat_offset_userdata = tls->userdata_chunk;
MeshRenderData *mr = mat_offset_userdata->mr;
int *mat_tri_len = mat_offset_userdata->mat_tri_len;
MeshRenderData *mr = userdata;
int *mat_tri_len = tls->userdata_chunk;
BMesh *bm = mr->bm;
BMFace *efa = BM_face_at_index(bm, iter);
@ -298,21 +287,12 @@ static void mesh_render_data_mat_offset_bm_range(void *__restrict UNUSED(userdat
}
}
static void mesh_render_data_mat_offset_build_bm(MeshRenderData *mr,
MeshBufferExtractionCache *cache)
static void mesh_render_data_mat_tri_len_mesh_range_fn(void *__restrict userdata,
const int iter,
const TaskParallelTLS *__restrict tls)
{
BMesh *bm = mr->bm;
mesh_render_data_mat_offset_build_threaded(
mr, cache, bm->totface, mesh_render_data_mat_offset_bm_range);
}
static void mesh_render_data_mat_offset_mesh_range(void *__restrict UNUSED(userdata),
const int iter,
const TaskParallelTLS *__restrict tls)
{
MatOffsetUserData *mat_offset_userdata = tls->userdata_chunk;
const MeshRenderData *mr = mat_offset_userdata->mr;
int *mat_tri_len = mat_offset_userdata->mat_tri_len;
MeshRenderData *mr = userdata;
int *mat_tri_len = tls->userdata_chunk;
const MPoly *mp = &mr->mpoly[iter];
if (!(mr->use_hide && (mp->flag & ME_HIDE))) {
@ -321,25 +301,49 @@ static void mesh_render_data_mat_offset_mesh_range(void *__restrict UNUSED(userd
}
}
static void mesh_render_data_mat_offset_build_mesh(MeshRenderData *mr,
MeshBufferExtractionCache *cache)
static void mesh_render_data_mat_tri_len_reduce_fn(const void *__restrict userdata,
void *__restrict chunk_join,
void *__restrict chunk)
{
mesh_render_data_mat_offset_build_threaded(
mr, cache, mr->poly_len, mesh_render_data_mat_offset_mesh_range);
const MeshRenderData *mr = userdata;
int *dst_mat_len = chunk_join;
int *src_mat_len = chunk;
for (int i = 0; i < mr->mat_len; i++) {
dst_mat_len[i] += src_mat_len[i];
}
}
static void mesh_render_data_mat_offset_apply_offset(MeshRenderData *mr,
MeshBufferExtractionCache *cache)
static int *mesh_render_data_mat_tri_len_build_threaded(MeshRenderData *mr,
int face_len,
TaskParallelRangeFunc range_func)
{
int *mat_tri_len = cache->mat_offsets.tri;
int ofs = mat_tri_len[0];
mat_tri_len[0] = 0;
for (int i = 1; i < mr->mat_len; i++) {
int tmp = mat_tri_len[i];
mat_tri_len[i] = ofs;
ofs += tmp;
/* Extending the #MatOffsetUserData with an int per material slot. */
size_t mat_tri_len_size = sizeof(int) * mr->mat_len;
int *mat_tri_len = MEM_callocN(mat_tri_len_size, __func__);
TaskParallelSettings settings;
BLI_parallel_range_settings_defaults(&settings);
settings.userdata_chunk = mat_tri_len;
settings.userdata_chunk_size = mat_tri_len_size;
settings.min_iter_per_thread = MIN_RANGE_LEN;
settings.func_reduce = mesh_render_data_mat_tri_len_reduce_fn;
BLI_task_parallel_range(0, face_len, mr, range_func, &settings);
return mat_tri_len;
}
/* Count how many triangles for each material. */
static int *mesh_render_data_mat_tri_len_build(MeshRenderData *mr)
{
if (mr->extract_type == MR_EXTRACT_BMESH) {
BMesh *bm = mr->bm;
return mesh_render_data_mat_tri_len_build_threaded(
mr, bm->totface, mesh_render_data_mat_tri_len_bm_range_fn);
}
else {
return mesh_render_data_mat_tri_len_build_threaded(
mr, mr->poly_len, mesh_render_data_mat_tri_len_mesh_range_fn);
}
cache->mat_offsets.visible_tri_len = ofs;
}
/** \} */

View File

@ -856,7 +856,9 @@ static void mesh_buffer_extraction_cache_clear(MeshBufferExtractionCache *extrac
extraction_cache->loose_geom.edge_len = 0;
extraction_cache->loose_geom.vert_len = 0;
MEM_SAFE_FREE(extraction_cache->mat_offsets.tri);
MEM_SAFE_FREE(extraction_cache->poly_sorted.tri_first_index);
MEM_SAFE_FREE(extraction_cache->poly_sorted.mat_tri_len);
extraction_cache->poly_sorted.visible_tri_len = 0;
}
static void mesh_batch_cache_clear(Mesh *me)

View File

@ -27,59 +27,70 @@
namespace blender::draw {
static void extract_tris_mat_task_reduce(void *_userdata_to, void *_userdata_from)
{
GPUIndexBufBuilder *elb_to = static_cast<GPUIndexBufBuilder *>(_userdata_to);
GPUIndexBufBuilder *elb_from = static_cast<GPUIndexBufBuilder *>(_userdata_from);
GPU_indexbuf_join(elb_to, elb_from);
}
/* ---------------------------------------------------------------------- */
/** \name Extract Triangles Indices (multi material)
* \{ */
struct MeshExtract_Tri_Data {
GPUIndexBufBuilder elb;
const int *tri_mat_start;
int *tri_mat_end;
};
static void extract_tris_init(const MeshRenderData *mr,
struct MeshBatchCache *UNUSED(cache),
void *UNUSED(ibo),
void *tls_data)
{
MeshExtract_Tri_Data *data = static_cast<MeshExtract_Tri_Data *>(tls_data);
data->tri_mat_start = mr->mat_offsets.tri;
data->tri_mat_end = static_cast<int *>(MEM_dupallocN(data->tri_mat_start));
GPU_indexbuf_init(&data->elb, GPU_PRIM_TRIS, mr->mat_offsets.visible_tri_len, mr->loop_len);
GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(tls_data);
GPU_indexbuf_init(elb, GPU_PRIM_TRIS, mr->poly_sorted.visible_tri_len, mr->loop_len);
}
static void extract_tris_iter_looptri_bm(const MeshRenderData *mr,
BMLoop **elt,
const int UNUSED(elt_index),
void *_data)
static void extract_tris_iter_poly_bm(const MeshRenderData *mr,
const BMFace *f,
const int f_index,
void *_data)
{
MeshExtract_Tri_Data *data = static_cast<MeshExtract_Tri_Data *>(_data);
const int mat_last = mr->mat_len - 1;
int tri_first_index = mr->poly_sorted.tri_first_index[f_index];
if (tri_first_index == -1) {
return;
}
if (!BM_elem_flag_test(elt[0]->f, BM_ELEM_HIDDEN)) {
int *mat_tri_ofs = data->tri_mat_end;
const int mat = min_ii(elt[0]->f->mat_nr, mat_last);
GPU_indexbuf_set_tri_verts(&data->elb,
mat_tri_ofs[mat]++,
GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(_data);
int tri_first_index_real = poly_to_tri_count(f_index, BM_elem_index_get(f->l_first));
struct BMLoop *(*looptris)[3] = mr->edit_bmesh->looptris;
int tri_len = f->len - 2;
for (int offs = 0; offs < tri_len; offs++) {
BMLoop **elt = looptris[tri_first_index_real + offs];
int tri_index = tri_first_index + offs;
GPU_indexbuf_set_tri_verts(elb,
tri_index,
BM_elem_index_get(elt[0]),
BM_elem_index_get(elt[1]),
BM_elem_index_get(elt[2]));
}
}
static void extract_tris_iter_looptri_mesh(const MeshRenderData *mr,
const MLoopTri *mlt,
const int UNUSED(elt_index),
void *_data)
static void extract_tris_iter_poly_mesh(const MeshRenderData *mr,
const MPoly *mp,
const int mp_index,
void *_data)
{
MeshExtract_Tri_Data *data = static_cast<MeshExtract_Tri_Data *>(_data);
const int mat_last = mr->mat_len - 1;
const MPoly *mp = &mr->mpoly[mlt->poly];
if (!(mr->use_hide && (mp->flag & ME_HIDE))) {
int *mat_tri_ofs = data->tri_mat_end;
const int mat = min_ii(mp->mat_nr, mat_last);
GPU_indexbuf_set_tri_verts(
&data->elb, mat_tri_ofs[mat]++, mlt->tri[0], mlt->tri[1], mlt->tri[2]);
int tri_first_index = mr->poly_sorted.tri_first_index[mp_index];
if (tri_first_index == -1) {
return;
}
GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(_data);
int tri_first_index_real = poly_to_tri_count(mp_index, mp->loopstart);
int tri_len = mp->totloop - 2;
for (int offs = 0; offs < tri_len; offs++) {
const MLoopTri *mlt = &mr->mlooptri[tri_first_index_real + offs];
int tri_index = tri_first_index + offs;
GPU_indexbuf_set_tri_verts(elb, tri_index, mlt->tri[0], mlt->tri[1], mlt->tri[2]);
}
}
@ -89,40 +100,41 @@ static void extract_tris_finish(const MeshRenderData *mr,
void *_data)
{
GPUIndexBuf *ibo = static_cast<GPUIndexBuf *>(buf);
MeshExtract_Tri_Data *data = static_cast<MeshExtract_Tri_Data *>(_data);
GPU_indexbuf_build_in_place(&data->elb, ibo);
GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(_data);
GPU_indexbuf_build_in_place(elb, ibo);
/* Create ibo sub-ranges. Always do this to avoid error when the standard surface batch
* is created before the surfaces-per-material. */
if (mr->use_final_mesh && cache->final.tris_per_mat) {
MeshBufferCache *mbc_final = &cache->final;
int mat_start = 0;
for (int i = 0; i < mr->mat_len; i++) {
/* These IBOs have not been queried yet but we create them just in case they are needed
* later since they are not tracked by mesh_buffer_cache_create_requested(). */
if (mbc_final->tris_per_mat[i] == nullptr) {
mbc_final->tris_per_mat[i] = GPU_indexbuf_calloc();
}
const int mat_tri_len = mr->poly_sorted.mat_tri_len[i];
/* Multiply by 3 because these are triangle indices. */
const int mat_start = data->tri_mat_start[i];
const int mat_end = data->tri_mat_end[i];
const int start = mat_start * 3;
const int len = (mat_end - mat_start) * 3;
const int len = mat_tri_len * 3;
GPU_indexbuf_create_subrange_in_place(mbc_final->tris_per_mat[i], ibo, start, len);
mat_start += mat_tri_len;
}
}
MEM_freeN(data->tri_mat_end);
}
constexpr MeshExtract create_extractor_tris()
{
MeshExtract extractor = {nullptr};
extractor.init = extract_tris_init;
extractor.iter_looptri_bm = extract_tris_iter_looptri_bm;
extractor.iter_looptri_mesh = extract_tris_iter_looptri_mesh;
extractor.iter_poly_bm = extract_tris_iter_poly_bm;
extractor.iter_poly_mesh = extract_tris_iter_poly_mesh;
extractor.task_reduce = extract_tris_mat_task_reduce;
extractor.finish = extract_tris_finish;
extractor.data_type = MR_DATA_MAT_OFFSETS;
extractor.data_size = sizeof(MeshExtract_Tri_Data);
extractor.use_threading = false;
extractor.data_type = MR_DATA_LOOPTRI | MR_DATA_POLYS_SORTED;
extractor.data_size = sizeof(GPUIndexBufBuilder);
extractor.use_threading = true;
extractor.mesh_buffer_offset = offsetof(MeshBufferCache, ibo.tris);
return extractor;
}
@ -174,13 +186,6 @@ static void extract_tris_single_mat_iter_looptri_mesh(const MeshRenderData *mr,
}
}
static void extract_tris_single_mat_task_reduce(void *_userdata_to, void *_userdata_from)
{
GPUIndexBufBuilder *elb_to = static_cast<GPUIndexBufBuilder *>(_userdata_to);
GPUIndexBufBuilder *elb_from = static_cast<GPUIndexBufBuilder *>(_userdata_from);
GPU_indexbuf_join(elb_to, elb_from);
}
static void extract_tris_single_mat_finish(const MeshRenderData *mr,
struct MeshBatchCache *cache,
void *buf,
@ -213,7 +218,7 @@ constexpr MeshExtract create_extractor_tris_single_mat()
extractor.init = extract_tris_single_mat_init;
extractor.iter_looptri_bm = extract_tris_single_mat_iter_looptri_bm;
extractor.iter_looptri_mesh = extract_tris_single_mat_iter_looptri_mesh;
extractor.task_reduce = extract_tris_single_mat_task_reduce;
extractor.task_reduce = extract_tris_mat_task_reduce;
extractor.finish = extract_tris_single_mat_finish;
extractor.data_type = MR_DATA_NONE;
extractor.data_size = sizeof(GPUIndexBufBuilder);