Nuke OMP usage in multires.c.

New code is over three times quicker than old one here (e.g. Suzanne
subdiv level 4, 250k tris, threaded part is now 1.4ms instead of 4.5ms
with OMP).
This commit is contained in:
Bastien Montagne 2018-01-11 19:39:24 +01:00
parent 713852affb
commit 90cd856ac3
3 changed files with 130 additions and 88 deletions

View File

@ -73,6 +73,9 @@ typedef enum {
#define CCG_OMP_LIMIT 1000000
/* TODO(sergey): This actually depends on subsurf level as well. */
#define CCG_TASK_LIMIT 16
/***/
CCGSubSurf* ccgSubSurf_new (CCGMeshIFC *ifc, int subdivisionLevels, CCGAllocatorIFC *allocatorIFC, CCGAllocatorHDL allocator);

View File

@ -34,9 +34,6 @@
#define FACE_calcIFNo(f, lvl, S, x, y, no) _face_calcIFNo(f, lvl, S, x, y, no, subdivLevels, vertDataSize)
/* TODO(sergey): This actually depends on subsurf level as well. */
#define CCG_TASK_LIMIT 16
/* TODO(sergey): Deduplicate the following functions/ */
static void *_edge_getCoVert(CCGEdge *e, CCGVert *v, int lvl, int x, int dataSize)
{

View File

@ -1003,6 +1003,115 @@ static void grid_tangent_matrix(float mat[3][3], const CCGKey *key,
copy_v3_v3(mat[2], CCG_grid_elem_no(key, grid, x, y));
}
typedef struct MultiresDispRunData {
DispOp op;
CCGElem **gridData, **subGridData;
CCGKey *key;
MPoly *mpoly;
MDisps *mdisps;
GridPaintMask *grid_paint_mask;
int *gridOffset;
int gridSize, dGridSize, dSkip;
} MultiresDispRunData;
static void multires_disp_run_cb(
void *__restrict userdata,
const int pidx,
const ParallelRangeTLS *__restrict UNUSED(tls))
{
MultiresDispRunData *tdata = userdata;
DispOp op = tdata->op;
CCGElem **gridData = tdata->gridData;
CCGElem **subGridData = tdata->subGridData;
CCGKey *key = tdata->key;
MPoly *mpoly = tdata->mpoly;
MDisps *mdisps = tdata->mdisps;
GridPaintMask *grid_paint_mask = tdata->grid_paint_mask;
int *gridOffset = tdata->gridOffset;
int gridSize = tdata->gridSize;
int dGridSize = tdata->dGridSize;
int dSkip = tdata->dSkip;
const int numVerts = mpoly[pidx].totloop;
int S, x, y, gIndex = gridOffset[pidx];
for (S = 0; S < numVerts; ++S, ++gIndex) {
GridPaintMask *gpm = grid_paint_mask ? &grid_paint_mask[gIndex] : NULL;
MDisps *mdisp = &mdisps[mpoly[pidx].loopstart + S];
CCGElem *grid = gridData[gIndex];
CCGElem *subgrid = subGridData[gIndex];
float (*dispgrid)[3] = NULL;
dispgrid = mdisp->disps;
/* if needed, reallocate multires paint mask */
if (gpm && gpm->level < key->level) {
gpm->level = key->level;
if (gpm->data) {
MEM_freeN(gpm->data);
}
gpm->data = MEM_callocN(sizeof(float) * key->grid_area, "gpm.data");
}
for (y = 0; y < gridSize; y++) {
for (x = 0; x < gridSize; x++) {
float *co = CCG_grid_elem_co(key, grid, x, y);
float *sco = CCG_grid_elem_co(key, subgrid, x, y);
float *data = dispgrid[dGridSize * y * dSkip + x * dSkip];
float mat[3][3], disp[3], d[3], mask;
/* construct tangent space matrix */
grid_tangent_matrix(mat, key, x, y, subgrid);
switch (op) {
case APPLY_DISPLACEMENTS:
/* Convert displacement to object space
* and add to grid points */
mul_v3_m3v3(disp, mat, data);
add_v3_v3v3(co, sco, disp);
break;
case CALC_DISPLACEMENTS:
/* Calculate displacement between new and old
* grid points and convert to tangent space */
sub_v3_v3v3(disp, co, sco);
invert_m3(mat);
mul_v3_m3v3(data, mat, disp);
break;
case ADD_DISPLACEMENTS:
/* Convert subdivided displacements to tangent
* space and add to the original displacements */
invert_m3(mat);
mul_v3_m3v3(d, mat, co);
add_v3_v3(data, d);
break;
}
if (gpm) {
switch (op) {
case APPLY_DISPLACEMENTS:
/* Copy mask from gpm to DM */
*CCG_grid_elem_mask(key, grid, x, y) =
paint_grid_paint_mask(gpm, key->level, x, y);
break;
case CALC_DISPLACEMENTS:
/* Copy mask from DM to gpm */
mask = *CCG_grid_elem_mask(key, grid, x, y);
gpm->data[y * gridSize + x] = CLAMPIS(mask, 0, 1);
break;
case ADD_DISPLACEMENTS:
/* Add mask displacement to gpm */
gpm->data[y * gridSize + x] +=
*CCG_grid_elem_mask(key, grid, x, y);
break;
}
}
}
}
}
}
/* XXX WARNING: subsurf elements from dm and oldGridData *must* be of the same format (size),
* because this code uses CCGKey's info from dm to access oldGridData's normals
* (through the call to grid_tangent_matrix())! */
@ -1015,7 +1124,7 @@ static void multiresModifier_disp_run(DerivedMesh *dm, Mesh *me, DerivedMesh *dm
MDisps *mdisps = CustomData_get_layer(&me->ldata, CD_MDISPS);
GridPaintMask *grid_paint_mask = NULL;
int *gridOffset;
int i, k, /*numGrids, */ gridSize, dGridSize, dSkip;
int i, gridSize, dGridSize, dSkip;
int totloop, totpoly;
/* this happens in the dm made by bmesh_mdisps_space_set */
@ -1051,8 +1160,6 @@ static void multiresModifier_disp_run(DerivedMesh *dm, Mesh *me, DerivedMesh *dm
if (key.has_mask)
grid_paint_mask = CustomData_get_layer(&me->ldata, CD_GRID_PAINT_MASK);
k = 0; /*current loop/mdisp index within the mloop array*/
/* when adding new faces in edit mode, need to allocate disps */
for (i = 0; i < totloop; ++i) {
if (mdisps[i].disps == NULL) {
@ -1061,90 +1168,25 @@ static void multiresModifier_disp_run(DerivedMesh *dm, Mesh *me, DerivedMesh *dm
}
}
BLI_begin_threaded_malloc();
ParallelRangeSettings settings;
BLI_parallel_range_settings_defaults(&settings);
settings.min_iter_per_thread = CCG_TASK_LIMIT;
#pragma omp parallel for private(i) if (totloop * gridSize * gridSize >= CCG_OMP_LIMIT)
MultiresDispRunData data = {
.op = op,
.gridData = gridData,
.subGridData = subGridData,
.key = &key,
.mpoly = mpoly,
.mdisps = mdisps,
.grid_paint_mask = grid_paint_mask,
.gridOffset = gridOffset,
.gridSize = gridSize,
.dGridSize = dGridSize,
.dSkip = dSkip
};
for (i = 0; i < totpoly; ++i) {
const int numVerts = mpoly[i].totloop;
int S, x, y, gIndex = gridOffset[i];
for (S = 0; S < numVerts; ++S, ++gIndex, ++k) {
GridPaintMask *gpm = grid_paint_mask ? &grid_paint_mask[gIndex] : NULL;
MDisps *mdisp = &mdisps[mpoly[i].loopstart + S];
CCGElem *grid = gridData[gIndex];
CCGElem *subgrid = subGridData[gIndex];
float (*dispgrid)[3] = NULL;
dispgrid = mdisp->disps;
/* if needed, reallocate multires paint mask */
if (gpm && gpm->level < key.level) {
gpm->level = key.level;
if (gpm->data) {
MEM_freeN(gpm->data);
}
gpm->data = MEM_callocN(sizeof(float) * key.grid_area, "gpm.data");
}
for (y = 0; y < gridSize; y++) {
for (x = 0; x < gridSize; x++) {
float *co = CCG_grid_elem_co(&key, grid, x, y);
float *sco = CCG_grid_elem_co(&key, subgrid, x, y);
float *data = dispgrid[dGridSize * y * dSkip + x * dSkip];
float mat[3][3], disp[3], d[3], mask;
/* construct tangent space matrix */
grid_tangent_matrix(mat, &key, x, y, subgrid);
switch (op) {
case APPLY_DISPLACEMENTS:
/* Convert displacement to object space
* and add to grid points */
mul_v3_m3v3(disp, mat, data);
add_v3_v3v3(co, sco, disp);
break;
case CALC_DISPLACEMENTS:
/* Calculate displacement between new and old
* grid points and convert to tangent space */
sub_v3_v3v3(disp, co, sco);
invert_m3(mat);
mul_v3_m3v3(data, mat, disp);
break;
case ADD_DISPLACEMENTS:
/* Convert subdivided displacements to tangent
* space and add to the original displacements */
invert_m3(mat);
mul_v3_m3v3(d, mat, co);
add_v3_v3(data, d);
break;
}
if (gpm) {
switch (op) {
case APPLY_DISPLACEMENTS:
/* Copy mask from gpm to DM */
*CCG_grid_elem_mask(&key, grid, x, y) =
paint_grid_paint_mask(gpm, key.level, x, y);
break;
case CALC_DISPLACEMENTS:
/* Copy mask from DM to gpm */
mask = *CCG_grid_elem_mask(&key, grid, x, y);
gpm->data[y * gridSize + x] = CLAMPIS(mask, 0, 1);
break;
case ADD_DISPLACEMENTS:
/* Add mask displacement to gpm */
gpm->data[y * gridSize + x] +=
*CCG_grid_elem_mask(&key, grid, x, y);
break;
}
}
}
}
}
}
BLI_end_threaded_malloc();
BLI_task_parallel_range(0, totpoly, &data, multires_disp_run_cb, &settings);
if (op == APPLY_DISPLACEMENTS) {
ccgSubSurf_stitchFaces(ccgdm->ss, 0, NULL, 0);