Sculpt: Avoid retrieving vertices attribute when flushing positions

Currently the positions are retrieved again for every vertex. This is
slow, and will get slower when positions are stored as a named
attribute. Saves around 0.5ms per stroke update when a modifier
is active in my test with a 1 million face mesh.
This commit is contained in:
Hans Goudey 2022-11-16 14:54:20 -06:00
parent 87ace7207d
commit 1677ddb7ee
1 changed files with 66 additions and 85 deletions

View File

@ -18,6 +18,8 @@
#include "BLI_gsqueue.h"
#include "BLI_math.h"
#include "BLI_task.h"
#include "BLI_task.hh"
#include "BLI_timeit.hh"
#include "BLI_utildefines.h"
#include "DNA_brush_types.h"
@ -69,6 +71,8 @@
#include "bmesh.h"
using blender::MutableSpan;
/* -------------------------------------------------------------------- */
/** \name Sculpt PBVH Abstraction API
*
@ -3622,48 +3626,46 @@ static void do_brush_action(Sculpt *sd,
}
/* Flush displacement from deformed PBVH vertex to original mesh. */
static void sculpt_flush_pbvhvert_deform(Object *ob, PBVHVertexIter *vd)
static void sculpt_flush_pbvhvert_deform(const SculptSession &ss,
const PBVHVertexIter &vd,
MutableSpan<MVert> verts)
{
SculptSession *ss = ob->sculpt;
Mesh *me = static_cast<Mesh *>(ob->data);
float disp[3], newco[3];
int index = vd->vert_indices[vd->i];
int index = vd.vert_indices[vd.i];
sub_v3_v3v3(disp, vd->co, ss->deform_cos[index]);
mul_m3_v3(ss->deform_imats[index], disp);
add_v3_v3v3(newco, disp, ss->orig_cos[index]);
sub_v3_v3v3(disp, vd.co, ss.deform_cos[index]);
mul_m3_v3(ss.deform_imats[index], disp);
add_v3_v3v3(newco, disp, ss.orig_cos[index]);
copy_v3_v3(ss->deform_cos[index], vd->co);
copy_v3_v3(ss->orig_cos[index], newco);
copy_v3_v3(ss.deform_cos[index], vd.co);
copy_v3_v3(ss.orig_cos[index], newco);
MVert *verts = BKE_mesh_verts_for_write(me);
if (!ss->shapekey_active) {
if (!ss.shapekey_active) {
copy_v3_v3(verts[index].co, newco);
}
}
static void sculpt_combine_proxies_task_cb(void *__restrict userdata,
const int n,
const TaskParallelTLS *__restrict /*tls*/)
static void sculpt_combine_proxies_node(Object &object,
Sculpt &sd,
const bool use_orco,
PBVHNode &node)
{
SculptThreadedTaskData *data = static_cast<SculptThreadedTaskData *>(userdata);
SculptSession *ss = data->ob->sculpt;
Sculpt *sd = data->sd;
Object *ob = data->ob;
const bool use_orco = data->use_proxies_orco;
SculptSession *ss = object.sculpt;
PBVHVertexIter vd;
PBVHProxyNode *proxies;
int proxy_count;
float(*orco)[3] = nullptr;
if (use_orco && !ss->bm) {
orco = SCULPT_undo_push_node(data->ob, data->nodes[n], SCULPT_UNDO_COORDS)->co;
orco = SCULPT_undo_push_node(&object, &node, SCULPT_UNDO_COORDS)->co;
}
BKE_pbvh_node_get_proxies(data->nodes[n], &proxies, &proxy_count);
int proxy_count;
PBVHProxyNode *proxies;
BKE_pbvh_node_get_proxies(&node, &proxies, &proxy_count);
BKE_pbvh_vertex_iter_begin (ss->pbvh, data->nodes[n], vd, PBVH_ITER_UNIQUE) {
Mesh &mesh = *static_cast<Mesh *>(object.data);
MutableSpan<MVert> verts = mesh.verts_for_write();
PBVHVertexIter vd;
BKE_pbvh_vertex_iter_begin (ss->pbvh, &node, vd, PBVH_ITER_UNIQUE) {
float val[3];
if (use_orco) {
@ -3682,23 +3684,22 @@ static void sculpt_combine_proxies_task_cb(void *__restrict userdata,
add_v3_v3(val, proxies[p].co[vd.i]);
}
SCULPT_clip(sd, ss, vd.co, val);
SCULPT_clip(&sd, ss, vd.co, val);
if (ss->deform_modifiers_active) {
sculpt_flush_pbvhvert_deform(ob, &vd);
sculpt_flush_pbvhvert_deform(*ss, vd, verts);
}
}
BKE_pbvh_vertex_iter_end;
BKE_pbvh_node_free_proxies(data->nodes[n]);
BKE_pbvh_node_free_proxies(&node);
}
static void sculpt_combine_proxies(Sculpt *sd, Object *ob)
{
using namespace blender;
SculptSession *ss = ob->sculpt;
Brush *brush = BKE_paint_brush(&sd->paint);
PBVHNode **nodes;
int totnode;
if (!ss->cache->supports_gravity && sculpt_tool_is_proxy_used(brush->sculpt_tool)) {
/* First line is tools that don't support proxies. */
@ -3714,37 +3715,33 @@ static void sculpt_combine_proxies(Sculpt *sd, Object *ob)
SCULPT_TOOL_BOUNDARY,
SCULPT_TOOL_POSE);
int totnode;
PBVHNode **nodes;
BKE_pbvh_gather_proxies(ss->pbvh, &nodes, &totnode);
SculptThreadedTaskData data{};
data.sd = sd;
data.ob = ob;
data.brush = brush;
data.nodes = nodes;
data.use_proxies_orco = use_orco;
threading::parallel_for(IndexRange(totnode), 1, [&](IndexRange range) {
for (const int i : range) {
sculpt_combine_proxies_node(*ob, *sd, use_orco, *nodes[i]);
}
});
TaskParallelSettings settings;
BKE_pbvh_parallel_range_settings(&settings, true, totnode);
BLI_task_parallel_range(0, totnode, &data, sculpt_combine_proxies_task_cb, &settings);
MEM_SAFE_FREE(nodes);
}
void SCULPT_combine_transform_proxies(Sculpt *sd, Object *ob)
{
using namespace blender;
SculptSession *ss = ob->sculpt;
PBVHNode **nodes;
int totnode;
PBVHNode **nodes;
BKE_pbvh_gather_proxies(ss->pbvh, &nodes, &totnode);
SculptThreadedTaskData data{};
data.sd = sd;
data.ob = ob;
data.nodes = nodes;
data.use_proxies_orco = false;
TaskParallelSettings settings;
BKE_pbvh_parallel_range_settings(&settings, true, totnode);
BLI_task_parallel_range(0, totnode, &data, sculpt_combine_proxies_task_cb, &settings);
threading::parallel_for(IndexRange(totnode), 1, [&](IndexRange range) {
for (const int i : range) {
sculpt_combine_proxies_node(*ob, *sd, false, *nodes[i]);
}
});
MEM_SAFE_FREE(nodes);
}
@ -3777,34 +3774,10 @@ static void sculpt_update_keyblock(Object *ob)
}
}
static void SCULPT_flush_stroke_deform_task_cb(void *__restrict userdata,
const int n,
const TaskParallelTLS *__restrict /*tls*/)
{
SculptThreadedTaskData *data = static_cast<SculptThreadedTaskData *>(userdata);
SculptSession *ss = data->ob->sculpt;
Object *ob = data->ob;
float(*vertCos)[3] = data->vertCos;
PBVHVertexIter vd;
BKE_pbvh_vertex_iter_begin (ss->pbvh, data->nodes[n], vd, PBVH_ITER_UNIQUE) {
sculpt_flush_pbvhvert_deform(ob, &vd);
if (!vertCos) {
continue;
}
int index = vd.vert_indices[vd.i];
copy_v3_v3(vertCos[index], ss->orig_cos[index]);
}
BKE_pbvh_vertex_iter_end;
}
void SCULPT_flush_stroke_deform(Sculpt *sd, Object *ob, bool is_proxy_used)
void SCULPT_flush_stroke_deform(Sculpt * /*sd*/, Object *ob, bool is_proxy_used)
{
using namespace blender;
SculptSession *ss = ob->sculpt;
Brush *brush = BKE_paint_brush(&sd->paint);
if (is_proxy_used && ss->deform_modifiers_active) {
/* This brushes aren't using proxies, so sculpt_combine_proxies() wouldn't propagate needed
@ -3826,16 +3799,24 @@ void SCULPT_flush_stroke_deform(Sculpt *sd, Object *ob, bool is_proxy_used)
BKE_pbvh_search_gather(ss->pbvh, nullptr, nullptr, &nodes, &totnode);
SculptThreadedTaskData data{};
data.sd = sd;
data.ob = ob;
data.brush = brush;
data.nodes = nodes;
data.vertCos = vertCos;
MutableSpan<MVert> verts = me->verts_for_write();
TaskParallelSettings settings;
BKE_pbvh_parallel_range_settings(&settings, true, totnode);
BLI_task_parallel_range(0, totnode, &data, SCULPT_flush_stroke_deform_task_cb, &settings);
threading::parallel_for(IndexRange(totnode), 1, [&](IndexRange range) {
for (const int i : range) {
PBVHVertexIter vd;
BKE_pbvh_vertex_iter_begin (ss->pbvh, nodes[i], vd, PBVH_ITER_UNIQUE) {
sculpt_flush_pbvhvert_deform(*ss, vd, verts);
if (!vertCos) {
continue;
}
int index = vd.vert_indices[vd.i];
copy_v3_v3(vertCos[index], ss->orig_cos[index]);
}
BKE_pbvh_vertex_iter_end;
}
});
if (vertCos) {
SCULPT_vertcos_to_key(ob, ss->shapekey_active, vertCos);