Geometry Nodes: optimize Set Position node

This implements four optimizations in the Set Position node:
* Check whether the position input is the current position and ignore
  it if it is. This results in a speedup when only the Offset input is used.
* Use multi-threading when copying to computed values to the
  position attribute. All geometry types benefit from this.
* Use devirtualization for the offset and position input. This optimizes
  the common case that they are either single values or computed
  in the fly in a span.
* Write to `Mesh->mvert` directly instead of creating a temporary span.
  This makes setting mesh vertex positions even more efficient.

In my simple benchmark I'm using a White Noise node to offset the
position of 1,000,000 vertices. The speed is `20 ms -> 4.5 ms` in the
multi-threaded case and `32 ms -> 22 ms` in the single-threaded case.
This commit is contained in:
Jacques Lucke 2021-11-26 15:33:21 +01:00
parent eb7827e797
commit 602ecbdf9a
2 changed files with 140 additions and 12 deletions

View File

@ -183,6 +183,15 @@ template<typename T> class VArrayImpl {
* own anything can overwrite this with false. */
return true;
}
/**
* Return true when the other virtual array should be considered to be the same, e.g. because it
* shares the same underlying memory.
*/
virtual bool is_same(const VArrayImpl<T> &UNUSED(other)) const
{
return false;
}
};
/* Similar to #VArrayImpl, but adds methods that allow modifying the referenced elements. */
@ -260,6 +269,18 @@ template<typename T> class VArrayImpl_For_Span : public VMutableArrayImpl<T> {
{
return Span<T>(data_, this->size_);
}
bool is_same(const VArrayImpl<T> &other) const final
{
if (other.size() != this->size_) {
return false;
}
if (!other.is_span()) {
return false;
}
const Span<T> other_span = other.get_internal_span();
return data_ == other_span.data();
}
};
/**
@ -388,6 +409,12 @@ class VArrayImpl_For_DerivedSpan final : public VMutableArrayImpl<ElemT> {
{
}
template<typename OtherStructT,
typename OtherElemT,
OtherElemT (*OtherGetFunc)(const OtherStructT &),
void (*OtherSetFunc)(OtherStructT &, OtherElemT)>
friend class VArrayImpl_For_DerivedSpan;
private:
ElemT get(const int64_t index) const override
{
@ -416,6 +443,23 @@ class VArrayImpl_For_DerivedSpan final : public VMutableArrayImpl<ElemT> {
{
return false;
}
bool is_same(const VArrayImpl<ElemT> &other) const override
{
if (other.size() != this->size_) {
return false;
}
if (const VArrayImpl_For_DerivedSpan<StructT, ElemT, GetFunc> *other_typed =
dynamic_cast<const VArrayImpl_For_DerivedSpan<StructT, ElemT, GetFunc> *>(&other)) {
return other_typed->data_ == data_;
}
if (const VArrayImpl_For_DerivedSpan<StructT, ElemT, GetFunc, SetFunc> *other_typed =
dynamic_cast<const VArrayImpl_For_DerivedSpan<StructT, ElemT, GetFunc, SetFunc> *>(
&other)) {
return other_typed->data_ == data_;
}
return false;
}
};
namespace detail {
@ -670,6 +714,25 @@ template<typename T> class VArrayCommon {
return impl_->get_internal_single();
}
/**
* Return true when the other virtual references the same underlying memory.
*/
bool is_same(const VArrayCommon<T> &other) const
{
if (!*this || !other) {
return false;
}
/* Check in both directions in case one does not know how to compare to the other
* implementation. */
if (impl_->is_same(*other.impl_)) {
return true;
}
if (other.impl_->is_same(*impl_)) {
return true;
}
return false;
}
/** Copy the entire virtual array into a span. */
void materialize(MutableSpan<T> r_span) const
{

View File

@ -16,6 +16,11 @@
#include "DEG_depsgraph_query.h"
#include "BLI_task.hh"
#include "DNA_mesh_types.h"
#include "DNA_meshdata_types.h"
#include "node_geometry_util.hh"
namespace blender::nodes::node_geo_set_position_cc {
@ -29,6 +34,77 @@ static void node_declare(NodeDeclarationBuilder &b)
b.add_output<decl::Geometry>(N_("Geometry"));
}
static void set_computed_position_and_offset(GeometryComponent &component,
const VArray<float3> &in_positions,
const VArray<float3> &in_offsets,
const AttributeDomain domain,
const IndexMask selection)
{
OutputAttribute_Typed<float3> positions = component.attribute_try_get_for_output<float3>(
"position", domain, {0, 0, 0});
const int grain_size = 10000;
switch (component.type()) {
case GEO_COMPONENT_TYPE_MESH: {
Mesh *mesh = static_cast<MeshComponent &>(component).get_for_write();
MutableSpan<MVert> mverts{mesh->mvert, mesh->totvert};
if (in_positions.is_same(positions.varray())) {
devirtualize_varray(in_offsets, [&](const auto in_offsets) {
threading::parallel_for(
selection.index_range(), grain_size, [&](const IndexRange range) {
for (const int i : selection.slice(range)) {
const float3 offset = in_offsets[i];
add_v3_v3(mverts[i].co, offset);
}
});
});
}
else {
devirtualize_varray2(
in_positions, in_offsets, [&](const auto in_positions, const auto in_offsets) {
threading::parallel_for(
selection.index_range(), grain_size, [&](const IndexRange range) {
for (const int i : selection.slice(range)) {
const float3 new_position = in_positions[i] + in_offsets[i];
copy_v3_v3(mverts[i].co, new_position);
}
});
});
}
break;
}
default: {
MutableSpan<float3> out_positions_span = positions.as_span();
if (in_positions.is_same(positions.varray())) {
devirtualize_varray(in_offsets, [&](const auto in_offsets) {
threading::parallel_for(
selection.index_range(), grain_size, [&](const IndexRange range) {
for (const int i : selection.slice(range)) {
out_positions_span[i] += in_offsets[i];
}
});
});
}
else {
devirtualize_varray2(
in_positions, in_offsets, [&](const auto in_positions, const auto in_offsets) {
threading::parallel_for(
selection.index_range(), grain_size, [&](const IndexRange range) {
for (const int i : selection.slice(range)) {
out_positions_span[i] = in_positions[i] + in_offsets[i];
}
});
});
}
break;
}
}
positions.save();
}
static void set_position_in_component(GeometryComponent &component,
const Field<bool> &selection_field,
const Field<float3> &position_field,
@ -53,20 +129,9 @@ static void set_position_in_component(GeometryComponent &component,
position_evaluator.add(offset_field);
position_evaluator.evaluate();
/* TODO: We could have different code paths depending on whether the offset input is a single
* value or not */
const VArray<float3> &positions_input = position_evaluator.get_evaluated<float3>(0);
const VArray<float3> &offsets_input = position_evaluator.get_evaluated<float3>(1);
OutputAttribute_Typed<float3> positions = component.attribute_try_get_for_output<float3>(
"position", domain, {0, 0, 0});
MutableSpan<float3> position_mutable = positions.as_span();
for (int i : selection) {
position_mutable[i] = positions_input[i] + offsets_input[i];
}
positions.save();
set_computed_position_and_offset(component, positions_input, offsets_input, domain, selection);
}
static void node_geo_exec(GeoNodeExecParams params)