Geometry Nodes: Improve point instance node performance

This commit uses two changes to improve the performance of the point
instance node.

**Prevent Reallocations**
At 64 bytes, the transform matrix for every instance is rather large,
so reallocating the vector as it grows can become a performance bottle-
neck. This commit reserves memory for the instances that will be added
to prevent unecessary reallocations as the instance vector grows.

In a test with 4 million instances of 3 objects in a collection, the
node was about 40% faster, from 370ms to 270ms for the node.

**Parallelization**
Currently the instances are added by appending to a vector. By changing
this slightly to fill indices instead, we can parallelize the operation
so that multiple threads can fill data at the same time. Tested on a
Ryzen 3700x, this reduced the runtime from the above 270ms to 44ms
average, bringing the total speedup to ~8x.

Note that displaying the instances in the viewport is still much slower
than the calculations in node, this change doesn't affect that.
This commit is contained in:
Hans Goudey 2021-05-08 23:57:36 -05:00
parent b7afb8ea70
commit 518c5ce4cd
Notes: blender-bot 2023-02-14 07:17:43 +01:00
Referenced by issue #88984, Crash on VSE add of Movie strip
3 changed files with 44 additions and 12 deletions

View File

@ -597,6 +597,7 @@ class InstancesComponent : public GeometryComponent {
void clear();
void reserve(int min_capacity);
void resize(int capacity);
int add_reference(InstanceReference reference);
void add_instance(int instance_handle, const blender::float4x4 &transform, const int id = -1);
@ -604,6 +605,7 @@ class InstancesComponent : public GeometryComponent {
blender::Span<InstanceReference> references() const;
blender::Span<int> instance_reference_handles() const;
blender::MutableSpan<int> instance_reference_handles();
blender::MutableSpan<blender::float4x4> instance_transforms();
blender::Span<blender::float4x4> instance_transforms() const;
blender::MutableSpan<int> instance_ids();

View File

@ -56,6 +56,19 @@ void InstancesComponent::reserve(int min_capacity)
instance_ids_.reserve(min_capacity);
}
/**
* Resize the transform, handles, and ID vectors to the specified capacity.
*
* \note This function should be used carefully, only when it's guarenteed
* that the data will be filled.
*/
void InstancesComponent::resize(int capacity)
{
instance_reference_handles_.resize(capacity);
instance_transforms_.resize(capacity);
instance_ids_.resize(capacity);
}
void InstancesComponent::clear()
{
instance_reference_handles_.clear();
@ -81,6 +94,11 @@ blender::Span<int> InstancesComponent::instance_reference_handles() const
return instance_reference_handles_;
}
blender::MutableSpan<int> InstancesComponent::instance_reference_handles()
{
return instance_reference_handles_;
}
blender::MutableSpan<blender::float4x4> InstancesComponent::instance_transforms()
{
return instance_transforms_;

View File

@ -17,6 +17,7 @@
#include "DNA_collection_types.h"
#include "BLI_hash.h"
#include "BLI_task.hh"
#include "UI_interface.h"
#include "UI_resources.h"
@ -159,26 +160,37 @@ static void add_instances_from_component(InstancesComponent &instances,
"scale", domain, {1, 1, 1});
GVArray_Typed<int> id_attribute = src_geometry.attribute_get_for_read<int>("id", domain, -1);
/* The initial size of the component might be non-zero if there are two component types. */
const int start_len = instances.instances_amount();
instances.resize(start_len + domain_size);
MutableSpan<int> handles = instances.instance_reference_handles().slice(start_len, domain_size);
MutableSpan<float4x4> transforms = instances.instance_transforms().slice(start_len, domain_size);
MutableSpan<int> instance_ids = instances.instance_ids().slice(start_len, domain_size);
/* Skip all of the randomness handling if there is only a single possible instance
* (anything except for collection mode with "Whole Collection" turned off). */
if (possible_handles.size() == 1) {
const int handle = possible_handles.first();
for (const int i : IndexRange(domain_size)) {
instances.add_instance(handle,
float4x4::from_loc_eul_scale(positions[i], rotations[i], scales[i]),
id_attribute[i]);
}
parallel_for(IndexRange(domain_size), 1024, [&](IndexRange range) {
for (const int i : range) {
handles[i] = handle;
transforms[i] = float4x4::from_loc_eul_scale(positions[i], rotations[i], scales[i]);
instance_ids[i] = id_attribute[i];
}
});
}
else {
const int seed = params.get_input<int>("Seed");
Array<uint32_t> ids = get_geometry_element_ids_as_uints(src_geometry, ATTR_DOMAIN_POINT);
for (const int i : IndexRange(domain_size)) {
const int index = BLI_hash_int_2d(ids[i], seed) % possible_handles.size();
const int handle = possible_handles[index];
instances.add_instance(handle,
float4x4::from_loc_eul_scale(positions[i], rotations[i], scales[i]),
id_attribute[i]);
}
parallel_for(IndexRange(domain_size), 1024, [&](IndexRange range) {
for (const int i : range) {
const int index = BLI_hash_int_2d(ids[i], seed) % possible_handles.size();
const int handle = possible_handles[index];
handles[i] = handle;
transforms[i] = float4x4::from_loc_eul_scale(positions[i], rotations[i], scales[i]);
instance_ids[i] = id_attribute[i];
}
});
}
}