Geometry Nodes: multi threaded field evaluation

This adds a new `ParallelMultiFunction` which wraps another multi-function
and evaluates it with multiple threads. The speeds up field evaluation
quite a bit (the effect is most noticeable when the number of evaluations
and the field is large).

There are still other single-threaded performance bottlenecks in field
evaluation that will need to be solved separately. Most notably here
is the process of copying the computed data into the position attribute
in the Set Position node.

Differential Revision: https://developer.blender.org/D12457
This commit is contained in:
Jacques Lucke 2021-09-15 11:02:39 +02:00
parent fb27a9bb98
commit e6ca054590
10 changed files with 335 additions and 2 deletions

View File

@ -39,6 +39,7 @@
#include "BLI_index_range.hh"
#include "BLI_span.hh"
#include "BLI_vector.hh"
namespace blender {
@ -221,6 +222,8 @@ class IndexMask {
{
return indices_.is_empty();
}
IndexMask slice_and_offset(IndexRange slice, Vector<int64_t> &r_new_indices) const;
};
} // namespace blender

View File

@ -86,6 +86,7 @@ set(SRC
intern/hash_md5.c
intern/hash_mm2a.c
intern/hash_mm3.c
intern/index_mask.cc
intern/jitter_2d.c
intern/kdtree_1d.c
intern/kdtree_2d.c

View File

@ -0,0 +1,57 @@
/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "BLI_index_mask.hh"
namespace blender {
/**
* Create a sub-mask that is also shifted to the beginning. The shifting to the beginning allows
* code to work with smaller indices, which is more memory efficient.
*
* \return New index mask with the size of #slice. It is either empty or starts with 0. It might
* reference indices that have been appended to #r_new_indices.
*
* Example:
* this: [2, 3, 5, 7, 8, 9, 10]
* slice: ^--------^
* output: [0, 2, 4, 5]
*
* All the indices in the sub-mask are shifted by 3 towards zero, so that the first index in the
* output is zero.
*/
IndexMask IndexMask::slice_and_offset(const IndexRange slice, Vector<int64_t> &r_new_indices) const
{
const int slice_size = slice.size();
if (slice_size == 0) {
return {};
}
IndexMask sliced_mask{indices_.slice(slice)};
if (sliced_mask.is_range()) {
return IndexMask(slice_size);
}
const int64_t offset = sliced_mask.indices().first();
if (offset == 0) {
return sliced_mask;
}
r_new_indices.resize(slice_size);
for (const int i : IndexRange(slice_size)) {
r_new_indices[i] = sliced_mask[i] - offset;
}
return IndexMask(r_new_indices.as_span());
}
} // namespace blender

View File

@ -40,4 +40,28 @@ TEST(index_mask, RangeConstructor)
EXPECT_EQ(indices[2], 5);
}
TEST(index_mask, SliceAndOffset)
{
Vector<int64_t> indices;
{
IndexMask mask{IndexRange(10)};
IndexMask new_mask = mask.slice_and_offset(IndexRange(3, 5), indices);
EXPECT_TRUE(new_mask.is_range());
EXPECT_EQ(new_mask.size(), 5);
EXPECT_EQ(new_mask[0], 0);
EXPECT_EQ(new_mask[1], 1);
}
{
Vector<int64_t> original_indices = {2, 3, 5, 7, 8, 9, 10};
IndexMask mask{original_indices.as_span()};
IndexMask new_mask = mask.slice_and_offset(IndexRange(1, 4), indices);
EXPECT_FALSE(new_mask.is_range());
EXPECT_EQ(new_mask.size(), 4);
EXPECT_EQ(new_mask[0], 0);
EXPECT_EQ(new_mask[1], 2);
EXPECT_EQ(new_mask[2], 4);
EXPECT_EQ(new_mask[3], 5);
}
}
} // namespace blender::tests

View File

@ -34,6 +34,7 @@ set(SRC
intern/generic_virtual_vector_array.cc
intern/multi_function.cc
intern/multi_function_builder.cc
intern/multi_function_parallel.cc
intern/multi_function_procedure.cc
intern/multi_function_procedure_builder.cc
intern/multi_function_procedure_executor.cc
@ -54,6 +55,7 @@ set(SRC
FN_multi_function_data_type.hh
FN_multi_function_param_type.hh
FN_multi_function_params.hh
FN_multi_function_parallel.hh
FN_multi_function_procedure.hh
FN_multi_function_procedure_builder.hh
FN_multi_function_procedure_executor.hh
@ -64,6 +66,22 @@ set(LIB
bf_blenlib
)
if(WITH_TBB)
add_definitions(-DWITH_TBB)
if(WIN32)
# TBB includes Windows.h which will define min/max macros
# that will collide with the stl versions.
add_definitions(-DNOMINMAX)
endif()
list(APPEND INC_SYS
${TBB_INCLUDE_DIRS}
)
list(APPEND LIB
${TBB_LIBRARIES}
)
endif()
blender_add_lib(bf_functions "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
if(WITH_GTESTS)

View File

@ -911,4 +911,50 @@ template<typename T> class GVMutableArray_Typed {
}
};
class GVArray_For_SlicedGVArray : public GVArray {
protected:
const GVArray &varray_;
int64_t offset_;
public:
GVArray_For_SlicedGVArray(const GVArray &varray, const IndexRange slice)
: GVArray(varray.type(), slice.size()), varray_(varray), offset_(slice.start())
{
BLI_assert(slice.one_after_last() <= varray.size());
}
/* TODO: Add #materialize method. */
void get_impl(const int64_t index, void *r_value) const override;
void get_to_uninitialized_impl(const int64_t index, void *r_value) const override;
};
/**
* Utility class to create the "best" sliced virtual array.
*/
class GVArray_Slice {
private:
const GVArray *varray_;
/* Of these optional virtual arrays, at most one is constructed at any time. */
std::optional<GVArray_For_GSpan> varray_span_;
std::optional<GVArray_For_SlicedGVArray> varray_any_;
public:
GVArray_Slice(const GVArray &varray, const IndexRange slice);
const GVArray &operator*()
{
return *varray_;
}
const GVArray *operator->()
{
return varray_;
}
operator const GVArray &()
{
return *varray_;
}
};
} // namespace blender::fn

View File

@ -0,0 +1,39 @@
/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#pragma once
/** \file
* \ingroup fn
*/
#include "FN_multi_function.hh"
namespace blender::fn {
class ParallelMultiFunction : public MultiFunction {
private:
const MultiFunction &fn_;
const int64_t grain_size_;
bool threading_supported_;
public:
ParallelMultiFunction(const MultiFunction &fn, const int64_t grain_size);
void call(IndexMask mask, MFParams params, MFContext context) const override;
};
} // namespace blender::fn

View File

@ -21,6 +21,7 @@
#include "BLI_vector_set.hh"
#include "FN_field.hh"
#include "FN_multi_function_parallel.hh"
namespace blender::fn {
@ -360,7 +361,13 @@ Vector<const GVArray *> evaluate_fields(ResourceScope &scope,
build_multi_function_procedure_for_fields(
procedure, scope, field_tree_info, varying_fields_to_evaluate);
MFProcedureExecutor procedure_executor{"Procedure", procedure};
MFParamsBuilder mf_params{procedure_executor, &mask};
/* Add multi threading capabilities to the field evaluation. */
const int grain_size = 10000;
fn::ParallelMultiFunction parallel_procedure_executor{procedure_executor, grain_size};
/* Utility variable to make easy to switch the executor. */
const MultiFunction &executor_fn = parallel_procedure_executor;
MFParamsBuilder mf_params{executor_fn, &mask};
MFContextBuilder mf_context;
/* Provide inputs to the procedure executor. */
@ -401,7 +408,7 @@ Vector<const GVArray *> evaluate_fields(ResourceScope &scope,
mf_params.add_uninitialized_single_output(span);
}
procedure_executor.call(mask, mf_params, mf_context);
executor_fn.call(mask, mf_params, mf_context);
}
/* Evaluate constant fields if necessary. */

View File

@ -387,4 +387,47 @@ void GVMutableArray_GSpan::disable_not_applied_warning()
show_not_saved_warning_ = false;
}
/* --------------------------------------------------------------------
* GVArray_For_SlicedGVArray.
*/
void GVArray_For_SlicedGVArray::get_impl(const int64_t index, void *r_value) const
{
varray_.get(index + offset_, r_value);
}
void GVArray_For_SlicedGVArray::get_to_uninitialized_impl(const int64_t index, void *r_value) const
{
varray_.get_to_uninitialized(index + offset_, r_value);
}
/* --------------------------------------------------------------------
* GVArray_Slice.
*/
GVArray_Slice::GVArray_Slice(const GVArray &varray, const IndexRange slice)
{
if (varray.is_span()) {
/* Create a new virtual for the sliced span. */
const GSpan span = varray.get_internal_span();
const GSpan sliced_span = span.slice(slice.start(), slice.size());
varray_span_.emplace(sliced_span);
varray_ = &*varray_span_;
}
else if (varray.is_single()) {
/* Can just use the existing virtual array, because it's the same value for the indices in the
* slice anyway. */
varray_ = &varray;
}
else {
/* Generic version when none of the above method works.
* We don't necessarily want to materialize the input varray because there might be
* large distances between the required indices. Then we would materialize many elements that
* are not accessed later on.
*/
varray_any_.emplace(varray, slice);
varray_ = &*varray_any_;
}
}
} // namespace blender::fn

View File

@ -0,0 +1,95 @@
/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "FN_multi_function_parallel.hh"
#include "BLI_task.hh"
namespace blender::fn {
ParallelMultiFunction::ParallelMultiFunction(const MultiFunction &fn, const int64_t grain_size)
: fn_(fn), grain_size_(grain_size)
{
this->set_signature(&fn.signature());
threading_supported_ = true;
for (const int param_index : fn.param_indices()) {
const MFParamType param_type = fn.param_type(param_index);
if (param_type.data_type().category() == MFDataType::Vector) {
/* Vector parameters do not support threading yet. */
threading_supported_ = false;
break;
}
}
}
void ParallelMultiFunction::call(IndexMask full_mask, MFParams params, MFContext context) const
{
if (full_mask.size() <= grain_size_ || !threading_supported_) {
fn_.call(full_mask, params, context);
return;
}
threading::parallel_for(full_mask.index_range(), grain_size_, [&](const IndexRange mask_slice) {
Vector<int64_t> sub_mask_indices;
const IndexMask sub_mask = full_mask.slice_and_offset(mask_slice, sub_mask_indices);
if (sub_mask.is_empty()) {
return;
}
const int64_t input_slice_start = full_mask[mask_slice.first()];
const int64_t input_slice_size = full_mask[mask_slice.last()] - input_slice_start + 1;
const IndexRange input_slice_range{input_slice_start, input_slice_size};
MFParamsBuilder sub_params{fn_, sub_mask.min_array_size()};
ResourceScope &scope = sub_params.resource_scope();
/* All parameters are sliced so that the wrapped multi-function does not have to take care of
* the index offset. */
for (const int param_index : fn_.param_indices()) {
const MFParamType param_type = fn_.param_type(param_index);
switch (param_type.category()) {
case MFParamType::SingleInput: {
const GVArray &varray = params.readonly_single_input(param_index);
const GVArray &sliced_varray = scope.construct<GVArray_Slice>(varray, input_slice_range);
sub_params.add_readonly_single_input(sliced_varray);
break;
}
case MFParamType::SingleMutable: {
const GMutableSpan span = params.single_mutable(param_index);
const GMutableSpan sliced_span = span.slice(input_slice_start, input_slice_size);
sub_params.add_single_mutable(sliced_span);
break;
}
case MFParamType::SingleOutput: {
const GMutableSpan span = params.uninitialized_single_output(param_index);
const GMutableSpan sliced_span = span.slice(input_slice_start, input_slice_size);
sub_params.add_uninitialized_single_output(sliced_span);
break;
}
case MFParamType::VectorInput:
case MFParamType::VectorMutable:
case MFParamType::VectorOutput: {
BLI_assert_unreachable();
break;
}
}
}
fn_.call(sub_mask, sub_params, context);
});
}
} // namespace blender::fn