Functions: improve devirtualization in multi-function builder

This refactors how devirtualization is done in general and how
multi-functions use it.

* The old `Devirtualizer` class has been removed in favor of a simpler
  solution. It is also more general in the sense that it is not coupled
  with `IndexMask` and `VArray`. Instead there is a function that has
  inputs which control how different types are devirtualized. The
  new implementation is currently less general with regard to the number
  of parameters it supports. This can be changed in the future, but
  does not seem necessary now and would make the code less obvious.
* Devirtualizers for different types are now defined in their respective
  headers.
* The multi-function builder works with the `GVArray` stored in `MFParams`
  directly now, instead of first converting it to a `VArray<T>`. This reduces
  some constant overhead, which makes the multi-function slightly
  faster. This is only noticable when very few elements are processed though.

No functional changes or performance regressions are expected.
This commit is contained in:
Jacques Lucke 2023-01-07 12:55:48 +01:00
parent 1942d55c07
commit 1bbf1ed03c
14 changed files with 315 additions and 366 deletions

View File

@ -1,7 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_array.hh"
#include "BLI_devirtualize_parameters.hh"
#include "BLI_set.hh"
#include "BLI_task.hh"

View File

@ -26,302 +26,139 @@
* times and binary sizes, depending on the number of parameters that are devirtualized separately.
* So there is always a trade-off between run-time performance and compile-time/binary-size.
*
* This file provides a utility to devirtualize array parameters to a function using a high level
* API. This makes it easy to experiment with different extremes of the mentioned trade-off and
* allows finding a good compromise for each function.
* This file provides a utility to devirtualize function parameters using a high level API. This
* makes it easy to experiment with different extremes of the mentioned trade-off and allows
* finding a good compromise for each function.
*/
#include "BLI_parameter_pack_utils.hh"
#include "BLI_virtual_array.hh"
namespace blender::devirtualize_parameters {
/**
* Bit flag that specifies how an individual parameter is or can be devirtualized.
*/
enum class DeviMode {
/* This is used as zero-value to compare to, to avoid casting to int. */
None = 0,
/* Don't use devirtualization for that parameter, just pass it along. */
Keep = (1 << 0),
/* Devirtualize #Varray as #Span. */
Span = (1 << 1),
/* Devirtualize #VArray as #SingleAsSpan. */
Single = (1 << 2),
/* Devirtualize #IndexMask as #IndexRange. */
Range = (1 << 3),
};
ENUM_OPERATORS(DeviMode, DeviMode::Range);
/** Utility to encode multiple #DeviMode in a type. */
template<DeviMode... Mode> using DeviModeSequence = ValueSequence<DeviMode, Mode...>;
/**
* Main class that performs the devirtualization.
*/
template<typename Fn, typename... SourceTypes> class Devirtualizer {
private:
/** Utility to get the tag of the I-th source type. */
template<size_t I>
using type_at_index = typename TypeSequence<SourceTypes...>::template at_index<I>;
static constexpr size_t SourceTypesNum = sizeof...(SourceTypes);
/** Function to devirtualize. */
Fn fn_;
/**
* Source values that will be devirtualized. Note that these are stored as pointers to avoid
* unnecessary copies. The caller is responsible for keeping the memory alive.
*/
std::tuple<const SourceTypes *...> sources_;
/** Keeps track of whether #fn_ has been called already to avoid calling it twice. */
bool executed_ = false;
public:
Devirtualizer(Fn fn, const SourceTypes *...sources) : fn_(std::move(fn)), sources_{sources...}
{
}
/**
* Return true when the function passed to the constructor has been called already.
*/
bool executed() const
{
return executed_;
}
/**
* At compile time, generates multiple variants of the function, each optimized for a different
* combination of devirtualized parameters. For every parameter, a bit flag is passed that
* determines how it will be devirtualized. At run-time, if possible, one of the generated
* functions is picked and executed.
*
* To check whether the function was called successfully, call #executed() afterwards.
*
* \note This generates an exponential amount of code in the final binary, depending on how many
* to-be-virtualized parameters there are.
*/
template<DeviMode... AllowedModes>
void try_execute_devirtualized(DeviModeSequence<AllowedModes...> /* allowed_modes */)
{
BLI_assert(!executed_);
static_assert(sizeof...(AllowedModes) == SourceTypesNum);
this->try_execute_devirtualized_impl(DeviModeSequence<>(),
DeviModeSequence<AllowedModes...>());
}
/**
* Execute the function and pass in the original parameters without doing any devirtualization.
*/
void execute_without_devirtualization()
{
BLI_assert(!executed_);
this->try_execute_devirtualized_impl_call(
make_value_sequence<DeviMode, DeviMode::Keep, SourceTypesNum>(),
std::make_index_sequence<SourceTypesNum>());
}
private:
/**
* A recursive method that generates all the combinations of devirtualized parameters that the
* caller requested. A recursive function is necessary to achieve generating an exponential
* number of function calls (which has to be used with care, but is expected here).
*
* At every recursive step, the #DeviMode of one parameter is determined. This is achieved by
* extending #DeviModeSequence<Mode...> by one element in each step. The recursion ends once all
* parameters are handled.
*
* \return True when the function has been executed.
*/
template<DeviMode... Mode, DeviMode... AllowedModes>
bool try_execute_devirtualized_impl(
/* Initially empty, but then extended by one element in each recursive step. */
DeviModeSequence<Mode...> /* modes */,
/* Bit flag for every parameter. */
DeviModeSequence<AllowedModes...> /* allowed_modes */)
{
static_assert(SourceTypesNum == sizeof...(AllowedModes));
if constexpr (SourceTypesNum == sizeof...(Mode)) {
/* End of recursion, now call the function with the determined #DeviModes. */
this->try_execute_devirtualized_impl_call(DeviModeSequence<Mode...>(),
std::make_index_sequence<SourceTypesNum>());
return true;
}
else {
/* Index of the parameter that is checked in the current recursive step. */
constexpr size_t I = sizeof...(Mode);
/* Non-devirtualized parameter type. */
using SourceType = type_at_index<I>;
/* A bit flag indicating what devirtualizations are allowed in this step. */
[[maybe_unused]] constexpr DeviMode allowed_modes =
DeviModeSequence<AllowedModes...>::template at_index<I>();
/* Handle #VArray types. */
if constexpr (is_VArray_v<SourceType>) {
/* The actual virtual array, used for dynamic dispatch at run-time. */
const SourceType &varray = *std::get<I>(sources_);
/* Check if the virtual array is a single value. */
if constexpr ((allowed_modes & DeviMode::Single) != DeviMode::None) {
if (varray.is_single()) {
if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Single>(),
DeviModeSequence<AllowedModes...>())) {
return true;
}
}
}
/* Check if the virtual array is a span. */
if constexpr ((allowed_modes & DeviMode::Span) != DeviMode::None) {
if (varray.is_span()) {
if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Span>(),
DeviModeSequence<AllowedModes...>())) {
return true;
}
}
}
/* Check if it is ok if the virtual array is not devirtualized. */
if constexpr ((allowed_modes & DeviMode::Keep) != DeviMode::None) {
if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Keep>(),
DeviModeSequence<AllowedModes...>())) {
return true;
}
}
}
/* Handle #IndexMask. */
else if constexpr (std::is_same_v<IndexMask, SourceType>) {
/* Check if the mask is actually a contiguous range. */
if constexpr ((allowed_modes & DeviMode::Range) != DeviMode::None) {
/* The actual mask used for dynamic dispatch at run-time. */
const IndexMask &mask = *std::get<I>(sources_);
if (mask.is_range()) {
if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Range>(),
DeviModeSequence<AllowedModes...>())) {
return true;
}
}
}
/* Check if mask is also allowed to stay a span. */
if constexpr ((allowed_modes & DeviMode::Span) != DeviMode::None) {
if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Span>(),
DeviModeSequence<AllowedModes...>())) {
return true;
}
}
}
/* Handle unknown types. */
else {
if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Keep>(),
DeviModeSequence<AllowedModes...>())) {
return true;
}
}
}
return false;
}
/**
* Actually call the function with devirtualized parameters.
*/
template<DeviMode... Mode, size_t... I>
void try_execute_devirtualized_impl_call(DeviModeSequence<Mode...> /* modes */,
std::index_sequence<I...> /* indices */)
{
BLI_assert(!executed_);
fn_(this->get_devirtualized_parameter<I, Mode>()...);
executed_ = true;
}
/**
* Return the I-th parameter devirtualized using the passed in #DeviMode. This has different
* return types based on the template parameters.
*
* \note It is expected that the caller already knows that the parameter can be devirtualized
* with the given mode.
*/
template<size_t I, DeviMode Mode> decltype(auto) get_devirtualized_parameter()
{
using SourceType = type_at_index<I>;
static_assert(Mode != DeviMode::None);
if constexpr (Mode == DeviMode::Keep) {
/* Don't change the original parameter at all. */
return *std::get<I>(sources_);
}
if constexpr (is_VArray_v<SourceType>) {
const SourceType &varray = *std::get<I>(sources_);
if constexpr (Mode == DeviMode::Single) {
/* Devirtualize virtual array as single value. */
return SingleAsSpan(varray);
}
else if constexpr (Mode == DeviMode::Span) {
/* Devirtualize virtual array as span. */
return varray.get_internal_span();
}
}
else if constexpr (std::is_same_v<IndexMask, SourceType>) {
const IndexMask &mask = *std::get<I>(sources_);
if constexpr (ELEM(Mode, DeviMode::Span)) {
/* Don't devirtualize mask, it's still a span. */
return mask;
}
else if constexpr (Mode == DeviMode::Range) {
/* Devirtualize the mask as range. */
return mask.as_range();
}
}
}
};
} // namespace blender::devirtualize_parameters
namespace blender {
/**
* Generate multiple versions of the given function optimized for different virtual arrays.
* One has to be careful with nesting multiple devirtualizations, because that results in an
* exponential number of function instantiations (increasing compile time and binary size).
* Calls the given function with devirtualized parameters if possible. Note that using many
* non-trivial devirtualizers results in exponential code growth.
*
* Generally, this function should only be used when the virtual method call overhead to get an
* element from a virtual array is significant.
* \return True if the function has been called.
*
* Every devirtualizer is expected to have a `devirtualize(auto fn) -> bool` method.
* This method is expected to do one of two things:
* - Call `fn` with the devirtualized argument and return what `fn` returns.
* - Don't call `fn` (because the devirtualization failed) and return false.
*
* Examples for devirtualizers: #BasicDevirtualizer, #IndexMaskDevirtualizer, #VArrayDevirtualizer.
*/
template<typename T, typename Func>
inline void devirtualize_varray(const VArray<T> &varray, const Func &func, bool enable = true)
template<typename Fn, typename... Devirtualizers>
inline bool call_with_devirtualized_parameters(const std::tuple<Devirtualizers...> &devis,
const Fn &fn)
{
using namespace devirtualize_parameters;
if (enable) {
Devirtualizer<decltype(func), VArray<T>> devirtualizer(func, &varray);
constexpr DeviMode devi_mode = DeviMode::Single | DeviMode::Span;
devirtualizer.try_execute_devirtualized(DeviModeSequence<devi_mode>());
if (devirtualizer.executed()) {
return;
}
/* In theory the code below could be generalized to avoid code duplication. However, the maximum
* number of parameters is expected to be relatively low. Explicitely implementing the different
* cases makes it more obvious to see what is going on and also makes inlining everything easier
* for the compiler. */
constexpr size_t DeviNum = sizeof...(Devirtualizers);
if constexpr (DeviNum == 0) {
fn();
return true;
}
func(varray);
if constexpr (DeviNum == 1) {
return std::get<0>(devis).devirtualize([&](auto param0) {
fn(param0);
return true;
});
}
if constexpr (DeviNum == 2) {
return std::get<0>(devis).devirtualize([&](auto &&param0) {
return std::get<1>(devis).devirtualize([&](auto &&param1) {
fn(param0, param1);
return true;
});
});
}
if constexpr (DeviNum == 3) {
return std::get<0>(devis).devirtualize([&](auto &&param0) {
return std::get<1>(devis).devirtualize([&](auto &&param1) {
return std::get<2>(devis).devirtualize([&](auto &&param2) {
fn(param0, param1, param2);
return true;
});
});
});
}
if constexpr (DeviNum == 4) {
return std::get<0>(devis).devirtualize([&](auto &&param0) {
return std::get<1>(devis).devirtualize([&](auto &&param1) {
return std::get<2>(devis).devirtualize([&](auto &&param2) {
return std::get<3>(devis).devirtualize([&](auto &&param3) {
fn(param0, param1, param2, param3);
return true;
});
});
});
});
}
if constexpr (DeviNum == 5) {
return std::get<0>(devis).devirtualize([&](auto &&param0) {
return std::get<1>(devis).devirtualize([&](auto &&param1) {
return std::get<2>(devis).devirtualize([&](auto &&param2) {
return std::get<3>(devis).devirtualize([&](auto &&param3) {
return std::get<4>(devis).devirtualize([&](auto &&param4) {
fn(param0, param1, param2, param3, param4);
return true;
});
});
});
});
});
}
if constexpr (DeviNum == 6) {
return std::get<0>(devis).devirtualize([&](auto &&param0) {
return std::get<1>(devis).devirtualize([&](auto &&param1) {
return std::get<2>(devis).devirtualize([&](auto &&param2) {
return std::get<3>(devis).devirtualize([&](auto &&param3) {
return std::get<4>(devis).devirtualize([&](auto &&param4) {
return std::get<5>(devis).devirtualize([&](auto &&param5) {
fn(param0, param1, param2, param3, param4, param5);
return true;
});
});
});
});
});
});
}
if constexpr (DeviNum == 7) {
return std::get<0>(devis).devirtualize([&](auto &&param0) {
return std::get<1>(devis).devirtualize([&](auto &&param1) {
return std::get<2>(devis).devirtualize([&](auto &&param2) {
return std::get<3>(devis).devirtualize([&](auto &&param3) {
return std::get<4>(devis).devirtualize([&](auto &&param4) {
return std::get<5>(devis).devirtualize([&](auto &&param5) {
return std::get<6>(devis).devirtualize([&](auto &&param6) {
fn(param0, param1, param2, param3, param4, param5, param6);
return true;
});
});
});
});
});
});
});
}
return false;
}
/**
* Same as `devirtualize_varray`, but devirtualizes two virtual arrays at the same time.
* This is better than nesting two calls to `devirtualize_varray`, because it instantiates fewer
* cases.
* A devirtualizer to be used with #call_with_devirtualized_parameters.
*
* This one is very simple, it does not perform any actual devirtualization. It can be used to pass
* parameters to the function that shouldn't be devirtualized.
*/
template<typename T1, typename T2, typename Func>
inline void devirtualize_varray2(const VArray<T1> &varray1,
const VArray<T2> &varray2,
const Func &func,
bool enable = true)
{
using namespace devirtualize_parameters;
if (enable) {
Devirtualizer<decltype(func), VArray<T1>, VArray<T2>> devirtualizer(func, &varray1, &varray2);
constexpr DeviMode devi_mode = DeviMode::Single | DeviMode::Span;
devirtualizer.try_execute_devirtualized(DeviModeSequence<devi_mode, devi_mode>());
if (devirtualizer.executed()) {
return;
}
template<typename T> struct BasicDevirtualizer {
const T value;
template<typename Fn> bool devirtualize(const Fn &fn) const
{
return fn(this->value);
}
func(varray1, varray2);
}
};
} // namespace blender

View File

@ -798,6 +798,28 @@ inline bool GVArrayCommon::is_empty() const
/** \} */
/** To be used with #call_with_devirtualized_parameters. */
template<typename T, bool UseSingle, bool UseSpan> struct GVArrayDevirtualizer {
const GVArrayImpl &varray_impl;
template<typename Fn> bool devirtualize(const Fn &fn) const
{
const CommonVArrayInfo info = this->varray_impl.common_info();
const int64_t size = this->varray_impl.size();
if constexpr (UseSingle) {
if (info.type == CommonVArrayInfo::Type::Single) {
return fn(SingleAsSpan<T>(*static_cast<const T *>(info.data), size));
}
}
if constexpr (UseSpan) {
if (info.type == CommonVArrayInfo::Type::Span) {
return fn(Span<T>(static_cast<const T *>(info.data), size));
}
}
return false;
}
};
/* -------------------------------------------------------------------- */
/** \name Inline methods for #GVArray.
* \{ */

View File

@ -284,4 +284,22 @@ class IndexMask {
Vector<int64_t> *r_skip_amounts = nullptr) const;
};
/** To be used with #call_with_devirtualized_parameters. */
template<bool UseRange, bool UseSpan> struct IndexMaskDevirtualizer {
const IndexMask &mask;
template<typename Fn> bool devirtualize(const Fn &fn) const
{
if constexpr (UseRange) {
if (this->mask.is_range()) {
return fn(this->mask.as_range());
}
}
if constexpr (UseSpan) {
return fn(this->mask.indices());
}
return false;
}
};
} // namespace blender

View File

@ -27,6 +27,7 @@
#include "BLI_any.hh"
#include "BLI_array.hh"
#include "BLI_devirtualize_parameters.hh"
#include "BLI_index_mask.hh"
#include "BLI_span.hh"
@ -1312,4 +1313,68 @@ template<typename T> class SingleAsSpan {
}
};
/** To be used with #call_with_devirtualized_parameters. */
template<typename T, bool UseSingle, bool UseSpan> struct VArrayDevirtualizer {
const VArray<T> &varray;
template<typename Fn> bool devirtualize(const Fn &fn) const
{
const CommonVArrayInfo info = this->varray.common_info();
const int64_t size = this->varray.size();
if constexpr (UseSingle) {
if (info.type == CommonVArrayInfo::Type::Single) {
return fn(SingleAsSpan<T>(*static_cast<const T *>(info.data), size));
}
}
if constexpr (UseSpan) {
if (info.type == CommonVArrayInfo::Type::Span) {
return fn(Span<T>(static_cast<const T *>(info.data), size));
}
}
return false;
}
};
/**
* Generate multiple versions of the given function optimized for different virtual arrays.
* One has to be careful with nesting multiple devirtualizations, because that results in an
* exponential number of function instantiations (increasing compile time and binary size).
*
* Generally, this function should only be used when the virtual method call overhead to get an
* element from a virtual array is significant.
*/
template<typename T, typename Func>
inline void devirtualize_varray(const VArray<T> &varray, const Func &func, bool enable = true)
{
if (enable) {
if (call_with_devirtualized_parameters(
std::make_tuple(VArrayDevirtualizer<T, true, true>{varray}), func)) {
return;
}
}
func(varray);
}
/**
* Same as `devirtualize_varray`, but devirtualizes two virtual arrays at the same time.
* This is better than nesting two calls to `devirtualize_varray`, because it instantiates fewer
* cases.
*/
template<typename T1, typename T2, typename Func>
inline void devirtualize_varray2(const VArray<T1> &varray1,
const VArray<T2> &varray2,
const Func &func,
bool enable = true)
{
if (enable) {
if (call_with_devirtualized_parameters(
std::make_tuple(VArrayDevirtualizer<T1, true, true>{varray1},
VArrayDevirtualizer<T2, true, true>{varray2}),
func)) {
return;
}
}
func(varray1, varray2);
}
} // namespace blender

View File

@ -11,7 +11,6 @@
#include "MEM_guardedalloc.h"
#include "BLI_devirtualize_parameters.hh"
#include "BLI_listbase.h"
#include "BLI_math_base.h"
#include "BLI_math_vector.hh"

View File

@ -7,7 +7,6 @@
#include <atomic>
#include "BLI_array_utils.hh"
#include "BLI_devirtualize_parameters.hh"
#include "BLI_index_mask_ops.hh"
#include "BLI_utildefines.h"
#include "BLI_vector_set.hh"

View File

@ -10,14 +10,10 @@
#include <functional>
#include "BLI_devirtualize_parameters.hh"
#include "FN_multi_function.hh"
namespace blender::fn {
namespace devi = devirtualize_parameters;
/**
* These presets determine what code is generated for a #CustomMF. Different presets make different
* trade-offs between run-time performance and compile-time/binary size.
@ -63,14 +59,24 @@ struct AllSpanOrSingle {
static constexpr bool use_devirtualization = true;
static constexpr FallbackMode fallback_mode = FallbackMode::Materialized;
template<typename Fn, typename... ParamTypes>
void try_devirtualize(devi::Devirtualizer<Fn, ParamTypes...> &devirtualizer)
template<typename... ParamTags, typename... LoadedParams, size_t... I>
auto create_devirtualizers(TypeSequence<ParamTags...> /*param_tags*/,
std::index_sequence<I...> /*indices*/,
const IndexMask &mask,
const std::tuple<LoadedParams...> &loaded_params)
{
using devi::DeviMode;
devirtualizer.try_execute_devirtualized(
make_value_sequence<DeviMode,
DeviMode::Span | DeviMode::Single | DeviMode::Range,
sizeof...(ParamTypes)>());
return std::make_tuple(IndexMaskDevirtualizer<true, true>{mask}, [&]() {
typedef ParamTags ParamTag;
typedef typename ParamTag::base_type T;
if constexpr (ParamTag::category == MFParamCategory::SingleInput) {
const GVArrayImpl &varray_impl = *std::get<I>(loaded_params);
return GVArrayDevirtualizer<T, true, true>{varray_impl};
}
else if constexpr (ParamTag::category == MFParamCategory::SingleOutput) {
T *ptr = std::get<I>(loaded_params);
return BasicDevirtualizer<T *>{ptr};
}
}()...);
}
};
@ -83,17 +89,26 @@ template<size_t... Indices> struct SomeSpanOrSingle {
static constexpr bool use_devirtualization = true;
static constexpr FallbackMode fallback_mode = FallbackMode::Materialized;
template<typename Fn, typename... ParamTypes>
void try_devirtualize(devi::Devirtualizer<Fn, ParamTypes...> &devirtualizer)
template<typename... ParamTags, typename... LoadedParams, size_t... I>
auto create_devirtualizers(TypeSequence<ParamTags...> /*param_tags*/,
std::index_sequence<I...> /*indices*/,
const IndexMask &mask,
const std::tuple<LoadedParams...> &loaded_params)
{
using devi::DeviMode;
devirtualizer.try_execute_devirtualized(
make_two_value_sequence<DeviMode,
DeviMode::Span | DeviMode::Single | DeviMode::Range,
DeviMode::Single,
sizeof...(ParamTypes),
0,
(Indices + 1)...>());
return std::make_tuple(IndexMaskDevirtualizer<true, true>{mask}, [&]() {
typedef ParamTags ParamTag;
typedef typename ParamTag::base_type T;
if constexpr (ParamTag::category == MFParamCategory::SingleInput) {
constexpr bool UseSpan = ValueSequence<size_t, Indices...>::template contains<I>();
const GVArrayImpl &varray_impl = *std::get<I>(loaded_params);
return GVArrayDevirtualizer<T, true, UseSpan>{varray_impl};
}
else if constexpr (ParamTag::category == MFParamCategory::SingleOutput) {
T *ptr = std::get<I>(loaded_params);
return BasicDevirtualizer<T *>{ptr};
}
}()...);
}
};
@ -107,8 +122,8 @@ namespace detail {
* instead of a `VArray<int>`).
*/
template<typename MaskT, typename... Args, typename... ParamTags, size_t... I, typename ElementFn>
void execute_array(TypeSequence<ParamTags...> /* param_tags */,
std::index_sequence<I...> /* indices */,
void execute_array(TypeSequence<ParamTags...> /*param_tags*/,
std::index_sequence<I...> /*indices*/,
ElementFn element_fn,
MaskT mask,
/* Use restrict to tell the compiler that pointer inputs do not alias each
@ -125,7 +140,7 @@ void execute_array(TypeSequence<ParamTags...> /* param_tags */,
else if constexpr (ParamTag::category == MFParamCategory::SingleOutput) {
/* For outputs, pass a pointer to the function. This is done instead of passing a
* reference, because the pointer points to uninitialized memory. */
return &args[i];
return args + i;
}
}()...);
}
@ -151,7 +166,7 @@ template<typename ParamTag> struct ArgInfo {
* Similar to #execute_array but accepts two mask inputs, one for inputs and one for outputs.
*/
template<typename... ParamTags, typename ElementFn, typename... Chunks>
void execute_materialized_impl(TypeSequence<ParamTags...> /* param_tags */,
void execute_materialized_impl(TypeSequence<ParamTags...> /*param_tags*/,
const ElementFn element_fn,
const IndexRange in_mask,
const IndexMask out_mask,
@ -168,7 +183,7 @@ void execute_materialized_impl(TypeSequence<ParamTags...> /* param_tags */,
}
else if constexpr (ParamTag::category == MFParamCategory::SingleOutput) {
/* For outputs, a pointer is passed, because the memory is uninitialized. */
return &chunks[out_i];
return chunks + out_i;
}
}()...);
}
@ -179,12 +194,12 @@ void execute_materialized_impl(TypeSequence<ParamTags...> /* param_tags */,
* separately, processing happens in chunks. This allows retrieving from input virtual arrays in
* chunks, which reduces virtual function call overhead.
*/
template<typename... ParamTags, size_t... I, typename ElementFn, typename... Args>
template<typename... ParamTags, size_t... I, typename ElementFn, typename... LoadedParams>
void execute_materialized(TypeSequence<ParamTags...> /* param_tags */,
std::index_sequence<I...> /* indices */,
const ElementFn element_fn,
const IndexMask mask,
Args &&...args)
const std::tuple<LoadedParams...> &loaded_params)
{
/* In theory, all elements could be processed in one chunk. However, that has the disadvantage
@ -212,19 +227,21 @@ void execute_materialized(TypeSequence<ParamTags...> /* param_tags */,
typedef typename ParamTag::base_type T;
[[maybe_unused]] ArgInfo<ParamTags> &arg_info = std::get<I>(args_info);
if constexpr (ParamTag::category == MFParamCategory::SingleInput) {
VArray<T> &varray = *args;
if (varray.is_single()) {
const GVArrayImpl &varray_impl = *std::get<I>(loaded_params);
const CommonVArrayInfo common_info = varray_impl.common_info();
if (common_info.type == CommonVArrayInfo::Type::Single) {
/* If an input #VArray is a single value, we have to fill the buffer with that value
* only once. The same unchanged buffer can then be reused in every chunk. */
MutableSpan<T> in_chunk{std::get<I>(buffers_owner).ptr(), buffer_size};
const T in_single = varray.get_internal_single();
const T &in_single = *static_cast<const T *>(common_info.data);
uninitialized_fill_n(in_chunk.data(), in_chunk.size(), in_single);
std::get<I>(buffers) = in_chunk;
arg_info.mode = ArgMode::Single;
}
else if (varray.is_span()) {
else if (common_info.type == CommonVArrayInfo::Type::Span) {
/* Remember the span so that it doesn't have to be retrieved in every iteration. */
arg_info.internal_span = varray.get_internal_span();
const T *ptr = static_cast<const T *>(common_info.data);
arg_info.internal_span = Span<T>(ptr, varray_impl.size());
}
}
}(),
@ -254,7 +271,6 @@ void execute_materialized(TypeSequence<ParamTags...> /* param_tags */,
return Span<T>(std::get<I>(buffers));
}
else {
const VArray<T> &varray = *args;
if (sliced_mask_is_range) {
if (!arg_info.internal_span.is_empty()) {
/* In this case we can just use an existing span instead of "compressing" it into
@ -264,10 +280,11 @@ void execute_materialized(TypeSequence<ParamTags...> /* param_tags */,
return arg_info.internal_span.slice(sliced_mask_range);
}
}
const GVArrayImpl &varray_impl = *std::get<I>(loaded_params);
/* As a fallback, do a virtual function call to retrieve all elements in the current
* chunk. The elements are stored in a temporary buffer reused for every chunk. */
MutableSpan<T> in_chunk{std::get<I>(buffers_owner).ptr(), chunk_size};
varray.materialize_compressed_to_uninitialized(sliced_mask, in_chunk);
varray_impl.materialize_compressed_to_uninitialized(sliced_mask, in_chunk.data());
/* Remember that this parameter has been materialized, so that the values are
* destructed properly when the chunk is done. */
arg_info.mode = ArgMode::Materialized;
@ -276,7 +293,7 @@ void execute_materialized(TypeSequence<ParamTags...> /* param_tags */,
}
else if constexpr (ParamTag::category == MFParamCategory::SingleOutput) {
/* For outputs, just pass a pointer. This is important so that `__restrict` works. */
return args->data();
return std::get<I>(loaded_params);
}
}()...);
@ -344,40 +361,34 @@ template<typename... ParamTags> class CustomMF : public MultiFunction {
ExecPreset exec_preset,
IndexMask mask,
MFParams params,
std::index_sequence<I...> /* indices */)
std::index_sequence<I...> /*indices*/)
{
std::tuple<typename ParamTags::array_type...> retrieved_params;
(
/* Get all parameters from #params and store them in #retrieved_params. */
[&]() {
/* Use `typedef` instead of `using` to work around a compiler bug. */
typedef typename TagsSequence::template at_index<I> ParamTag;
typedef typename ParamTag::base_type T;
/* Contains `const GVArrayImpl *` for inputs and `T *` for outputs. */
const auto loaded_params = std::make_tuple([&]() {
/* Use `typedef` instead of `using` to work around a compiler bug. */
typedef typename TagsSequence::template at_index<I> ParamTag;
typedef typename ParamTag::base_type T;
if constexpr (ParamTag::category == MFParamCategory::SingleInput) {
std::get<I>(retrieved_params) = params.readonly_single_input<T>(I);
}
if constexpr (ParamTag::category == MFParamCategory::SingleOutput) {
std::get<I>(retrieved_params) = params.uninitialized_single_output<T>(I);
}
}(),
...);
auto array_executor = [&](auto &&...args) {
detail::execute_array(TagsSequence(),
std::make_index_sequence<TagsSequence::size()>(),
element_fn,
std::forward<decltype(args)>(args)...);
};
if constexpr (ParamTag::category == MFParamCategory::SingleInput) {
return params.readonly_single_input(I).get_implementation();
}
else if constexpr (ParamTag::category == MFParamCategory::SingleOutput) {
return static_cast<T *>(params.uninitialized_single_output(I).data());
}
}()...);
/* First try devirtualized execution, since this is the most efficient. */
bool executed_devirtualized = false;
if constexpr (ExecPreset::use_devirtualization) {
devi::Devirtualizer<decltype(array_executor), IndexMask, typename ParamTags::array_type...>
devirtualizer{
array_executor, &mask, [&] { return &std::get<I>(retrieved_params); }()...};
exec_preset.try_devirtualize(devirtualizer);
executed_devirtualized = devirtualizer.executed();
const auto devirtualizers = exec_preset.create_devirtualizers(
TagsSequence(), std::index_sequence<I...>(), mask, loaded_params);
executed_devirtualized = call_with_devirtualized_parameters(
devirtualizers, [&](auto &&...args) {
detail::execute_array(TagsSequence(),
std::index_sequence<I...>(),
element_fn,
std::forward<decltype(args)>(args)...);
});
}
/* If devirtualized execution was disabled or not possible, use a fallback method which is
@ -385,16 +396,23 @@ template<typename... ParamTags> class CustomMF : public MultiFunction {
if (!executed_devirtualized) {
if constexpr (ExecPreset::fallback_mode == CustomMF_presets::FallbackMode::Materialized) {
materialize_detail::execute_materialized(
TypeSequence<ParamTags...>(), std::index_sequence<I...>(), element_fn, mask, [&] {
return &std::get<I>(retrieved_params);
}()...);
TagsSequence(), std::index_sequence<I...>(), element_fn, mask, loaded_params);
}
else {
detail::execute_array(TagsSequence(),
std::make_index_sequence<TagsSequence::size()>(),
element_fn,
mask,
std::get<I>(retrieved_params)...);
detail::execute_array(
TagsSequence(), std::index_sequence<I...>(), element_fn, mask, [&]() {
/* Use `typedef` instead of `using` to work around a compiler bug. */
typedef typename TagsSequence::template at_index<I> ParamTag;
typedef typename ParamTag::base_type T;
if constexpr (ParamTag::category == MFParamCategory::SingleInput) {
const GVArrayImpl &varray_impl = *std::get<I>(loaded_params);
return GVArray(&varray_impl).typed<T>();
}
else if constexpr (ParamTag::category == MFParamCategory::SingleOutput) {
T *ptr = std::get<I>(loaded_params);
return ptr;
}
}()...);
}
}
}

View File

@ -34,9 +34,6 @@ enum class MFParamCategory {
template<MFParamCategory Category, typename T> struct MFParamTag {
static constexpr MFParamCategory category = Category;
using base_type = T;
/* TODO: Doesn't support all categories yet, this can be generalized when necessary. */
using array_type =
std::conditional_t<Category == MFParamCategory::SingleInput, VArray<T>, MutableSpan<T>>;
};
class MFParamType {

View File

@ -5,7 +5,6 @@
#include "BKE_curves_utils.hh"
#include "BKE_geometry_set.hh"
#include "BLI_devirtualize_parameters.hh"
#include "BLI_math_geom.h"
#include "BLI_math_rotation_legacy.hh"
#include "BLI_task.hh"

View File

@ -1,7 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_array_utils.hh"
#include "BLI_devirtualize_parameters.hh"
#include "BLI_index_mask.hh"
#include "BLI_user_counter.hh"

View File

@ -2,7 +2,6 @@
#include "BLI_array.hh"
#include "BLI_array_utils.hh"
#include "BLI_devirtualize_parameters.hh"
#include "BLI_set.hh"
#include "BLI_task.hh"

View File

@ -9,7 +9,6 @@
#include "DNA_object_types.h"
#include "DNA_pointcloud_types.h"
#include "BLI_devirtualize_parameters.hh"
#include "BLI_noise.hh"
#include "BLI_task.hh"

View File

@ -1,6 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_devirtualize_parameters.hh"
#include "BLI_generic_array.hh"
#include "BLI_length_parameterize.hh"