Functions: enable more gcc optimizations for multi-functions

This mainly helps GCC catch up with Clang in terms of field evaluation
performance in some cases. In some cases this patch can speedup
field evaluation 2-3x (e.g. when there are many float math nodes).
See D16942 for a more detailed benchmark.
This commit is contained in:
Jacques Lucke 2023-01-07 20:23:20 +01:00
parent a2ea32a600
commit c4d4db39dc
1 changed files with 13 additions and 7 deletions

View File

@ -124,13 +124,19 @@ namespace detail {
* instead of a `VArray<int>`).
*/
template<typename MaskT, typename... Args, typename... ParamTags, size_t... I, typename ElementFn>
void execute_array(TypeSequence<ParamTags...> /*param_tags*/,
std::index_sequence<I...> /*indices*/,
ElementFn element_fn,
MaskT mask,
/* Use restrict to tell the compiler that pointer inputs do not alias each
* other. This is important for some compiler optimizations. */
Args &&__restrict... args)
/* Perform additional optimizations on this loop because it is a very hot loop. For example, the
* math node in geometry nodes is processed here. */
#if (defined(__GNUC__) && !defined(__clang__))
[[gnu::optimize("-funroll-loops")]] [[gnu::optimize("O3")]]
#endif
void execute_array(
TypeSequence<ParamTags...> /*param_tags*/,
std::index_sequence<I...> /*indices*/,
ElementFn element_fn,
MaskT mask,
/* Use restrict to tell the compiler that pointer inputs do not alias each
* other. This is important for some compiler optimizations. */
Args &&__restrict... args)
{
for (const int64_t i : mask) {
element_fn([&]() -> decltype(auto) {