Functions: move loops into function builders

This simplifies debugging, and can help improve performance
by making it easier for the compiler.

More optimization might still be possible by using `__restrict` in
a few places.
This commit is contained in:
Jacques Lucke 2022-03-29 10:11:49 +02:00
parent c55afdf30b
commit 3c6c15d676
1 changed files with 33 additions and 13 deletions

View File

@ -49,8 +49,11 @@ template<typename In1, typename Out1> class CustomMF_SI_SO : public MultiFunctio
return [=](IndexMask mask, const VArray<In1> &in1, MutableSpan<Out1> out1) {
/* Devirtualization results in a 2-3x speedup for some simple functions. */
devirtualize_varray(in1, [&](const auto &in1) {
mask.foreach_index(
[&](int i) { new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i])); });
mask.to_best_mask_type([&](const auto &mask) {
for (const int64_t i : mask) {
new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i]));
}
});
});
};
}
@ -102,8 +105,11 @@ class CustomMF_SI_SI_SO : public MultiFunction {
MutableSpan<Out1> out1) {
/* Devirtualization results in a 2-3x speedup for some simple functions. */
devirtualize_varray2(in1, in2, [&](const auto &in1, const auto &in2) {
mask.foreach_index(
[&](int i) { new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i], in2[i])); });
mask.to_best_mask_type([&](const auto &mask) {
for (const int64_t i : mask) {
new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i], in2[i]));
}
});
});
};
}
@ -160,9 +166,11 @@ class CustomMF_SI_SI_SI_SO : public MultiFunction {
const VArray<In2> &in2,
const VArray<In3> &in3,
MutableSpan<Out1> out1) {
mask.foreach_index([&](int i) {
/* Virtual arrays are not devirtualized yet, to avoid generating lots of code without further
* consideration. */
for (const int64_t i : mask) {
new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i], in2[i], in3[i]));
});
}
};
}
@ -223,9 +231,11 @@ class CustomMF_SI_SI_SI_SI_SO : public MultiFunction {
const VArray<In3> &in3,
const VArray<In4> &in4,
MutableSpan<Out1> out1) {
mask.foreach_index([&](int i) {
/* Virtual arrays are not devirtualized yet, to avoid generating lots of code without further
* consideration. */
for (const int64_t i : mask) {
new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i], in2[i], in3[i], in4[i]));
});
}
};
}
@ -268,7 +278,11 @@ template<typename Mut1> class CustomMF_SM : public MultiFunction {
template<typename ElementFuncT> static FunctionT create_function(ElementFuncT element_fn)
{
return [=](IndexMask mask, MutableSpan<Mut1> mut1) {
mask.foreach_index([&](int i) { element_fn(mut1[i]); });
mask.to_best_mask_type([&](const auto &mask) {
for (const int64_t i : mask) {
element_fn(mut1[i]);
}
});
};
}
@ -304,9 +318,11 @@ template<typename From, typename To> class CustomMF_Convert : public MultiFuncti
const VArray<From> &inputs = params.readonly_single_input<From>(0);
MutableSpan<To> outputs = params.uninitialized_single_output<To>(1);
for (int64_t i : mask) {
new (static_cast<void *>(&outputs[i])) To(inputs[i]);
}
mask.to_best_mask_type([&](const auto &mask) {
for (int64_t i : mask) {
new (static_cast<void *>(&outputs[i])) To(inputs[i]);
}
});
}
};
@ -366,7 +382,11 @@ template<typename T> class CustomMF_Constant : public MultiFunction {
void call(IndexMask mask, MFParams params, MFContext UNUSED(context)) const override
{
MutableSpan<T> output = params.uninitialized_single_output<T>(0);
mask.foreach_index([&](int i) { new (&output[i]) T(value_); });
mask.to_best_mask_type([&](const auto &mask) {
for (const int64_t i : mask) {
new (&output[i]) T(value_);
}
});
}
uint64_t hash() const override