Functions: move loops into function builders
This simplifies debugging, and can help improve performance by making it easier for the compiler. More optimization might still be possible by using `__restrict` in a few places.
This commit is contained in:
parent
c55afdf30b
commit
3c6c15d676
|
@ -49,8 +49,11 @@ template<typename In1, typename Out1> class CustomMF_SI_SO : public MultiFunctio
|
|||
return [=](IndexMask mask, const VArray<In1> &in1, MutableSpan<Out1> out1) {
|
||||
/* Devirtualization results in a 2-3x speedup for some simple functions. */
|
||||
devirtualize_varray(in1, [&](const auto &in1) {
|
||||
mask.foreach_index(
|
||||
[&](int i) { new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i])); });
|
||||
mask.to_best_mask_type([&](const auto &mask) {
|
||||
for (const int64_t i : mask) {
|
||||
new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i]));
|
||||
}
|
||||
});
|
||||
});
|
||||
};
|
||||
}
|
||||
|
@ -102,8 +105,11 @@ class CustomMF_SI_SI_SO : public MultiFunction {
|
|||
MutableSpan<Out1> out1) {
|
||||
/* Devirtualization results in a 2-3x speedup for some simple functions. */
|
||||
devirtualize_varray2(in1, in2, [&](const auto &in1, const auto &in2) {
|
||||
mask.foreach_index(
|
||||
[&](int i) { new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i], in2[i])); });
|
||||
mask.to_best_mask_type([&](const auto &mask) {
|
||||
for (const int64_t i : mask) {
|
||||
new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i], in2[i]));
|
||||
}
|
||||
});
|
||||
});
|
||||
};
|
||||
}
|
||||
|
@ -160,9 +166,11 @@ class CustomMF_SI_SI_SI_SO : public MultiFunction {
|
|||
const VArray<In2> &in2,
|
||||
const VArray<In3> &in3,
|
||||
MutableSpan<Out1> out1) {
|
||||
mask.foreach_index([&](int i) {
|
||||
/* Virtual arrays are not devirtualized yet, to avoid generating lots of code without further
|
||||
* consideration. */
|
||||
for (const int64_t i : mask) {
|
||||
new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i], in2[i], in3[i]));
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -223,9 +231,11 @@ class CustomMF_SI_SI_SI_SI_SO : public MultiFunction {
|
|||
const VArray<In3> &in3,
|
||||
const VArray<In4> &in4,
|
||||
MutableSpan<Out1> out1) {
|
||||
mask.foreach_index([&](int i) {
|
||||
/* Virtual arrays are not devirtualized yet, to avoid generating lots of code without further
|
||||
* consideration. */
|
||||
for (const int64_t i : mask) {
|
||||
new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i], in2[i], in3[i], in4[i]));
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -268,7 +278,11 @@ template<typename Mut1> class CustomMF_SM : public MultiFunction {
|
|||
template<typename ElementFuncT> static FunctionT create_function(ElementFuncT element_fn)
|
||||
{
|
||||
return [=](IndexMask mask, MutableSpan<Mut1> mut1) {
|
||||
mask.foreach_index([&](int i) { element_fn(mut1[i]); });
|
||||
mask.to_best_mask_type([&](const auto &mask) {
|
||||
for (const int64_t i : mask) {
|
||||
element_fn(mut1[i]);
|
||||
}
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -304,9 +318,11 @@ template<typename From, typename To> class CustomMF_Convert : public MultiFuncti
|
|||
const VArray<From> &inputs = params.readonly_single_input<From>(0);
|
||||
MutableSpan<To> outputs = params.uninitialized_single_output<To>(1);
|
||||
|
||||
for (int64_t i : mask) {
|
||||
new (static_cast<void *>(&outputs[i])) To(inputs[i]);
|
||||
}
|
||||
mask.to_best_mask_type([&](const auto &mask) {
|
||||
for (int64_t i : mask) {
|
||||
new (static_cast<void *>(&outputs[i])) To(inputs[i]);
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -366,7 +382,11 @@ template<typename T> class CustomMF_Constant : public MultiFunction {
|
|||
void call(IndexMask mask, MFParams params, MFContext UNUSED(context)) const override
|
||||
{
|
||||
MutableSpan<T> output = params.uninitialized_single_output<T>(0);
|
||||
mask.foreach_index([&](int i) { new (&output[i]) T(value_); });
|
||||
mask.to_best_mask_type([&](const auto &mask) {
|
||||
for (const int64_t i : mask) {
|
||||
new (&output[i]) T(value_);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
uint64_t hash() const override
|
||||
|
|
Loading…
Reference in New Issue