Geometry Nodes: Parallelize attribute nodes

This commit significantly speeds up many of the attribute nodes when
multiple threads are available in linear situations when parallelism
cannot be achieved elsewhere.

See the differential for a table of timing comparisons tested on a
Ryzen 3700x. For an attribute with 4 million elements, the nodes were
about 3 to 9 times faster.

The changes are not exhaustive, other nodes could still be parallelized
in the future. Also, it would be possible to further optimize the grain
size in `parallel_for`, but I'd rather make sure it isn't too small.
I tested some different values, but also relied on intuition--
increasing grain size for less complex operations and vice versa.

Differential Revision: https://developer.blender.org/D11139
This commit is contained in:
Hans Goudey 2021-05-03 08:00:09 -05:00
parent 2b46606af1
commit 1d7ee50fef
8 changed files with 285 additions and 206 deletions

View File

@ -15,6 +15,7 @@
*/
#include "BLI_math_rotation.h"
#include "BLI_task.hh"
#include "UI_interface.h"
#include "UI_resources.h"
@ -55,42 +56,44 @@ static void align_rotations_auto_pivot(const VArray<float3> &vectors,
const float3 local_main_axis,
const MutableSpan<float3> rotations)
{
for (const int i : IndexRange(vectors.size())) {
const float3 vector = vectors[i];
if (is_zero_v3(vector)) {
continue;
}
float old_rotation[3][3];
eul_to_mat3(old_rotation, rotations[i]);
float3 old_axis;
mul_v3_m3v3(old_axis, old_rotation, local_main_axis);
const float3 new_axis = vector.normalized();
float3 rotation_axis = float3::cross_high_precision(old_axis, new_axis);
if (is_zero_v3(rotation_axis)) {
/* The vectors are linearly dependent, so we fall back to another axis. */
rotation_axis = float3::cross_high_precision(old_axis, float3(1, 0, 0));
if (is_zero_v3(rotation_axis)) {
/* This is now guaranteed to not be zero. */
rotation_axis = float3::cross_high_precision(old_axis, float3(0, 1, 0));
parallel_for(IndexRange(vectors.size()), 128, [&](IndexRange range) {
for (const int i : range) {
const float3 vector = vectors[i];
if (is_zero_v3(vector)) {
continue;
}
float old_rotation[3][3];
eul_to_mat3(old_rotation, rotations[i]);
float3 old_axis;
mul_v3_m3v3(old_axis, old_rotation, local_main_axis);
const float3 new_axis = vector.normalized();
float3 rotation_axis = float3::cross_high_precision(old_axis, new_axis);
if (is_zero_v3(rotation_axis)) {
/* The vectors are linearly dependent, so we fall back to another axis. */
rotation_axis = float3::cross_high_precision(old_axis, float3(1, 0, 0));
if (is_zero_v3(rotation_axis)) {
/* This is now guaranteed to not be zero. */
rotation_axis = float3::cross_high_precision(old_axis, float3(0, 1, 0));
}
}
const float full_angle = angle_normalized_v3v3(old_axis, new_axis);
const float angle = factors[i] * full_angle;
float rotation[3][3];
axis_angle_to_mat3(rotation, rotation_axis, angle);
float new_rotation_matrix[3][3];
mul_m3_m3m3(new_rotation_matrix, rotation, old_rotation);
float3 new_rotation;
mat3_to_eul(new_rotation, new_rotation_matrix);
rotations[i] = new_rotation;
}
const float full_angle = angle_normalized_v3v3(old_axis, new_axis);
const float angle = factors[i] * full_angle;
float rotation[3][3];
axis_angle_to_mat3(rotation, rotation_axis, angle);
float new_rotation_matrix[3][3];
mul_m3_m3m3(new_rotation_matrix, rotation, old_rotation);
float3 new_rotation;
mat3_to_eul(new_rotation, new_rotation_matrix);
rotations[i] = new_rotation;
}
});
}
static void align_rotations_fixed_pivot(const VArray<float3> &vectors,
@ -104,37 +107,39 @@ static void align_rotations_fixed_pivot(const VArray<float3> &vectors,
return;
}
for (const int i : IndexRange(vectors.size())) {
const float3 vector = vectors[i];
if (is_zero_v3(vector)) {
continue;
parallel_for(IndexRange(vectors.size()), 128, [&](IndexRange range) {
for (const int i : range) {
const float3 vector = vectors[i];
if (is_zero_v3(vector)) {
continue;
}
float old_rotation[3][3];
eul_to_mat3(old_rotation, rotations[i]);
float3 old_axis;
mul_v3_m3v3(old_axis, old_rotation, local_main_axis);
float3 pivot_axis;
mul_v3_m3v3(pivot_axis, old_rotation, local_pivot_axis);
float full_angle = angle_signed_on_axis_v3v3_v3(vector, old_axis, pivot_axis);
if (full_angle > M_PI) {
/* Make sure the point is rotated as little as possible. */
full_angle -= 2.0f * M_PI;
}
const float angle = factors[i] * full_angle;
float rotation[3][3];
axis_angle_to_mat3(rotation, pivot_axis, angle);
float new_rotation_matrix[3][3];
mul_m3_m3m3(new_rotation_matrix, rotation, old_rotation);
float3 new_rotation;
mat3_to_eul(new_rotation, new_rotation_matrix);
rotations[i] = new_rotation;
}
float old_rotation[3][3];
eul_to_mat3(old_rotation, rotations[i]);
float3 old_axis;
mul_v3_m3v3(old_axis, old_rotation, local_main_axis);
float3 pivot_axis;
mul_v3_m3v3(pivot_axis, old_rotation, local_pivot_axis);
float full_angle = angle_signed_on_axis_v3v3_v3(vector, old_axis, pivot_axis);
if (full_angle > M_PI) {
/* Make sure the point is rotated as little as possible. */
full_angle -= 2.0f * M_PI;
}
const float angle = factors[i] * full_angle;
float rotation[3][3];
axis_angle_to_mat3(rotation, pivot_axis, angle);
float new_rotation_matrix[3][3];
mul_m3_m3m3(new_rotation_matrix, rotation, old_rotation);
float3 new_rotation;
mat3_to_eul(new_rotation, new_rotation_matrix);
rotations[i] = new_rotation;
}
});
}
static void align_rotations_on_component(GeometryComponent &component,

View File

@ -14,6 +14,8 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "BLI_task.hh"
#include "BKE_colorband.h"
#include "UI_interface.h"
@ -85,9 +87,11 @@ static void execute_on_component(const GeoNodeExecParams &params, GeometryCompon
MutableSpan<Color4f> results = attribute_result.as_span();
ColorBand *color_ramp = &node_storage->color_ramp;
for (const int i : IndexRange(attribute_in.size())) {
BKE_colorband_evaluate(color_ramp, attribute_in[i], results[i]);
}
parallel_for(IndexRange(attribute_in.size()), 512, [&](IndexRange range) {
for (const int i : range) {
BKE_colorband_evaluate(color_ramp, attribute_in[i], results[i]);
}
});
attribute_result.save();
}

View File

@ -15,6 +15,7 @@
*/
#include "BLI_math_base_safe.h"
#include "BLI_task.hh"
#include "UI_interface.h"
#include "UI_resources.h"
@ -208,28 +209,36 @@ static void map_range_float(const VArray<float> &attribute_input,
switch (interpolation_type) {
case NODE_MAP_RANGE_LINEAR: {
for (int i : span.index_range()) {
results[i] = map_linear(span[i], min_from, max_from, min_to, max_to);
}
parallel_for(span.index_range(), 2048, [&](IndexRange range) {
for (const int i : range) {
results[i] = map_linear(span[i], min_from, max_from, min_to, max_to);
}
});
break;
}
case NODE_MAP_RANGE_STEPPED: {
const float steps = params.get_input<float>("Steps");
for (int i : span.index_range()) {
results[i] = map_stepped(span[i], min_from, max_from, min_to, max_to, steps);
}
parallel_for(span.index_range(), 1024, [&](IndexRange range) {
for (const int i : range) {
results[i] = map_stepped(span[i], min_from, max_from, min_to, max_to, steps);
}
});
break;
}
case NODE_MAP_RANGE_SMOOTHSTEP: {
for (int i : span.index_range()) {
results[i] = map_smoothstep(span[i], min_from, max_from, min_to, max_to);
}
parallel_for(span.index_range(), 1024, [&](IndexRange range) {
for (const int i : range) {
results[i] = map_smoothstep(span[i], min_from, max_from, min_to, max_to);
}
});
break;
}
case NODE_MAP_RANGE_SMOOTHERSTEP: {
for (int i : span.index_range()) {
results[i] = map_smootherstep(span[i], min_from, max_from, min_to, max_to);
}
parallel_for(span.index_range(), 1024, [&](IndexRange range) {
for (const int i : range) {
results[i] = map_smootherstep(span[i], min_from, max_from, min_to, max_to);
}
});
break;
}
}
@ -240,9 +249,11 @@ static void map_range_float(const VArray<float> &attribute_input,
const float clamp_min = min_to < max_to ? min_to : max_to;
const float clamp_max = min_to < max_to ? max_to : min_to;
for (int i : results.index_range()) {
results[i] = std::clamp(results[i], clamp_min, clamp_max);
}
parallel_for(results.index_range(), 2048, [&](IndexRange range) {
for (const int i : range) {
results[i] = std::clamp(results[i], clamp_min, clamp_max);
}
});
}
}
@ -262,36 +273,47 @@ static void map_range_float3(const VArray<float3> &attribute_input,
switch (interpolation_type) {
case NODE_MAP_RANGE_LINEAR: {
for (int i : span.index_range()) {
results[i].x = map_linear(span[i].x, min_from.x, max_from.x, min_to.x, max_to.x);
results[i].y = map_linear(span[i].y, min_from.y, max_from.y, min_to.y, max_to.y);
results[i].z = map_linear(span[i].z, min_from.z, max_from.z, min_to.z, max_to.z);
}
parallel_for(span.index_range(), 1024, [&](IndexRange range) {
for (const int i : range) {
results[i].x = map_linear(span[i].x, min_from.x, max_from.x, min_to.x, max_to.x);
results[i].y = map_linear(span[i].y, min_from.y, max_from.y, min_to.y, max_to.y);
results[i].z = map_linear(span[i].z, min_from.z, max_from.z, min_to.z, max_to.z);
}
});
break;
}
case NODE_MAP_RANGE_STEPPED: {
const float3 steps = params.get_input<float3>("Steps_001");
for (int i : span.index_range()) {
results[i].x = map_stepped(span[i].x, min_from.x, max_from.x, min_to.x, max_to.x, steps.x);
results[i].y = map_stepped(span[i].y, min_from.y, max_from.y, min_to.y, max_to.y, steps.y);
results[i].z = map_stepped(span[i].z, min_from.z, max_from.z, min_to.z, max_to.z, steps.z);
}
parallel_for(span.index_range(), 1024, [&](IndexRange range) {
for (const int i : range) {
results[i].x = map_stepped(
span[i].x, min_from.x, max_from.x, min_to.x, max_to.x, steps.x);
results[i].y = map_stepped(
span[i].y, min_from.y, max_from.y, min_to.y, max_to.y, steps.y);
results[i].z = map_stepped(
span[i].z, min_from.z, max_from.z, min_to.z, max_to.z, steps.z);
}
});
break;
}
case NODE_MAP_RANGE_SMOOTHSTEP: {
for (int i : span.index_range()) {
results[i].x = map_smoothstep(span[i].x, min_from.x, max_from.x, min_to.x, max_to.x);
results[i].y = map_smoothstep(span[i].y, min_from.y, max_from.y, min_to.y, max_to.y);
results[i].z = map_smoothstep(span[i].z, min_from.z, max_from.z, min_to.z, max_to.z);
}
parallel_for(span.index_range(), 1024, [&](IndexRange range) {
for (const int i : range) {
results[i].x = map_smoothstep(span[i].x, min_from.x, max_from.x, min_to.x, max_to.x);
results[i].y = map_smoothstep(span[i].y, min_from.y, max_from.y, min_to.y, max_to.y);
results[i].z = map_smoothstep(span[i].z, min_from.z, max_from.z, min_to.z, max_to.z);
}
});
break;
}
case NODE_MAP_RANGE_SMOOTHERSTEP: {
for (int i : span.index_range()) {
results[i].x = map_smootherstep(span[i].x, min_from.x, max_from.x, min_to.x, max_to.x);
results[i].y = map_smootherstep(span[i].y, min_from.y, max_from.y, min_to.y, max_to.y);
results[i].z = map_smootherstep(span[i].z, min_from.z, max_from.z, min_to.z, max_to.z);
}
parallel_for(span.index_range(), 1024, [&](IndexRange range) {
for (const int i : range) {
results[i].x = map_smootherstep(span[i].x, min_from.x, max_from.x, min_to.x, max_to.x);
results[i].y = map_smootherstep(span[i].y, min_from.y, max_from.y, min_to.y, max_to.y);
results[i].z = map_smootherstep(span[i].z, min_from.z, max_from.z, min_to.z, max_to.z);
}
});
break;
}
}

View File

@ -14,6 +14,8 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "BLI_task.hh"
#include "UI_interface.h"
#include "UI_resources.h"
@ -157,9 +159,11 @@ static void do_math_operation(const VArray<float> &span_a,
{
bool success = try_dispatch_float_math_fl_fl_fl_to_fl(
operation, [&](auto math_function, const FloatMathOperationInfo &UNUSED(info)) {
for (const int i : IndexRange(span_result.size())) {
span_result[i] = math_function(span_a[i], span_b[i], span_c[i]);
}
parallel_for(IndexRange(span_result.size()), 512, [&](IndexRange range) {
for (const int i : range) {
span_result[i] = math_function(span_a[i], span_b[i], span_c[i]);
}
});
});
BLI_assert(success);
UNUSED_VARS_NDEBUG(success);
@ -172,9 +176,11 @@ static void do_math_operation(const VArray<float> &span_a,
{
bool success = try_dispatch_float_math_fl_fl_to_fl(
operation, [&](auto math_function, const FloatMathOperationInfo &UNUSED(info)) {
for (const int i : IndexRange(span_result.size())) {
span_result[i] = math_function(span_a[i], span_b[i]);
}
parallel_for(IndexRange(span_result.size()), 1024, [&](IndexRange range) {
for (const int i : range) {
span_result[i] = math_function(span_a[i], span_b[i]);
}
});
});
BLI_assert(success);
UNUSED_VARS_NDEBUG(success);
@ -186,9 +192,11 @@ static void do_math_operation(const VArray<float> &span_input,
{
bool success = try_dispatch_float_math_fl_to_fl(
operation, [&](auto math_function, const FloatMathOperationInfo &UNUSED(info)) {
for (const int i : IndexRange(span_result.size())) {
span_result[i] = math_function(span_input[i]);
}
parallel_for(IndexRange(span_result.size()), 1024, [&](IndexRange range) {
for (const int i : range) {
span_result[i] = math_function(span_input[i]);
}
});
});
BLI_assert(success);
UNUSED_VARS_NDEBUG(success);

View File

@ -14,6 +14,8 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "BLI_task.hh"
#include "BKE_material.h"
#include "DNA_material_types.h"
@ -64,14 +66,16 @@ static void do_mix_operation_float(const int blend_mode,
VMutableArray<float> &results)
{
const int size = results.size();
for (const int i : IndexRange(size)) {
const float factor = factors[i];
float3 a{inputs_a[i]};
const float3 b{inputs_b[i]};
ramp_blend(blend_mode, a, factor, b);
const float result = a.x;
results.set(i, result);
}
parallel_for(IndexRange(size), 512, [&](IndexRange range) {
for (const int i : range) {
const float factor = factors[i];
float3 a{inputs_a[i]};
const float3 b{inputs_b[i]};
ramp_blend(blend_mode, a, factor, b);
const float result = a.x;
results.set(i, result);
}
});
}
static void do_mix_operation_float3(const int blend_mode,
@ -81,13 +85,15 @@ static void do_mix_operation_float3(const int blend_mode,
VMutableArray<float3> &results)
{
const int size = results.size();
for (const int i : IndexRange(size)) {
const float factor = factors[i];
float3 a = inputs_a[i];
const float3 b = inputs_b[i];
ramp_blend(blend_mode, a, factor, b);
results.set(i, a);
}
parallel_for(IndexRange(size), 512, [&](IndexRange range) {
for (const int i : range) {
const float factor = factors[i];
float3 a = inputs_a[i];
const float3 b = inputs_b[i];
ramp_blend(blend_mode, a, factor, b);
results.set(i, a);
}
});
}
static void do_mix_operation_color4f(const int blend_mode,
@ -97,13 +103,15 @@ static void do_mix_operation_color4f(const int blend_mode,
VMutableArray<Color4f> &results)
{
const int size = results.size();
for (const int i : IndexRange(size)) {
const float factor = factors[i];
Color4f a = inputs_a[i];
const Color4f b = inputs_b[i];
ramp_blend(blend_mode, a, factor, b);
results.set(i, a);
}
parallel_for(IndexRange(size), 512, [&](IndexRange range) {
for (const int i : range) {
const float factor = factors[i];
Color4f a = inputs_a[i];
const Color4f b = inputs_b[i];
ramp_blend(blend_mode, a, factor, b);
results.set(i, a);
}
});
}
static void do_mix_operation(const CustomDataType result_type,

View File

@ -16,6 +16,7 @@
#include "BLI_hash.h"
#include "BLI_rand.hh"
#include "BLI_task.hh"
#include "UI_interface.h"
#include "UI_resources.h"
@ -125,28 +126,36 @@ static void randomize_attribute(MutableSpan<T> span,
/* The operations could be templated too, but it doesn't make the code much shorter. */
switch (operation) {
case GEO_NODE_ATTRIBUTE_RANDOMIZE_REPLACE_CREATE:
for (const int i : span.index_range()) {
const T random_value = random_value_in_range<T>(ids[i], seed, min, max);
span[i] = random_value;
}
parallel_for(span.index_range(), 512, [&](IndexRange range) {
for (const int i : range) {
const T random_value = random_value_in_range<T>(ids[i], seed, min, max);
span[i] = random_value;
}
});
break;
case GEO_NODE_ATTRIBUTE_RANDOMIZE_ADD:
for (const int i : span.index_range()) {
const T random_value = random_value_in_range<T>(ids[i], seed, min, max);
span[i] = span[i] + random_value;
}
parallel_for(span.index_range(), 512, [&](IndexRange range) {
for (const int i : range) {
const T random_value = random_value_in_range<T>(ids[i], seed, min, max);
span[i] = span[i] + random_value;
}
});
break;
case GEO_NODE_ATTRIBUTE_RANDOMIZE_SUBTRACT:
for (const int i : span.index_range()) {
const T random_value = random_value_in_range<T>(ids[i], seed, min, max);
span[i] = span[i] - random_value;
}
parallel_for(span.index_range(), 512, [&](IndexRange range) {
for (const int i : range) {
const T random_value = random_value_in_range<T>(ids[i], seed, min, max);
span[i] = span[i] - random_value;
}
});
break;
case GEO_NODE_ATTRIBUTE_RANDOMIZE_MULTIPLY:
for (const int i : span.index_range()) {
const T random_value = random_value_in_range<T>(ids[i], seed, min, max);
span[i] = span[i] * random_value;
}
parallel_for(span.index_range(), 512, [&](IndexRange range) {
for (const int i : range) {
const T random_value = random_value_in_range<T>(ids[i], seed, min, max);
span[i] = span[i] * random_value;
}
});
break;
default:
BLI_assert(false);
@ -161,10 +170,12 @@ static void randomize_attribute_bool(MutableSpan<bool> span,
{
BLI_assert(operation == GEO_NODE_ATTRIBUTE_RANDOMIZE_REPLACE_CREATE);
UNUSED_VARS_NDEBUG(operation);
for (const int i : span.index_range()) {
const bool random_value = BLI_hash_int_2d_to_float(ids[i], seed) > 0.5f;
span[i] = random_value;
}
parallel_for(span.index_range(), 512, [&](IndexRange range) {
for (const int i : range) {
const bool random_value = BLI_hash_int_2d_to_float(ids[i], seed) > 0.5f;
span[i] = random_value;
}
});
}
Array<uint32_t> get_geometry_element_ids_as_uints(const GeometryComponent &component,
@ -179,9 +190,11 @@ Array<uint32_t> get_geometry_element_ids_as_uints(const GeometryComponent &compo
BLI_assert(hashes.size() == hash_attribute->size());
const CPPType &cpp_type = hash_attribute->type();
GVArray_GSpan items{*hash_attribute};
for (const int i : hashes.index_range()) {
hashes[i] = cpp_type.hash(items[i]);
}
parallel_for(hashes.index_range(), 512, [&](IndexRange range) {
for (const int i : range) {
hashes[i] = cpp_type.hash(items[i]);
}
});
}
else {
/* If there is no "id" attribute for per-point variation, just create it here. */

View File

@ -15,6 +15,7 @@
*/
#include "BLI_compiler_attrs.h"
#include "BLI_task.hh"
#include "DNA_texture_types.h"
@ -95,14 +96,17 @@ static void execute_on_component(GeometryComponent &component, const GeoNodeExec
mapping_name, result_domain, {0, 0, 0});
MutableSpan<Color4f> colors = attribute_out.as_span();
for (const int i : IndexRange(mapping_attribute.size())) {
TexResult texture_result = {0};
const float3 position = mapping_attribute[i];
/* For legacy reasons we have to map [0, 1] to [-1, 1] to support uv mappings. */
const float3 remapped_position = position * 2.0f - float3(1.0f);
BKE_texture_get_value(nullptr, texture, remapped_position, &texture_result, false);
colors[i] = {texture_result.tr, texture_result.tg, texture_result.tb, texture_result.ta};
}
parallel_for(IndexRange(mapping_attribute.size()), 128, [&](IndexRange range) {
for (const int i : range) {
TexResult texture_result = {0};
const float3 position = mapping_attribute[i];
/* For legacy reasons we have to map [0, 1] to [-1, 1] to support uv mappings. */
const float3 remapped_position = position * 2.0f - float3(1.0f);
BKE_texture_get_value(nullptr, texture, remapped_position, &texture_result, false);
colors[i] = {texture_result.tr, texture_result.tg, texture_result.tb, texture_result.ta};
}
});
attribute_out.save();
}

View File

@ -15,6 +15,7 @@
*/
#include "BLI_math_base_safe.h"
#include "BLI_task.hh"
#include "UI_interface.h"
#include "UI_resources.h"
@ -181,12 +182,14 @@ static void do_math_operation_fl3_fl3_to_fl3(const VArray<float3> &input_a,
bool success = try_dispatch_float_math_fl3_fl3_to_fl3(
operation, [&](auto math_function, const FloatMathOperationInfo &UNUSED(info)) {
for (const int i : IndexRange(size)) {
const float3 a = span_a[i];
const float3 b = span_b[i];
const float3 out = math_function(a, b);
span_result[i] = out;
}
parallel_for(IndexRange(size), 512, [&](IndexRange range) {
for (const int i : range) {
const float3 a = span_a[i];
const float3 b = span_b[i];
const float3 out = math_function(a, b);
span_result[i] = out;
}
});
});
span_result.save();
@ -211,13 +214,15 @@ static void do_math_operation_fl3_fl3_fl3_to_fl3(const VArray<float3> &input_a,
bool success = try_dispatch_float_math_fl3_fl3_fl3_to_fl3(
operation, [&](auto math_function, const FloatMathOperationInfo &UNUSED(info)) {
for (const int i : IndexRange(size)) {
const float3 a = span_a[i];
const float3 b = span_b[i];
const float3 c = span_c[i];
const float3 out = math_function(a, b, c);
span_result[i] = out;
}
parallel_for(IndexRange(size), 512, [&](IndexRange range) {
for (const int i : range) {
const float3 a = span_a[i];
const float3 b = span_b[i];
const float3 c = span_c[i];
const float3 out = math_function(a, b, c);
span_result[i] = out;
}
});
});
span_result.save();
@ -242,13 +247,15 @@ static void do_math_operation_fl3_fl3_fl_to_fl3(const VArray<float3> &input_a,
bool success = try_dispatch_float_math_fl3_fl3_fl_to_fl3(
operation, [&](auto math_function, const FloatMathOperationInfo &UNUSED(info)) {
for (const int i : IndexRange(size)) {
const float3 a = span_a[i];
const float3 b = span_b[i];
const float c = span_c[i];
const float3 out = math_function(a, b, c);
span_result[i] = out;
}
parallel_for(IndexRange(size), 512, [&](IndexRange range) {
for (const int i : range) {
const float3 a = span_a[i];
const float3 b = span_b[i];
const float c = span_c[i];
const float3 out = math_function(a, b, c);
span_result[i] = out;
}
});
});
span_result.save();
@ -271,12 +278,14 @@ static void do_math_operation_fl3_fl3_to_fl(const VArray<float3> &input_a,
bool success = try_dispatch_float_math_fl3_fl3_to_fl(
operation, [&](auto math_function, const FloatMathOperationInfo &UNUSED(info)) {
for (const int i : IndexRange(size)) {
const float3 a = span_a[i];
const float3 b = span_b[i];
const float out = math_function(a, b);
span_result[i] = out;
}
parallel_for(IndexRange(size), 512, [&](IndexRange range) {
for (const int i : range) {
const float3 a = span_a[i];
const float3 b = span_b[i];
const float out = math_function(a, b);
span_result[i] = out;
}
});
});
span_result.save();
@ -299,12 +308,14 @@ static void do_math_operation_fl3_fl_to_fl3(const VArray<float3> &input_a,
bool success = try_dispatch_float_math_fl3_fl_to_fl3(
operation, [&](auto math_function, const FloatMathOperationInfo &UNUSED(info)) {
for (const int i : IndexRange(size)) {
const float3 a = span_a[i];
const float b = span_b[i];
const float3 out = math_function(a, b);
span_result[i] = out;
}
parallel_for(IndexRange(size), 512, [&](IndexRange range) {
for (const int i : range) {
const float3 a = span_a[i];
const float b = span_b[i];
const float3 out = math_function(a, b);
span_result[i] = out;
}
});
});
span_result.save();
@ -325,11 +336,13 @@ static void do_math_operation_fl3_to_fl3(const VArray<float3> &input_a,
bool success = try_dispatch_float_math_fl3_to_fl3(
operation, [&](auto math_function, const FloatMathOperationInfo &UNUSED(info)) {
for (const int i : IndexRange(size)) {
const float3 in = span_a[i];
const float3 out = math_function(in);
span_result[i] = out;
}
parallel_for(IndexRange(size), 512, [&](IndexRange range) {
for (const int i : range) {
const float3 in = span_a[i];
const float3 out = math_function(in);
span_result[i] = out;
}
});
});
span_result.save();
@ -350,11 +363,13 @@ static void do_math_operation_fl3_to_fl(const VArray<float3> &input_a,
bool success = try_dispatch_float_math_fl3_to_fl(
operation, [&](auto math_function, const FloatMathOperationInfo &UNUSED(info)) {
for (const int i : IndexRange(size)) {
const float3 in = span_a[i];
const float out = math_function(in);
span_result[i] = out;
}
parallel_for(IndexRange(size), 512, [&](IndexRange range) {
for (const int i : range) {
const float3 in = span_a[i];
const float out = math_function(in);
span_result[i] = out;
}
});
});
span_result.save();