BLI: Avoid invoking tbb for small parallel_reduce calls

Apply a change similar to e130903060 for
`parallel_reduce`, just like `parallel_for`. I measured a performance
improvement in viewport FPS of at least 10% with 1 million small
instances (one bottleneck was computing many small bounding boxes).
This commit is contained in:
Hans Goudey 2022-05-09 18:21:37 +02:00
parent 892562b7bf
commit c2737913db
1 changed files with 10 additions and 8 deletions

View File

@ -77,17 +77,19 @@ Value parallel_reduce(IndexRange range,
const Reduction &reduction)
{
#ifdef WITH_TBB
return tbb::parallel_reduce(
tbb::blocked_range<int64_t>(range.first(), range.one_after_last(), grain_size),
identity,
[&](const tbb::blocked_range<int64_t> &subrange, const Value &ident) {
return function(IndexRange(subrange.begin(), subrange.size()), ident);
},
reduction);
if (range.size() >= grain_size) {
return tbb::parallel_reduce(
tbb::blocked_range<int64_t>(range.first(), range.one_after_last(), grain_size),
identity,
[&](const tbb::blocked_range<int64_t> &subrange, const Value &ident) {
return function(IndexRange(subrange.begin(), subrange.size()), ident);
},
reduction);
}
#else
UNUSED_VARS(grain_size, reduction);
return function(range, identity);
#endif
return function(range, identity);
}
/**