BLI: Avoid invoking tbb for small parallel_reduce calls
Apply a change similar to e130903060
for
`parallel_reduce`, just like `parallel_for`. I measured a performance
improvement in viewport FPS of at least 10% with 1 million small
instances (one bottleneck was computing many small bounding boxes).
This commit is contained in:
parent
892562b7bf
commit
c2737913db
|
@ -77,17 +77,19 @@ Value parallel_reduce(IndexRange range,
|
|||
const Reduction &reduction)
|
||||
{
|
||||
#ifdef WITH_TBB
|
||||
return tbb::parallel_reduce(
|
||||
tbb::blocked_range<int64_t>(range.first(), range.one_after_last(), grain_size),
|
||||
identity,
|
||||
[&](const tbb::blocked_range<int64_t> &subrange, const Value &ident) {
|
||||
return function(IndexRange(subrange.begin(), subrange.size()), ident);
|
||||
},
|
||||
reduction);
|
||||
if (range.size() >= grain_size) {
|
||||
return tbb::parallel_reduce(
|
||||
tbb::blocked_range<int64_t>(range.first(), range.one_after_last(), grain_size),
|
||||
identity,
|
||||
[&](const tbb::blocked_range<int64_t> &subrange, const Value &ident) {
|
||||
return function(IndexRange(subrange.begin(), subrange.size()), ident);
|
||||
},
|
||||
reduction);
|
||||
}
|
||||
#else
|
||||
UNUSED_VARS(grain_size, reduction);
|
||||
return function(range, identity);
|
||||
#endif
|
||||
return function(range, identity);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue