BLI: inline fast path of IndexRange::as_span

This frequently showed up in profiling but shouldn't.

This also updates the code to use atomics for more correctness and
adds multi-threading for better performance.
This commit is contained in:
Jacques Lucke 2022-04-07 19:28:41 +02:00
parent 67c42e7f03
commit a5beca7ba0
3 changed files with 40 additions and 20 deletions

View File

@ -39,6 +39,7 @@
*/
#include <algorithm>
#include <atomic>
#include <cmath>
#include <iostream>
@ -288,6 +289,12 @@ class IndexRange {
stream << "[" << range.start() << ", " << range.one_after_last() << ")";
return stream;
}
private:
static std::atomic<int64_t> s_current_array_size;
static std::atomic<int64_t *> s_current_array;
Span<int64_t> as_span_internal() const;
};
} // namespace blender

View File

@ -722,4 +722,16 @@ template<typename T> class MutableSpan {
}
};
/** This is defined here, because in `BLI_index_range.hh` `Span` is not yet defined. */
inline Span<int64_t> IndexRange::as_span() const
{
const int64_t min_required_size = start_ + size_;
const int64_t current_array_size = s_current_array_size.load(std::memory_order_acquire);
const int64_t *current_array = s_current_array.load(std::memory_order_acquire);
if (min_required_size <= current_array_size) {
return Span<int64_t>(current_array + start_, size_);
}
return this->as_span_internal();
}
} /* namespace blender */

View File

@ -1,46 +1,47 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include <atomic>
#include <mutex>
#include "BLI_array.hh"
#include "BLI_index_range.hh"
#include "BLI_span.hh"
#include "BLI_task.hh"
#include "BLI_vector.hh"
namespace blender {
static RawVector<RawArray<int64_t, 0>> arrays;
static int64_t current_array_size = 0;
static int64_t *current_array = nullptr;
static std::mutex current_array_mutex;
std::atomic<int64_t> IndexRange::s_current_array_size = 0;
std::atomic<int64_t *> IndexRange::s_current_array = nullptr;
Span<int64_t> IndexRange::as_span() const
Span<int64_t> IndexRange::as_span_internal() const
{
int64_t min_required_size = start_ + size_;
if (min_required_size <= current_array_size) {
return Span<int64_t>(current_array + start_, size_);
}
std::lock_guard<std::mutex> lock(current_array_mutex);
if (min_required_size <= current_array_size) {
return Span<int64_t>(current_array + start_, size_);
/* Double checked lock. */
if (min_required_size <= s_current_array_size) {
return Span<int64_t>(s_current_array + start_, size_);
}
int64_t new_size = std::max<int64_t>(1000, power_of_2_max_u(min_required_size));
RawArray<int64_t, 0> new_array(new_size);
for (int64_t i = 0; i < new_size; i++) {
new_array[i] = i;
}
arrays.append(std::move(new_array));
/* Isolate, because a mutex is locked. */
threading::isolate_task([&]() {
int64_t new_size = std::max<int64_t>(1000, power_of_2_max_u(min_required_size));
RawArray<int64_t, 0> new_array(new_size);
threading::parallel_for(IndexRange(new_size), 4096, [&](const IndexRange range) {
for (const int64_t i : range) {
new_array[i] = i;
}
});
arrays.append(std::move(new_array));
current_array = arrays.last().data();
std::atomic_thread_fence(std::memory_order_seq_cst);
current_array_size = new_size;
s_current_array.store(arrays.last().data(), std::memory_order_release);
s_current_array_size.store(new_size, std::memory_order_release);
});
return Span<int64_t>(current_array + start_, size_);
return Span<int64_t>(s_current_array + start_, size_);
}
} // namespace blender