BLI: inline fast path of IndexRange::as_span

This frequently showed up in profiling but shouldn't. This also updates the code to use atomics for more correctness and adds multi-threading for better performance.
2022-04-07 19:28:41 +02:00 · 2022-04-07 19:28:41 +02:00 · a5beca7ba0
parent 67c42e7f03
commit a5beca7ba0
3 changed files with 40 additions and 20 deletions
--- a/source/blender/blenlib/BLI_index_range.hh
+++ b/source/blender/blenlib/BLI_index_range.hh
@ -39,6 +39,7 @@
 */

 #include <algorithm>
+#include <atomic>
 #include <cmath>
 #include <iostream>

@ -288,6 +289,12 @@ class IndexRange {
    stream << "[" << range.start() << ", " << range.one_after_last() << ")";
    return stream;
  }
+
+ private:
+  static std::atomic<int64_t> s_current_array_size;
+  static std::atomic<int64_t *> s_current_array;
+
+  Span<int64_t> as_span_internal() const;
 };

 }  // namespace blender
--- a/source/blender/blenlib/BLI_span.hh
+++ b/source/blender/blenlib/BLI_span.hh
@ -722,4 +722,16 @@ template<typename T> class MutableSpan {
  }
 };

+/** This is defined here, because in `BLI_index_range.hh` `Span` is not yet defined. */
+inline Span<int64_t> IndexRange::as_span() const
+{
+  const int64_t min_required_size = start_ + size_;
+  const int64_t current_array_size = s_current_array_size.load(std::memory_order_acquire);
+  const int64_t *current_array = s_current_array.load(std::memory_order_acquire);
+  if (min_required_size <= current_array_size) {
+    return Span<int64_t>(current_array + start_, size_);
+  }
+  return this->as_span_internal();
+}
+
 } /* namespace blender */
--- a/source/blender/blenlib/intern/BLI_index_range.cc
+++ b/source/blender/blenlib/intern/BLI_index_range.cc
@ -1,46 +1,47 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */

-#include <atomic>
 #include <mutex>

 #include "BLI_array.hh"
 #include "BLI_index_range.hh"
 #include "BLI_span.hh"
+#include "BLI_task.hh"
 #include "BLI_vector.hh"

 namespace blender {

 static RawVector<RawArray<int64_t, 0>> arrays;
-static int64_t current_array_size = 0;
-static int64_t *current_array = nullptr;
 static std::mutex current_array_mutex;
+std::atomic<int64_t> IndexRange::s_current_array_size = 0;
+std::atomic<int64_t *> IndexRange::s_current_array = nullptr;

-Span<int64_t> IndexRange::as_span() const
+Span<int64_t> IndexRange::as_span_internal() const
 {
  int64_t min_required_size = start_ + size_;

-  if (min_required_size <= current_array_size) {
-    return Span<int64_t>(current_array + start_, size_);
-  }
-
  std::lock_guard<std::mutex> lock(current_array_mutex);

-  if (min_required_size <= current_array_size) {
-    return Span<int64_t>(current_array + start_, size_);
+  /* Double checked lock. */
+  if (min_required_size <= s_current_array_size) {
+    return Span<int64_t>(s_current_array + start_, size_);
  }

-  int64_t new_size = std::max<int64_t>(1000, power_of_2_max_u(min_required_size));
-  RawArray<int64_t, 0> new_array(new_size);
-  for (int64_t i = 0; i < new_size; i++) {
-    new_array[i] = i;
-  }
-  arrays.append(std::move(new_array));
+  /* Isolate, because a mutex is locked. */
+  threading::isolate_task([&]() {
+    int64_t new_size = std::max<int64_t>(1000, power_of_2_max_u(min_required_size));
+    RawArray<int64_t, 0> new_array(new_size);
+    threading::parallel_for(IndexRange(new_size), 4096, [&](const IndexRange range) {
+      for (const int64_t i : range) {
+        new_array[i] = i;
+      }
+    });
+    arrays.append(std::move(new_array));

-  current_array = arrays.last().data();
-  std::atomic_thread_fence(std::memory_order_seq_cst);
-  current_array_size = new_size;
+    s_current_array.store(arrays.last().data(), std::memory_order_release);
+    s_current_array_size.store(new_size, std::memory_order_release);
+  });

-  return Span<int64_t>(current_array + start_, size_);
+  return Span<int64_t>(s_current_array + start_, size_);
 }

 }  // namespace blender