Geometry Nodes: avoid using enumerable thread specific on single thread
The geometry nodes evaluator supports "lazy threading", i.e. it starts out single-threaded. But when it determines that multi-threading can be benefitial, it switches to multi-threaded mode. Now it only creates an enumerable-thread-specific if it is actually using multiple threads. This results in a 6% speedup in my test file with many node groups and math nodes.
This commit is contained in:
parent
c744d5453f
commit
dba2d82846
Notes:
blender-bot
2023-02-13 23:16:02 +01:00
Referenced by commit 0bc0e3f9f7
, Fix: geometry nodes crashes with large trees
|
@ -245,8 +245,11 @@ class Executor {
|
|||
* A separate linear allocator for every thread. We could potentially reuse some memory, but that
|
||||
* doesn't seem worth it yet.
|
||||
*/
|
||||
threading::EnumerableThreadSpecific<LinearAllocator<>> local_allocators_;
|
||||
LinearAllocator<> *main_local_allocator_ = nullptr;
|
||||
struct ThreadLocalData {
|
||||
LinearAllocator<> allocator;
|
||||
};
|
||||
std::unique_ptr<threading::EnumerableThreadSpecific<ThreadLocalData>> thread_locals_;
|
||||
LinearAllocator<> main_allocator_;
|
||||
/**
|
||||
* Set to false when the first execution ends.
|
||||
*/
|
||||
|
@ -259,7 +262,6 @@ class Executor {
|
|||
{
|
||||
/* The indices are necessary, because they are used as keys in #node_states_. */
|
||||
BLI_assert(self_.graph_.node_indices_are_valid());
|
||||
main_local_allocator_ = &local_allocators_.local();
|
||||
}
|
||||
|
||||
~Executor()
|
||||
|
@ -338,16 +340,25 @@ class Executor {
|
|||
Span<const Node *> nodes = self_.graph_.nodes();
|
||||
node_states_.reinitialize(nodes.size());
|
||||
|
||||
/* Construct all node states in parallel. */
|
||||
threading::parallel_for(nodes.index_range(), 256, [&](const IndexRange range) {
|
||||
LinearAllocator<> &allocator = local_allocators_.local();
|
||||
auto construct_node_range = [&](const IndexRange range, LinearAllocator<> &allocator) {
|
||||
for (const int i : range) {
|
||||
const Node &node = *nodes[i];
|
||||
NodeState &node_state = *allocator.construct<NodeState>().release();
|
||||
node_states_[i] = &node_state;
|
||||
this->construct_initial_node_state(allocator, node, node_state);
|
||||
}
|
||||
});
|
||||
};
|
||||
if (nodes.size() <= 256) {
|
||||
construct_node_range(nodes.index_range(), main_allocator_);
|
||||
}
|
||||
else {
|
||||
this->ensure_thread_locals();
|
||||
/* Construct all node states in parallel. */
|
||||
threading::parallel_for(nodes.index_range(), 256, [&](const IndexRange range) {
|
||||
LinearAllocator<> &allocator = this->get_main_or_local_allocator();
|
||||
construct_node_range(range, allocator);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void construct_initial_node_state(LinearAllocator<> &allocator,
|
||||
|
@ -1067,10 +1078,23 @@ class Executor {
|
|||
if (BLI_system_thread_count() <= 1) {
|
||||
return false;
|
||||
}
|
||||
this->ensure_thread_locals();
|
||||
task_pool_.store(BLI_task_pool_create(this, TASK_PRIORITY_HIGH));
|
||||
return true;
|
||||
}
|
||||
|
||||
void ensure_thread_locals()
|
||||
{
|
||||
#ifdef FN_LAZY_FUNCTION_DEBUG_THREADS
|
||||
if (current_main_thread_ != std::this_thread::get_id()) {
|
||||
BLI_assert_unreachable();
|
||||
}
|
||||
#endif
|
||||
if (!thread_locals_) {
|
||||
thread_locals_ = std::make_unique<threading::EnumerableThreadSpecific<ThreadLocalData>>();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Allow other threads to steal all the nodes that are currently scheduled on this thread.
|
||||
*/
|
||||
|
@ -1109,9 +1133,9 @@ class Executor {
|
|||
LinearAllocator<> &get_main_or_local_allocator()
|
||||
{
|
||||
if (this->use_multi_threading()) {
|
||||
return local_allocators_.local();
|
||||
return thread_locals_->local().allocator;
|
||||
}
|
||||
return *main_local_allocator_;
|
||||
return main_allocator_;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue