Merge branch 'blender2.7'
This commit is contained in:
commit
1c7695b848
Notes:
blender-bot
2024-03-27 14:49:18 +01:00
Referenced by issue #59244, Quick Favorites: add support for property enums
|
@ -40,7 +40,7 @@ bool system_cpu_ensure_initialized()
|
|||
{
|
||||
static bool is_initialized = false;
|
||||
static bool result = false;
|
||||
if (is_initialized) {
|
||||
if(is_initialized) {
|
||||
return result;
|
||||
}
|
||||
is_initialized = true;
|
||||
|
@ -71,8 +71,8 @@ int system_cpu_thread_count()
|
|||
{
|
||||
const int num_nodes = system_cpu_num_numa_nodes();
|
||||
int num_threads = 0;
|
||||
for (int node = 0; node < num_nodes; ++node) {
|
||||
if (!system_cpu_is_numa_node_available(node)) {
|
||||
for(int node = 0; node < num_nodes; ++node) {
|
||||
if(!system_cpu_is_numa_node_available(node)) {
|
||||
continue;
|
||||
}
|
||||
num_threads += system_cpu_num_numa_node_processors(node);
|
||||
|
@ -82,7 +82,7 @@ int system_cpu_thread_count()
|
|||
|
||||
int system_cpu_num_numa_nodes()
|
||||
{
|
||||
if (!system_cpu_ensure_initialized()) {
|
||||
if(!system_cpu_ensure_initialized()) {
|
||||
/* Fallback to a single node with all the threads. */
|
||||
return 1;
|
||||
}
|
||||
|
@ -91,7 +91,7 @@ int system_cpu_num_numa_nodes()
|
|||
|
||||
bool system_cpu_is_numa_node_available(int node)
|
||||
{
|
||||
if (!system_cpu_ensure_initialized()) {
|
||||
if(!system_cpu_ensure_initialized()) {
|
||||
return true;
|
||||
}
|
||||
return numaAPI_IsNodeAvailable(node);
|
||||
|
@ -99,7 +99,7 @@ bool system_cpu_is_numa_node_available(int node)
|
|||
|
||||
int system_cpu_num_numa_node_processors(int node)
|
||||
{
|
||||
if (!system_cpu_ensure_initialized()) {
|
||||
if(!system_cpu_ensure_initialized()) {
|
||||
return system_cpu_thread_count_fallback();
|
||||
}
|
||||
return numaAPI_GetNumNodeProcessors(node);
|
||||
|
@ -107,12 +107,20 @@ int system_cpu_num_numa_node_processors(int node)
|
|||
|
||||
bool system_cpu_run_thread_on_node(int node)
|
||||
{
|
||||
if (!system_cpu_ensure_initialized()) {
|
||||
if(!system_cpu_ensure_initialized()) {
|
||||
return true;
|
||||
}
|
||||
return numaAPI_RunThreadOnNode(node);
|
||||
}
|
||||
|
||||
int system_cpu_num_active_group_processors()
|
||||
{
|
||||
if(!system_cpu_ensure_initialized()) {
|
||||
return system_cpu_thread_count_fallback();
|
||||
}
|
||||
return numaAPI_GetNumCurrentNodesProcessors();
|
||||
}
|
||||
|
||||
#if !defined(_WIN32) || defined(FREE_WINDOWS)
|
||||
static void __cpuid(int data[4], int selector)
|
||||
{
|
||||
|
|
|
@ -44,6 +44,10 @@ int system_cpu_num_numa_node_processors(int node);
|
|||
* Returns truth if affinity has successfully changed. */
|
||||
bool system_cpu_run_thread_on_node(int node);
|
||||
|
||||
/* Number of processors within the current CPU group (or within active thread
|
||||
* thread affinity). */
|
||||
int system_cpu_num_active_group_processors();
|
||||
|
||||
string system_cpu_brand_string();
|
||||
int system_cpu_bits();
|
||||
bool system_cpu_support_sse2();
|
||||
|
|
|
@ -228,9 +228,21 @@ int get_num_total_processors(const vector<int>& num_per_node_processors)
|
|||
void distribute_threads_on_nodes(const vector<thread*>& threads)
|
||||
{
|
||||
const int num_threads = threads.size();
|
||||
/* TODO(sergey): Skip overriding affinity if threads fits into the current
|
||||
* nodes/CPU group. This will allow user to tweak affinity for weird and
|
||||
* wonderful reasons. */
|
||||
const int num_active_group_processors =
|
||||
system_cpu_num_active_group_processors();
|
||||
VLOG(1) << "Detected " << num_active_group_processors << " processors "
|
||||
<< "in active group.";
|
||||
if(num_active_group_processors >= num_threads) {
|
||||
/* If the current thread is set up in a way that its affinity allows to
|
||||
* use at least requested number of threads we do not explicitly set
|
||||
* affinity to the worker therads.
|
||||
* This way we allow users to manually edit affinity of the parent
|
||||
* thread, and here we follow that affinity. This way it's possible to
|
||||
* have two Cycles/Blender instances running manually set to a different
|
||||
* dies on a CPU. */
|
||||
VLOG(1) << "Not setting thread group affinity.";
|
||||
return;
|
||||
}
|
||||
vector<int> num_per_node_processors;
|
||||
get_per_node_num_processors(&num_per_node_processors);
|
||||
if(num_per_node_processors.size() == 0) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
Project: LibNumaAPI
|
||||
URL: https://github.com/Nazg-Gul/libNumaAPI
|
||||
License: MIT
|
||||
Upstream version: f83d41ec4d7
|
||||
Upstream version: 4e7206befce
|
||||
Local modifications: None
|
||||
|
|
|
@ -70,6 +70,16 @@ bool numaAPI_IsNodeAvailable(int node);
|
|||
// Get number of available processors on a given node.
|
||||
int numaAPI_GetNumNodeProcessors(int node);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Topology helpers.
|
||||
//
|
||||
// Those are a bit higher level queries, but is still rather platform-specific
|
||||
// and generally useful.
|
||||
|
||||
// Get number of processors within the NUMA nodes on which current thread is
|
||||
// set affinity on.
|
||||
int numaAPI_GetNumCurrentNodesProcessors(void);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Affinities.
|
||||
|
||||
|
|
|
@ -34,8 +34,6 @@
|
|||
# include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef WITH_DYNLOAD
|
||||
|
||||
// Descriptor numa library.
|
||||
|
@ -64,6 +62,7 @@ typedef void tnuma_free_cpumask(struct bitmask* bitmask);
|
|||
typedef void tnuma_free_nodemask(struct bitmask* bitmask);
|
||||
typedef int tnuma_run_on_node_mask(struct bitmask *nodemask);
|
||||
typedef int tnuma_run_on_node_mask_all(struct bitmask *nodemask);
|
||||
typedef struct bitmask *tnuma_get_run_node_mask(void);
|
||||
typedef void tnuma_set_interleave_mask(struct bitmask *nodemask);
|
||||
typedef void tnuma_set_localalloc(void);
|
||||
|
||||
|
@ -87,6 +86,7 @@ static tnuma_free_nodemask* numa_free_nodemask;
|
|||
static tnuma_free_cpumask* numa_free_cpumask;
|
||||
static tnuma_run_on_node_mask* numa_run_on_node_mask;
|
||||
static tnuma_run_on_node_mask_all* numa_run_on_node_mask_all;
|
||||
static tnuma_get_run_node_mask* numa_get_run_node_mask;
|
||||
static tnuma_set_interleave_mask* numa_set_interleave_mask;
|
||||
static tnuma_set_localalloc* numa_set_localalloc;
|
||||
|
||||
|
@ -162,6 +162,7 @@ static NUMAAPI_Result loadNumaSymbols(void) {
|
|||
NUMA_LIBRARY_FIND(numa_free_nodemask);
|
||||
NUMA_LIBRARY_FIND(numa_run_on_node_mask);
|
||||
NUMA_LIBRARY_FIND(numa_run_on_node_mask_all);
|
||||
NUMA_LIBRARY_FIND(numa_get_run_node_mask);
|
||||
NUMA_LIBRARY_FIND(numa_set_interleave_mask);
|
||||
NUMA_LIBRARY_FIND(numa_set_localalloc);
|
||||
|
||||
|
@ -204,7 +205,7 @@ int numaAPI_GetNumNodeProcessors(int node) {
|
|||
struct bitmask* cpu_mask = numa_allocate_cpumask();
|
||||
numa_node_to_cpus(node, cpu_mask);
|
||||
const unsigned int num_bytes = numa_bitmask_nbytes(cpu_mask);
|
||||
const unsigned int num_bits = num_bytes *8;
|
||||
const unsigned int num_bits = num_bytes * 8;
|
||||
// TODO(sergey): There might be faster way calculating number of set bits.
|
||||
int num_processors = 0;
|
||||
for (unsigned int bit = 0; bit < num_bits; ++bit) {
|
||||
|
@ -224,6 +225,23 @@ int numaAPI_GetNumNodeProcessors(int node) {
|
|||
return num_processors;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Topology helpers.
|
||||
|
||||
int numaAPI_GetNumCurrentNodesProcessors(void) {
|
||||
struct bitmask* node_mask = numa_get_run_node_mask();
|
||||
const unsigned int num_bytes = numa_bitmask_nbytes(node_mask);
|
||||
const unsigned int num_bits = num_bytes * 8;
|
||||
int num_processors = 0;
|
||||
for (unsigned int bit = 0; bit < num_bits; ++bit) {
|
||||
if (numa_bitmask_isbitset(node_mask, bit)) {
|
||||
num_processors += numaAPI_GetNumNodeProcessors(bit);
|
||||
}
|
||||
}
|
||||
numa_bitmask_free(node_mask);
|
||||
return num_processors;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Affinities.
|
||||
|
||||
|
|
|
@ -52,6 +52,13 @@ int numaAPI_GetNumNodeProcessors(int node) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Topology helpers.
|
||||
|
||||
int numaAPI_GetNumCurrentNodesProcessors(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Affinities.
|
||||
|
||||
|
|
|
@ -47,8 +47,6 @@
|
|||
# include <VersionHelpers.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Initialization.
|
||||
|
||||
|
@ -74,9 +72,14 @@ typedef BOOL t_VirtualFree(void* address, SIZE_T size, DWORD free_type);
|
|||
typedef BOOL t_SetProcessAffinityMask(HANDLE process_handle,
|
||||
DWORD_PTR process_affinity_mask);
|
||||
typedef BOOL t_SetThreadGroupAffinity(HANDLE thread_handle,
|
||||
const GROUP_AFFINITY* GroupAffinity,
|
||||
const GROUP_AFFINITY* group_affinity,
|
||||
GROUP_AFFINITY* PreviousGroupAffinity);
|
||||
typedef BOOL t_GetThreadGroupAffinity(HANDLE thread_handle,
|
||||
GROUP_AFFINITY* group_affinity);
|
||||
typedef DWORD t_GetCurrentProcessorNumber(void);
|
||||
typedef void t_GetCurrentProcessorNumberEx(PROCESSOR_NUMBER* proc_number);
|
||||
typedef DWORD t_GetActiveProcessorCount(WORD group_number);
|
||||
|
||||
|
||||
// NUMA symbols.
|
||||
static t_GetNumaHighestNodeNumber* _GetNumaHighestNodeNumber;
|
||||
|
@ -88,7 +91,10 @@ static t_VirtualFree* _VirtualFree;
|
|||
// Threading symbols.
|
||||
static t_SetProcessAffinityMask* _SetProcessAffinityMask;
|
||||
static t_SetThreadGroupAffinity* _SetThreadGroupAffinity;
|
||||
static t_GetThreadGroupAffinity* _GetThreadGroupAffinity;
|
||||
static t_GetCurrentProcessorNumber* _GetCurrentProcessorNumber;
|
||||
static t_GetCurrentProcessorNumberEx* _GetCurrentProcessorNumberEx;
|
||||
static t_GetActiveProcessorCount* _GetActiveProcessorCount;
|
||||
|
||||
static void numaExit(void) {
|
||||
// TODO(sergey): Consider closing library here.
|
||||
|
@ -128,7 +134,10 @@ static NUMAAPI_Result loadNumaSymbols(void) {
|
|||
// Threading.
|
||||
KERNEL_LIBRARY_FIND(SetProcessAffinityMask);
|
||||
KERNEL_LIBRARY_FIND(SetThreadGroupAffinity);
|
||||
KERNEL_LIBRARY_FIND(GetThreadGroupAffinity);
|
||||
KERNEL_LIBRARY_FIND(GetCurrentProcessorNumber);
|
||||
KERNEL_LIBRARY_FIND(GetCurrentProcessorNumberEx);
|
||||
KERNEL_LIBRARY_FIND(GetActiveProcessorCount);
|
||||
|
||||
#undef KERNEL_LIBRARY_FIND
|
||||
#undef _LIBRARY_FIND
|
||||
|
@ -151,6 +160,19 @@ NUMAAPI_Result numaAPI_Initialize(void) {
|
|||
#endif
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Internal helpers.
|
||||
|
||||
static int countNumSetBits(int64_t mask) {
|
||||
// TODO(sergey): There might be faster way calculating number of set bits.
|
||||
int num_bits = 0;
|
||||
while (mask != 0) {
|
||||
num_bits += (mask & 1);
|
||||
mask = (mask >> 1);
|
||||
}
|
||||
return num_bits;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Topology query.
|
||||
|
||||
|
@ -185,11 +207,26 @@ int numaAPI_GetNumNodeProcessors(int node) {
|
|||
if (!_GetNumaNodeProcessorMask(node, &processor_mask)) {
|
||||
return 0;
|
||||
}
|
||||
// TODO(sergey): There might be faster way calculating number of set bits.
|
||||
int num_processors = 0;
|
||||
while (processor_mask != 0) {
|
||||
num_processors += (processor_mask & 1);
|
||||
processor_mask = (processor_mask >> 1);
|
||||
return countNumSetBits(processor_mask);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Topology helpers.
|
||||
|
||||
int numaAPI_GetNumCurrentNodesProcessors(void) {
|
||||
HANDLE thread_handle = GetCurrentThread();
|
||||
GROUP_AFFINITY group_affinity;
|
||||
// TODO(sergey): Needs implementation.
|
||||
if (!_GetThreadGroupAffinity(thread_handle, &group_affinity)) {
|
||||
return 0;
|
||||
}
|
||||
// First, count number of possible bits in the affinity mask.
|
||||
const int num_processors = countNumSetBits(group_affinity.Mask);
|
||||
// Then check that it's not exceeding number of processors in tjhe group.
|
||||
const int num_group_processors =
|
||||
_GetActiveProcessorCount(group_affinity.Group);
|
||||
if (num_group_processors < num_processors) {
|
||||
return num_group_processors;
|
||||
}
|
||||
return num_processors;
|
||||
}
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit d31844cc0b3be397b5032cb0c5d8daea39584783
|
||||
Subproject commit 46a9160c6f67d60610fdcc1dadbe3946a7010625
|
Loading…
Reference in New Issue