Update bundled version of NUMA API library

This commit is contained in:
Sergey Sharybin 2019-01-11 17:47:10 +01:00
parent c1dd74580e
commit ff44a9957e
Notes: blender-bot 2023-02-14 06:55:40 +01:00
Referenced by issue #61095, Import/Export formats losted
9 changed files with 88 additions and 16 deletions

View File

@ -1,5 +1,5 @@
Project: LibNumaAPI
URL: https://github.com/Nazg-Gul/libNumaAPI
License: MIT
Upstream version: f83d41ec4d7
Upstream version: 4e7206befce
Local modifications: None

View File

@ -70,6 +70,16 @@ bool numaAPI_IsNodeAvailable(int node);
// Get number of available processors on a given node.
int numaAPI_GetNumNodeProcessors(int node);
////////////////////////////////////////////////////////////////////////////////
// Topology helpers.
//
// Those are a bit higher level queries, but is still rather platform-specific
// and generally useful.
// Get number of processors within the NUMA nodes on which current thread is
// set affinity on.
int numaAPI_GetNumCurrentNodesProcessors(void);
////////////////////////////////////////////////////////////////////////////////
// Affinities.

View File

@ -34,8 +34,6 @@
# include <dlfcn.h>
#endif
#include <stdio.h>
#ifdef WITH_DYNLOAD
// Descriptor numa library.
@ -64,6 +62,7 @@ typedef void tnuma_free_cpumask(struct bitmask* bitmask);
typedef void tnuma_free_nodemask(struct bitmask* bitmask);
typedef int tnuma_run_on_node_mask(struct bitmask *nodemask);
typedef int tnuma_run_on_node_mask_all(struct bitmask *nodemask);
typedef struct bitmask *tnuma_get_run_node_mask(void);
typedef void tnuma_set_interleave_mask(struct bitmask *nodemask);
typedef void tnuma_set_localalloc(void);
@ -87,6 +86,7 @@ static tnuma_free_nodemask* numa_free_nodemask;
static tnuma_free_cpumask* numa_free_cpumask;
static tnuma_run_on_node_mask* numa_run_on_node_mask;
static tnuma_run_on_node_mask_all* numa_run_on_node_mask_all;
static tnuma_get_run_node_mask* numa_get_run_node_mask;
static tnuma_set_interleave_mask* numa_set_interleave_mask;
static tnuma_set_localalloc* numa_set_localalloc;
@ -162,6 +162,7 @@ static NUMAAPI_Result loadNumaSymbols(void) {
NUMA_LIBRARY_FIND(numa_free_nodemask);
NUMA_LIBRARY_FIND(numa_run_on_node_mask);
NUMA_LIBRARY_FIND(numa_run_on_node_mask_all);
NUMA_LIBRARY_FIND(numa_get_run_node_mask);
NUMA_LIBRARY_FIND(numa_set_interleave_mask);
NUMA_LIBRARY_FIND(numa_set_localalloc);
@ -204,7 +205,7 @@ int numaAPI_GetNumNodeProcessors(int node) {
struct bitmask* cpu_mask = numa_allocate_cpumask();
numa_node_to_cpus(node, cpu_mask);
const unsigned int num_bytes = numa_bitmask_nbytes(cpu_mask);
const unsigned int num_bits = num_bytes *8;
const unsigned int num_bits = num_bytes * 8;
// TODO(sergey): There might be faster way calculating number of set bits.
int num_processors = 0;
for (unsigned int bit = 0; bit < num_bits; ++bit) {
@ -224,6 +225,23 @@ int numaAPI_GetNumNodeProcessors(int node) {
return num_processors;
}
////////////////////////////////////////////////////////////////////////////////
// Topology helpers.
int numaAPI_GetNumCurrentNodesProcessors(void) {
struct bitmask* node_mask = numa_get_run_node_mask();
const unsigned int num_bytes = numa_bitmask_nbytes(node_mask);
const unsigned int num_bits = num_bytes * 8;
int num_processors = 0;
for (unsigned int bit = 0; bit < num_bits; ++bit) {
if (numa_bitmask_isbitset(node_mask, bit)) {
num_processors += numaAPI_GetNumNodeProcessors(bit);
}
}
numa_bitmask_free(node_mask);
return num_processors;
}
////////////////////////////////////////////////////////////////////////////////
// Affinities.

View File

@ -52,6 +52,13 @@ int numaAPI_GetNumNodeProcessors(int node) {
return 0;
}
////////////////////////////////////////////////////////////////////////////////
// Topology helpers.
int numaAPI_GetNumCurrentNodesProcessors(void) {
return 0;
}
////////////////////////////////////////////////////////////////////////////////
// Affinities.

View File

@ -47,8 +47,6 @@
# include <VersionHelpers.h>
#endif
#include <stdio.h>
////////////////////////////////////////////////////////////////////////////////
// Initialization.
@ -74,9 +72,14 @@ typedef BOOL t_VirtualFree(void* address, SIZE_T size, DWORD free_type);
typedef BOOL t_SetProcessAffinityMask(HANDLE process_handle,
DWORD_PTR process_affinity_mask);
typedef BOOL t_SetThreadGroupAffinity(HANDLE thread_handle,
const GROUP_AFFINITY* GroupAffinity,
const GROUP_AFFINITY* group_affinity,
GROUP_AFFINITY* PreviousGroupAffinity);
typedef BOOL t_GetThreadGroupAffinity(HANDLE thread_handle,
GROUP_AFFINITY* group_affinity);
typedef DWORD t_GetCurrentProcessorNumber(void);
typedef void t_GetCurrentProcessorNumberEx(PROCESSOR_NUMBER* proc_number);
typedef DWORD t_GetActiveProcessorCount(WORD group_number);
// NUMA symbols.
static t_GetNumaHighestNodeNumber* _GetNumaHighestNodeNumber;
@ -88,7 +91,10 @@ static t_VirtualFree* _VirtualFree;
// Threading symbols.
static t_SetProcessAffinityMask* _SetProcessAffinityMask;
static t_SetThreadGroupAffinity* _SetThreadGroupAffinity;
static t_GetThreadGroupAffinity* _GetThreadGroupAffinity;
static t_GetCurrentProcessorNumber* _GetCurrentProcessorNumber;
static t_GetCurrentProcessorNumberEx* _GetCurrentProcessorNumberEx;
static t_GetActiveProcessorCount* _GetActiveProcessorCount;
static void numaExit(void) {
// TODO(sergey): Consider closing library here.
@ -128,7 +134,10 @@ static NUMAAPI_Result loadNumaSymbols(void) {
// Threading.
KERNEL_LIBRARY_FIND(SetProcessAffinityMask);
KERNEL_LIBRARY_FIND(SetThreadGroupAffinity);
KERNEL_LIBRARY_FIND(GetThreadGroupAffinity);
KERNEL_LIBRARY_FIND(GetCurrentProcessorNumber);
KERNEL_LIBRARY_FIND(GetCurrentProcessorNumberEx);
KERNEL_LIBRARY_FIND(GetActiveProcessorCount);
#undef KERNEL_LIBRARY_FIND
#undef _LIBRARY_FIND
@ -151,6 +160,19 @@ NUMAAPI_Result numaAPI_Initialize(void) {
#endif
}
////////////////////////////////////////////////////////////////////////////////
// Internal helpers.
static int countNumSetBits(int64_t mask) {
// TODO(sergey): There might be faster way calculating number of set bits.
int num_bits = 0;
while (mask != 0) {
num_bits += (mask & 1);
mask = (mask >> 1);
}
return num_bits;
}
////////////////////////////////////////////////////////////////////////////////
// Topology query.
@ -185,11 +207,26 @@ int numaAPI_GetNumNodeProcessors(int node) {
if (!_GetNumaNodeProcessorMask(node, &processor_mask)) {
return 0;
}
// TODO(sergey): There might be faster way calculating number of set bits.
int num_processors = 0;
while (processor_mask != 0) {
num_processors += (processor_mask & 1);
processor_mask = (processor_mask >> 1);
return countNumSetBits(processor_mask);
}
////////////////////////////////////////////////////////////////////////////////
// Topology helpers.
int numaAPI_GetNumCurrentNodesProcessors(void) {
HANDLE thread_handle = GetCurrentThread();
GROUP_AFFINITY group_affinity;
// TODO(sergey): Needs implementation.
if (!_GetThreadGroupAffinity(thread_handle, &group_affinity)) {
return 0;
}
// First, count number of possible bits in the affinity mask.
const int num_processors = countNumSetBits(group_affinity.Mask);
// Then check that it's not exceeding number of processors in tjhe group.
const int num_group_processors =
_GetActiveProcessorCount(group_affinity.Group);
if (num_group_processors < num_processors) {
return num_group_processors;
}
return num_processors;
}

@ -1 +1 @@
Subproject commit fec9a7e88c8686830e9210099818a61489e699e4
Subproject commit 8eafc437295b0edc990db231fe957e2ad42af70d

@ -1 +1 @@
Subproject commit 9cc2ad1eaf941d8ed3b5542a3d5cdfccec7ba60b
Subproject commit 46a9160c6f67d60610fdcc1dadbe3946a7010625

@ -1 +1 @@
Subproject commit 68a2d28b24430133946f21d7deede1312d181aaf
Subproject commit 272b1a4ef07717beb3d0bfcb7380c2164fd008a3

@ -1 +1 @@
Subproject commit 2278dd8d6aea4d56632dee37196a5e341ce99283
Subproject commit aef8f32086b9393d286c49cbe5a51ae465fe0589