Merge branch 'master' into blender2.8

This commit is contained in:
Sergey Sharybin 2018-11-28 14:42:38 +01:00
commit 3ed0d5b4d4
Notes: blender-bot 2023-02-14 05:37:19 +01:00
Referenced by issue #58207, Render time going crazy!
20 changed files with 1357 additions and 0 deletions

View File

@ -695,6 +695,7 @@ function(SETUP_BLENDER_SORTED_LIBS)
bf_intern_glew_mx
bf_intern_clog
bf_intern_opensubdiv
bf_intern_numaapi
)
if(NOT WITH_SYSTEM_GLOG)

View File

@ -30,6 +30,7 @@ add_subdirectory(ghost)
add_subdirectory(guardedalloc)
add_subdirectory(libmv)
add_subdirectory(memutil)
add_subdirectory(numaapi)
add_subdirectory(opencolorio)
add_subdirectory(opensubdiv)
add_subdirectory(mikktspace)

1
intern/numaapi/AUTHORS Normal file
View File

@ -0,0 +1 @@
Sergey Sharybin <sergey.vfx@gmail.com>

View File

@ -0,0 +1,39 @@
# ***** BEGIN GPL LICENSE BLOCK *****
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# ***** END GPL LICENSE BLOCK *****
set(INC
./include
)
set(INC_SYS
)
set(SRC
source/numaapi.c
source/numaapi_linux.c
source/numaapi_stub.c
source/numaapi_win32.c
include/numaapi.h
source/build_config.h
)
add_definitions(-DWITH_DYNLOAD)
blender_add_lib(bf_intern_numaapi "${SRC}" "${INC}" "${INC_SYS}")

19
intern/numaapi/LICENSE Normal file
View File

@ -0,0 +1,19 @@
Copyright (c) 2016 libnumaapi authors.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.

7
intern/numaapi/README Normal file
View File

@ -0,0 +1,7 @@
LibNumaAPI is aimed to provide one common cross-platform API for all
possible platforms, so cross-platform applications might not worry
about implementation details.
LICENSE
LibNumaAPI library is released under the MIT license.

View File

@ -0,0 +1,5 @@
Project: LibNumaAPI
URL: https://github.com/Nazg-Gul/libNumaAPI
License: MIT
Upstream version: f83d41ec4d7
Local modifications: None

View File

@ -0,0 +1,108 @@
// Copyright (c) 2016, libnumaapi authors
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
//
// Author: Sergey Sharybin (sergey.vfx@gmail.com)
#ifndef __LIBNUMAAPI_H__
#define __LIBNUMAAPI_H__
#include <stdbool.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
#define NUMAAPI_VERSION_MAJOR 1
#define NUMAAPI_VERSION_MINOR 0
typedef enum NUMAAPI_Result {
NUMAAPI_SUCCESS = 0,
// NUMA is not available on this platform.
NUMAAPI_NOT_AVAILABLE = 1,
// Generic error, no real details are available,
NUMAAPI_ERROR = 2,
// Error installing atexit() handlers.
NUMAAPI_ERROR_ATEXIT = 3,
} NUMAAPI_Result;
////////////////////////////////////////////////////////////////////////////////
// Initialization.
// Initialize NUMA API.
//
// This is first call which should be called before any other NUMA functions
// can be used.
NUMAAPI_Result numaAPI_Initialize(void);
// Get string representation of NUMAPIResult.
const char* numaAPI_ResultAsString(NUMAAPI_Result result);
////////////////////////////////////////////////////////////////////////////////
// Topology query.
// Get number of available nodes.
//
// This is in fact an index of last node plus one and it's not guaranteed
// that all nodes up to this one are available.
int numaAPI_GetNumNodes(void);
// Returns truth if the given node is available for compute.
bool numaAPI_IsNodeAvailable(int node);
// Getnumber of available processors on a given node.
int numaAPI_GetNumNodeProcessors(int node);
////////////////////////////////////////////////////////////////////////////////
// Affinities.
// Runs the current process and its children on a specific node.
//
// Returns truth if affinity has successfully changed.
//
// NOTE: This function can not change active CPU group. Mainly designed to deal
// with Threadripper 2 topology, to make it possible to gain maximum performance
// for the main application thread.
bool numaAPI_RunProcessOnNode(int node);
// Runs the current thread and its children on a specific node.
//
// Returns truth if affinity has successfully changed.
bool numaAPI_RunThreadOnNode(int node);
////////////////////////////////////////////////////////////////////////////////
// Memory management.
// Allocate memory on a given node,
void* numaAPI_AllocateOnNode(size_t size, int node);
// Allocate memory in the local memory, closest to the current node.
void* numaAPI_AllocateLocal(size_t size);
// Frees size bytes of memory starting at start.
//
// TODO(sergey): Consider making it regular free() semantic.
void numaAPI_Free(void* start, size_t size);
#ifdef __cplusplus
}
#endif
#endif // __LIBNUMAAPI_H__

View File

@ -0,0 +1,379 @@
// Copyright (c) 2018, libnumaapi authors
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
//
// Author: Sergey Sharybin (sergey.vfx@gmail.com)
#ifndef __BUILD_CONFIG_H__
#define __BUILD_CONFIG_H__
#include <limits.h>
#include <stdint.h>
// Initially is based on Chromium's build_config.h, with tweaks and extensions
// needed for this project.
//
// NOTE: All commonly used symbols (which are checked on a "top" level, from
// outside of any platform-specific ifdef block) are to be explicitly defined
// to 0 when they are not "active". This is extra lines of code in this file,
// but is not being edited that often. Such approach helps catching cases when
// one attempted to access build configuration variable without including the
// header by simply using -Wundef compiler attribute.
//
// NOTE: Not having things explicitly defined to 0 is harmless (in terms it
// follows same rules as Google projects) and will simply cause compiler to
// become more noisy, which is simple to correct.
////////////////////////////////////////////////////////////////////////////////
// A set of macros to use for platform detection.
#if defined(__native_client__)
// __native_client__ must be first, so that other OS_ defines are not set.
# define OS_NACL 1
#elif defined(_AIX)
# define OS_AIX 1
#elif defined(ANDROID)
# define OS_ANDROID 1
#elif defined(__APPLE__)
// Only include TargetConditions after testing ANDROID as some android builds
// on mac don't have this header available and it's not needed unless the target
// is really mac/ios.
# include <TargetConditionals.h>
# define OS_MACOSX 1
# if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
# define OS_IOS 1
# endif // defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
#elif defined(__HAIKU__)
# define OS_HAIKU 1
#elif defined(__hpux)
# define OS_HPUX 1
#elif defined(__linux__)
# define OS_LINUX 1
// Include a system header to pull in features.h for glibc/uclibc macros.
# include <unistd.h>
# if defined(__GLIBC__) && !defined(__UCLIBC__)
// We really are using glibc, not uClibc pretending to be glibc.
# define LIBC_GLIBC 1
# endif
#elif defined(__sgi)
# define OS_IRIX 1
#elif defined(_WIN32)
# define OS_WIN 1
#elif defined(__FreeBSD__)
# define OS_FREEBSD 1
#elif defined(__NetBSD__)
# define OS_NETBSD 1
#elif defined(__OpenBSD__)
# define OS_OPENBSD 1
#elif defined(__sun)
# define OS_SOLARIS 1
#elif defined(__QNXNTO__)
# define OS_QNX 1
#else
# error Please add support for your platform in build_config.h
#endif
#if !defined(OS_AIX)
# define OS_AIX 0
#endif
#if !defined(OS_NACL)
# define OS_NACL 0
#endif
#if !defined(OS_ANDROID)
# define OS_ANDROID 0
#endif
#if !defined(OS_MACOSX)
# define OS_MACOSX 0
#endif
#if !defined(OS_IOS)
# define OS_IOS 0
#endif
#if !defined(OS_HAIKU)
# define OS_HAIKU 0
#endif
#if !defined(OS_HPUX)
# define OS_HPUX 0
#endif
#if !defined(OS_IRIX)
# define OS_IRIX 0
#endif
#if !defined(OS_LINUX)
# define OS_LINUX 0
#endif
#if !defined(LIBC_GLIBC)
# define LIBC_GLIBC 0
#endif
#if !defined(OS_WIN)
# define OS_WIN 0
#endif
#if !defined(OS_FREEBSD)
# define OS_FREEBSD 0
#endif
#if !defined(OS_NETBSD)
# define OS_NETBSD 0
#endif
#if !defined(OS_OPENBSD)
# define OS_OPENBSD 0
#endif
#if !defined(OS_SOLARIS)
# define OS_SOLARIS 0
#endif
#if !defined(OS_QNX)
# define OS_QNX 0
#endif
////////////////////////////////////////////////////////////////////////////////
// *BSD OS family detection.
//
// For access to standard BSD features, use OS_BSD instead of a
// more specific macro.
#if OS_FREEBSD || OS_OPENBSD || OS_NETBSD
# define OS_BSD 1
#else
# define OS_BSD 0
#endif
////////////////////////////////////////////////////////////////////////////////
// POSIX system detection.
//
// For access to standard POSIXish features use OS_POSIX instead of a
// more specific macro.
#if OS_MACOSX || OS_LINUX || OS_BSD || OS_SOLARIS ||OS_ANDROID || OS_NACL || \
OS_QNX || OS_HAIKU || OS_AIX || OS_HPUX || OS_IRIX
# define OS_POSIX 1
#else
# define OS_POSIX 0
#endif
////////////////////////////////////////////////////////////////////////////////
// Compiler detection, including its capabilities.
#if defined(__clang__)
# define COMPILER_CLANG 1
#elif defined(__GNUC__)
# define COMPILER_GCC 1
# define COMPILER_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
#elif defined(_MSC_VER)
# define COMPILER_MSVC 1
# define COMPILER_MSVC_VERSION (_MSC_VER)
#elif defined(__MINGW32__)
# define COMPILER_MINGW32 1
#elif defined(__MINGW64__)
# define COMPILER_MINGW64 1
#else
# error Please add support for your compiler in build_config.h
#endif
#if !defined(COMPILER_CLANG)
# define COMPILER_CLANG 0
#endif
#if !defined(COMPILER_GCC)
# define COMPILER_GCC 0
#endif
#if !defined(COMPILER_MSVC)
# define COMPILER_MSVC 0
#endif
#if !defined(COMPILER_MINGW32)
# define COMPILER_MINGW32 0
#endif
#if !defined(COMPILER_MINGW64)
# define COMPILER_MINGW64 0
#endif
// Compiler is any of MinGW family.
#if COMPILER_MINGW32 || COMPILER_MINGW64
# define COMPILER_MINGW 1
#else
# define COMPILER_MINGW 0
#endif
// Check what is the latest C++ specification the compiler supports.
//
// NOTE: Use explicit definition here to avoid expansion-to-defined warning from
// being geenrated. While this will most likely a false-positive warning in this
// particular case, that warning might be helpful to catch errors elsewhere.
// C++11 check.
#if ((defined(__cplusplus) && (__cplusplus > 199711L)) || \
(defined(_MSC_VER) && (_MSC_VER >= 1800)))
# define COMPILER_SUPPORTS_CXX11 1
#else
# define COMPILER_SUPPORTS_CXX11 0
#endif
// C++14 check.
#if (defined(__cplusplus) && (__cplusplus > 201311L))
# define COMPILER_SUPPORTS_CXX14 1
#else
# define COMPILER_SUPPORTS_CXX14 0
#endif
// C++17 check.
#if (defined(__cplusplus) && (__cplusplus > 201611L))
# define COMPILER_SUPPORTS_CXX17 1
#else
# define COMPILER_SUPPORTS_CXX17 0
#endif
// C++20 check.
#if (defined(__cplusplus) && (__cplusplus > 201911L))
# define COMPILER_SUPPORTS_CXX20 1
#else
# define COMPILER_SUPPORTS_CXX20 0
#endif
// COMPILER_USE_ADDRESS_SANITIZER is defined when program is detected that
// compilation happened wit haddress sanitizer enabled. This allows to give
// tips to sanitizer, or maybe work around some known issues with third party
// libraries.
#if !defined(COMPILER_USE_ADDRESS_SANITIZER)
# if defined(__has_feature)
# define COMPILER_USE_ADDRESS_SANITIZER 1
# elif defined(__SANITIZE_ADDRESS__)
# define COMPILER_USE_ADDRESS_SANITIZER 1
# endif
#endif
#if !defined(COMPILER_USE_ADDRESS_SANITIZER)
# define COMPILER_USE_ADDRESS_SANITIZER 0
#endif
////////////////////////////////////////////////////////////////////////////////
// Processor architecture detection.
//
// For more info on what's defined, see:
//
// http://msdn.microsoft.com/en-us/library/b0084kay.aspx
// http://www.agner.org/optimize/calling_conventions.pdf
//
// or with gcc, run: "echo | gcc -E -dM -"
#if defined(_M_X64) || defined(__x86_64__)
# define ARCH_CPU_X86_FAMILY 1
# define ARCH_CPU_X86_64 1
# define ARCH_CPU_64_BITS 1
# define ARCH_CPU_LITTLE_ENDIAN 1
#elif defined(_M_IX86) || defined(__i386__)
# define ARCH_CPU_X86_FAMILY 1
# define ARCH_CPU_X86 1
# define ARCH_CPU_32_BITS 1
# define ARCH_CPU_LITTLE_ENDIAN 1
#elif defined(__ARMEL__)
# define ARCH_CPU_ARM_FAMILY 1
# define ARCH_CPU_ARMEL 1
# define ARCH_CPU_32_BITS 1
# define ARCH_CPU_LITTLE_ENDIAN 1
#elif defined(__aarch64__)
# define ARCH_CPU_ARM_FAMILY 1
# define ARCH_CPU_ARM64 1
# define ARCH_CPU_64_BITS 1
# define ARCH_CPU_LITTLE_ENDIAN 1
#elif defined(__pnacl__)
# define ARCH_CPU_32_BITS 1
# define ARCH_CPU_LITTLE_ENDIAN 1
#elif defined(__MIPSEL__)
# if defined(__LP64__)
# define ARCH_CPU_MIPS64_FAMILY 1
# define ARCH_CPU_MIPS64EL 1
# define ARCH_CPU_64_BITS 1
# define ARCH_CPU_LITTLE_ENDIAN 1
# else
# define ARCH_CPU_MIPS_FAMILY 1
# define ARCH_CPU_MIPSEL 1
# define ARCH_CPU_32_BITS 1
# define ARCH_CPU_LITTLE_ENDIAN 1
# endif
#elif defined(__MIPSEB__)
# if defined(__LP64__)
# define ARCH_CPU_MIPS64_FAMILY 1
# define ARCH_CPU_MIPS64EB 1
# define ARCH_CPU_64_BITS 1
# define ARCH_CPU_BIG_ENDIAN 1
# else
# define ARCH_CPU_MIPS_FAMILY 1
# define ARCH_CPU_MIPSEB 1
# define ARCH_CPU_32_BITS 1
# define ARCH_CPU_BIG_ENDIAN 1
# endif
#else
# error Please add support for your architecture in build_config.h
#endif
#if !defined(ARCH_CPU_LITTLE_ENDIAN)
# define ARCH_CPU_LITTLE_ENDIAN 0
#endif
#if !defined(ARCH_CPU_BIG_ENDIAN)
# define ARCH_CPU_BIG_ENDIAN 0
#endif
#if !defined(ARCH_CPU_32_BITS)
# define ARCH_CPU_32_BITS 0
#endif
#if !defined(ARCH_CPU_64_BITS)
# define ARCH_CPU_64_BITS 0
#endif
#if !defined(ARCH_CPU_X86_FAMILY)
# define ARCH_CPU_X86_FAMILY 0
#endif
#if !defined(ARCH_CPU_ARM_FAMILY)
# define ARCH_CPU_ARM_FAMILY 0
#endif
#if !defined(ARCH_CPU_MIPS_FAMILY)
# define ARCH_CPU_MIPS_FAMILY 0
#endif
#if !defined(ARCH_CPU_MIPS64_FAMILY)
# define ARCH_CPU_MIPS64_FAMILY 0
#endif
////////////////////////////////////////////////////////////////////////////////
// Sizes of platform-dependent types.
#if defined(__SIZEOF_POINTER__)
# define PLATFORM_SIZEOF_PTR __SIZEOF_POINTER__
#elif defined(UINTPTR_MAX)
# if (UINTPTR_MAX == 0xffffffff)
# define PLATFORM_SIZEOF_PTR 4
# elif (UINTPTR_MAX == 0xffffffffffffffff) // NOLINT
# define PLATFORM_SIZEOF_PTR 8
# endif
#elif defined(__WORDSIZE)
# if (__WORDSIZE == 32)
# define PLATFORM_SIZEOF_PTR 4
# else if (__WORDSIZE == 64)
# define PLATFORM_SIZEOF_PTR 8
# endif
#endif
#if !defined(PLATFORM_SIZEOF_PTR)
# error "Cannot find pointer size"
#endif
#if (UINT_MAX == 0xffffffff)
# define PLATFORM_SIZEOF_INT 4
#elif (UINT_MAX == 0xffffffffffffffff) // NOLINT
# define PLATFORM_SIZEOF_INT 8
#else
# error "Cannot find int size"
#endif
#if (USHRT_MAX == 0xffffffff)
# define PLATFORM_SIZEOF_SHORT 4
#elif (USHRT_MAX == 0xffff) // NOLINT
# define PLATFORM_SIZEOF_SHORT 2
#else
# error "Cannot find short size"
#endif
#endif // __BUILD_CONFIG_H__

View File

@ -0,0 +1,37 @@
// Copyright (c) 2018, libnumaapi authors
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
//
// Author: Sergey Sharybin (sergey.vfx@gmail.com)
#include "numaapi.h"
#include <assert.h>
// Get string representation of NUMAPIResult.
const char* numaAPI_ResultAsString(NUMAAPI_Result result) {
switch (result) {
case NUMAAPI_SUCCESS: return "SUCCESS";
case NUMAAPI_NOT_AVAILABLE: return "NOT_AVAILABLE";
case NUMAAPI_ERROR: return "ERROR";
case NUMAAPI_ERROR_ATEXIT: return "ERROR_AT_EXIT";
}
assert(!"Unknown result was passed to numapi_ResultAsString().");
return "UNKNOWN";
}

View File

@ -0,0 +1,272 @@
// Copyright (c) 2016, libnumaapi authors
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
//
// Author: Sergey Sharybin (sergey.vfx@gmail.com)
#include "build_config.h"
#if OS_LINUX
#include "numaapi.h"
#include <stdlib.h>
#ifndef WITH_DYNLOAD
# include <numa.h>
#else
# include <dlfcn.h>
#endif
#ifdef WITH_DYNLOAD
// Descriptor numa library.
static void* numa_lib;
// Types of all symbols which are read from the library.
struct bitmask;
typedef int tnuma_available(void);
typedef int tnuma_max_node(void);
typedef int tnuma_node_to_cpus(int node, struct bitmask* mask);
typedef long tnuma_node_size(int node, long* freep);
typedef int tnuma_run_on_node(int node);
typedef void* tnuma_alloc_onnode(size_t size, int node);
typedef void* tnuma_alloc_local(size_t size);
typedef void tnuma_free(void* start, size_t size);
typedef struct bitmask* tnuma_bitmask_clearall(struct bitmask *bitmask);
typedef int tnuma_bitmask_isbitset(const struct bitmask *bitmask,
unsigned int n);
typedef struct bitmask* tnuma_bitmask_setbit(struct bitmask *bitmask,
unsigned int n);
typedef unsigned int tnuma_bitmask_nbytes(struct bitmask *bitmask);
typedef void tnuma_bitmask_free(struct bitmask *bitmask);
typedef struct bitmask* tnuma_allocate_cpumask(void);
typedef struct bitmask* tnuma_allocate_nodemask(void);
typedef void tnuma_free_cpumask(struct bitmask* bitmask);
typedef void tnuma_free_nodemask(struct bitmask* bitmask);
typedef int tnuma_run_on_node_mask(struct bitmask *nodemask);
typedef void tnuma_set_interleave_mask(struct bitmask *nodemask);
typedef void tnuma_set_localalloc(void);
// Actual symbols.
static tnuma_available* numa_available;
static tnuma_max_node* numa_max_node;
static tnuma_node_to_cpus* numa_node_to_cpus;
static tnuma_node_size* numa_node_size;
static tnuma_run_on_node* numa_run_on_node;
static tnuma_alloc_onnode* numa_alloc_onnode;
static tnuma_alloc_local* numa_alloc_local;
static tnuma_free* numa_free;
static tnuma_bitmask_clearall* numa_bitmask_clearall;
static tnuma_bitmask_isbitset* numa_bitmask_isbitset;
static tnuma_bitmask_setbit* numa_bitmask_setbit;
static tnuma_bitmask_nbytes* numa_bitmask_nbytes;
static tnuma_bitmask_free* numa_bitmask_free;
static tnuma_allocate_cpumask* numa_allocate_cpumask;
static tnuma_allocate_nodemask* numa_allocate_nodemask;
static tnuma_free_nodemask* numa_free_nodemask;
static tnuma_free_cpumask* numa_free_cpumask;
static tnuma_run_on_node_mask* numa_run_on_node_mask;
static tnuma_set_interleave_mask* numa_set_interleave_mask;
static tnuma_set_localalloc* numa_set_localalloc;
static void* findLibrary(const char** paths) {
int i = 0;
while (paths[i] != NULL) {
void* lib = dlopen(paths[i], RTLD_LAZY);
if (lib != NULL) {
return lib;
}
++i;
}
return NULL;
}
static void numaExit(void) {
if (numa_lib == NULL) {
return;
}
dlclose(numa_lib);
numa_lib = NULL;
}
static NUMAAPI_Result loadNumaSymbols(void) {
// Prevent multiple initializations.
static bool initialized = false;
static NUMAAPI_Result result = NUMAAPI_NOT_AVAILABLE;
if (initialized) {
return result;
}
initialized = true;
// Find appropriate .so library.
const char* numa_paths[] = {
"libnuma.so.1",
"libnuma.so",
NULL};
// Register de-initialization.
const int error = atexit(numaExit);
if (error) {
result = NUMAAPI_ERROR_ATEXIT;
return result;
}
// Load library.
numa_lib = findLibrary(numa_paths);
if (numa_lib == NULL) {
result = NUMAAPI_NOT_AVAILABLE;
return result;
}
// Load symbols.
#define _LIBRARY_FIND(lib, name) \
do { \
name = (t##name *)dlsym(lib, #name); \
} while (0)
#define NUMA_LIBRARY_FIND(name) _LIBRARY_FIND(numa_lib, name)
NUMA_LIBRARY_FIND(numa_available);
NUMA_LIBRARY_FIND(numa_max_node);
NUMA_LIBRARY_FIND(numa_node_to_cpus);
NUMA_LIBRARY_FIND(numa_node_size);
NUMA_LIBRARY_FIND(numa_run_on_node);
NUMA_LIBRARY_FIND(numa_alloc_onnode);
NUMA_LIBRARY_FIND(numa_alloc_local);
NUMA_LIBRARY_FIND(numa_free);
NUMA_LIBRARY_FIND(numa_bitmask_clearall);
NUMA_LIBRARY_FIND(numa_bitmask_isbitset);
NUMA_LIBRARY_FIND(numa_bitmask_setbit);
NUMA_LIBRARY_FIND(numa_bitmask_nbytes);
NUMA_LIBRARY_FIND(numa_bitmask_free);
NUMA_LIBRARY_FIND(numa_allocate_cpumask);
NUMA_LIBRARY_FIND(numa_allocate_nodemask);
NUMA_LIBRARY_FIND(numa_free_cpumask);
NUMA_LIBRARY_FIND(numa_free_nodemask);
NUMA_LIBRARY_FIND(numa_run_on_node_mask);
NUMA_LIBRARY_FIND(numa_set_interleave_mask);
NUMA_LIBRARY_FIND(numa_set_localalloc);
#undef NUMA_LIBRARY_FIND
#undef _LIBRARY_FIND
result = NUMAAPI_SUCCESS;
return result;
}
#endif
////////////////////////////////////////////////////////////////////////////////
// Initialization.
NUMAAPI_Result numaAPI_Initialize(void) {
#ifdef WITH_DYNLOAD
NUMAAPI_Result result = loadNumaSymbols();
if (result != NUMAAPI_SUCCESS) {
return result;
}
#endif
if (numa_available() < 0) {
return NUMAAPI_NOT_AVAILABLE;
}
return NUMAAPI_SUCCESS;
}
////////////////////////////////////////////////////////////////////////////////
// Topology query.
int numaAPI_GetNumNodes(void) {
return numa_max_node() + 1;
}
bool numaAPI_IsNodeAvailable(int node) {
if (numa_node_size(node, NULL) > 0) {
return true;
}
return false;
}
int numaAPI_GetNumNodeProcessors(int node) {
struct bitmask* cpu_mask = numa_allocate_cpumask();
numa_node_to_cpus(node, cpu_mask);
const unsigned int num_bytes = numa_bitmask_nbytes(cpu_mask);
const unsigned int num_bits = num_bytes *8;
// TODO(sergey): There might be faster way calculating number of set bits.
int num_processors = 0;
for (unsigned int bit = 0; bit < num_bits; ++bit) {
if (numa_bitmask_isbitset(cpu_mask, bit)) {
++num_processors;
}
}
#ifdef WITH_DYNLOAD
if (numa_free_cpumask != NULL) {
numa_free_cpumask(cpu_mask);
} else {
numa_bitmask_free(cpu_mask);
}
#else
numa_free_cpumask(cpu_mask);
#endif
return num_processors;
}
////////////////////////////////////////////////////////////////////////////////
// Affinities.
bool numaAPI_RunProcessOnNode(int node) {
numaAPI_RunThreadOnNode(node);
return true;
}
bool numaAPI_RunThreadOnNode(int node) {
// Construct bit mask from node index.
struct bitmask* node_mask = numa_allocate_nodemask();
numa_bitmask_clearall(node_mask);
numa_bitmask_setbit(node_mask, node);
numa_run_on_node_mask(node_mask);
// TODO(sergey): The following commands are based on x265 code, we might want
// to make those optional, or require to call those explicitly.
//
// Current assumption is that this is similar to SetThreadGroupAffinity().
numa_set_interleave_mask(node_mask);
numa_set_localalloc();
#ifdef WITH_DYNLOAD
if (numa_free_nodemask != NULL) {
numa_free_nodemask(node_mask);
} else {
numa_bitmask_free(node_mask);
}
#else
numa_free_nodemask(node_mask);
#endif
return true;
}
////////////////////////////////////////////////////////////////////////////////
// Memory management.
void* numaAPI_AllocateOnNode(size_t size, int node) {
return numa_alloc_onnode(size, node);
}
void* numaAPI_AllocateLocal(size_t size) {
return numa_alloc_local(size);
}
void numaAPI_Free(void* start, size_t size) {
numa_free(start, size);
}
#endif // OS_LINUX

View File

@ -0,0 +1,82 @@
// Copyright (c) 2016, libnumaapi authors
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
//
// Author: Sergey Sharybin (sergey.vfx@gmail.com)
#include "numaapi.h"
#include "build_config.h"
// Stub implementation for platforms which doesn't have NUMA support.
#if !OS_LINUX && !OS_WIN
////////////////////////////////////////////////////////////////////////////////
// Initialization.
NUMAPIResult numaAPI_Initialize(void) {
return UMAAPI_NOT_AVAILABLE;
}
////////////////////////////////////////////////////////////////////////////////
// Topology query.
int numaAPI_GetNumNodes(void) {
return 0;
}
bool numApiIsNodeAvailable(int node) {
(void) node; // Ignored.
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Affinities.
bool numaAPI_RunProcessOnNode(int node) {
(void) node; // Ignored.
return false;
}
bool numaAPI_RunThreadOnNode(int node) {
(void) node; // Ignored.
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Memory management.
void* numaAPI_AllocateOnNode(size_t size, int node) {
(void) size; // Ignored.
(void) node; // Ignored.
return 0;
}
void* numaAPI_AllocateLocal(size_t size) {
(void) size; // Ignored.
return NULL;
}
void numaAPI_Free(void* start, size_t size) {
(void) start; // Ignored.
(void) size; // Ignored.
}
#endif // !OS_LINUX && !OS_WIN

View File

@ -0,0 +1,253 @@
// Copyright (c) 2016, libnumaapi authors
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
//
// Author: Sergey Sharybin (sergey.vfx@gmail.com)
#include "build_config.h"
#if OS_WIN
#include "numaapi.h"
#ifndef NOGDI
# define NOGDI
#endif
#ifndef NOMINMAX
# define NOMINMAX
#endif
#ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
#endif
#ifndef NOCOMM
# define NOCOMM
#endif
#include <stdlib.h>
#include <stdint.h>
#include <windows.h>
#if ARCH_CPU_64_BITS
# include <VersionHelpers.h>
#endif
#include <stdio.h>
////////////////////////////////////////////////////////////////////////////////
// Initialization.
// Kernel library, from where the symbols come.
static HMODULE kernel_lib;
// Types of all symbols which are read from the library.
// NUMA function types.
typedef BOOL t_GetNumaHighestNodeNumber(PULONG highest_node_number);
typedef BOOL t_GetNumaNodeProcessorMask(UCHAR node, ULONGLONG* processor_mask);
typedef BOOL t_GetNumaNodeProcessorMaskEx(USHORT node,
GROUP_AFFINITY* processor_mask);
typedef BOOL t_GetNumaProcessorNode(UCHAR processor, UCHAR* node_number);
typedef void* t_VirtualAllocExNuma(HANDLE process_handle,
LPVOID address,
SIZE_T size,
DWORD allocation_type,
DWORD protect,
DWORD preferred);
typedef BOOL t_VirtualFree(void* address, SIZE_T size, DWORD free_type);
// Threading function types.
typedef BOOL t_SetProcessAffinityMask(HANDLE process_handle,
DWORD_PTR process_affinity_mask);
typedef BOOL t_SetThreadGroupAffinity(HANDLE thread_handle,
const GROUP_AFFINITY* GroupAffinity,
GROUP_AFFINITY* PreviousGroupAffinity);
typedef DWORD t_GetCurrentProcessorNumber(void);
// NUMA symbols.
static t_GetNumaHighestNodeNumber* _GetNumaHighestNodeNumber;
static t_GetNumaNodeProcessorMask* _GetNumaNodeProcessorMask;
static t_GetNumaNodeProcessorMaskEx* _GetNumaNodeProcessorMaskEx;
static t_GetNumaProcessorNode* _GetNumaProcessorNode;
static t_VirtualAllocExNuma* _VirtualAllocExNuma;
static t_VirtualFree* _VirtualFree;
// Threading symbols.
static t_SetProcessAffinityMask* _SetProcessAffinityMask;
static t_SetThreadGroupAffinity* _SetThreadGroupAffinity;
static t_GetCurrentProcessorNumber* _GetCurrentProcessorNumber;
static void numaExit(void) {
// TODO(sergey): Consider closing library here.
}
static NUMAAPI_Result loadNumaSymbols(void) {
// Prevent multiple initializations.
static bool initialized = false;
static NUMAAPI_Result result = NUMAAPI_NOT_AVAILABLE;
if (initialized) {
return result;
}
initialized = true;
// Register de-initialization.
const int error = atexit(numaExit);
if (error) {
result = NUMAAPI_ERROR_ATEXIT;
return result;
}
// Load library.
kernel_lib = LoadLibraryA("Kernel32.dll");
// Load symbols.
#define _LIBRARY_FIND(lib, name) \
do { \
_##name = (t_##name *)GetProcAddress(lib, #name); \
} while (0)
#define KERNEL_LIBRARY_FIND(name) _LIBRARY_FIND(kernel_lib, name)
// NUMA.
KERNEL_LIBRARY_FIND(GetNumaHighestNodeNumber);
KERNEL_LIBRARY_FIND(GetNumaNodeProcessorMask);
KERNEL_LIBRARY_FIND(GetNumaNodeProcessorMaskEx);
KERNEL_LIBRARY_FIND(GetNumaProcessorNode);
KERNEL_LIBRARY_FIND(VirtualAllocExNuma);
KERNEL_LIBRARY_FIND(VirtualFree);
// Threading.
KERNEL_LIBRARY_FIND(SetProcessAffinityMask);
KERNEL_LIBRARY_FIND(SetThreadGroupAffinity);
KERNEL_LIBRARY_FIND(GetCurrentProcessorNumber);
#undef KERNEL_LIBRARY_FIND
#undef _LIBRARY_FIND
result = NUMAAPI_SUCCESS;
return result;
}
NUMAAPI_Result numaAPI_Initialize(void) {
#if !ARCH_CPU_64_BITS
// No NUMA on 32 bit platforms.
return LIBNUMAAPI_NOT_AVAILABLE;
#else
if (!IsWindows7OrGreater()) {
// Require Windows 7 or higher.
NUMAAPI_NOT_AVAILABLE;
}
loadNumaSymbols();
return NUMAAPI_SUCCESS;
#endif
}
////////////////////////////////////////////////////////////////////////////////
// Topology query.
int numaAPI_GetNumNodes(void) {
ULONG highest_node_number;
if (!_GetNumaHighestNodeNumber(&highest_node_number)) {
return 0;
}
// TODO(sergey): Resolve the type narrowing.
// NOTE: This is not necessarily a total amount of nodes in the system.
return (int)highest_node_number + 1;
}
bool numaAPI_IsNodeAvailable(int node) {
// Trick to detect whether the node is usable or not: check whether
// there are any processors associated with it.
//
// This is needed because numaApiGetNumNodes() is not guaranteed to
// give total amount of nodes and some nodes might be unavailable.
ULONGLONG processor_mask;
if (!_GetNumaNodeProcessorMask(node, &processor_mask)) {
return false;
}
if (processor_mask == 0) {
return false;
}
return true;
}
int numaAPI_GetNumNodeProcessors(int node) {
ULONGLONG processor_mask;
if (!_GetNumaNodeProcessorMask(node, &processor_mask)) {
return 0;
}
// TODO(sergey): There might be faster way calculating number of set bits.
int num_processors = 0;
while (processor_mask != 0) {
num_processors += (processor_mask & 1);
processor_mask = (processor_mask >> 1);
}
return num_processors;
}
////////////////////////////////////////////////////////////////////////////////
// Affinities.
bool numaAPI_RunProcessOnNode(int node) {
// TODO(sergey): Make sure requested node is within active CPU group.
// Change affinity of the proces to make it to run on a given node.
HANDLE process_handle = GetCurrentProcess();
ULONGLONG processor_mask;
if (_GetNumaNodeProcessorMask(node, &processor_mask) == 0) {
return false;
}
if (_SetProcessAffinityMask(process_handle, processor_mask) == 0) {
return false;
}
return true;
}
bool numaAPI_RunThreadOnNode(int node) {
HANDLE thread_handle = GetCurrentThread();
GROUP_AFFINITY group_affinity = { 0 };
if (_GetNumaNodeProcessorMaskEx(node, &group_affinity) == 0) {
return false;
}
if (_SetThreadGroupAffinity(thread_handle, &group_affinity, NULL) == 0) {
return false;
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
// Memory management.
void* numaAPI_AllocateOnNode(size_t size, int node) {
return _VirtualAllocExNuma(GetCurrentProcess(),
NULL,
size,
MEM_RESERVE | MEM_COMMIT,
PAGE_READWRITE,
node);
}
void* numaAPI_AllocateLocal(size_t size) {
UCHAR current_processor = (UCHAR)_GetCurrentProcessorNumber();
UCHAR node;
if (!_GetNumaProcessorNode(current_processor, &node)) {
return NULL;
}
return numaAPI_AllocateOnNode(size, node);
}
void numaAPI_Free(void* start, size_t size) {
if (!_VirtualFree(start, size, MEM_RELEASE)) {
// TODO(sergey): Throw an error!
}
}
#endif // OS_WIN

View File

@ -30,6 +30,10 @@
int BLI_cpu_support_sse2(void);
void BLI_system_backtrace(FILE *fp);
/* Get CPU brand, result is to be MEM_freeN()-ed. */
char *BLI_cpu_brand_string(void);
/* getpid */
#ifdef WIN32
# define BLI_SYSTEM_PID_H <process.h>

View File

@ -204,6 +204,12 @@ void BLI_thread_queue_nowait(ThreadQueue *queue);
# define BLI_thread_local_set(name, value) name = value
#endif /* defined(__APPLE__) */
/* **** Special functions to help performance on crazy NUMA setups. **** */
/* Make sure process/thread is using NUMA node with fast memory access. */
void BLI_thread_put_process_on_fast_node(void);
void BLI_thread_put_thread_on_fast_node(void);
#ifdef __cplusplus
}
#endif

View File

@ -30,6 +30,7 @@ set(INC
../../../intern/guardedalloc
../../../intern/atomic
../../../intern/eigen
../../../intern/numaapi/include
../../../extern/wcwidth
)

View File

@ -27,6 +27,7 @@
#include "BLI_utildefines.h"
#include "BLI_system.h"
#include "BLI_string.h"
#include "MEM_guardedalloc.h"
@ -138,3 +139,40 @@ void BLI_system_backtrace(FILE *fp)
}
/* end BLI_system_backtrace */
/* NOTE: The code for CPU brand string is adopted from Cycles. */
#if !defined(_WIN32) || defined(FREE_WINDOWS)
static void __cpuid(int data[4], int selector)
{
#if defined(__x86_64__)
asm("cpuid" : "=a" (data[0]), "=b" (data[1]), "=c" (data[2]), "=d" (data[3]) : "a"(selector));
#elif defined(__i386__)
asm("pushl %%ebx \n\t"
"cpuid \n\t"
"movl %%ebx, %1 \n\t"
"popl %%ebx \n\t"
: "=a" (data[0]), "=r" (data[1]), "=c" (data[2]), "=d" (data[3])
: "a"(selector)
: "ebx");
#else
data[0] = data[1] = data[2] = data[3] = 0;
#endif
}
#endif
char *BLI_cpu_brand_string(void)
{
char buf[48] = { 0 };
int result[4] = { 0 };
__cpuid(result, 0x80000000);
if (result[0] >= (int)0x80000004) {
__cpuid((int*)(buf + 0), 0x80000002);
__cpuid((int*)(buf + 16), 0x80000003);
__cpuid((int*)(buf + 32), 0x80000004);
char *brand = BLI_strdup(buf);
/* TODO(sergey): Make it a bit more presentable by removing trademark. */
return brand;
}
return NULL;
}

View File

@ -37,6 +37,7 @@
#include "BLI_listbase.h"
#include "BLI_gsqueue.h"
#include "BLI_system.h"
#include "BLI_task.h"
#include "BLI_threads.h"
@ -55,6 +56,7 @@
#endif
#include "atomic_ops.h"
#include "numaapi.h"
#if defined(__APPLE__) && defined(_OPENMP) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 2) && !defined(__clang__)
# define USE_APPLE_OMP_FIX
@ -126,6 +128,7 @@ static pthread_mutex_t _colormanage_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t _fftw_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t _view3d_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_t mainid;
static bool is_numa_available = false;
static unsigned int thread_levels = 0; /* threads can be invoked inside threads */
static int num_threads_override = 0;
@ -155,6 +158,9 @@ void BLI_threadapi_init(void)
mainid = pthread_self();
BLI_spin_init(&_malloc_lock);
if (numaAPI_Initialize() == NUMAAPI_SUCCESS) {
is_numa_available = true;
}
}
void BLI_threadapi_exit(void)
@ -840,3 +846,98 @@ void BLI_threaded_malloc_end(void)
MEM_set_lock_callback(NULL, NULL);
}
}
/* **** Special functions to help performance on crazy NUMA setups. **** */
static bool check_is_threadripper2_alike_topology(void)
{
/* NOTE: We hope operating system does not support CPU hotswap to
* a different brand. And that SMP of different types is also not
* encouraged by the system. */
static bool is_initialized = false;
static bool is_threadripper2 = false;
if (is_initialized) {
return is_threadripper2;
}
is_initialized = true;
char *cpu_brand = BLI_cpu_brand_string();
if (cpu_brand == NULL) {
return false;
}
if (strstr(cpu_brand, "Threadripper")) {
/* NOTE: We consinder all Threadrippers having similar topology to
* the second one. This is because we are trying to utilize NUMA node
* 0 as much as possible. This node does exist on earlier versions of
* threadripper and setting affinity to it should not have negative
* effect.
* This allows us to avoid per-model check, making the code more
* reliable for the CPUs which are not yet released.
*/
if (strstr(cpu_brand, "2990WX") || strstr(cpu_brand, "2950X")) {
is_threadripper2 = true;
}
}
/* NOTE: While all dies of EPYC has memory controller, only two f them
* has access to a lower-indexed DDR slots. Those dies are same as on
* Threadripper2 with the memory controller.
* Now, it is rather likely that reasonable amount of users don't max
* up their DR slots, making it only two dies connected to a DDR slot
* with actual memory in it. */
if (strstr(cpu_brand, "EPYC")) {
/* NOTE: Similarly to Threadripper we do not do model check. */
is_threadripper2 = true;
}
return is_threadripper2;
}
static void threadripper_put_process_on_fast_node(void)
{
if (!is_numa_available) {
return;
}
/* NOTE: Technically, we can use NUMA nodes 0 and 2 and usning both of
* them in the affinity mask will allow OS to schedule threads more
* flexible,possibly increasing overall performance when multiple apps
* are crunching numbers.
*
* However, if scene fits into memory adjacent to a single die we don't
* want OS to re-schedule the process to another die since that will make
* it further away from memory allocated for .blend file. */
/* NOTE: Even if NUMA is avasilable in the API but is disabled in BIOS on
* this workstation we still process here. If NUMA is disabled it will be a
* single node, so our action is no-visible-changes, but allows to keep
* things simple and unified. */
numaAPI_RunProcessOnNode(0);
}
static void threadripper_put_thread_on_fast_node(void)
{
if (!is_numa_available) {
return;
}
/* NOTE: This is where things becomes more interesting. On the one hand
* we can use nodes 0 and 2 and allow operating system to do balancing
* of processes/threads for the maximum performance when multiple apps
* are running.
* On another hand, however, we probably want to use same node as the
* main thread since that's where the memory of .blend file is likely
* to be allocated.
* Since the main thread is currently on node 0, we also put thread on
* same node. */
/* See additional note about NUMA disabled in BIOS above. */
numaAPI_RunThreadOnNode(0);
}
void BLI_thread_put_process_on_fast_node(void)
{
if (check_is_threadripper2_alike_topology()) {
threadripper_put_process_on_fast_node();
}
}
void BLI_thread_put_thread_on_fast_node(void)
{
if (check_is_threadripper2_alike_topology()) {
threadripper_put_thread_on_fast_node();
}
}

View File

@ -334,6 +334,7 @@ static void *do_job_thread(void *job_v)
{
wmJob *wm_job = job_v;
BLI_thread_put_thread_on_fast_node();
wm_job->startjob(wm_job->run_customdata, &wm_job->stop, &wm_job->do_update, &wm_job->progress);
wm_job->ready = true;

View File

@ -52,6 +52,7 @@
#include "BLI_callbacks.h"
#include "BLI_string.h"
#include "BLI_system.h"
#include "BLI_threads.h"
/* mostly init functions */
#include "BKE_appdir.h"
@ -364,6 +365,7 @@ int main(
BKE_appdir_program_path_init(argv[0]);
BLI_threadapi_init();
BLI_thread_put_process_on_fast_node();
DNA_sdna_current_init();