Merge branch 'master' into blender2.8

Referenced by issue #58207, Render time going crazy!
2018-11-28 14:42:38 +01:00 · 2018-11-28 14:42:38 +01:00 · 3ed0d5b4d4 · 2023-02-14 05:37:19 +01:00
parent 2bd62b076f ce927e15e0
commit 3ed0d5b4d4
20 changed files with 1357 additions and 0 deletions
--- a/build_files/cmake/macros.cmake
+++ b/build_files/cmake/macros.cmake
@ -695,6 +695,7 @@ function(SETUP_BLENDER_SORTED_LIBS)
 		bf_intern_glew_mx
 		bf_intern_clog
 		bf_intern_opensubdiv
+		bf_intern_numaapi
 	)

 	if(NOT WITH_SYSTEM_GLOG)
--- a/intern/CMakeLists.txt
+++ b/intern/CMakeLists.txt
@ -30,6 +30,7 @@ add_subdirectory(ghost)
 add_subdirectory(guardedalloc)
 add_subdirectory(libmv)
 add_subdirectory(memutil)
+add_subdirectory(numaapi)
 add_subdirectory(opencolorio)
 add_subdirectory(opensubdiv)
 add_subdirectory(mikktspace)
--- a/intern/numaapi/AUTHORS
+++ b/intern/numaapi/AUTHORS
@ -0,0 +1 @@
+Sergey Sharybin <sergey.vfx@gmail.com>
--- a/intern/numaapi/CMakeLists.txt
+++ b/intern/numaapi/CMakeLists.txt
@ -0,0 +1,39 @@
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# ***** END GPL LICENSE BLOCK *****
+
+set(INC
+	./include
+)
+
+set(INC_SYS
+
+)
+
+set(SRC
+	source/numaapi.c
+	source/numaapi_linux.c
+	source/numaapi_stub.c
+	source/numaapi_win32.c
+
+	include/numaapi.h
+	source/build_config.h
+)
+
+add_definitions(-DWITH_DYNLOAD)
+
+blender_add_lib(bf_intern_numaapi "${SRC}" "${INC}" "${INC_SYS}")
--- a/intern/numaapi/LICENSE
+++ b/intern/numaapi/LICENSE
@ -0,0 +1,19 @@
+Copyright (c) 2016 libnumaapi authors.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+IN THE SOFTWARE.
--- a/intern/numaapi/README
+++ b/intern/numaapi/README
@ -0,0 +1,7 @@
+LibNumaAPI is aimed to provide one common cross-platform API for all
+possible platforms, so cross-platform applications might not worry
+about implementation details.
+
+LICENSE
+
+LibNumaAPI library is released under the MIT license.
--- a/intern/numaapi/README.blender
+++ b/intern/numaapi/README.blender
@ -0,0 +1,5 @@
+Project: LibNumaAPI
+URL: https://github.com/Nazg-Gul/libNumaAPI
+License: MIT
+Upstream version: f83d41ec4d7
+Local modifications: None
--- a/intern/numaapi/include/numaapi.h
+++ b/intern/numaapi/include/numaapi.h
@ -0,0 +1,108 @@
+// Copyright (c) 2016, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin (sergey.vfx@gmail.com)
+
+#ifndef __LIBNUMAAPI_H__
+#define __LIBNUMAAPI_H__
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NUMAAPI_VERSION_MAJOR 1
+#define NUMAAPI_VERSION_MINOR 0
+
+typedef enum NUMAAPI_Result {
+  NUMAAPI_SUCCESS       = 0,
+  // NUMA is not available on this platform.
+  NUMAAPI_NOT_AVAILABLE = 1,
+  // Generic error, no real details are available,
+  NUMAAPI_ERROR         = 2,
+  // Error installing atexit() handlers.
+  NUMAAPI_ERROR_ATEXIT  = 3,
+} NUMAAPI_Result;
+
+////////////////////////////////////////////////////////////////////////////////
+// Initialization.
+
+// Initialize NUMA API.
+//
+// This is first call which should be called before any other NUMA functions
+// can be used.
+NUMAAPI_Result numaAPI_Initialize(void);
+
+// Get string representation of NUMAPIResult.
+const char* numaAPI_ResultAsString(NUMAAPI_Result result);
+
+////////////////////////////////////////////////////////////////////////////////
+// Topology query.
+
+// Get number of available nodes.
+//
+// This is in fact an index of last node plus one and it's not guaranteed
+// that all nodes up to this one are available.
+int numaAPI_GetNumNodes(void);
+
+// Returns truth if the given node is available for compute.
+bool numaAPI_IsNodeAvailable(int node);
+
+// Getnumber of available processors on a given node.
+int numaAPI_GetNumNodeProcessors(int node);
+
+////////////////////////////////////////////////////////////////////////////////
+// Affinities.
+
+// Runs the current process and its children on a specific node.
+//
+// Returns truth if affinity has successfully changed.
+//
+// NOTE: This function can not change active CPU group. Mainly designed to deal
+// with Threadripper 2 topology, to make it possible to gain maximum performance
+// for the main application thread.
+bool numaAPI_RunProcessOnNode(int node);
+
+// Runs the current thread and its children on a specific node.
+//
+// Returns truth if affinity has successfully changed.
+bool numaAPI_RunThreadOnNode(int node);
+
+////////////////////////////////////////////////////////////////////////////////
+// Memory management.
+
+// Allocate memory on a given node,
+void* numaAPI_AllocateOnNode(size_t size, int node);
+
+// Allocate memory in the local memory, closest to the current node.
+void* numaAPI_AllocateLocal(size_t size);
+
+// Frees size bytes of memory starting at start.
+//
+// TODO(sergey): Consider making it regular free() semantic.
+void numaAPI_Free(void* start, size_t size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // __LIBNUMAAPI_H__
--- a/intern/numaapi/source/build_config.h
+++ b/intern/numaapi/source/build_config.h
@ -0,0 +1,379 @@
+// Copyright (c) 2018, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin (sergey.vfx@gmail.com)
+
+#ifndef __BUILD_CONFIG_H__
+#define __BUILD_CONFIG_H__
+
+#include <limits.h>
+#include <stdint.h>
+
+// Initially is based on Chromium's build_config.h, with tweaks and extensions
+// needed for this project.
+//
+// NOTE: All commonly used symbols (which are checked on a "top" level, from
+// outside of any platform-specific ifdef block) are to be explicitly defined
+// to 0 when they are not "active". This is extra lines of code in this file,
+// but is not being edited that often. Such approach helps catching cases when
+// one attempted to access build configuration variable without including the
+// header by simply using -Wundef compiler attribute.
+//
+// NOTE: Not having things explicitly defined to 0 is harmless (in terms it
+// follows same rules as Google projects) and will simply cause compiler to
+// become more noisy, which is simple to correct.
+
+////////////////////////////////////////////////////////////////////////////////
+// A set of macros to use for platform detection.
+
+#if defined(__native_client__)
+// __native_client__ must be first, so that other OS_ defines are not set.
+#  define OS_NACL 1
+#elif defined(_AIX)
+#  define OS_AIX 1
+#elif defined(ANDROID)
+#  define OS_ANDROID 1
+#elif defined(__APPLE__)
+// Only include TargetConditions after testing ANDROID as some android builds
+// on mac don't have this header available and it's not needed unless the target
+// is really mac/ios.
+#  include <TargetConditionals.h>
+#  define OS_MACOSX 1
+#  if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
+#    define OS_IOS 1
+#  endif  // defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
+#elif defined(__HAIKU__)
+#  define OS_HAIKU 1
+#elif defined(__hpux)
+#  define OS_HPUX 1
+#elif defined(__linux__)
+#  define OS_LINUX 1
+// Include a system header to pull in features.h for glibc/uclibc macros.
+#  include <unistd.h>
+#  if defined(__GLIBC__) && !defined(__UCLIBC__)
+// We really are using glibc, not uClibc pretending to be glibc.
+#    define LIBC_GLIBC 1
+#  endif
+#elif defined(__sgi)
+#  define OS_IRIX 1
+#elif defined(_WIN32)
+#  define OS_WIN 1
+#elif defined(__FreeBSD__)
+#  define OS_FREEBSD 1
+#elif defined(__NetBSD__)
+#  define OS_NETBSD 1
+#elif defined(__OpenBSD__)
+#  define OS_OPENBSD 1
+#elif defined(__sun)
+#  define OS_SOLARIS 1
+#elif defined(__QNXNTO__)
+#  define OS_QNX 1
+#else
+#  error Please add support for your platform in build_config.h
+#endif
+
+#if !defined(OS_AIX)
+#  define OS_AIX 0
+#endif
+#if !defined(OS_NACL)
+#  define OS_NACL 0
+#endif
+#if !defined(OS_ANDROID)
+#  define OS_ANDROID 0
+#endif
+#if !defined(OS_MACOSX)
+#  define OS_MACOSX 0
+#endif
+#if !defined(OS_IOS)
+#  define OS_IOS 0
+#endif
+#if !defined(OS_HAIKU)
+#  define OS_HAIKU 0
+#endif
+#if !defined(OS_HPUX)
+#  define OS_HPUX 0
+#endif
+#if !defined(OS_IRIX)
+#  define OS_IRIX 0
+#endif
+#if !defined(OS_LINUX)
+#  define OS_LINUX 0
+#endif
+#if !defined(LIBC_GLIBC)
+#  define LIBC_GLIBC 0
+#endif
+#if !defined(OS_WIN)
+#  define OS_WIN 0
+#endif
+#if !defined(OS_FREEBSD)
+#  define OS_FREEBSD 0
+#endif
+#if !defined(OS_NETBSD)
+#  define OS_NETBSD 0
+#endif
+#if !defined(OS_OPENBSD)
+#  define OS_OPENBSD 0
+#endif
+#if !defined(OS_SOLARIS)
+#  define OS_SOLARIS 0
+#endif
+#if !defined(OS_QNX)
+#  define OS_QNX 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// *BSD OS family detection.
+//
+// For access to standard BSD features, use OS_BSD instead of a
+// more specific macro.
+#if OS_FREEBSD || OS_OPENBSD || OS_NETBSD
+#  define OS_BSD 1
+#else
+#  define OS_BSD 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// POSIX system detection.
+//
+// For access to standard POSIXish features use OS_POSIX instead of a
+// more specific macro.
+#if OS_MACOSX || OS_LINUX || OS_BSD || OS_SOLARIS ||OS_ANDROID || OS_NACL ||  \
+    OS_QNX || OS_HAIKU || OS_AIX || OS_HPUX || OS_IRIX
+#  define OS_POSIX 1
+#else
+#  define OS_POSIX 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Compiler detection, including its capabilities.
+
+#if defined(__clang__)
+#  define COMPILER_CLANG 1
+#elif defined(__GNUC__)
+#  define COMPILER_GCC 1
+#  define COMPILER_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#elif defined(_MSC_VER)
+#  define COMPILER_MSVC 1
+#  define COMPILER_MSVC_VERSION (_MSC_VER)
+#elif defined(__MINGW32__)
+#  define COMPILER_MINGW32 1
+#elif defined(__MINGW64__)
+#  define COMPILER_MINGW64 1
+#else
+#  error Please add support for your compiler in build_config.h
+#endif
+
+#if !defined(COMPILER_CLANG)
+#  define COMPILER_CLANG 0
+#endif
+#if !defined(COMPILER_GCC)
+#  define COMPILER_GCC 0
+#endif
+#if !defined(COMPILER_MSVC)
+#  define COMPILER_MSVC 0
+#endif
+#if !defined(COMPILER_MINGW32)
+#  define COMPILER_MINGW32 0
+#endif
+#if !defined(COMPILER_MINGW64)
+#  define COMPILER_MINGW64 0
+#endif
+
+// Compiler is any of MinGW family.
+#if COMPILER_MINGW32 || COMPILER_MINGW64
+#  define COMPILER_MINGW 1
+#else
+#  define COMPILER_MINGW 0
+#endif
+
+// Check what is the latest C++ specification the compiler supports.
+//
+// NOTE: Use explicit definition here to avoid expansion-to-defined warning from
+// being geenrated. While this will most likely a false-positive warning in this
+// particular case, that warning might be helpful to catch errors elsewhere.
+
+// C++11 check.
+#if ((defined(__cplusplus) && (__cplusplus > 199711L)) || \
+     (defined(_MSC_VER) && (_MSC_VER >= 1800)))
+#  define COMPILER_SUPPORTS_CXX11 1
+#else
+#  define COMPILER_SUPPORTS_CXX11 0
+#endif
+// C++14 check.
+#if (defined(__cplusplus) && (__cplusplus > 201311L))
+#  define COMPILER_SUPPORTS_CXX14  1
+#else
+#  define COMPILER_SUPPORTS_CXX14  0
+#endif
+// C++17 check.
+#if (defined(__cplusplus) && (__cplusplus > 201611L))
+#  define COMPILER_SUPPORTS_CXX17  1
+#else
+#  define COMPILER_SUPPORTS_CXX17  0
+#endif
+// C++20 check.
+#if (defined(__cplusplus) && (__cplusplus > 201911L))
+#  define COMPILER_SUPPORTS_CXX20  1
+#else
+#  define COMPILER_SUPPORTS_CXX20  0
+#endif
+
+// COMPILER_USE_ADDRESS_SANITIZER is defined when program is detected that
+// compilation happened wit haddress sanitizer enabled. This allows to give
+// tips to sanitizer, or maybe work around some known issues with third party
+// libraries.
+#if !defined(COMPILER_USE_ADDRESS_SANITIZER)
+#  if defined(__has_feature)
+#    define COMPILER_USE_ADDRESS_SANITIZER 1
+#  elif defined(__SANITIZE_ADDRESS__)
+#    define COMPILER_USE_ADDRESS_SANITIZER 1
+#  endif
+#endif
+
+#if !defined(COMPILER_USE_ADDRESS_SANITIZER)
+#  define COMPILER_USE_ADDRESS_SANITIZER 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Processor architecture detection.
+//
+// For more info on what's defined, see:
+//
+//   http://msdn.microsoft.com/en-us/library/b0084kay.aspx
+//   http://www.agner.org/optimize/calling_conventions.pdf
+//
+//   or with gcc, run: "echo | gcc -E -dM -"
+#if defined(_M_X64) || defined(__x86_64__)
+#  define ARCH_CPU_X86_FAMILY 1
+#  define ARCH_CPU_X86_64 1
+#  define ARCH_CPU_64_BITS 1
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(_M_IX86) || defined(__i386__)
+#  define ARCH_CPU_X86_FAMILY 1
+#  define ARCH_CPU_X86 1
+#  define ARCH_CPU_32_BITS 1
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__ARMEL__)
+#  define ARCH_CPU_ARM_FAMILY 1
+#  define ARCH_CPU_ARMEL 1
+#  define ARCH_CPU_32_BITS 1
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__aarch64__)
+#  define ARCH_CPU_ARM_FAMILY 1
+#  define ARCH_CPU_ARM64 1
+#  define ARCH_CPU_64_BITS 1
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__pnacl__)
+#  define ARCH_CPU_32_BITS 1
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__MIPSEL__)
+#  if defined(__LP64__)
+#    define ARCH_CPU_MIPS64_FAMILY 1
+#    define ARCH_CPU_MIPS64EL 1
+#    define ARCH_CPU_64_BITS 1
+#    define ARCH_CPU_LITTLE_ENDIAN 1
+#  else
+#    define ARCH_CPU_MIPS_FAMILY 1
+#    define ARCH_CPU_MIPSEL 1
+#    define ARCH_CPU_32_BITS 1
+#    define ARCH_CPU_LITTLE_ENDIAN 1
+#  endif
+#elif defined(__MIPSEB__)
+#  if defined(__LP64__)
+#    define ARCH_CPU_MIPS64_FAMILY 1
+#    define ARCH_CPU_MIPS64EB 1
+#    define ARCH_CPU_64_BITS 1
+#    define ARCH_CPU_BIG_ENDIAN 1
+#  else
+#    define ARCH_CPU_MIPS_FAMILY 1
+#    define ARCH_CPU_MIPSEB 1
+#    define ARCH_CPU_32_BITS 1
+#    define ARCH_CPU_BIG_ENDIAN 1
+#  endif
+#else
+#  error Please add support for your architecture in build_config.h
+#endif
+
+#if !defined(ARCH_CPU_LITTLE_ENDIAN)
+#  define ARCH_CPU_LITTLE_ENDIAN 0
+#endif
+#if !defined(ARCH_CPU_BIG_ENDIAN)
+#  define ARCH_CPU_BIG_ENDIAN 0
+#endif
+
+#if !defined(ARCH_CPU_32_BITS)
+#  define ARCH_CPU_32_BITS 0
+#endif
+#if !defined(ARCH_CPU_64_BITS)
+#  define ARCH_CPU_64_BITS 0
+#endif
+
+#if !defined(ARCH_CPU_X86_FAMILY)
+#  define ARCH_CPU_X86_FAMILY 0
+#endif
+#if !defined(ARCH_CPU_ARM_FAMILY)
+#  define ARCH_CPU_ARM_FAMILY 0
+#endif
+#if !defined(ARCH_CPU_MIPS_FAMILY)
+#  define ARCH_CPU_MIPS_FAMILY 0
+#endif
+#if !defined(ARCH_CPU_MIPS64_FAMILY)
+#  define ARCH_CPU_MIPS64_FAMILY 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Sizes of platform-dependent types.
+
+#if defined(__SIZEOF_POINTER__)
+#  define PLATFORM_SIZEOF_PTR __SIZEOF_POINTER__
+#elif defined(UINTPTR_MAX)
+#  if (UINTPTR_MAX == 0xffffffff)
+#    define PLATFORM_SIZEOF_PTR 4
+#  elif (UINTPTR_MAX == 0xffffffffffffffff)  // NOLINT
+#    define PLATFORM_SIZEOF_PTR 8
+#  endif
+#elif defined(__WORDSIZE)
+#  if (__WORDSIZE == 32)
+#    define PLATFORM_SIZEOF_PTR 4
+#  else if (__WORDSIZE == 64)
+#    define PLATFORM_SIZEOF_PTR 8
+#  endif
+#endif
+#if !defined(PLATFORM_SIZEOF_PTR)
+#  error "Cannot find pointer size"
+#endif
+
+#if (UINT_MAX == 0xffffffff)
+#  define PLATFORM_SIZEOF_INT 4
+#elif (UINT_MAX == 0xffffffffffffffff)  // NOLINT
+#  define PLATFORM_SIZEOF_INT 8
+#else
+#  error "Cannot find int size"
+#endif
+
+#if (USHRT_MAX == 0xffffffff)
+#  define PLATFORM_SIZEOF_SHORT 4
+#elif (USHRT_MAX == 0xffff)  // NOLINT
+#  define PLATFORM_SIZEOF_SHORT 2
+#else
+#  error "Cannot find short size"
+#endif
+
+#endif  // __BUILD_CONFIG_H__
--- a/intern/numaapi/source/numaapi.c
+++ b/intern/numaapi/source/numaapi.c
@ -0,0 +1,37 @@
+// Copyright (c) 2018, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin (sergey.vfx@gmail.com)
+
+#include "numaapi.h"
+
+#include <assert.h>
+
+// Get string representation of NUMAPIResult.
+const char* numaAPI_ResultAsString(NUMAAPI_Result result) {
+  switch (result) {
+    case NUMAAPI_SUCCESS: return "SUCCESS";
+    case NUMAAPI_NOT_AVAILABLE: return "NOT_AVAILABLE";
+    case NUMAAPI_ERROR: return "ERROR";
+    case NUMAAPI_ERROR_ATEXIT: return "ERROR_AT_EXIT";
+  }
+  assert(!"Unknown result was passed to numapi_ResultAsString().");
+  return "UNKNOWN";
+}
--- a/intern/numaapi/source/numaapi_linux.c
+++ b/intern/numaapi/source/numaapi_linux.c
@ -0,0 +1,272 @@
+// Copyright (c) 2016, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin (sergey.vfx@gmail.com)
+
+#include "build_config.h"
+
+#if OS_LINUX
+
+#include "numaapi.h"
+
+#include <stdlib.h>
+
+#ifndef WITH_DYNLOAD
+#  include <numa.h>
+#else
+#  include <dlfcn.h>
+#endif
+
+#ifdef WITH_DYNLOAD
+
+// Descriptor numa library.
+static void* numa_lib;
+
+// Types of all symbols which are read from the library.
+struct bitmask;
+typedef int tnuma_available(void);
+typedef int tnuma_max_node(void);
+typedef int tnuma_node_to_cpus(int node, struct bitmask* mask);
+typedef long tnuma_node_size(int node, long* freep);
+typedef int tnuma_run_on_node(int node);
+typedef void* tnuma_alloc_onnode(size_t size, int node);
+typedef void* tnuma_alloc_local(size_t size);
+typedef void tnuma_free(void* start, size_t size);
+typedef struct bitmask* tnuma_bitmask_clearall(struct bitmask *bitmask);
+typedef int tnuma_bitmask_isbitset(const struct bitmask *bitmask,
+                                   unsigned int n);
+typedef struct bitmask* tnuma_bitmask_setbit(struct bitmask *bitmask,
+                                             unsigned int n);
+typedef unsigned int tnuma_bitmask_nbytes(struct bitmask *bitmask);
+typedef void tnuma_bitmask_free(struct bitmask *bitmask);
+typedef struct bitmask* tnuma_allocate_cpumask(void);
+typedef struct bitmask* tnuma_allocate_nodemask(void);
+typedef void tnuma_free_cpumask(struct bitmask* bitmask);
+typedef void tnuma_free_nodemask(struct bitmask* bitmask);
+typedef int tnuma_run_on_node_mask(struct bitmask *nodemask);
+typedef void tnuma_set_interleave_mask(struct bitmask *nodemask);
+typedef void tnuma_set_localalloc(void);
+
+// Actual symbols.
+static tnuma_available* numa_available;
+static tnuma_max_node* numa_max_node;
+static tnuma_node_to_cpus* numa_node_to_cpus;
+static tnuma_node_size* numa_node_size;
+static tnuma_run_on_node* numa_run_on_node;
+static tnuma_alloc_onnode* numa_alloc_onnode;
+static tnuma_alloc_local* numa_alloc_local;
+static tnuma_free* numa_free;
+static tnuma_bitmask_clearall* numa_bitmask_clearall;
+static tnuma_bitmask_isbitset* numa_bitmask_isbitset;
+static tnuma_bitmask_setbit* numa_bitmask_setbit;
+static tnuma_bitmask_nbytes* numa_bitmask_nbytes;
+static tnuma_bitmask_free* numa_bitmask_free;
+static tnuma_allocate_cpumask* numa_allocate_cpumask;
+static tnuma_allocate_nodemask* numa_allocate_nodemask;
+static tnuma_free_nodemask* numa_free_nodemask;
+static tnuma_free_cpumask* numa_free_cpumask;
+static tnuma_run_on_node_mask* numa_run_on_node_mask;
+static tnuma_set_interleave_mask* numa_set_interleave_mask;
+static tnuma_set_localalloc* numa_set_localalloc;
+
+static void* findLibrary(const char** paths) {
+  int i = 0;
+  while (paths[i] != NULL) {
+      void* lib = dlopen(paths[i], RTLD_LAZY);
+      if (lib != NULL) {
+        return lib;
+      }
+      ++i;
+  }
+  return NULL;
+}
+
+static void numaExit(void) {
+  if (numa_lib == NULL) {
+    return;
+  }
+  dlclose(numa_lib);
+  numa_lib = NULL;
+}
+
+static NUMAAPI_Result loadNumaSymbols(void) {
+  // Prevent multiple initializations.
+  static bool initialized = false;
+  static NUMAAPI_Result result = NUMAAPI_NOT_AVAILABLE;
+  if (initialized) {
+    return result;
+  }
+  initialized = true;
+  // Find appropriate .so library.
+  const char* numa_paths[] = {
+      "libnuma.so.1",
+      "libnuma.so",
+      NULL};
+  // Register de-initialization.
+  const int error = atexit(numaExit);
+  if (error) {
+    result = NUMAAPI_ERROR_ATEXIT;
+    return result;
+  }
+  // Load library.
+  numa_lib = findLibrary(numa_paths);
+  if (numa_lib == NULL) {
+    result = NUMAAPI_NOT_AVAILABLE;
+    return result;
+  }
+  // Load symbols.
+
+#define _LIBRARY_FIND(lib, name)          \
+  do {                                    \
+    name = (t##name *)dlsym(lib, #name);  \
+  } while (0)
+#define NUMA_LIBRARY_FIND(name) _LIBRARY_FIND(numa_lib, name)
+
+  NUMA_LIBRARY_FIND(numa_available);
+  NUMA_LIBRARY_FIND(numa_max_node);
+  NUMA_LIBRARY_FIND(numa_node_to_cpus);
+  NUMA_LIBRARY_FIND(numa_node_size);
+  NUMA_LIBRARY_FIND(numa_run_on_node);
+  NUMA_LIBRARY_FIND(numa_alloc_onnode);
+  NUMA_LIBRARY_FIND(numa_alloc_local);
+  NUMA_LIBRARY_FIND(numa_free);
+  NUMA_LIBRARY_FIND(numa_bitmask_clearall);
+  NUMA_LIBRARY_FIND(numa_bitmask_isbitset);
+  NUMA_LIBRARY_FIND(numa_bitmask_setbit);
+  NUMA_LIBRARY_FIND(numa_bitmask_nbytes);
+  NUMA_LIBRARY_FIND(numa_bitmask_free);
+  NUMA_LIBRARY_FIND(numa_allocate_cpumask);
+  NUMA_LIBRARY_FIND(numa_allocate_nodemask);
+  NUMA_LIBRARY_FIND(numa_free_cpumask);
+  NUMA_LIBRARY_FIND(numa_free_nodemask);
+  NUMA_LIBRARY_FIND(numa_run_on_node_mask);
+  NUMA_LIBRARY_FIND(numa_set_interleave_mask);
+  NUMA_LIBRARY_FIND(numa_set_localalloc);
+
+#undef NUMA_LIBRARY_FIND
+#undef _LIBRARY_FIND
+
+  result = NUMAAPI_SUCCESS;
+  return result;
+}
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Initialization.
+
+NUMAAPI_Result numaAPI_Initialize(void) {
+#ifdef WITH_DYNLOAD
+  NUMAAPI_Result result = loadNumaSymbols();
+  if (result != NUMAAPI_SUCCESS) {
+    return result;
+  }
+#endif
+  if (numa_available() < 0) {
+    return NUMAAPI_NOT_AVAILABLE;
+  }
+  return NUMAAPI_SUCCESS;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Topology query.
+
+int numaAPI_GetNumNodes(void) {
+  return numa_max_node() + 1;
+}
+
+bool numaAPI_IsNodeAvailable(int node) {
+  if (numa_node_size(node, NULL) > 0) {
+    return true;
+  }
+  return false;
+}
+
+int numaAPI_GetNumNodeProcessors(int node) {
+  struct bitmask* cpu_mask = numa_allocate_cpumask();
+  numa_node_to_cpus(node, cpu_mask);
+  const unsigned int num_bytes = numa_bitmask_nbytes(cpu_mask);
+  const unsigned int num_bits = num_bytes  *8;
+  // TODO(sergey): There might be faster way calculating number of set bits.
+  int num_processors = 0;
+  for (unsigned int bit = 0; bit < num_bits; ++bit) {
+    if (numa_bitmask_isbitset(cpu_mask, bit)) {
+      ++num_processors;
+    }
+  }
+#ifdef WITH_DYNLOAD
+  if (numa_free_cpumask != NULL) {
+    numa_free_cpumask(cpu_mask);
+  } else {
+    numa_bitmask_free(cpu_mask);
+  }
+#else
+  numa_free_cpumask(cpu_mask);
+#endif
+  return num_processors;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Affinities.
+
+bool numaAPI_RunProcessOnNode(int node) {
+  numaAPI_RunThreadOnNode(node);
+  return true;
+}
+
+bool numaAPI_RunThreadOnNode(int node) {
+  // Construct bit mask from node index.
+  struct bitmask* node_mask = numa_allocate_nodemask();
+  numa_bitmask_clearall(node_mask);
+  numa_bitmask_setbit(node_mask, node);
+  numa_run_on_node_mask(node_mask);
+  // TODO(sergey): The following commands are based on x265 code, we might want
+  // to make those optional, or require to call those explicitly.
+  //
+  // Current assumption is that this is similar to SetThreadGroupAffinity().
+  numa_set_interleave_mask(node_mask);
+  numa_set_localalloc();
+#ifdef WITH_DYNLOAD
+  if (numa_free_nodemask != NULL) {
+    numa_free_nodemask(node_mask);
+  } else {
+    numa_bitmask_free(node_mask);
+  }
+#else
+  numa_free_nodemask(node_mask);
+#endif
+  return true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Memory management.
+
+void* numaAPI_AllocateOnNode(size_t size, int node) {
+  return numa_alloc_onnode(size, node);
+}
+
+void* numaAPI_AllocateLocal(size_t size) {
+  return numa_alloc_local(size);
+}
+
+void numaAPI_Free(void* start, size_t size) {
+  numa_free(start, size);
+}
+
+#endif  // OS_LINUX
--- a/intern/numaapi/source/numaapi_stub.c
+++ b/intern/numaapi/source/numaapi_stub.c
@ -0,0 +1,82 @@
+// Copyright (c) 2016, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin (sergey.vfx@gmail.com)
+
+#include "numaapi.h"
+
+#include "build_config.h"
+
+// Stub implementation for platforms which doesn't have NUMA support.
+
+#if !OS_LINUX && !OS_WIN
+
+////////////////////////////////////////////////////////////////////////////////
+// Initialization.
+
+NUMAPIResult numaAPI_Initialize(void) {
+  return UMAAPI_NOT_AVAILABLE;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Topology query.
+
+int numaAPI_GetNumNodes(void) {
+  return 0;
+}
+
+bool numApiIsNodeAvailable(int node) {
+  (void) node;  // Ignored.
+  return false;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Affinities.
+
+bool numaAPI_RunProcessOnNode(int node) {
+  (void) node;  // Ignored.
+  return false;
+}
+
+bool numaAPI_RunThreadOnNode(int node) {
+  (void) node;  // Ignored.
+  return false;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Memory management.
+
+void* numaAPI_AllocateOnNode(size_t size, int node) {
+  (void) size;  // Ignored.
+  (void) node;  // Ignored.
+  return 0;
+}
+
+void* numaAPI_AllocateLocal(size_t size) {
+  (void) size;  // Ignored.
+  return NULL;
+}
+
+void numaAPI_Free(void* start, size_t size) {
+  (void) start;  // Ignored.
+  (void) size;  // Ignored.
+}
+
+#endif  // !OS_LINUX && !OS_WIN
--- a/intern/numaapi/source/numaapi_win32.c
+++ b/intern/numaapi/source/numaapi_win32.c
@ -0,0 +1,253 @@
+// Copyright (c) 2016, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin (sergey.vfx@gmail.com)
+
+#include "build_config.h"
+
+#if OS_WIN
+
+#include "numaapi.h"
+
+#ifndef NOGDI
+#  define NOGDI
+#endif
+#ifndef NOMINMAX
+#  define NOMINMAX
+#endif
+#ifndef WIN32_LEAN_AND_MEAN
+#  define WIN32_LEAN_AND_MEAN
+#endif
+#ifndef NOCOMM
+#  define NOCOMM
+#endif
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <windows.h>
+
+#if ARCH_CPU_64_BITS
+#  include <VersionHelpers.h>
+#endif
+
+#include <stdio.h>
+
+////////////////////////////////////////////////////////////////////////////////
+// Initialization.
+
+// Kernel library, from where the symbols come.
+static HMODULE kernel_lib;
+
+// Types of all symbols which are read from the library.
+
+// NUMA function types.
+typedef BOOL t_GetNumaHighestNodeNumber(PULONG highest_node_number);
+typedef BOOL t_GetNumaNodeProcessorMask(UCHAR node, ULONGLONG* processor_mask);
+typedef BOOL t_GetNumaNodeProcessorMaskEx(USHORT node,
+                                          GROUP_AFFINITY* processor_mask);
+typedef BOOL t_GetNumaProcessorNode(UCHAR processor, UCHAR* node_number);
+typedef void* t_VirtualAllocExNuma(HANDLE process_handle,
+                                   LPVOID address,
+                                   SIZE_T size,
+                                   DWORD  allocation_type,
+                                   DWORD  protect,
+                                   DWORD  preferred);
+typedef BOOL t_VirtualFree(void* address, SIZE_T size, DWORD free_type);
+// Threading function types.
+typedef BOOL t_SetProcessAffinityMask(HANDLE process_handle,
+                                      DWORD_PTR process_affinity_mask);
+typedef BOOL t_SetThreadGroupAffinity(HANDLE thread_handle,
+                                      const GROUP_AFFINITY* GroupAffinity,
+                                      GROUP_AFFINITY* PreviousGroupAffinity);
+typedef DWORD t_GetCurrentProcessorNumber(void);
+
+// NUMA symbols.
+static t_GetNumaHighestNodeNumber* _GetNumaHighestNodeNumber;
+static t_GetNumaNodeProcessorMask* _GetNumaNodeProcessorMask;
+static t_GetNumaNodeProcessorMaskEx* _GetNumaNodeProcessorMaskEx;
+static t_GetNumaProcessorNode* _GetNumaProcessorNode;
+static t_VirtualAllocExNuma* _VirtualAllocExNuma;
+static t_VirtualFree* _VirtualFree;
+// Threading symbols.
+static t_SetProcessAffinityMask* _SetProcessAffinityMask;
+static t_SetThreadGroupAffinity* _SetThreadGroupAffinity;
+static t_GetCurrentProcessorNumber* _GetCurrentProcessorNumber;
+
+static void numaExit(void) {
+  // TODO(sergey): Consider closing library here.
+}
+
+static NUMAAPI_Result loadNumaSymbols(void) {
+  // Prevent multiple initializations.
+  static bool initialized = false;
+  static NUMAAPI_Result result = NUMAAPI_NOT_AVAILABLE;
+  if (initialized) {
+    return result;
+  }
+  initialized = true;
+  // Register de-initialization.
+  const int error = atexit(numaExit);
+  if (error) {
+    result = NUMAAPI_ERROR_ATEXIT;
+    return result;
+  }
+  // Load library.
+  kernel_lib = LoadLibraryA("Kernel32.dll");
+  // Load symbols.
+
+#define _LIBRARY_FIND(lib, name)                   \
+  do {                                             \
+    _##name = (t_##name *)GetProcAddress(lib, #name);  \
+  } while (0)
+#define KERNEL_LIBRARY_FIND(name) _LIBRARY_FIND(kernel_lib, name)
+
+  // NUMA.
+  KERNEL_LIBRARY_FIND(GetNumaHighestNodeNumber);
+  KERNEL_LIBRARY_FIND(GetNumaNodeProcessorMask);
+  KERNEL_LIBRARY_FIND(GetNumaNodeProcessorMaskEx);
+  KERNEL_LIBRARY_FIND(GetNumaProcessorNode);
+  KERNEL_LIBRARY_FIND(VirtualAllocExNuma);
+  KERNEL_LIBRARY_FIND(VirtualFree);
+  // Threading.
+  KERNEL_LIBRARY_FIND(SetProcessAffinityMask);
+  KERNEL_LIBRARY_FIND(SetThreadGroupAffinity);
+  KERNEL_LIBRARY_FIND(GetCurrentProcessorNumber);
+
+#undef KERNEL_LIBRARY_FIND
+#undef _LIBRARY_FIND
+
+  result = NUMAAPI_SUCCESS;
+  return result;
+}
+
+NUMAAPI_Result numaAPI_Initialize(void) {
+#if !ARCH_CPU_64_BITS
+  // No NUMA on 32 bit platforms.
+  return LIBNUMAAPI_NOT_AVAILABLE;
+#else
+  if (!IsWindows7OrGreater()) {
+    // Require Windows 7 or higher.
+    NUMAAPI_NOT_AVAILABLE;
+  }
+  loadNumaSymbols();
+  return NUMAAPI_SUCCESS;
+#endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Topology query.
+
+int numaAPI_GetNumNodes(void) {
+  ULONG highest_node_number;
+  if (!_GetNumaHighestNodeNumber(&highest_node_number)) {
+    return 0;
+  }
+  // TODO(sergey): Resolve the type narrowing.
+  // NOTE: This is not necessarily a total amount of nodes in the system.
+  return (int)highest_node_number + 1;
+}
+
+bool numaAPI_IsNodeAvailable(int node) {
+  // Trick to detect whether the node is usable or not: check whether
+  // there are any processors associated with it.
+  //
+  // This is needed because numaApiGetNumNodes() is not guaranteed to
+  // give total amount of nodes and some nodes might be unavailable.
+  ULONGLONG processor_mask;
+  if (!_GetNumaNodeProcessorMask(node, &processor_mask)) {
+    return false;
+  }
+  if (processor_mask == 0) {
+    return false;
+  }
+  return true;
+}
+
+int numaAPI_GetNumNodeProcessors(int node) {
+  ULONGLONG processor_mask;
+  if (!_GetNumaNodeProcessorMask(node, &processor_mask)) {
+    return 0;
+  }
+  // TODO(sergey): There might be faster way calculating number of set bits.
+  int num_processors = 0;
+  while (processor_mask != 0) {
+    num_processors += (processor_mask & 1);
+    processor_mask = (processor_mask >> 1);
+  }
+  return num_processors;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Affinities.
+
+bool numaAPI_RunProcessOnNode(int node) {
+  // TODO(sergey): Make sure requested node is within active CPU group.
+  // Change affinity of the proces to make it to run on a given node.
+  HANDLE process_handle = GetCurrentProcess();
+  ULONGLONG processor_mask;
+  if (_GetNumaNodeProcessorMask(node, &processor_mask) == 0) {
+    return false;
+  }
+  if (_SetProcessAffinityMask(process_handle, processor_mask) == 0) {
+    return false;
+  }
+  return true;
+}
+
+bool numaAPI_RunThreadOnNode(int node) {
+  HANDLE thread_handle = GetCurrentThread();
+  GROUP_AFFINITY group_affinity = { 0 };
+  if (_GetNumaNodeProcessorMaskEx(node, &group_affinity) == 0) {
+    return false;
+  }
+  if (_SetThreadGroupAffinity(thread_handle, &group_affinity, NULL) == 0) {
+    return false;
+  }
+  return true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Memory management.
+
+void* numaAPI_AllocateOnNode(size_t size, int node) {
+  return _VirtualAllocExNuma(GetCurrentProcess(),
+                             NULL,
+                             size,
+                             MEM_RESERVE | MEM_COMMIT,
+                             PAGE_READWRITE,
+                             node);
+}
+
+void* numaAPI_AllocateLocal(size_t size) {
+  UCHAR current_processor = (UCHAR)_GetCurrentProcessorNumber();
+  UCHAR node;
+  if (!_GetNumaProcessorNode(current_processor, &node)) {
+    return NULL;
+  }
+  return numaAPI_AllocateOnNode(size, node);
+}
+
+void numaAPI_Free(void* start, size_t size) {
+  if (!_VirtualFree(start, size, MEM_RELEASE)) {
+    // TODO(sergey): Throw an error!
+  }
+}
+
+#endif  // OS_WIN
--- a/source/blender/blenlib/BLI_system.h
+++ b/source/blender/blenlib/BLI_system.h
@ -30,6 +30,10 @@
 int BLI_cpu_support_sse2(void);
 void BLI_system_backtrace(FILE *fp);

+
+/* Get CPU brand, result is to be MEM_freeN()-ed. */
+char *BLI_cpu_brand_string(void);
+
 /* getpid */
 #ifdef WIN32
 #  define BLI_SYSTEM_PID_H <process.h>
--- a/source/blender/blenlib/BLI_threads.h
+++ b/source/blender/blenlib/BLI_threads.h
@ -204,6 +204,12 @@ void BLI_thread_queue_nowait(ThreadQueue *queue);
 #  define BLI_thread_local_set(name, value) name = value
 #endif  /* defined(__APPLE__) */

+/* **** Special functions to help performance on crazy NUMA setups. **** */
+
+/* Make sure process/thread is using NUMA node with fast memory access. */
+void BLI_thread_put_process_on_fast_node(void);
+void BLI_thread_put_thread_on_fast_node(void);
+
 #ifdef __cplusplus
 }
 #endif
--- a/source/blender/blenlib/CMakeLists.txt
+++ b/source/blender/blenlib/CMakeLists.txt
@ -30,6 +30,7 @@ set(INC
 	../../../intern/guardedalloc
 	../../../intern/atomic
 	../../../intern/eigen
+	../../../intern/numaapi/include
 	../../../extern/wcwidth
 )

--- a/source/blender/blenlib/intern/system.c
+++ b/source/blender/blenlib/intern/system.c
@ -27,6 +27,7 @@

 #include "BLI_utildefines.h"
 #include "BLI_system.h"
+#include "BLI_string.h"

 #include "MEM_guardedalloc.h"

@ -138,3 +139,40 @@ void BLI_system_backtrace(FILE *fp)

 }
 /* end BLI_system_backtrace */
+
+/* NOTE: The code for CPU brand string is adopted from Cycles. */
+
+#if !defined(_WIN32) || defined(FREE_WINDOWS)
+static void __cpuid(int data[4], int selector)
+{
+#if defined(__x86_64__)
+	asm("cpuid" : "=a" (data[0]), "=b" (data[1]), "=c" (data[2]), "=d" (data[3]) : "a"(selector));
+#elif defined(__i386__)
+	asm("pushl %%ebx    \n\t"
+		"cpuid          \n\t"
+		"movl %%ebx, %1 \n\t"
+		"popl %%ebx     \n\t"
+		: "=a" (data[0]), "=r" (data[1]), "=c" (data[2]), "=d" (data[3])
+		: "a"(selector)
+		: "ebx");
+#else
+	data[0] = data[1] = data[2] = data[3] = 0;
+#endif
+}
+#endif
+
+char *BLI_cpu_brand_string(void)
+{
+	char buf[48] = { 0 };
+	int result[4] = { 0 };
+	__cpuid(result, 0x80000000);
+	if (result[0] >= (int)0x80000004) {
+		__cpuid((int*)(buf + 0), 0x80000002);
+		__cpuid((int*)(buf + 16), 0x80000003);
+		__cpuid((int*)(buf + 32), 0x80000004);
+		char *brand = BLI_strdup(buf);
+		/* TODO(sergey): Make it a bit more presentable by removing trademark. */
+		return brand;
+	}
+	return NULL;
+}
--- a/source/blender/blenlib/intern/threads.c
+++ b/source/blender/blenlib/intern/threads.c
@ -37,6 +37,7 @@

 #include "BLI_listbase.h"
 #include "BLI_gsqueue.h"
+#include "BLI_system.h"
 #include "BLI_task.h"
 #include "BLI_threads.h"

@ -55,6 +56,7 @@
 #endif

 #include "atomic_ops.h"
+#include "numaapi.h"

 #if defined(__APPLE__) && defined(_OPENMP) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 2) && !defined(__clang__)
 #  define USE_APPLE_OMP_FIX
@ -126,6 +128,7 @@ static pthread_mutex_t _colormanage_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_mutex_t _fftw_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_mutex_t _view3d_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_t mainid;
+static bool is_numa_available = false;
 static unsigned int thread_levels = 0;  /* threads can be invoked inside threads */
 static int num_threads_override = 0;

@ -155,6 +158,9 @@ void BLI_threadapi_init(void)
 	mainid = pthread_self();

 	BLI_spin_init(&_malloc_lock);
+	if (numaAPI_Initialize() == NUMAAPI_SUCCESS) {
+		is_numa_available = true;
+	}
 }

 void BLI_threadapi_exit(void)
@ -840,3 +846,98 @@ void BLI_threaded_malloc_end(void)
 		MEM_set_lock_callback(NULL, NULL);
 	}
 }
+
+/* **** Special functions to help performance on crazy NUMA setups. **** */
+
+static bool check_is_threadripper2_alike_topology(void)
+{
+	/* NOTE: We hope operating system does not support CPU hotswap to
+	 * a different brand. And that SMP of different types is also not
+	 * encouraged by the system. */
+	static bool is_initialized = false;
+	static bool is_threadripper2 = false;
+	if (is_initialized) {
+		return is_threadripper2;
+	}
+	is_initialized = true;
+	char *cpu_brand = BLI_cpu_brand_string();
+	if (cpu_brand == NULL) {
+		return false;
+	}
+	if (strstr(cpu_brand, "Threadripper")) {
+		/* NOTE: We consinder all Threadrippers having similar topology to
+		* the second one. This is because we are trying to utilize NUMA node
+		* 0 as much as possible. This node does exist on earlier versions of
+		* threadripper and setting affinity to it should not have negative
+		* effect.
+		* This allows us to avoid per-model check, making the code more
+		* reliable for the CPUs which are not yet released.
+		*/
+		if (strstr(cpu_brand, "2990WX") || strstr(cpu_brand, "2950X")) {
+			is_threadripper2 = true;
+		}
+	}
+	/* NOTE: While all dies of EPYC has memory controller, only two f them
+	 * has access to a lower-indexed DDR slots. Those dies are same as on
+	 * Threadripper2 with the memory controller.
+	 * Now, it is rather likely that reasonable amount of users don't max
+	 * up their DR slots, making it only two dies connected to a DDR slot
+	 * with actual memory in it. */
+	if (strstr(cpu_brand, "EPYC")) {
+		/* NOTE: Similarly to Threadripper we do not do model check. */
+		is_threadripper2 = true;
+	}
+	return is_threadripper2;
+}
+
+static void threadripper_put_process_on_fast_node(void)
+{
+	if (!is_numa_available) {
+		return;
+	}
+	/* NOTE: Technically, we can use NUMA nodes 0 and 2 and usning both of
+	 * them in the affinity mask will allow OS to schedule threads more
+	 * flexible,possibly increasing overall performance when multiple apps
+	 * are crunching numbers.
+	 *
+	 * However, if scene fits into memory adjacent to a single die we don't
+	 * want OS to re-schedule the process to another die since that will make
+	 * it further away from memory allocated for .blend file. */
+	/* NOTE: Even if NUMA is avasilable in the API but is disabled in BIOS on
+	 * this workstation we still process here. If NUMA is disabled it will be a
+	 * single node, so our action is no-visible-changes, but allows to keep
+	 * things simple and unified. */
+	numaAPI_RunProcessOnNode(0);
+}
+
+static void threadripper_put_thread_on_fast_node(void)
+{
+	if (!is_numa_available) {
+		return;
+	}
+	/* NOTE: This is where things becomes more interesting. On the one hand
+	 * we can use nodes 0 and 2 and allow operating system to do balancing
+	 * of processes/threads for the maximum performance when multiple apps
+	 * are running.
+	 * On another hand, however, we probably want to use same node as the
+	 * main thread since that's where the memory of .blend file is likely
+	 * to be allocated.
+	 * Since the main thread is currently on node 0, we also put thread on
+	 * same node. */
+	/* See additional note about NUMA disabled in BIOS above. */
+	numaAPI_RunThreadOnNode(0);
+}
+
+void BLI_thread_put_process_on_fast_node(void)
+{
+	if (check_is_threadripper2_alike_topology()) {
+		threadripper_put_process_on_fast_node();
+	}
+}
+
+void BLI_thread_put_thread_on_fast_node(void)
+{
+	if (check_is_threadripper2_alike_topology()) {
+		threadripper_put_thread_on_fast_node();
+	}
+}
--- a/source/blender/windowmanager/intern/wm_jobs.c
+++ b/source/blender/windowmanager/intern/wm_jobs.c
@ -334,6 +334,7 @@ static void *do_job_thread(void *job_v)
 {
 	wmJob *wm_job = job_v;

+	BLI_thread_put_thread_on_fast_node();
 	wm_job->startjob(wm_job->run_customdata, &wm_job->stop, &wm_job->do_update, &wm_job->progress);
 	wm_job->ready = true;

--- a/source/creator/creator.c
+++ b/source/creator/creator.c
@ -52,6 +52,7 @@
 #include "BLI_callbacks.h"
 #include "BLI_string.h"
 #include "BLI_system.h"
+#include "BLI_threads.h"

 /* mostly init functions */
 #include "BKE_appdir.h"
@ -364,6 +365,7 @@ int main(
 	BKE_appdir_program_path_init(argv[0]);

 	BLI_threadapi_init();
+	BLI_thread_put_process_on_fast_node();

 	DNA_sdna_current_init();