Cycles: add initial CUDA 10.0 support, but only recommend use for Turing cards.

There may still be rendering errors when used for older graphics cards.
This commit is contained in:
Brecht Van Lommel 2018-12-04 12:34:59 +01:00
parent 3f31ec8398
commit b14ec18601
7 changed files with 33 additions and 9 deletions

View File

@ -408,7 +408,7 @@ option(WITH_CYCLES_CUDA_BINARIES "Build Cycles CUDA binaries" OFF)
option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF)
option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF)
mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 CACHE STRING "CUDA architectures to build binaries for")
set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_72 sm_75 CACHE STRING "CUDA architectures to build binaries for")
mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
unset(PLATFORM_DEFAULT)
option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON)

View File

@ -126,7 +126,6 @@ if 'cmake' in builder:
# Prepare CMake options needed to configure cuda binaries compilation, 64bit only.
if bits == 64:
cuda_cmake_options.append("-DWITH_CYCLES_CUDA_BINARIES=%s" % ('ON' if build_cubins else 'OFF'))
cuda_cmake_options.append("-DCYCLES_CUDA_BINARIES_ARCH=sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70")
if build_cubins or 'cuda' in targets:
cuda_cmake_options.append("-DCUDA_64_BIT_DEVICE_CODE=ON")

View File

@ -54,7 +54,7 @@ set(WITH_X11_XF86VMODE ON CACHE BOOL "" FORCE)
set(WITH_PLAYER ON CACHE BOOL "" FORCE)
set(WITH_MEM_JEMALLOC ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61 CACHE STRING "" FORCE)
set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_72;sm_75 CACHE STRING "" FORCE)
# platform dependent options
if(UNIX AND NOT APPLE)

View File

@ -619,7 +619,11 @@ static int cuewNvrtcInit(void) {
/* Library paths. */
#ifdef _WIN32
/* Expected in c:/windows/system or similar, no path needed. */
const char *nvrtc_paths[] = {"nvrtc64_80.dll", "nvrtc64_90.dll", "nvrtc64_91.dll", NULL};
const char *nvrtc_paths[] = {"nvrtc64_80.dll",
"nvrtc64_90.dll",
"nvrtc64_91.dll",
"nvrtc64_10_0.dll",
NULL};
#elif defined(__APPLE__)
/* Default installation path. */
const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL};

View File

@ -315,6 +315,13 @@ if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER))
endif()
endif()
# NVRTC gives wrong rendering result in CUDA 10.0, so we must use NVCC.
if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER)
if(${CUDA_VERSION} VERSION_GREATER_EQUAL 10.0)
message(STATUS "cycles_cubin_cc not supported for CUDA 10.0+, using nvcc instead.")
set(WITH_CYCLES_CUBIN_COMPILER OFF)
endif()
endif()
# Subdirectories

View File

@ -343,11 +343,11 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
# warn for other versions
if(CUDA_VERSION MATCHES "90" OR CUDA_VERSION MATCHES "91")
if(CUDA_VERSION MATCHES "90" OR CUDA_VERSION MATCHES "91" OR CUDA_VERSION MATCHES "100")
else()
message(WARNING
"CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
"build may succeed but only CUDA 9.0 and 9.1 are officially supported")
"build may succeed but only CUDA 9.0, 9.1 and 10.0 are officially supported")
endif()
# build for each arch
@ -442,6 +442,8 @@ if(WITH_CYCLES_CUDA_BINARIES)
foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
if(${arch} MATCHES "sm_2.")
message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
elseif(${arch} MATCHES "sm_7." AND NOT (${CUDA_VERSION} VERSION_GREATER_EQUAL 10.0))
message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.")
else()
# Compile regular kernel
CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} filter "" "${cuda_filter_sources}" FALSE)

View File

@ -52,8 +52,8 @@
# define CUDA_KERNEL_MAX_REGISTERS 63
# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
/* 5.0, 5.2, 5.3, 6.0, 6.1 */
#elif __CUDA_ARCH__ >= 500
/* 5.x, 6.x */
#elif __CUDA_ARCH__ <= 699
# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32
# define CUDA_BLOCK_MAX_THREADS 1024
@ -62,13 +62,25 @@
/* tunable parameters */
# define CUDA_THREADS_BLOCK_WIDTH 16
/* CUDA 9.0 seems to cause slowdowns on high-end Pascal cards unless we increase the number of registers */
# if __CUDACC_VER_MAJOR__ == 9 && __CUDA_ARCH__ >= 600
# if __CUDACC_VER_MAJOR__ >= 9 && __CUDA_ARCH__ >= 600
# define CUDA_KERNEL_MAX_REGISTERS 64
# else
# define CUDA_KERNEL_MAX_REGISTERS 48
# endif
# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
/* 7.x */
#elif __CUDA_ARCH__ <= 799
# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32
# define CUDA_BLOCK_MAX_THREADS 1024
# define CUDA_THREAD_MAX_REGISTERS 255
/* tunable parameters */
# define CUDA_THREADS_BLOCK_WIDTH 16
# define CUDA_KERNEL_MAX_REGISTERS 64
# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 72
/* unknown architecture */
#else