Cycles: Allow PTX targets for CUDA kernel build.

This is intended for developers on Windows primarily:
Now, CUDA architectures of type compute_xx are supported. This allows for quicker builds,
at the expense of the CUDA driver running ptxas the first time a kernel is loaded.

Differential Revision: https://developer.blender.org/D5953
This commit is contained in:
Stefan Werner 2019-10-16 10:29:04 +02:00
parent 76e8d2cc17
commit 35a545b752
2 changed files with 23 additions and 7 deletions

View File

@ -454,6 +454,12 @@ class CUDADevice : public Device {
VLOG(1) << "Using precompiled kernel.";
return cubin;
}
const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor));
VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
if (path_exists(ptx)) {
VLOG(1) << "Using precompiled kernel.";
return ptx;
}
}
const string common_cflags = compile_kernel_get_common_cflags(

View File

@ -389,11 +389,20 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(cuda_cubins)
macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental)
set(cuda_cubin ${name}_${arch}.cubin)
if(${arch} MATCHES "compute_.*")
set(format "ptx")
else()
set(format "cubin")
endif()
set(cuda_file ${name}_${arch}.${format})
set(kernel_sources ${sources})
if(NOT ${prev_arch} STREQUAL "none")
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin)
if(${prev_arch} MATCHES "compute_.*")
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx)
else()
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin)
endif()
endif()
set(cuda_kernel_src "/kernels/cuda/${name}.cu")
@ -406,7 +415,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
-I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda
--use_fast_math
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin})
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file})
if(${experimental})
set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__)
@ -440,20 +449,21 @@ if(WITH_CYCLES_CUDA_BINARIES)
-v
-cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
DEPENDS ${kernel_sources} cycles_cubin_cc)
set(cuda_file ${cuda_cubin})
else()
add_custom_command(
OUTPUT ${cuda_cubin}
OUTPUT ${cuda_file}
COMMAND ${CUDA_NVCC_EXECUTABLE}
-arch=${arch}
${CUDA_NVCC_FLAGS}
--cubin
--${format}
${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
--ptxas-options="-v"
${cuda_flags}
DEPENDS ${kernel_sources})
endif()
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND cuda_cubins ${cuda_cubin})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND cuda_cubins ${cuda_file})
unset(cuda_debug_flags)
endmacro()