Cycles: Add an experimental CUDA kernel.
Now we build 2 .cubins per architecture (e.g. kernel_sm_21.cubin, kernel_experimental_sm_21.cubin). The experimental kernel can be used by switching to the Experimental Feature Set: http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Experimental_Features This enables Subsurface Scattering and Correlated Multi Jitter Sampling on GPU, while keeping the stability and performance of the regular kernel. Differential Revision: https://developer.blender.org/D762 Patch by Sergey and myself. Developer / Builder Note: CUDA Toolkit 6.5 is highly recommended for this, also note that building the experimental kernel requires a lot of system memory (~7-8GB).
This commit is contained in:
parent
f6e049cd5a
commit
fb3f32760d
Notes:
blender-bot
2023-02-14 10:09:24 +01:00
Referenced by issue #41639, Pie menu : pie menu calling another pie menu : need to double click (on second menu)
|
@ -984,8 +984,9 @@ if env['OURPLATFORM']!='darwin':
|
|||
dir=os.path.join(env['BF_INSTALLDIR'], VERSION, 'scripts', 'addons','cycles', 'lib')
|
||||
for arch in env['BF_CYCLES_CUDA_BINARIES_ARCH']:
|
||||
kernel_build_dir = os.path.join(B.root_build_dir, 'intern/cycles/kernel')
|
||||
cubin_file = os.path.join(kernel_build_dir, "kernel_%s.cubin" % arch)
|
||||
cubininstall.append(env.Install(dir=dir,source=cubin_file))
|
||||
for suffix in ('', '_experimental'):
|
||||
cubin_file = os.path.join(kernel_build_dir, "kernel%s_%s.cubin" % (suffix, arch))
|
||||
cubininstall.append(env.Install(dir=dir,source=cubin_file))
|
||||
|
||||
# osl shaders
|
||||
if env['WITH_BF_CYCLES_OSL']:
|
||||
|
|
|
@ -154,7 +154,7 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
|
|||
sub.prop(cscene, "subsurface_samples", text="Subsurface")
|
||||
sub.prop(cscene, "volume_samples", text="Volume")
|
||||
|
||||
if use_cpu(context):
|
||||
if use_cpu(context) or cscene.feature_set == 'EXPERIMENTAL':
|
||||
layout.row().prop(cscene, "sampling_pattern", text="Pattern")
|
||||
|
||||
for rl in scene.render.layers:
|
||||
|
|
|
@ -197,14 +197,18 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
string compile_kernel()
|
||||
string compile_kernel(bool experimental)
|
||||
{
|
||||
/* compute cubin name */
|
||||
int major, minor;
|
||||
cuDeviceComputeCapability(&major, &minor, cuDevId);
|
||||
|
||||
/* attempt to use kernel provided with blender */
|
||||
string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor));
|
||||
string cubin;
|
||||
if(experimental)
|
||||
cubin = path_get(string_printf("lib/kernel_experimental_sm_%d%d.cubin", major, minor));
|
||||
else
|
||||
cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor));
|
||||
if(path_exists(cubin))
|
||||
return cubin;
|
||||
|
||||
|
@ -212,7 +216,10 @@ public:
|
|||
string kernel_path = path_get("kernel");
|
||||
string md5 = path_files_md5_hash(kernel_path);
|
||||
|
||||
cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str());
|
||||
if(experimental)
|
||||
cubin = string_printf("cycles_kernel_experimental_sm%d%d_%s.cubin", major, minor, md5.c_str());
|
||||
else
|
||||
cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str());
|
||||
cubin = path_user_get(path_join("cache", cubin));
|
||||
|
||||
/* if exists already, use it */
|
||||
|
@ -263,6 +270,9 @@ public:
|
|||
string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" "
|
||||
"-o \"%s\" --ptxas-options=\"-v\" -I\"%s\" -DNVCC -D__KERNEL_CUDA_VERSION__=%d",
|
||||
nvcc, major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version);
|
||||
|
||||
if(experimental)
|
||||
command += " -D__KERNEL_CUDA_EXPERIMENTAL__";
|
||||
|
||||
printf("%s\n", command.c_str());
|
||||
|
||||
|
@ -293,7 +303,7 @@ public:
|
|||
return false;
|
||||
|
||||
/* get kernel */
|
||||
string cubin = compile_kernel();
|
||||
string cubin = compile_kernel(experimental);
|
||||
|
||||
if(cubin == "")
|
||||
return false;
|
||||
|
|
|
@ -160,37 +160,50 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
|||
set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS})
|
||||
set(cuda_cubins)
|
||||
|
||||
foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
|
||||
set(cuda_cubin kernel_${arch}.cubin)
|
||||
macro(CYCLES_CUDA_KERNEL_ADD arch experimental)
|
||||
if(${experimental})
|
||||
set(cuda_extra_flags "-D__KERNEL_CUDA_EXPERIMENTAL__")
|
||||
set(cuda_cubin kernel_experimental_${arch}.cubin)
|
||||
else()
|
||||
set(cuda_extra_flags "")
|
||||
set(cuda_cubin kernel_${arch}.cubin)
|
||||
endif()
|
||||
|
||||
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
|
||||
set(cuda_math_flags "--use_fast_math")
|
||||
|
||||
if(CUDA_VERSION LESS 60 AND ${arch} MATCHES "sm_50")
|
||||
message(WARNING "Can't build kernel for CUDA sm_50 architecture, skipping")
|
||||
else()
|
||||
add_custom_command(
|
||||
OUTPUT ${cuda_cubin}
|
||||
COMMAND ${CUDA_NVCC_EXECUTABLE}
|
||||
-arch=${arch}
|
||||
-m${CUDA_BITS}
|
||||
--cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}
|
||||
--ptxas-options="-v"
|
||||
${cuda_arch_flags}
|
||||
${cuda_version_flags}
|
||||
${cuda_math_flags}
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/../util
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/svm
|
||||
-DCCL_NAMESPACE_BEGIN=
|
||||
-DCCL_NAMESPACE_END=
|
||||
-DNVCC
|
||||
add_custom_command(
|
||||
OUTPUT ${cuda_cubin}
|
||||
COMMAND ${CUDA_NVCC_EXECUTABLE}
|
||||
-arch=${arch}
|
||||
-m${CUDA_BITS}
|
||||
--cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}
|
||||
--ptxas-options="-v"
|
||||
${cuda_arch_flags}
|
||||
${cuda_version_flags}
|
||||
${cuda_math_flags}
|
||||
${cuda_extra_flags}
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/../util
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/svm
|
||||
-DCCL_NAMESPACE_BEGIN=
|
||||
-DCCL_NAMESPACE_END=
|
||||
-DNVCC
|
||||
|
||||
DEPENDS ${cuda_sources})
|
||||
DEPENDS ${cuda_sources})
|
||||
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
list(APPEND cuda_cubins ${cuda_cubin})
|
||||
endif()
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
list(APPEND cuda_cubins ${cuda_cubin})
|
||||
|
||||
unset(cuda_extra_flags)
|
||||
endmacro()
|
||||
|
||||
foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
|
||||
# Compile regular kernel
|
||||
CYCLES_CUDA_KERNEL_ADD(${arch} FALSE)
|
||||
|
||||
# Compile experimental kernel
|
||||
CYCLES_CUDA_KERNEL_ADD(${arch} TRUE)
|
||||
endforeach()
|
||||
|
||||
add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
|
||||
|
|
|
@ -83,30 +83,35 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
|
|||
dependencies = ['kernel.cu'] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('geom/*.h') + kernel.Glob('closure/*.h')
|
||||
last_cubin_file = None
|
||||
|
||||
configs = (("kernel_%s.cubin", ''),
|
||||
("kernel_experimental_%s.cubin", ' -D__KERNEL_CUDA_EXPERIMENTAL__'))
|
||||
|
||||
# add command for each cuda architecture
|
||||
for arch in cuda_archs:
|
||||
if cuda_version < 60 and arch == "sm_50":
|
||||
print("Can't build kernel for CUDA sm_50 architecture, skipping")
|
||||
continue
|
||||
for config in configs:
|
||||
# TODO(sergey): Use dict instead ocouple in order to increase readability?
|
||||
name = config[0]
|
||||
extra_flags = config[1]
|
||||
|
||||
cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch)
|
||||
cubin_file = os.path.join(build_dir, name % arch)
|
||||
current_flags = nvcc_flags + extra_flags
|
||||
|
||||
if env['BF_CYCLES_CUDA_ENV']:
|
||||
MS_SDK = "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd"
|
||||
command = "\"%s\" & \"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, nvcc_flags, kernel_file, cubin_file)
|
||||
else:
|
||||
command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, kernel_file, cubin_file)
|
||||
if env['BF_CYCLES_CUDA_ENV']:
|
||||
MS_SDK = "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd"
|
||||
command = "\"%s\" & \"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, current_flags, kernel_file, cubin_file)
|
||||
else:
|
||||
command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, current_flags, kernel_file, cubin_file)
|
||||
|
||||
kernel.Command(cubin_file, 'kernel.cu', command)
|
||||
kernel.Depends(cubin_file, dependencies)
|
||||
kernel.Command(cubin_file, 'kernel.cu', command)
|
||||
kernel.Depends(cubin_file, dependencies)
|
||||
|
||||
kernel_binaries.append(cubin_file)
|
||||
|
||||
if not env['WITH_BF_CYCLES_CUDA_THREADED_COMPILE']:
|
||||
# trick to compile one kernel at a time to reduce memory usage
|
||||
if last_cubin_file:
|
||||
kernel.Depends(cubin_file, last_cubin_file)
|
||||
last_cubin_file = cubin_file
|
||||
kernel_binaries.append(cubin_file)
|
||||
|
||||
if not env['WITH_BF_CYCLES_CUDA_THREADED_COMPILE']:
|
||||
# trick to compile one kernel at a time to reduce memory usage
|
||||
if last_cubin_file:
|
||||
kernel.Depends(cubin_file, last_cubin_file)
|
||||
last_cubin_file = cubin_file
|
||||
|
||||
Return('kernel_binaries')
|
||||
|
||||
|
|
|
@ -79,8 +79,11 @@ CCL_NAMESPACE_BEGIN
|
|||
#define __VOLUME_SCATTER__
|
||||
|
||||
/* Experimental on GPU */
|
||||
//#define __VOLUME_DECOUPLED__
|
||||
//#define __SUBSURFACE__
|
||||
#ifdef __KERNEL_CUDA_EXPERIMENTAL__
|
||||
#define __SUBSURFACE__
|
||||
#define __CMJ__
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL_OPENCL__
|
||||
|
|
|
@ -942,12 +942,13 @@ static void node_shader_buts_anisotropic(uiLayout *layout, bContext *UNUSED(C),
|
|||
|
||||
static void node_shader_buts_subsurface(uiLayout *layout, bContext *C, PointerRNA *ptr)
|
||||
{
|
||||
/* SSS does not work on GPU yet */
|
||||
/* SSS only enabled in Experimental Kernel */
|
||||
PointerRNA scene = CTX_data_pointer_get(C, "scene");
|
||||
if (scene.data) {
|
||||
PointerRNA cscene = RNA_pointer_get(&scene, "cycles");
|
||||
if (cscene.data && (RNA_enum_get(&cscene, "device") == 1 && U.compute_device_type != 0))
|
||||
uiItemL(layout, IFACE_("SSS not supported on GPU"), ICON_ERROR);
|
||||
if (cscene.data && (RNA_enum_get(&cscene, "device") == 1 && U.compute_device_type != 0
|
||||
&& RNA_enum_get(&cscene, "feature_set") == 0))
|
||||
uiItemL(layout, IFACE_("Only enabled in experimental GPU kernel"), ICON_ERROR);
|
||||
}
|
||||
|
||||
uiItemR(layout, ptr, "falloff", 0, "", ICON_NONE);
|
||||
|
|
Loading…
Reference in New Issue