Fix Cycles MNEE not working for Metal

Move MNEE to own kernel, separate from shader ray-tracing. This does introduce
the limitation that a shader can't use both MNEE and AO/bevel, but that seems
like the better trade-off for now.

We can experiment with bigger kernel organization changes later.

Differential Revision: https://developer.blender.org/D15070
This commit is contained in:
Brecht Van Lommel 2022-05-30 18:04:14 +02:00
parent 52cb24a779
commit f2cd7e08fe
20 changed files with 167 additions and 25 deletions

View File

@ -457,6 +457,8 @@ void CUDADevice::reserve_local_memory(const uint kernel_features)
/* Use the biggest kernel for estimation. */
const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE :
(kernel_features & KERNEL_FEATURE_MNEE) ?
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE :
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE;
/* Launch kernel, using just 1 block appears sufficient to reserve memory for all

View File

@ -420,6 +420,8 @@ void HIPDevice::reserve_local_memory(const uint kernel_features)
/* Use the biggest kernel for estimation. */
const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE :
(kernel_features & KERNEL_FEATURE_MNEE) ?
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE :
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE;
/* Launch kernel, using just 1 block appears sufficient to reserve memory for all

View File

@ -33,6 +33,8 @@ const char *device_kernel_as_string(DeviceKernel kernel)
return "integrator_shade_surface";
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
return "integrator_shade_surface_raytrace";
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
return "integrator_shade_surface_mnee";
case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
return "integrator_shade_volume";
case DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL:

View File

@ -489,7 +489,8 @@ bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type)
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) {
kernel_function_list = function_list;
}

View File

@ -265,6 +265,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
break;
default:
bvhMetalRT = nil;

View File

@ -432,9 +432,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
}
{ /* Load and compile PTX module with OptiX kernels. */
string ptx_data, ptx_filename = path_get((kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
"lib/kernel_optix_shader_raytrace.ptx" :
"lib/kernel_optix.ptx");
string ptx_data, ptx_filename = path_get(
(kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ?
"lib/kernel_optix_shader_raytrace.ptx" :
"lib/kernel_optix.ptx");
if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) {
if (!getenv("OPTIX_ROOT_DIR")) {
set_error(
@ -444,7 +445,9 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
}
ptx_filename = compile_kernel(
kernel_features,
(kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ? "kernel_shader_raytrace" : "kernel",
(kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ?
"kernel_shader_raytrace" :
"kernel",
"optix",
true);
}
@ -582,6 +585,14 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
"__direct_callable__svm_node_bevel";
}
/* MNEE. */
if (kernel_features & KERNEL_FEATURE_MNEE) {
group_descs[PG_RGEN_SHADE_SURFACE_MNEE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.module = optix_module;
group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.entryFunctionName =
"__raygen__kernel_optix_integrator_shade_surface_mnee";
}
optix_assert(optixProgramGroupCreate(
context, group_descs, NUM_PROGRAM_GROUPS, &group_options, nullptr, 0, groups));
@ -663,6 +674,46 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipelines[PIP_SHADE_RAYTRACE], 0, dss, css, motion_blur ? 3 : 2));
}
if (kernel_features & KERNEL_FEATURE_MNEE) {
/* Create MNEE pipeline. */
vector<OptixProgramGroup> pipeline_groups;
pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]);
pipeline_groups.push_back(groups[PG_MISS]);
pipeline_groups.push_back(groups[PG_HITD]);
pipeline_groups.push_back(groups[PG_HITS]);
pipeline_groups.push_back(groups[PG_HITL]);
pipeline_groups.push_back(groups[PG_HITV]);
if (motion_blur) {
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
}
if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
}
pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
optix_assert(optixPipelineCreate(context,
&pipeline_options,
&link_options,
pipeline_groups.data(),
pipeline_groups.size(),
nullptr,
0,
&pipelines[PIP_SHADE_MNEE]));
/* Combine ray generation and trace continuation stack size. */
const unsigned int css = stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG +
link_options.maxTraceDepth * trace_css;
const unsigned int dss = 0;
/* Set stack size depending on pipeline options. */
optix_assert(
optixPipelineSetStackSize(pipelines[PIP_SHADE_MNEE], 0, dss, css, motion_blur ? 3 : 2));
}
{ /* Create intersection-only pipeline. */
vector<OptixProgramGroup> pipeline_groups;
pipeline_groups.reserve(NUM_PROGRAM_GROUPS);

View File

@ -24,6 +24,7 @@ enum {
PG_RGEN_INTERSECT_SUBSURFACE,
PG_RGEN_INTERSECT_VOLUME_STACK,
PG_RGEN_SHADE_SURFACE_RAYTRACE,
PG_RGEN_SHADE_SURFACE_MNEE,
PG_MISS,
PG_HITD, /* Default hit group. */
PG_HITS, /* __SHADOW_RECORD_ALL__ hit group. */
@ -46,7 +47,7 @@ static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
static const int NUM_CALLABLE_PROGRAM_GROUPS = 2;
/* List of OptiX pipelines. */
enum { PIP_SHADE_RAYTRACE, PIP_INTERSECT, NUM_PIPELINES };
enum { PIP_SHADE_RAYTRACE, PIP_SHADE_MNEE, PIP_INTERSECT, NUM_PIPELINES };
/* A single shader binding table entry. */
struct SbtRecord {

View File

@ -28,6 +28,7 @@ void OptiXDeviceQueue::init_execution()
static bool is_optix_specific_kernel(DeviceKernel kernel)
{
return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE ||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
@ -63,7 +64,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
cuda_stream_));
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) {
cuda_device_assert(
cuda_device_,
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
@ -82,6 +84,10 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
pipeline = optix_device->pipelines[PIP_SHADE_RAYTRACE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
pipeline = optix_device->pipelines[PIP_SHADE_MNEE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord);
break;
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
pipeline = optix_device->pipelines[PIP_INTERSECT];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord);

View File

@ -65,6 +65,8 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
integrator_shader_sort_counter_(device, "integrator_shader_sort_counter", MEM_READ_WRITE),
integrator_shader_raytrace_sort_counter_(
device, "integrator_shader_raytrace_sort_counter", MEM_READ_WRITE),
integrator_shader_mnee_sort_counter_(
device, "integrator_shader_mnee_sort_counter", MEM_READ_WRITE),
integrator_shader_sort_prefix_sum_(
device, "integrator_shader_sort_prefix_sum", MEM_READ_WRITE),
integrator_next_main_path_index_(device, "integrator_next_main_path_index", MEM_READ_WRITE),
@ -188,6 +190,9 @@ void PathTraceWorkGPU::alloc_integrator_sorting()
integrator_shader_raytrace_sort_counter_.alloc(max_shaders);
integrator_shader_raytrace_sort_counter_.zero_to_device();
integrator_shader_mnee_sort_counter_.alloc(max_shaders);
integrator_shader_mnee_sort_counter_.zero_to_device();
integrator_shader_sort_prefix_sum_.alloc(max_shaders);
integrator_shader_sort_prefix_sum_.zero_to_device();
@ -195,6 +200,8 @@ void PathTraceWorkGPU::alloc_integrator_sorting()
(int *)integrator_shader_sort_counter_.device_pointer;
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
(int *)integrator_shader_raytrace_sort_counter_.device_pointer;
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
(int *)integrator_shader_mnee_sort_counter_.device_pointer;
}
}
@ -327,6 +334,7 @@ void PathTraceWorkGPU::enqueue_reset()
queue_->zero_to_device(integrator_queue_counter_);
queue_->zero_to_device(integrator_shader_sort_counter_);
queue_->zero_to_device(integrator_shader_raytrace_sort_counter_);
queue_->zero_to_device(integrator_shader_mnee_sort_counter_);
/* Tiles enqueue need to know number of active paths, which is based on this counter. Zero the
* counter on the host side because `zero_to_device()` is not doing it. */
@ -450,6 +458,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME: {
/* Shading kernels with integrator state and render buffer. */
DeviceKernelArguments args(&d_path_index, &buffers_->buffer.device_pointer, &work_size);
@ -1080,13 +1089,15 @@ int PathTraceWorkGPU::shadow_catcher_count_possible_splits()
bool PathTraceWorkGPU::kernel_uses_sorting(DeviceKernel kernel)
{
return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE);
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE);
}
bool PathTraceWorkGPU::kernel_creates_shadow_paths(DeviceKernel kernel)
{
return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
}
@ -1094,7 +1105,8 @@ bool PathTraceWorkGPU::kernel_creates_ao_paths(DeviceKernel kernel)
{
return (device_scene_->data.kernel_features & KERNEL_FEATURE_AO) &&
(kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE);
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE);
}
bool PathTraceWorkGPU::kernel_is_shadow_path(DeviceKernel kernel)

View File

@ -133,6 +133,7 @@ class PathTraceWorkGPU : public PathTraceWork {
/* Shader sorting. */
device_vector<int> integrator_shader_sort_counter_;
device_vector<int> integrator_shader_raytrace_sort_counter_;
device_vector<int> integrator_shader_mnee_sort_counter_;
device_vector<int> integrator_shader_sort_prefix_sum_;
/* Path split. */
device_vector<int> integrator_next_main_path_index_;

View File

@ -269,6 +269,21 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_surface_mnee,
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
const int work_size)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_shade_surface_mnee(NULL, state, render_buffer));
}
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_shade_volume,
ccl_global const int *path_index_array,

View File

@ -15,3 +15,11 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_raytr
global_index;
integrator_shade_surface_raytrace(nullptr, path_index, __params.render_buffer);
}
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_mnee()
{
const int global_index = optixGetLaunchIndex().x;
const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
global_index;
integrator_shade_surface_mnee(nullptr, path_index, __params.render_buffer);
}

View File

@ -243,9 +243,12 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
/* Setup next kernel to execute. */
const bool use_caustics = kernel_data.integrator.use_caustics &&
(object_flag & SD_OBJECT_CAUSTICS);
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE) || use_caustics;
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
if (use_raytrace_kernel) {
if (use_caustics) {
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index);
}
else if (use_raytrace_kernel) {
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
}
else {

View File

@ -125,9 +125,12 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
const int flags = kernel_tex_fetch(__shaders, shader).flags;
const bool use_caustics = kernel_data.integrator.use_caustics &&
(object_flags & SD_OBJECT_CAUSTICS);
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics;
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
if (use_raytrace_kernel) {
if (use_caustics) {
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
}
else if (use_raytrace_kernel) {
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
}
else {
@ -150,9 +153,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
const int object_flags = intersection_get_object_flags(kg, &isect);
const bool use_caustics = kernel_data.integrator.use_caustics &&
(object_flags & SD_OBJECT_CAUSTICS);
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics;
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
if (use_raytrace_kernel) {
if (use_caustics) {
INTEGRATOR_PATH_NEXT_SORTED(
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
}
else if (use_raytrace_kernel) {
INTEGRATOR_PATH_NEXT_SORTED(
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
}
@ -222,8 +229,12 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
const int object_flags = intersection_get_object_flags(kg, isect);
const bool use_caustics = kernel_data.integrator.use_caustics &&
(object_flags & SD_OBJECT_CAUSTICS);
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics;
if (use_raytrace_kernel) {
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
if (use_caustics) {
INTEGRATOR_PATH_NEXT_SORTED(
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
}
else if (use_raytrace_kernel) {
INTEGRATOR_PATH_NEXT_SORTED(
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
}
@ -272,9 +283,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
const int object_flags = intersection_get_object_flags(kg, isect);
const bool use_caustics = kernel_data.integrator.use_caustics &&
(object_flags & SD_OBJECT_CAUSTICS);
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics;
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
if (use_raytrace_kernel) {
if (use_caustics) {
INTEGRATOR_PATH_NEXT_SORTED(
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
}
else if (use_raytrace_kernel) {
INTEGRATOR_PATH_NEXT_SORTED(
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
}

View File

@ -77,6 +77,9 @@ ccl_device void integrator_megakernel(KernelGlobals kg,
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
integrator_shade_surface_raytrace(kg, state, render_buffer);
break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
integrator_shade_surface_mnee(kg, state, render_buffer);
break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
integrator_shade_light(kg, state, render_buffer);
break;

View File

@ -137,7 +137,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
# ifdef __MNEE__
int mnee_vertex_count = 0;
IF_KERNEL_NODES_FEATURE(RAYTRACE)
IF_KERNEL_FEATURE(MNEE)
{
if (ls.lamp != LAMP_NONE) {
/* Is this a caustic light? */
@ -631,4 +631,12 @@ ccl_device_forceinline void integrator_shade_surface_raytrace(
kg, state, render_buffer);
}
ccl_device_forceinline void integrator_shade_surface_mnee(
KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
{
integrator_shade_surface<(KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE) |
KERNEL_FEATURE_MNEE,
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE>(kg, state, render_buffer);
}
CCL_NAMESPACE_END

View File

@ -174,9 +174,14 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
const int object_flags = intersection_get_object_flags(kg, &ss_isect.hits[0]);
const bool use_caustics = kernel_data.integrator.use_caustics &&
(object_flags & SD_OBJECT_CAUSTICS);
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE) || use_caustics;
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
if (use_raytrace_kernel) {
if (use_caustics) {
INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE,
shader);
}
else if (use_raytrace_kernel) {
INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
shader);

View File

@ -1572,6 +1572,7 @@ typedef enum DeviceKernel {
DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE,
DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL,
@ -1689,6 +1690,9 @@ enum KernelFeatureFlag : uint32_t {
KERNEL_FEATURE_AO_PASS = (1U << 25U),
KERNEL_FEATURE_AO_ADDITIVE = (1U << 26U),
KERNEL_FEATURE_AO = (KERNEL_FEATURE_AO_PASS | KERNEL_FEATURE_AO_ADDITIVE),
/* MNEE. */
KERNEL_FEATURE_MNEE = (1U << 27U),
};
/* Shader node feature mask, to specialize shader evaluation for kernels. */
@ -1714,9 +1718,12 @@ enum KernelFeatureFlag : uint32_t {
* are different depending on the main, shadow or null path. For GPU we don't have
* C++17 everywhere so can't use it. */
#ifdef __KERNEL_CPU__
# define IF_KERNEL_FEATURE(feature) \
if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
# define IF_KERNEL_NODES_FEATURE(feature) \
if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
#else
# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
# define IF_KERNEL_NODES_FEATURE(feature) \
if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
#endif

View File

@ -550,7 +550,7 @@ void Scene::update_kernel_features()
dscene.data.integrator.use_caustics = false;
if (has_caustics_caster && has_caustics_receiver && has_caustics_light) {
dscene.data.integrator.use_caustics = true;
kernel_features |= KERNEL_FEATURE_NODE_RAYTRACE;
kernel_features |= KERNEL_FEATURE_MNEE;
}
if (bake_manager->get_baking()) {
@ -597,6 +597,7 @@ static void log_kernel_features(const uint features)
<< "\n";
VLOG(2) << "Use Shader Raytrace " << string_from_bool(features & KERNEL_FEATURE_NODE_RAYTRACE)
<< "\n";
VLOG(2) << "Use MNEE" << string_from_bool(features & KERNEL_FEATURE_MNEE) << "\n";
VLOG(2) << "Use Transparent " << string_from_bool(features & KERNEL_FEATURE_TRANSPARENT) << "\n";
VLOG(2) << "Use Denoising " << string_from_bool(features & KERNEL_FEATURE_DENOISING) << "\n";
VLOG(2) << "Use Path Tracing " << string_from_bool(features & KERNEL_FEATURE_PATH_TRACING)

View File

@ -33,8 +33,6 @@ BLACKLIST_OPTIX = [
]
BLACKLIST_METAL = [
# No MNEE for Metal currently
"underwater_caustics.blend",
]
BLACKLIST_GPU = [