Fix Cycles MNEE not working for Metal
Move MNEE to own kernel, separate from shader ray-tracing. This does introduce the limitation that a shader can't use both MNEE and AO/bevel, but that seems like the better trade-off for now. We can experiment with bigger kernel organization changes later. Differential Revision: https://developer.blender.org/D15070
This commit is contained in:
parent
52cb24a779
commit
f2cd7e08fe
|
@ -457,6 +457,8 @@ void CUDADevice::reserve_local_memory(const uint kernel_features)
|
|||
/* Use the biggest kernel for estimation. */
|
||||
const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE :
|
||||
(kernel_features & KERNEL_FEATURE_MNEE) ?
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE :
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE;
|
||||
|
||||
/* Launch kernel, using just 1 block appears sufficient to reserve memory for all
|
||||
|
|
|
@ -420,6 +420,8 @@ void HIPDevice::reserve_local_memory(const uint kernel_features)
|
|||
/* Use the biggest kernel for estimation. */
|
||||
const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE :
|
||||
(kernel_features & KERNEL_FEATURE_MNEE) ?
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE :
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE;
|
||||
|
||||
/* Launch kernel, using just 1 block appears sufficient to reserve memory for all
|
||||
|
|
|
@ -33,6 +33,8 @@ const char *device_kernel_as_string(DeviceKernel kernel)
|
|||
return "integrator_shade_surface";
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
|
||||
return "integrator_shade_surface_raytrace";
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
|
||||
return "integrator_shade_surface_mnee";
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
|
||||
return "integrator_shade_volume";
|
||||
case DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL:
|
||||
|
|
|
@ -489,7 +489,8 @@ bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type)
|
|||
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
|
||||
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
|
||||
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
|
||||
i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
|
||||
i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
|
||||
i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) {
|
||||
kernel_function_list = function_list;
|
||||
}
|
||||
|
||||
|
|
|
@ -265,6 +265,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
|||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
|
||||
break;
|
||||
default:
|
||||
bvhMetalRT = nil;
|
||||
|
|
|
@ -432,9 +432,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
|||
}
|
||||
|
||||
{ /* Load and compile PTX module with OptiX kernels. */
|
||||
string ptx_data, ptx_filename = path_get((kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
|
||||
"lib/kernel_optix_shader_raytrace.ptx" :
|
||||
"lib/kernel_optix.ptx");
|
||||
string ptx_data, ptx_filename = path_get(
|
||||
(kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ?
|
||||
"lib/kernel_optix_shader_raytrace.ptx" :
|
||||
"lib/kernel_optix.ptx");
|
||||
if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) {
|
||||
if (!getenv("OPTIX_ROOT_DIR")) {
|
||||
set_error(
|
||||
|
@ -444,7 +445,9 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
|||
}
|
||||
ptx_filename = compile_kernel(
|
||||
kernel_features,
|
||||
(kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ? "kernel_shader_raytrace" : "kernel",
|
||||
(kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ?
|
||||
"kernel_shader_raytrace" :
|
||||
"kernel",
|
||||
"optix",
|
||||
true);
|
||||
}
|
||||
|
@ -582,6 +585,14 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
|||
"__direct_callable__svm_node_bevel";
|
||||
}
|
||||
|
||||
/* MNEE. */
|
||||
if (kernel_features & KERNEL_FEATURE_MNEE) {
|
||||
group_descs[PG_RGEN_SHADE_SURFACE_MNEE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
|
||||
group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.module = optix_module;
|
||||
group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.entryFunctionName =
|
||||
"__raygen__kernel_optix_integrator_shade_surface_mnee";
|
||||
}
|
||||
|
||||
optix_assert(optixProgramGroupCreate(
|
||||
context, group_descs, NUM_PROGRAM_GROUPS, &group_options, nullptr, 0, groups));
|
||||
|
||||
|
@ -663,6 +674,46 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
|||
pipelines[PIP_SHADE_RAYTRACE], 0, dss, css, motion_blur ? 3 : 2));
|
||||
}
|
||||
|
||||
if (kernel_features & KERNEL_FEATURE_MNEE) {
|
||||
/* Create MNEE pipeline. */
|
||||
vector<OptixProgramGroup> pipeline_groups;
|
||||
pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
|
||||
pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]);
|
||||
pipeline_groups.push_back(groups[PG_MISS]);
|
||||
pipeline_groups.push_back(groups[PG_HITD]);
|
||||
pipeline_groups.push_back(groups[PG_HITS]);
|
||||
pipeline_groups.push_back(groups[PG_HITL]);
|
||||
pipeline_groups.push_back(groups[PG_HITV]);
|
||||
if (motion_blur) {
|
||||
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
|
||||
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
|
||||
}
|
||||
if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
|
||||
pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
|
||||
pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
|
||||
}
|
||||
pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
|
||||
pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
|
||||
|
||||
optix_assert(optixPipelineCreate(context,
|
||||
&pipeline_options,
|
||||
&link_options,
|
||||
pipeline_groups.data(),
|
||||
pipeline_groups.size(),
|
||||
nullptr,
|
||||
0,
|
||||
&pipelines[PIP_SHADE_MNEE]));
|
||||
|
||||
/* Combine ray generation and trace continuation stack size. */
|
||||
const unsigned int css = stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG +
|
||||
link_options.maxTraceDepth * trace_css;
|
||||
const unsigned int dss = 0;
|
||||
|
||||
/* Set stack size depending on pipeline options. */
|
||||
optix_assert(
|
||||
optixPipelineSetStackSize(pipelines[PIP_SHADE_MNEE], 0, dss, css, motion_blur ? 3 : 2));
|
||||
}
|
||||
|
||||
{ /* Create intersection-only pipeline. */
|
||||
vector<OptixProgramGroup> pipeline_groups;
|
||||
pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
|
||||
|
|
|
@ -24,6 +24,7 @@ enum {
|
|||
PG_RGEN_INTERSECT_SUBSURFACE,
|
||||
PG_RGEN_INTERSECT_VOLUME_STACK,
|
||||
PG_RGEN_SHADE_SURFACE_RAYTRACE,
|
||||
PG_RGEN_SHADE_SURFACE_MNEE,
|
||||
PG_MISS,
|
||||
PG_HITD, /* Default hit group. */
|
||||
PG_HITS, /* __SHADOW_RECORD_ALL__ hit group. */
|
||||
|
@ -46,7 +47,7 @@ static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
|
|||
static const int NUM_CALLABLE_PROGRAM_GROUPS = 2;
|
||||
|
||||
/* List of OptiX pipelines. */
|
||||
enum { PIP_SHADE_RAYTRACE, PIP_INTERSECT, NUM_PIPELINES };
|
||||
enum { PIP_SHADE_RAYTRACE, PIP_SHADE_MNEE, PIP_INTERSECT, NUM_PIPELINES };
|
||||
|
||||
/* A single shader binding table entry. */
|
||||
struct SbtRecord {
|
||||
|
|
|
@ -28,6 +28,7 @@ void OptiXDeviceQueue::init_execution()
|
|||
static bool is_optix_specific_kernel(DeviceKernel kernel)
|
||||
{
|
||||
return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
|
||||
|
@ -63,7 +64,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
|||
cuda_stream_));
|
||||
|
||||
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) {
|
||||
cuda_device_assert(
|
||||
cuda_device_,
|
||||
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
|
||||
|
@ -82,6 +84,10 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
|||
pipeline = optix_device->pipelines[PIP_SHADE_RAYTRACE];
|
||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord);
|
||||
break;
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
|
||||
pipeline = optix_device->pipelines[PIP_SHADE_MNEE];
|
||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord);
|
||||
break;
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
|
||||
pipeline = optix_device->pipelines[PIP_INTERSECT];
|
||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord);
|
||||
|
|
|
@ -65,6 +65,8 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
|
|||
integrator_shader_sort_counter_(device, "integrator_shader_sort_counter", MEM_READ_WRITE),
|
||||
integrator_shader_raytrace_sort_counter_(
|
||||
device, "integrator_shader_raytrace_sort_counter", MEM_READ_WRITE),
|
||||
integrator_shader_mnee_sort_counter_(
|
||||
device, "integrator_shader_mnee_sort_counter", MEM_READ_WRITE),
|
||||
integrator_shader_sort_prefix_sum_(
|
||||
device, "integrator_shader_sort_prefix_sum", MEM_READ_WRITE),
|
||||
integrator_next_main_path_index_(device, "integrator_next_main_path_index", MEM_READ_WRITE),
|
||||
|
@ -188,6 +190,9 @@ void PathTraceWorkGPU::alloc_integrator_sorting()
|
|||
integrator_shader_raytrace_sort_counter_.alloc(max_shaders);
|
||||
integrator_shader_raytrace_sort_counter_.zero_to_device();
|
||||
|
||||
integrator_shader_mnee_sort_counter_.alloc(max_shaders);
|
||||
integrator_shader_mnee_sort_counter_.zero_to_device();
|
||||
|
||||
integrator_shader_sort_prefix_sum_.alloc(max_shaders);
|
||||
integrator_shader_sort_prefix_sum_.zero_to_device();
|
||||
|
||||
|
@ -195,6 +200,8 @@ void PathTraceWorkGPU::alloc_integrator_sorting()
|
|||
(int *)integrator_shader_sort_counter_.device_pointer;
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
|
||||
(int *)integrator_shader_raytrace_sort_counter_.device_pointer;
|
||||
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
|
||||
(int *)integrator_shader_mnee_sort_counter_.device_pointer;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -327,6 +334,7 @@ void PathTraceWorkGPU::enqueue_reset()
|
|||
queue_->zero_to_device(integrator_queue_counter_);
|
||||
queue_->zero_to_device(integrator_shader_sort_counter_);
|
||||
queue_->zero_to_device(integrator_shader_raytrace_sort_counter_);
|
||||
queue_->zero_to_device(integrator_shader_mnee_sort_counter_);
|
||||
|
||||
/* Tiles enqueue need to know number of active paths, which is based on this counter. Zero the
|
||||
* counter on the host side because `zero_to_device()` is not doing it. */
|
||||
|
@ -450,6 +458,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
|
|||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME: {
|
||||
/* Shading kernels with integrator state and render buffer. */
|
||||
DeviceKernelArguments args(&d_path_index, &buffers_->buffer.device_pointer, &work_size);
|
||||
|
@ -1080,13 +1089,15 @@ int PathTraceWorkGPU::shadow_catcher_count_possible_splits()
|
|||
bool PathTraceWorkGPU::kernel_uses_sorting(DeviceKernel kernel)
|
||||
{
|
||||
return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE);
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE);
|
||||
}
|
||||
|
||||
bool PathTraceWorkGPU::kernel_creates_shadow_paths(DeviceKernel kernel)
|
||||
{
|
||||
return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
|
||||
}
|
||||
|
||||
|
@ -1094,7 +1105,8 @@ bool PathTraceWorkGPU::kernel_creates_ao_paths(DeviceKernel kernel)
|
|||
{
|
||||
return (device_scene_->data.kernel_features & KERNEL_FEATURE_AO) &&
|
||||
(kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE);
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE);
|
||||
}
|
||||
|
||||
bool PathTraceWorkGPU::kernel_is_shadow_path(DeviceKernel kernel)
|
||||
|
|
|
@ -133,6 +133,7 @@ class PathTraceWorkGPU : public PathTraceWork {
|
|||
/* Shader sorting. */
|
||||
device_vector<int> integrator_shader_sort_counter_;
|
||||
device_vector<int> integrator_shader_raytrace_sort_counter_;
|
||||
device_vector<int> integrator_shader_mnee_sort_counter_;
|
||||
device_vector<int> integrator_shader_sort_prefix_sum_;
|
||||
/* Path split. */
|
||||
device_vector<int> integrator_next_main_path_index_;
|
||||
|
|
|
@ -269,6 +269,21 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_signature(integrator_shade_surface_mnee,
|
||||
ccl_global const int *path_index_array,
|
||||
ccl_global float *render_buffer,
|
||||
const int work_size)
|
||||
{
|
||||
const int global_index = ccl_gpu_global_id_x();
|
||||
|
||||
if (global_index < work_size) {
|
||||
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
|
||||
ccl_gpu_kernel_call(integrator_shade_surface_mnee(NULL, state, render_buffer));
|
||||
}
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_signature(integrator_shade_volume,
|
||||
ccl_global const int *path_index_array,
|
||||
|
|
|
@ -15,3 +15,11 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_raytr
|
|||
global_index;
|
||||
integrator_shade_surface_raytrace(nullptr, path_index, __params.render_buffer);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_mnee()
|
||||
{
|
||||
const int global_index = optixGetLaunchIndex().x;
|
||||
const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
|
||||
global_index;
|
||||
integrator_shade_surface_mnee(nullptr, path_index, __params.render_buffer);
|
||||
}
|
||||
|
|
|
@ -243,9 +243,12 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
|
|||
/* Setup next kernel to execute. */
|
||||
const bool use_caustics = kernel_data.integrator.use_caustics &&
|
||||
(object_flag & SD_OBJECT_CAUSTICS);
|
||||
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE) || use_caustics;
|
||||
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_raytrace_kernel) {
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
|
||||
}
|
||||
else {
|
||||
|
|
|
@ -125,9 +125,12 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
|
|||
const int flags = kernel_tex_fetch(__shaders, shader).flags;
|
||||
const bool use_caustics = kernel_data.integrator.use_caustics &&
|
||||
(object_flags & SD_OBJECT_CAUSTICS);
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics;
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_raytrace_kernel) {
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
else {
|
||||
|
@ -150,9 +153,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
|
|||
const int object_flags = intersection_get_object_flags(kg, &isect);
|
||||
const bool use_caustics = kernel_data.integrator.use_caustics &&
|
||||
(object_flags & SD_OBJECT_CAUSTICS);
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics;
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_raytrace_kernel) {
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
|
@ -222,8 +229,12 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
|
|||
const int object_flags = intersection_get_object_flags(kg, isect);
|
||||
const bool use_caustics = kernel_data.integrator.use_caustics &&
|
||||
(object_flags & SD_OBJECT_CAUSTICS);
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics;
|
||||
if (use_raytrace_kernel) {
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
|
@ -272,9 +283,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
|||
const int object_flags = intersection_get_object_flags(kg, isect);
|
||||
const bool use_caustics = kernel_data.integrator.use_caustics &&
|
||||
(object_flags & SD_OBJECT_CAUSTICS);
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics;
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_raytrace_kernel) {
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
|
|
|
@ -77,6 +77,9 @@ ccl_device void integrator_megakernel(KernelGlobals kg,
|
|||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
|
||||
integrator_shade_surface_raytrace(kg, state, render_buffer);
|
||||
break;
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
|
||||
integrator_shade_surface_mnee(kg, state, render_buffer);
|
||||
break;
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
|
||||
integrator_shade_light(kg, state, render_buffer);
|
||||
break;
|
||||
|
|
|
@ -137,7 +137,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
|
|||
|
||||
# ifdef __MNEE__
|
||||
int mnee_vertex_count = 0;
|
||||
IF_KERNEL_NODES_FEATURE(RAYTRACE)
|
||||
IF_KERNEL_FEATURE(MNEE)
|
||||
{
|
||||
if (ls.lamp != LAMP_NONE) {
|
||||
/* Is this a caustic light? */
|
||||
|
@ -631,4 +631,12 @@ ccl_device_forceinline void integrator_shade_surface_raytrace(
|
|||
kg, state, render_buffer);
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_shade_surface_mnee(
|
||||
KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
integrator_shade_surface<(KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE) |
|
||||
KERNEL_FEATURE_MNEE,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE>(kg, state, render_buffer);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -174,9 +174,14 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
|
|||
const int object_flags = intersection_get_object_flags(kg, &ss_isect.hits[0]);
|
||||
const bool use_caustics = kernel_data.integrator.use_caustics &&
|
||||
(object_flags & SD_OBJECT_CAUSTICS);
|
||||
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE) || use_caustics;
|
||||
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_raytrace_kernel) {
|
||||
if (use_caustics) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE,
|
||||
shader);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
|
||||
shader);
|
||||
|
|
|
@ -1572,6 +1572,7 @@ typedef enum DeviceKernel {
|
|||
DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
|
||||
DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL,
|
||||
|
@ -1689,6 +1690,9 @@ enum KernelFeatureFlag : uint32_t {
|
|||
KERNEL_FEATURE_AO_PASS = (1U << 25U),
|
||||
KERNEL_FEATURE_AO_ADDITIVE = (1U << 26U),
|
||||
KERNEL_FEATURE_AO = (KERNEL_FEATURE_AO_PASS | KERNEL_FEATURE_AO_ADDITIVE),
|
||||
|
||||
/* MNEE. */
|
||||
KERNEL_FEATURE_MNEE = (1U << 27U),
|
||||
};
|
||||
|
||||
/* Shader node feature mask, to specialize shader evaluation for kernels. */
|
||||
|
@ -1714,9 +1718,12 @@ enum KernelFeatureFlag : uint32_t {
|
|||
* are different depending on the main, shadow or null path. For GPU we don't have
|
||||
* C++17 everywhere so can't use it. */
|
||||
#ifdef __KERNEL_CPU__
|
||||
# define IF_KERNEL_FEATURE(feature) \
|
||||
if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
|
||||
# define IF_KERNEL_NODES_FEATURE(feature) \
|
||||
if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
|
||||
#else
|
||||
# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
|
||||
# define IF_KERNEL_NODES_FEATURE(feature) \
|
||||
if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
|
||||
#endif
|
||||
|
|
|
@ -550,7 +550,7 @@ void Scene::update_kernel_features()
|
|||
dscene.data.integrator.use_caustics = false;
|
||||
if (has_caustics_caster && has_caustics_receiver && has_caustics_light) {
|
||||
dscene.data.integrator.use_caustics = true;
|
||||
kernel_features |= KERNEL_FEATURE_NODE_RAYTRACE;
|
||||
kernel_features |= KERNEL_FEATURE_MNEE;
|
||||
}
|
||||
|
||||
if (bake_manager->get_baking()) {
|
||||
|
@ -597,6 +597,7 @@ static void log_kernel_features(const uint features)
|
|||
<< "\n";
|
||||
VLOG(2) << "Use Shader Raytrace " << string_from_bool(features & KERNEL_FEATURE_NODE_RAYTRACE)
|
||||
<< "\n";
|
||||
VLOG(2) << "Use MNEE" << string_from_bool(features & KERNEL_FEATURE_MNEE) << "\n";
|
||||
VLOG(2) << "Use Transparent " << string_from_bool(features & KERNEL_FEATURE_TRANSPARENT) << "\n";
|
||||
VLOG(2) << "Use Denoising " << string_from_bool(features & KERNEL_FEATURE_DENOISING) << "\n";
|
||||
VLOG(2) << "Use Path Tracing " << string_from_bool(features & KERNEL_FEATURE_PATH_TRACING)
|
||||
|
|
|
@ -33,8 +33,6 @@ BLACKLIST_OPTIX = [
|
|||
]
|
||||
|
||||
BLACKLIST_METAL = [
|
||||
# No MNEE for Metal currently
|
||||
"underwater_caustics.blend",
|
||||
]
|
||||
|
||||
BLACKLIST_GPU = [
|
||||
|
|
Loading…
Reference in New Issue