Merge branch 'blender-v3.0-release'
This commit is contained in:
commit
ce395c84a3
|
@ -42,7 +42,7 @@ class CPUKernels {
|
|||
|
||||
IntegratorInitFunction integrator_init_from_camera;
|
||||
IntegratorInitFunction integrator_init_from_bake;
|
||||
IntegratorFunction integrator_intersect_closest;
|
||||
IntegratorShadeFunction integrator_intersect_closest;
|
||||
IntegratorFunction integrator_intersect_shadow;
|
||||
IntegratorFunction integrator_intersect_subsurface;
|
||||
IntegratorFunction integrator_intersect_volume_stack;
|
||||
|
|
|
@ -73,7 +73,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *a
|
|||
sizeof(device_ptr),
|
||||
cuda_stream_));
|
||||
|
||||
if (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
|
||||
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
|
||||
cuda_device_assert(
|
||||
cuda_device_,
|
||||
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
|
||||
|
|
|
@ -439,7 +439,15 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
|
|||
DCHECK_LE(work_size, max_num_paths_);
|
||||
|
||||
switch (kernel) {
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: {
|
||||
/* Closest ray intersection kernels with integrator state and render buffer. */
|
||||
void *d_render_buffer = (void *)buffers_->buffer.device_pointer;
|
||||
void *args[] = {&d_path_index, &d_render_buffer, const_cast<int *>(&work_size)};
|
||||
|
||||
queue_->enqueue(kernel, work_size, args);
|
||||
break;
|
||||
}
|
||||
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: {
|
||||
|
|
|
@ -37,7 +37,7 @@
|
|||
|
||||
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_camera);
|
||||
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_bake);
|
||||
KERNEL_INTEGRATOR_FUNCTION(intersect_closest);
|
||||
KERNEL_INTEGRATOR_SHADE_FUNCTION(intersect_closest);
|
||||
KERNEL_INTEGRATOR_FUNCTION(intersect_shadow);
|
||||
KERNEL_INTEGRATOR_FUNCTION(intersect_subsurface);
|
||||
KERNEL_INTEGRATOR_FUNCTION(intersect_volume_stack);
|
||||
|
|
|
@ -112,7 +112,7 @@ CCL_NAMESPACE_BEGIN
|
|||
|
||||
DEFINE_INTEGRATOR_INIT_KERNEL(init_from_camera)
|
||||
DEFINE_INTEGRATOR_INIT_KERNEL(init_from_bake)
|
||||
DEFINE_INTEGRATOR_KERNEL(intersect_closest)
|
||||
DEFINE_INTEGRATOR_SHADE_KERNEL(intersect_closest)
|
||||
DEFINE_INTEGRATOR_KERNEL(intersect_subsurface)
|
||||
DEFINE_INTEGRATOR_KERNEL(intersect_volume_stack)
|
||||
DEFINE_INTEGRATOR_SHADE_KERNEL(shade_background)
|
||||
|
|
|
@ -131,13 +131,14 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_signature(integrator_intersect_closest,
|
||||
ccl_global const int *path_index_array,
|
||||
ccl_global float *render_buffer,
|
||||
const int work_size)
|
||||
{
|
||||
const int global_index = ccl_gpu_global_id_x();
|
||||
|
||||
if (global_index < work_size) {
|
||||
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
|
||||
ccl_gpu_kernel_call(integrator_intersect_closest(NULL, state));
|
||||
ccl_gpu_kernel_call(integrator_intersect_closest(NULL, state, render_buffer));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -57,7 +57,7 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest()
|
|||
const int global_index = optixGetLaunchIndex().x;
|
||||
const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
|
||||
global_index;
|
||||
integrator_intersect_closest(nullptr, path_index);
|
||||
integrator_intersect_closest(nullptr, path_index, __params.render_buffer);
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow()
|
||||
|
|
|
@ -160,40 +160,6 @@ ccl_device_forceinline void kernel_write_denoising_features_volume(KernelGlobals
|
|||
}
|
||||
#endif /* __DENOISING_FEATURES__ */
|
||||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
|
||||
/* Write shadow catcher passes on a bounce from the shadow catcher object. */
|
||||
ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
|
||||
KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
ccl_private const ShaderData *sd,
|
||||
ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
if (!kernel_data.integrator.has_shadow_catcher) {
|
||||
return;
|
||||
}
|
||||
|
||||
kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
|
||||
kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
|
||||
|
||||
if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, sd->object_flag)) {
|
||||
return;
|
||||
}
|
||||
|
||||
ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
|
||||
|
||||
/* Count sample for the shadow catcher object. */
|
||||
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
|
||||
|
||||
/* Since the split is done, the sample does not contribute to the matte, so accumulate it as
|
||||
* transparency to the matte. */
|
||||
const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
|
||||
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
|
||||
average(throughput));
|
||||
}
|
||||
|
||||
#endif /* __SHADOW_CATCHER__ */
|
||||
|
||||
ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer,
|
||||
size_t depth,
|
||||
float id,
|
||||
|
|
|
@ -88,7 +88,10 @@ ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg,
|
|||
#ifdef __SHADOW_CATCHER__
|
||||
/* Split path if a shadow catcher was hit. */
|
||||
ccl_device_forceinline void integrator_split_shadow_catcher(
|
||||
KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect)
|
||||
KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
ccl_private const Intersection *ccl_restrict isect,
|
||||
ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
/* Test if we hit a shadow catcher object, and potentially split the path to continue tracing two
|
||||
* paths from here. */
|
||||
|
@ -97,6 +100,8 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
|
|||
return;
|
||||
}
|
||||
|
||||
kernel_write_shadow_catcher_bounce_data(kg, state, render_buffer);
|
||||
|
||||
/* Mark state as having done a shadow catcher split so that it stops contributing to
|
||||
* the shadow catcher matte pass, but keeps contributing to the combined pass. */
|
||||
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT;
|
||||
|
@ -191,6 +196,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
|
|||
KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
ccl_private const Intersection *ccl_restrict isect,
|
||||
ccl_global float *ccl_restrict render_buffer,
|
||||
const bool hit)
|
||||
{
|
||||
/* Continue with volume kernel if we are inside a volume, regardless if we hit anything. */
|
||||
|
@ -233,7 +239,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
|
|||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
/* Handle shadow catcher. */
|
||||
integrator_split_shadow_catcher(kg, state, isect);
|
||||
integrator_split_shadow_catcher(kg, state, isect, render_buffer);
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
|
@ -253,7 +259,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
|
|||
* volume shading and termination testing have already been done. */
|
||||
template<uint32_t current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
||||
KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect)
|
||||
KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
ccl_private const Intersection *ccl_restrict isect,
|
||||
ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
if (isect->prim != PRIM_NONE) {
|
||||
/* Hit a surface, continue with light or surface kernel. */
|
||||
|
@ -278,7 +287,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
|||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
/* Handle shadow catcher. */
|
||||
integrator_split_shadow_catcher(kg, state, isect);
|
||||
integrator_split_shadow_catcher(kg, state, isect, render_buffer);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
@ -290,7 +299,9 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
|||
}
|
||||
}
|
||||
|
||||
ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState state)
|
||||
ccl_device void integrator_intersect_closest(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
PROFILING_INIT(kg, PROFILING_INTERSECT_CLOSEST);
|
||||
|
||||
|
@ -341,7 +352,7 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState s
|
|||
|
||||
/* Setup up next kernel to be executed. */
|
||||
integrator_intersect_next_kernel<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
|
||||
kg, state, &isect, hit);
|
||||
kg, state, &isect, render_buffer, hit);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -76,7 +76,7 @@ ccl_device void integrator_megakernel(KernelGlobals kg,
|
|||
if (queued_kernel) {
|
||||
switch (queued_kernel) {
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
|
||||
integrator_intersect_closest(kg, state);
|
||||
integrator_intersect_closest(kg, state, render_buffer);
|
||||
break;
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
|
||||
integrator_shade_background(kg, state, render_buffer);
|
||||
|
|
|
@ -492,10 +492,6 @@ ccl_device bool integrate_surface(KernelGlobals kg,
|
|||
kernel_write_denoising_features_surface(kg, state, &sd, render_buffer);
|
||||
#endif
|
||||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
kernel_write_shadow_catcher_bounce_data(kg, state, &sd, render_buffer);
|
||||
#endif
|
||||
|
||||
/* Direct light. */
|
||||
PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT);
|
||||
integrate_surface_direct_light(kg, state, &sd, &rng_state);
|
||||
|
|
|
@ -1024,7 +1024,7 @@ ccl_device void integrator_shade_volume(KernelGlobals kg,
|
|||
else {
|
||||
/* Continue to background, light or surface. */
|
||||
integrator_intersect_next_kernel_after_volume<DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME>(
|
||||
kg, state, &isect);
|
||||
kg, state, &isect, render_buffer);
|
||||
return;
|
||||
}
|
||||
#endif /* __VOLUME__ */
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "kernel/film/write_passes.h"
|
||||
#include "kernel/integrator/path_state.h"
|
||||
#include "kernel/integrator/state_util.h"
|
||||
|
||||
|
@ -47,7 +48,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals
|
|||
return false;
|
||||
}
|
||||
|
||||
if (path_flag & PATH_RAY_SHADOW_CATCHER_PASS) {
|
||||
if (path_flag & PATH_RAY_SHADOW_CATCHER_HIT) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -88,6 +89,28 @@ ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(const uint32_t
|
|||
return path_flag & PATH_RAY_SHADOW_CATCHER_PASS;
|
||||
}
|
||||
|
||||
/* Write shadow catcher passes on a bounce from the shadow catcher object. */
|
||||
ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
|
||||
KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
|
||||
kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
|
||||
|
||||
const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
|
||||
const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
|
||||
kernel_data.film.pass_stride;
|
||||
ccl_global float *buffer = render_buffer + render_buffer_offset;
|
||||
|
||||
/* Count sample for the shadow catcher object. */
|
||||
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
|
||||
|
||||
/* Since the split is done, the sample does not contribute to the matte, so accumulate it as
|
||||
* transparency to the matte. */
|
||||
const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
|
||||
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
|
||||
average(throughput));
|
||||
}
|
||||
|
||||
#endif /* __SHADOW_CATCHER__ */
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
Loading…
Reference in New Issue