Merge branch 'blender-v3.0-release'

This commit is contained in:
Sergey Sharybin 2021-11-11 15:29:35 +01:00
commit ce395c84a3
13 changed files with 60 additions and 54 deletions

View File

@ -42,7 +42,7 @@ class CPUKernels {
IntegratorInitFunction integrator_init_from_camera;
IntegratorInitFunction integrator_init_from_bake;
IntegratorFunction integrator_intersect_closest;
IntegratorShadeFunction integrator_intersect_closest;
IntegratorFunction integrator_intersect_shadow;
IntegratorFunction integrator_intersect_subsurface;
IntegratorFunction integrator_intersect_volume_stack;

View File

@ -73,7 +73,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *a
sizeof(device_ptr),
cuda_stream_));
if (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
cuda_device_assert(
cuda_device_,
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),

View File

@ -439,7 +439,15 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
DCHECK_LE(work_size, max_num_paths_);
switch (kernel) {
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: {
/* Closest ray intersection kernels with integrator state and render buffer. */
void *d_render_buffer = (void *)buffers_->buffer.device_pointer;
void *args[] = {&d_path_index, &d_render_buffer, const_cast<int *>(&work_size)};
queue_->enqueue(kernel, work_size, args);
break;
}
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: {

View File

@ -37,7 +37,7 @@
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_camera);
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_bake);
KERNEL_INTEGRATOR_FUNCTION(intersect_closest);
KERNEL_INTEGRATOR_SHADE_FUNCTION(intersect_closest);
KERNEL_INTEGRATOR_FUNCTION(intersect_shadow);
KERNEL_INTEGRATOR_FUNCTION(intersect_subsurface);
KERNEL_INTEGRATOR_FUNCTION(intersect_volume_stack);

View File

@ -112,7 +112,7 @@ CCL_NAMESPACE_BEGIN
DEFINE_INTEGRATOR_INIT_KERNEL(init_from_camera)
DEFINE_INTEGRATOR_INIT_KERNEL(init_from_bake)
DEFINE_INTEGRATOR_KERNEL(intersect_closest)
DEFINE_INTEGRATOR_SHADE_KERNEL(intersect_closest)
DEFINE_INTEGRATOR_KERNEL(intersect_subsurface)
DEFINE_INTEGRATOR_KERNEL(intersect_volume_stack)
DEFINE_INTEGRATOR_SHADE_KERNEL(shade_background)

View File

@ -131,13 +131,14 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
ccl_gpu_kernel_signature(integrator_intersect_closest,
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
const int work_size)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_intersect_closest(NULL, state));
ccl_gpu_kernel_call(integrator_intersect_closest(NULL, state, render_buffer));
}
}

View File

@ -57,7 +57,7 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest()
const int global_index = optixGetLaunchIndex().x;
const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
global_index;
integrator_intersect_closest(nullptr, path_index);
integrator_intersect_closest(nullptr, path_index, __params.render_buffer);
}
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow()

View File

@ -160,40 +160,6 @@ ccl_device_forceinline void kernel_write_denoising_features_volume(KernelGlobals
}
#endif /* __DENOISING_FEATURES__ */
#ifdef __SHADOW_CATCHER__
/* Write shadow catcher passes on a bounce from the shadow catcher object. */
ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
KernelGlobals kg,
IntegratorState state,
ccl_private const ShaderData *sd,
ccl_global float *ccl_restrict render_buffer)
{
if (!kernel_data.integrator.has_shadow_catcher) {
return;
}
kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, sd->object_flag)) {
return;
}
ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
/* Count sample for the shadow catcher object. */
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
/* Since the split is done, the sample does not contribute to the matte, so accumulate it as
* transparency to the matte. */
const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
average(throughput));
}
#endif /* __SHADOW_CATCHER__ */
ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer,
size_t depth,
float id,

View File

@ -88,7 +88,10 @@ ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg,
#ifdef __SHADOW_CATCHER__
/* Split path if a shadow catcher was hit. */
ccl_device_forceinline void integrator_split_shadow_catcher(
KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect)
KernelGlobals kg,
IntegratorState state,
ccl_private const Intersection *ccl_restrict isect,
ccl_global float *ccl_restrict render_buffer)
{
/* Test if we hit a shadow catcher object, and potentially split the path to continue tracing two
* paths from here. */
@ -97,6 +100,8 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
return;
}
kernel_write_shadow_catcher_bounce_data(kg, state, render_buffer);
/* Mark state as having done a shadow catcher split so that it stops contributing to
* the shadow catcher matte pass, but keeps contributing to the combined pass. */
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT;
@ -191,6 +196,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
KernelGlobals kg,
IntegratorState state,
ccl_private const Intersection *ccl_restrict isect,
ccl_global float *ccl_restrict render_buffer,
const bool hit)
{
/* Continue with volume kernel if we are inside a volume, regardless if we hit anything. */
@ -233,7 +239,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
#ifdef __SHADOW_CATCHER__
/* Handle shadow catcher. */
integrator_split_shadow_catcher(kg, state, isect);
integrator_split_shadow_catcher(kg, state, isect, render_buffer);
#endif
}
else {
@ -253,7 +259,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
* volume shading and termination testing have already been done. */
template<uint32_t current_kernel>
ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect)
KernelGlobals kg,
IntegratorState state,
ccl_private const Intersection *ccl_restrict isect,
ccl_global float *ccl_restrict render_buffer)
{
if (isect->prim != PRIM_NONE) {
/* Hit a surface, continue with light or surface kernel. */
@ -278,7 +287,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
#ifdef __SHADOW_CATCHER__
/* Handle shadow catcher. */
integrator_split_shadow_catcher(kg, state, isect);
integrator_split_shadow_catcher(kg, state, isect, render_buffer);
#endif
return;
}
@ -290,7 +299,9 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
}
}
ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState state)
ccl_device void integrator_intersect_closest(KernelGlobals kg,
IntegratorState state,
ccl_global float *ccl_restrict render_buffer)
{
PROFILING_INIT(kg, PROFILING_INTERSECT_CLOSEST);
@ -341,7 +352,7 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState s
/* Setup up next kernel to be executed. */
integrator_intersect_next_kernel<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
kg, state, &isect, hit);
kg, state, &isect, render_buffer, hit);
}
CCL_NAMESPACE_END

View File

@ -76,7 +76,7 @@ ccl_device void integrator_megakernel(KernelGlobals kg,
if (queued_kernel) {
switch (queued_kernel) {
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
integrator_intersect_closest(kg, state);
integrator_intersect_closest(kg, state, render_buffer);
break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
integrator_shade_background(kg, state, render_buffer);

View File

@ -492,10 +492,6 @@ ccl_device bool integrate_surface(KernelGlobals kg,
kernel_write_denoising_features_surface(kg, state, &sd, render_buffer);
#endif
#ifdef __SHADOW_CATCHER__
kernel_write_shadow_catcher_bounce_data(kg, state, &sd, render_buffer);
#endif
/* Direct light. */
PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT);
integrate_surface_direct_light(kg, state, &sd, &rng_state);

View File

@ -1024,7 +1024,7 @@ ccl_device void integrator_shade_volume(KernelGlobals kg,
else {
/* Continue to background, light or surface. */
integrator_intersect_next_kernel_after_volume<DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME>(
kg, state, &isect);
kg, state, &isect, render_buffer);
return;
}
#endif /* __VOLUME__ */

View File

@ -16,6 +16,7 @@
#pragma once
#include "kernel/film/write_passes.h"
#include "kernel/integrator/path_state.h"
#include "kernel/integrator/state_util.h"
@ -47,7 +48,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals
return false;
}
if (path_flag & PATH_RAY_SHADOW_CATCHER_PASS) {
if (path_flag & PATH_RAY_SHADOW_CATCHER_HIT) {
return false;
}
@ -88,6 +89,28 @@ ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(const uint32_t
return path_flag & PATH_RAY_SHADOW_CATCHER_PASS;
}
/* Write shadow catcher passes on a bounce from the shadow catcher object. */
ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
{
kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
kernel_data.film.pass_stride;
ccl_global float *buffer = render_buffer + render_buffer_offset;
/* Count sample for the shadow catcher object. */
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
/* Since the split is done, the sample does not contribute to the matte, so accumulate it as
* transparency to the matte. */
const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
average(throughput));
}
#endif /* __SHADOW_CATCHER__ */
CCL_NAMESPACE_END