Merge branch 'blender-v3.0-release'
This commit is contained in:
commit
9e611c5616
|
@ -68,7 +68,8 @@ CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_
|
|||
{
|
||||
/* Pick any kernel, all of them are supposed to have same level of microarchitecture
|
||||
* optimization. */
|
||||
VLOG(1) << "Using " << kernels.integrator_init_from_camera.get_uarch_name() << " CPU kernels.";
|
||||
VLOG(1) << "Using " << get_cpu_kernels().integrator_init_from_camera.get_uarch_name()
|
||||
<< " CPU kernels.";
|
||||
|
||||
if (info.cpu_threads == 0) {
|
||||
info.cpu_threads = TaskScheduler::num_threads();
|
||||
|
@ -296,11 +297,6 @@ void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||
Device::build_bvh(bvh, progress, refit);
|
||||
}
|
||||
|
||||
const CPUKernels *CPUDevice::get_cpu_kernels() const
|
||||
{
|
||||
return &kernels;
|
||||
}
|
||||
|
||||
void CPUDevice::get_cpu_kernel_thread_globals(
|
||||
vector<CPUKernelThreadGlobals> &kernel_thread_globals)
|
||||
{
|
||||
|
|
|
@ -57,8 +57,6 @@ class CPUDevice : public Device {
|
|||
RTCDevice embree_device;
|
||||
#endif
|
||||
|
||||
CPUKernels kernels;
|
||||
|
||||
CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
|
||||
~CPUDevice();
|
||||
|
||||
|
@ -90,7 +88,6 @@ class CPUDevice : public Device {
|
|||
|
||||
void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
|
||||
|
||||
virtual const CPUKernels *get_cpu_kernels() const override;
|
||||
virtual void get_cpu_kernel_thread_globals(
|
||||
vector<CPUKernelThreadGlobals> &kernel_thread_globals) override;
|
||||
virtual void *get_cpu_osl_memory() override;
|
||||
|
|
|
@ -26,6 +26,9 @@ CCL_NAMESPACE_BEGIN
|
|||
KERNEL_NAME_EVAL(cpu_avx, name), KERNEL_NAME_EVAL(cpu_avx2, name)
|
||||
|
||||
#define REGISTER_KERNEL(name) name(KERNEL_FUNCTIONS(name))
|
||||
#define REGISTER_KERNEL_FILM_CONVERT(name) \
|
||||
film_convert_##name(KERNEL_FUNCTIONS(film_convert_##name)), \
|
||||
film_convert_half_rgba_##name(KERNEL_FUNCTIONS(film_convert_half_rgba_##name))
|
||||
|
||||
CPUKernels::CPUKernels()
|
||||
: /* Integrator. */
|
||||
|
@ -50,11 +53,25 @@ CPUKernels::CPUKernels()
|
|||
REGISTER_KERNEL(adaptive_sampling_filter_x),
|
||||
REGISTER_KERNEL(adaptive_sampling_filter_y),
|
||||
/* Cryptomatte. */
|
||||
REGISTER_KERNEL(cryptomatte_postprocess)
|
||||
REGISTER_KERNEL(cryptomatte_postprocess),
|
||||
/* Film Convert. */
|
||||
REGISTER_KERNEL_FILM_CONVERT(depth),
|
||||
REGISTER_KERNEL_FILM_CONVERT(mist),
|
||||
REGISTER_KERNEL_FILM_CONVERT(sample_count),
|
||||
REGISTER_KERNEL_FILM_CONVERT(float),
|
||||
REGISTER_KERNEL_FILM_CONVERT(light_path),
|
||||
REGISTER_KERNEL_FILM_CONVERT(float3),
|
||||
REGISTER_KERNEL_FILM_CONVERT(motion),
|
||||
REGISTER_KERNEL_FILM_CONVERT(cryptomatte),
|
||||
REGISTER_KERNEL_FILM_CONVERT(shadow_catcher),
|
||||
REGISTER_KERNEL_FILM_CONVERT(shadow_catcher_matte_with_shadow),
|
||||
REGISTER_KERNEL_FILM_CONVERT(combined),
|
||||
REGISTER_KERNEL_FILM_CONVERT(float4)
|
||||
{
|
||||
}
|
||||
|
||||
#undef REGISTER_KERNEL
|
||||
#undef REGISTER_KERNEL_FILM_CONVERT
|
||||
#undef KERNEL_FUNCTIONS
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -17,11 +17,13 @@
|
|||
#pragma once
|
||||
|
||||
#include "device/cpu/kernel_function.h"
|
||||
#include "util/half.h"
|
||||
#include "util/types.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
struct KernelGlobalsCPU;
|
||||
struct KernelFilmConvert;
|
||||
struct IntegratorStateCPU;
|
||||
struct TileInfo;
|
||||
|
||||
|
@ -102,6 +104,41 @@ class CPUKernels {
|
|||
|
||||
CryptomattePostprocessFunction cryptomatte_postprocess;
|
||||
|
||||
/* Film Convert. */
|
||||
using FilmConvertFunction = CPUKernelFunction<void (*)(const KernelFilmConvert *kfilm_convert,
|
||||
const float *buffer,
|
||||
float *pixel,
|
||||
const int width,
|
||||
const int buffer_stride,
|
||||
const int pixel_stride)>;
|
||||
using FilmConvertHalfRGBAFunction =
|
||||
CPUKernelFunction<void (*)(const KernelFilmConvert *kfilm_convert,
|
||||
const float *buffer,
|
||||
half4 *pixel,
|
||||
const int width,
|
||||
const int buffer_stride)>;
|
||||
|
||||
#define KERNEL_FILM_CONVERT_FUNCTION(name) \
|
||||
FilmConvertFunction film_convert_##name; \
|
||||
FilmConvertHalfRGBAFunction film_convert_half_rgba_##name;
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(depth)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(mist)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(sample_count)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(light_path)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float3)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(motion)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(cryptomatte)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(combined)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float4)
|
||||
|
||||
#undef KERNEL_FILM_CONVERT_FUNCTION
|
||||
|
||||
CPUKernels();
|
||||
};
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "device/queue.h"
|
||||
|
||||
#include "device/cpu/device.h"
|
||||
#include "device/cpu/kernel.h"
|
||||
#include "device/cuda/device.h"
|
||||
#include "device/dummy/device.h"
|
||||
#include "device/hip/device.h"
|
||||
|
@ -361,10 +362,11 @@ unique_ptr<DeviceQueue> Device::gpu_queue_create()
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
const CPUKernels *Device::get_cpu_kernels() const
|
||||
const CPUKernels &Device::get_cpu_kernels()
|
||||
{
|
||||
LOG(FATAL) << "Device does not support CPU kernels.";
|
||||
return nullptr;
|
||||
/* Initialize CPU kernels once and reuse. */
|
||||
static CPUKernels kernels;
|
||||
return kernels;
|
||||
}
|
||||
|
||||
void Device::get_cpu_kernel_thread_globals(
|
||||
|
|
|
@ -178,7 +178,7 @@ class Device {
|
|||
* These may not be used on GPU or multi-devices. */
|
||||
|
||||
/* Get CPU kernel functions for native instruction set. */
|
||||
virtual const CPUKernels *get_cpu_kernels() const;
|
||||
static const CPUKernels &get_cpu_kernels();
|
||||
/* Get kernel globals to pass to kernels. */
|
||||
virtual void get_cpu_kernel_thread_globals(
|
||||
vector<CPUKernelThreadGlobals> & /*kernel_thread_globals*/);
|
||||
|
|
|
@ -14,9 +14,12 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "device/device.h"
|
||||
|
||||
#include "integrator/pass_accessor_cpu.h"
|
||||
|
||||
#include "session/buffers.h"
|
||||
|
||||
#include "util/log.h"
|
||||
#include "util/tbb.h"
|
||||
|
||||
|
@ -33,70 +36,16 @@ CCL_NAMESPACE_BEGIN
|
|||
* Kernel processing.
|
||||
*/
|
||||
|
||||
template<typename Processor>
|
||||
inline void PassAccessorCPU::run_get_pass_kernel_processor(const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const Processor &processor) const
|
||||
{
|
||||
KernelFilmConvert kfilm_convert;
|
||||
init_kernel_film_convert(&kfilm_convert, buffer_params, destination);
|
||||
|
||||
if (destination.pixels) {
|
||||
/* NOTE: No overlays are applied since they are not used for final renders.
|
||||
* Can be supported via some sort of specialization to avoid code duplication. */
|
||||
|
||||
run_get_pass_kernel_processor_float(
|
||||
&kfilm_convert, render_buffers, buffer_params, destination, processor);
|
||||
}
|
||||
|
||||
if (destination.pixels_half_rgba) {
|
||||
/* TODO(sergey): Consider adding specialization to avoid per-pixel overlay check. */
|
||||
|
||||
if (destination.num_components == 1) {
|
||||
run_get_pass_kernel_processor_half_rgba(&kfilm_convert,
|
||||
render_buffers,
|
||||
buffer_params,
|
||||
destination,
|
||||
[&processor](const KernelFilmConvert *kfilm_convert,
|
||||
ccl_global const float *buffer,
|
||||
float *pixel_rgba) {
|
||||
float pixel;
|
||||
processor(kfilm_convert, buffer, &pixel);
|
||||
|
||||
pixel_rgba[0] = pixel;
|
||||
pixel_rgba[1] = pixel;
|
||||
pixel_rgba[2] = pixel;
|
||||
pixel_rgba[3] = 1.0f;
|
||||
});
|
||||
}
|
||||
else if (destination.num_components == 3) {
|
||||
run_get_pass_kernel_processor_half_rgba(&kfilm_convert,
|
||||
render_buffers,
|
||||
buffer_params,
|
||||
destination,
|
||||
[&processor](const KernelFilmConvert *kfilm_convert,
|
||||
ccl_global const float *buffer,
|
||||
float *pixel_rgba) {
|
||||
processor(kfilm_convert, buffer, pixel_rgba);
|
||||
pixel_rgba[3] = 1.0f;
|
||||
});
|
||||
}
|
||||
else if (destination.num_components == 4) {
|
||||
run_get_pass_kernel_processor_half_rgba(
|
||||
&kfilm_convert, render_buffers, buffer_params, destination, processor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Processor>
|
||||
inline void PassAccessorCPU::run_get_pass_kernel_processor_float(
|
||||
const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const Processor &processor) const
|
||||
const CPUKernels::FilmConvertFunction func) const
|
||||
{
|
||||
/* NOTE: No overlays are applied since they are not used for final renders.
|
||||
* Can be supported via some sort of specialization to avoid code duplication. */
|
||||
|
||||
DCHECK_EQ(destination.stride, 0) << "Custom stride for float destination is not implemented.";
|
||||
|
||||
const int64_t pass_stride = buffer_params.pass_stride;
|
||||
|
@ -112,21 +61,16 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_float(
|
|||
const float *buffer = window_data + y * buffer_row_stride;
|
||||
float *pixel = destination.pixels +
|
||||
(y * buffer_params.width + destination.offset) * pixel_stride;
|
||||
|
||||
for (int64_t x = 0; x < buffer_params.window_width;
|
||||
++x, buffer += pass_stride, pixel += pixel_stride) {
|
||||
processor(kfilm_convert, buffer, pixel);
|
||||
}
|
||||
func(kfilm_convert, buffer, pixel, buffer_params.window_width, pass_stride, pixel_stride);
|
||||
});
|
||||
}
|
||||
|
||||
template<typename Processor>
|
||||
inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
|
||||
const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const Processor &processor) const
|
||||
const CPUKernels::FilmConvertHalfRGBAFunction func) const
|
||||
{
|
||||
const int64_t pass_stride = buffer_params.pass_stride;
|
||||
const int64_t buffer_row_stride = buffer_params.stride * buffer_params.pass_stride;
|
||||
|
@ -141,16 +85,7 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
|
|||
tbb::parallel_for(0, buffer_params.window_height, [&](int64_t y) {
|
||||
const float *buffer = window_data + y * buffer_row_stride;
|
||||
half4 *pixel = dst_start + y * destination_stride;
|
||||
for (int64_t x = 0; x < buffer_params.window_width; ++x, buffer += pass_stride, ++pixel) {
|
||||
|
||||
float pixel_rgba[4];
|
||||
processor(kfilm_convert, buffer, pixel_rgba);
|
||||
|
||||
film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba);
|
||||
|
||||
*pixel = float4_to_half4_display(
|
||||
make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3]));
|
||||
}
|
||||
func(kfilm_convert, buffer, pixel, buffer_params.window_width, pass_stride);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -163,8 +98,25 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
|
|||
const BufferParams &buffer_params, \
|
||||
const Destination &destination) const \
|
||||
{ \
|
||||
run_get_pass_kernel_processor( \
|
||||
render_buffers, buffer_params, destination, film_get_pass_pixel_##pass); \
|
||||
const CPUKernels &kernels = Device::get_cpu_kernels(); \
|
||||
KernelFilmConvert kfilm_convert; \
|
||||
init_kernel_film_convert(&kfilm_convert, buffer_params, destination); \
|
||||
\
|
||||
if (destination.pixels) { \
|
||||
run_get_pass_kernel_processor_float(&kfilm_convert, \
|
||||
render_buffers, \
|
||||
buffer_params, \
|
||||
destination, \
|
||||
kernels.film_convert_##pass); \
|
||||
} \
|
||||
\
|
||||
if (destination.pixels_half_rgba) { \
|
||||
run_get_pass_kernel_processor_half_rgba(&kfilm_convert, \
|
||||
render_buffers, \
|
||||
buffer_params, \
|
||||
destination, \
|
||||
kernels.film_convert_half_rgba_##pass); \
|
||||
} \
|
||||
}
|
||||
|
||||
/* Float (scalar) passes. */
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "device/cpu/kernel.h"
|
||||
|
||||
#include "integrator/pass_accessor.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
@ -28,25 +30,19 @@ class PassAccessorCPU : public PassAccessor {
|
|||
using PassAccessor::PassAccessor;
|
||||
|
||||
protected:
|
||||
template<typename Processor>
|
||||
inline void run_get_pass_kernel_processor(const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const Processor &processor) const;
|
||||
inline void run_get_pass_kernel_processor_float(
|
||||
const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const CPUKernels::FilmConvertFunction func) const;
|
||||
|
||||
template<typename Processor>
|
||||
inline void run_get_pass_kernel_processor_float(const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const Processor &processor) const;
|
||||
|
||||
template<typename Processor>
|
||||
inline void run_get_pass_kernel_processor_half_rgba(const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const Processor &processor) const;
|
||||
inline void run_get_pass_kernel_processor_half_rgba(
|
||||
const KernelFilmConvert *kfilm_convert,
|
||||
const RenderBuffers *render_buffers,
|
||||
const BufferParams &buffer_params,
|
||||
const Destination &destination,
|
||||
const CPUKernels::FilmConvertHalfRGBAFunction func) const;
|
||||
|
||||
#define DECLARE_PASS_ACCESSOR(pass) \
|
||||
virtual void get_pass_##pass(const RenderBuffers *render_buffers, \
|
||||
|
|
|
@ -58,7 +58,7 @@ PathTraceWorkCPU::PathTraceWorkCPU(Device *device,
|
|||
DeviceScene *device_scene,
|
||||
bool *cancel_requested_flag)
|
||||
: PathTraceWork(device, film, device_scene, cancel_requested_flag),
|
||||
kernels_(*(device->get_cpu_kernels()))
|
||||
kernels_(Device::get_cpu_kernels())
|
||||
{
|
||||
DCHECK_EQ(device->info.type, DEVICE_CPU);
|
||||
}
|
||||
|
|
|
@ -96,7 +96,7 @@ bool ShaderEval::eval_cpu(Device *device,
|
|||
device->get_cpu_kernel_thread_globals(kernel_thread_globals);
|
||||
|
||||
/* Find required kernel function. */
|
||||
const CPUKernels &kernels = *(device->get_cpu_kernels());
|
||||
const CPUKernels &kernels = Device::get_cpu_kernels();
|
||||
|
||||
/* Simple parallel_for over all work items. */
|
||||
KernelShaderEvalInput *input_data = input.data();
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
/* CPU Kernel Interface */
|
||||
|
||||
#include "util/half.h"
|
||||
#include "util/types.h"
|
||||
|
||||
#include "kernel/types.h"
|
||||
|
|
|
@ -52,6 +52,37 @@ KERNEL_INTEGRATOR_SHADE_FUNCTION(megakernel);
|
|||
#undef KERNEL_INTEGRATOR_INIT_FUNCTION
|
||||
#undef KERNEL_INTEGRATOR_SHADE_FUNCTION
|
||||
|
||||
#define KERNEL_FILM_CONVERT_FUNCTION(name) \
|
||||
void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
|
||||
const float *buffer, \
|
||||
float *pixel, \
|
||||
const int width, \
|
||||
const int buffer_stride, \
|
||||
const int pixel_stride); \
|
||||
void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
|
||||
const KernelFilmConvert *kfilm_convert, \
|
||||
const float *buffer, \
|
||||
half4 *pixel, \
|
||||
const int width, \
|
||||
const int buffer_stride);
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(depth)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(mist)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(sample_count)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(light_path)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float3)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(motion)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(cryptomatte)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(combined)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float4)
|
||||
|
||||
#undef KERNEL_FILM_CONVERT_FUNCTION
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Shader evaluation.
|
||||
*/
|
||||
|
|
|
@ -47,8 +47,8 @@
|
|||
# include "kernel/integrator/megakernel.h"
|
||||
|
||||
# include "kernel/film/adaptive_sampling.h"
|
||||
# include "kernel/film/read.h"
|
||||
# include "kernel/film/id_passes.h"
|
||||
# include "kernel/film/read.h"
|
||||
|
||||
# include "kernel/bake/bake.h"
|
||||
|
||||
|
@ -232,6 +232,85 @@ void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU *
|
|||
#endif
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Film Convert.
|
||||
*/
|
||||
|
||||
#ifdef KERNEL_STUB
|
||||
|
||||
# define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \
|
||||
void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
|
||||
const float *buffer, \
|
||||
float *pixel, \
|
||||
const int width, \
|
||||
const int buffer_stride, \
|
||||
const int pixel_stride) \
|
||||
{ \
|
||||
STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \
|
||||
} \
|
||||
void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
|
||||
const KernelFilmConvert *kfilm_convert, \
|
||||
const float *buffer, \
|
||||
half4 *pixel, \
|
||||
const int width, \
|
||||
const int buffer_stride) \
|
||||
{ \
|
||||
STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
# define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \
|
||||
void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
|
||||
const float *buffer, \
|
||||
float *pixel, \
|
||||
const int width, \
|
||||
const int buffer_stride, \
|
||||
const int pixel_stride) \
|
||||
{ \
|
||||
for (int i = 0; i < width; i++, buffer += buffer_stride, pixel += pixel_stride) { \
|
||||
film_get_pass_pixel_##name(kfilm_convert, buffer, pixel); \
|
||||
} \
|
||||
} \
|
||||
void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
|
||||
const KernelFilmConvert *kfilm_convert, \
|
||||
const float *buffer, \
|
||||
half4 *pixel, \
|
||||
const int width, \
|
||||
const int buffer_stride) \
|
||||
{ \
|
||||
for (int i = 0; i < width; i++, buffer += buffer_stride, pixel++) { \
|
||||
float pixel_rgba[4] = {0.0f, 0.0f, 0.0f, 1.0f}; \
|
||||
film_get_pass_pixel_##name(kfilm_convert, buffer, pixel_rgba); \
|
||||
if (is_float) { \
|
||||
pixel_rgba[1] = pixel_rgba[0]; \
|
||||
pixel_rgba[2] = pixel_rgba[0]; \
|
||||
} \
|
||||
film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba); \
|
||||
*pixel = float4_to_half4_display( \
|
||||
make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3])); \
|
||||
} \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(depth, true)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(mist, true)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(sample_count, true)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float, true)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(light_path, false)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float3, false)
|
||||
|
||||
KERNEL_FILM_CONVERT_FUNCTION(motion, false)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(cryptomatte, false)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher, false)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow, false)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(combined, false)
|
||||
KERNEL_FILM_CONVERT_FUNCTION(float4, false)
|
||||
|
||||
#undef KERNEL_FILM_CONVERT_FUNCTION
|
||||
|
||||
#undef KERNEL_INVOKE
|
||||
#undef DEFINE_INTEGRATOR_KERNEL
|
||||
#undef DEFINE_INTEGRATOR_SHADE_KERNEL
|
||||
|
|
|
@ -31,7 +31,6 @@
|
|||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
template<uint32_t current_kernel>
|
||||
ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const int shader_flags)
|
||||
|
@ -86,36 +85,75 @@ ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg,
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Note that current_kernel is a template value since making this a variable
|
||||
* leads to poor performance with CUDA atomics. */
|
||||
template<uint32_t current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_shader_next_kernel(
|
||||
KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
ccl_private const Intersection *ccl_restrict isect,
|
||||
const int shader,
|
||||
const int shader_flags)
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
/* Split path if a shadow catcher was hit. */
|
||||
ccl_device_forceinline void integrator_split_shadow_catcher(
|
||||
KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect)
|
||||
{
|
||||
/* Note on scheduling.
|
||||
*
|
||||
* When there is no shadow catcher split the scheduling is simple: schedule surface shading with
|
||||
* or without raytrace support, depending on the shader used.
|
||||
*
|
||||
* When there is a shadow catcher split the general idea is to have the following configuration:
|
||||
*
|
||||
* - Schedule surface shading kernel (with corresponding raytrace support) for the ray which
|
||||
* will trace shadow catcher object.
|
||||
*
|
||||
* - When no alpha-over of approximate shadow catcher is needed, schedule surface shading for
|
||||
* the matte ray.
|
||||
*
|
||||
* - Otherwise schedule background shading kernel, so that we have a background to alpha-over
|
||||
* on. The background kernel will then schedule surface shading for the matte ray.
|
||||
/* Test if we hit a shadow catcher object, and potentially split the path to continue tracing two
|
||||
* paths from here. */
|
||||
const int object_flags = intersection_get_object_flags(kg, isect);
|
||||
if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, object_flags)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Mark state as having done a shadow catcher split so that it stops contributing to
|
||||
* the shadow catcher matte pass, but keeps contributing to the combined pass. */
|
||||
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT;
|
||||
|
||||
/* Copy current state to new state. */
|
||||
state = integrator_state_shadow_catcher_split(kg, state);
|
||||
|
||||
/* Initialize new state.
|
||||
*
|
||||
* Note that the splitting leaves kernel and sorting counters as-is, so use INIT semantic for
|
||||
* the matte path. */
|
||||
|
||||
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
|
||||
/* Mark current state so that it will only track contribution of shadow catcher objects ignoring
|
||||
* non-catcher objects. */
|
||||
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_PASS;
|
||||
|
||||
if (kernel_data.film.pass_background != PASS_UNUSED && !kernel_data.background.transparent) {
|
||||
/* If using background pass, schedule background shading kernel so that we have a background
|
||||
* to alpha-over on. The background kernel will then continue the path afterwards. */
|
||||
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
|
||||
INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!integrator_state_volume_stack_is_empty(kg, state)) {
|
||||
/* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher
|
||||
* objects from it, and then continue shading volume and shadow catcher surface after. */
|
||||
INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Continue with shading shadow catcher surface. */
|
||||
const int shader = intersection_get_shader(kg, isect);
|
||||
const int flags = kernel_tex_fetch(__shaders, shader).flags;
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
}
|
||||
}
|
||||
|
||||
/* Schedule next kernel to be executed after updating volume stack for shadow catcher. */
|
||||
template<uint32_t current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_volume(
|
||||
KernelGlobals kg, IntegratorState state)
|
||||
{
|
||||
/* Continue with shading shadow catcher surface. Same as integrator_split_shadow_catcher, but
|
||||
* using NEXT instead of INIT. */
|
||||
Intersection isect ccl_optional_struct_init;
|
||||
integrator_state_read_isect(kg, state, &isect);
|
||||
|
||||
const int shader = intersection_get_shader(kg, &isect);
|
||||
const int flags = kernel_tex_fetch(__shaders, shader).flags;
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
|
@ -124,23 +162,132 @@ ccl_device_forceinline void integrator_intersect_shader_next_kernel(
|
|||
else {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
const int object_flags = intersection_get_object_flags(kg, isect);
|
||||
if (kernel_shadow_catcher_split(kg, state, object_flags)) {
|
||||
if (kernel_data.film.pass_background != PASS_UNUSED && !kernel_data.background.transparent) {
|
||||
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
|
||||
/* Schedule next kernel to be executed after executing background shader for shadow catcher. */
|
||||
template<uint32_t current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_background(
|
||||
KernelGlobals kg, IntegratorState state)
|
||||
{
|
||||
/* Same logic as integrator_split_shadow_catcher, but using NEXT instead of INIT. */
|
||||
if (!integrator_state_volume_stack_is_empty(kg, state)) {
|
||||
/* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher
|
||||
* objects from it, and then continue shading volume and shadow catcher surface after. */
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||
return;
|
||||
}
|
||||
|
||||
INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
}
|
||||
else if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
/* Continue with shading shadow catcher surface. */
|
||||
integrator_intersect_next_kernel_after_shadow_catcher_volume<current_kernel>(kg, state);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Schedule next kernel to be executed after intersect closest.
|
||||
*
|
||||
* Note that current_kernel is a template value since making this a variable
|
||||
* leads to poor performance with CUDA atomics. */
|
||||
template<uint32_t current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_next_kernel(
|
||||
KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
ccl_private const Intersection *ccl_restrict isect,
|
||||
const bool hit)
|
||||
{
|
||||
/* Continue with volume kernel if we are inside a volume, regardless if we hit anything. */
|
||||
#ifdef __VOLUME__
|
||||
if (!integrator_state_volume_stack_is_empty(kg, state)) {
|
||||
const bool hit_surface = hit && !(isect->type & PRIMITIVE_LAMP);
|
||||
const int shader = (hit_surface) ? intersection_get_shader(kg, isect) : SHADER_NONE;
|
||||
const int flags = (hit_surface) ? kernel_tex_fetch(__shaders, shader).flags : 0;
|
||||
|
||||
if (!integrator_intersect_terminate(kg, state, flags)) {
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
INTEGRATOR_PATH_TERMINATE(current_kernel);
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (hit) {
|
||||
/* Hit a surface, continue with light or surface kernel. */
|
||||
if (isect->type & PRIMITIVE_LAMP) {
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
}
|
||||
else {
|
||||
/* Hit a surface, continue with surface kernel unless terminated. */
|
||||
const int shader = intersection_get_shader(kg, isect);
|
||||
const int flags = kernel_tex_fetch(__shaders, shader).flags;
|
||||
|
||||
if (!integrator_intersect_terminate(kg, state, flags)) {
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
}
|
||||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
/* Handle shadow catcher. */
|
||||
integrator_split_shadow_catcher(kg, state, isect);
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_TERMINATE(current_kernel);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Nothing hit, continue with background kernel. */
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
}
|
||||
}
|
||||
|
||||
/* Schedule next kernel to be executed after shade volume.
|
||||
*
|
||||
* The logic here matches integrator_intersect_next_kernel, except that
|
||||
* volume shading and termination testing have already been done. */
|
||||
template<uint32_t current_kernel>
|
||||
ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
||||
KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect)
|
||||
{
|
||||
if (isect->prim != PRIM_NONE) {
|
||||
/* Hit a surface, continue with light or surface kernel. */
|
||||
if (isect->type & PRIMITIVE_LAMP) {
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
/* Hit a surface, continue with surface kernel unless terminated. */
|
||||
const int shader = intersection_get_shader(kg, isect);
|
||||
const int flags = kernel_tex_fetch(__shaders, shader).flags;
|
||||
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
|
||||
|
||||
if (use_raytrace_kernel) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(
|
||||
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
|
||||
}
|
||||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
/* Handle shadow catcher. */
|
||||
integrator_split_shadow_catcher(kg, state, isect);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Nothing hit, continue with background kernel. */
|
||||
INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState state)
|
||||
|
@ -192,56 +339,9 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState s
|
|||
/* Write intersection result into global integrator state memory. */
|
||||
integrator_state_write_isect(kg, state, &isect);
|
||||
|
||||
#ifdef __VOLUME__
|
||||
if (!integrator_state_volume_stack_is_empty(kg, state)) {
|
||||
const bool hit_surface = hit && !(isect.type & PRIMITIVE_LAMP);
|
||||
const int shader = (hit_surface) ? intersection_get_shader(kg, &isect) : SHADER_NONE;
|
||||
const int flags = (hit_surface) ? kernel_tex_fetch(__shaders, shader).flags : 0;
|
||||
|
||||
if (!integrator_intersect_terminate<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
|
||||
kg, state, flags)) {
|
||||
/* Continue with volume kernel if we are inside a volume, regardless
|
||||
* if we hit anything. */
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (hit) {
|
||||
/* Hit a surface, continue with light or surface kernel. */
|
||||
if (isect.type & PRIMITIVE_LAMP) {
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
/* Hit a surface, continue with surface kernel unless terminated. */
|
||||
const int shader = intersection_get_shader(kg, &isect);
|
||||
const int flags = kernel_tex_fetch(__shaders, shader).flags;
|
||||
|
||||
if (!integrator_intersect_terminate<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
|
||||
kg, state, flags)) {
|
||||
integrator_intersect_shader_next_kernel<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
|
||||
kg, state, &isect, shader, flags);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Nothing hit, continue with background kernel. */
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
return;
|
||||
}
|
||||
/* Setup up next kernel to be executed. */
|
||||
integrator_intersect_next_kernel<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
|
||||
kg, state, &isect, hit);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -42,10 +42,13 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
|
|||
/* Store to avoid global fetches on every intersection step. */
|
||||
const uint volume_stack_size = kernel_data.volume_stack_size;
|
||||
|
||||
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
|
||||
const uint32_t visibility = SHADOW_CATCHER_PATH_VISIBILITY(path_flag, PATH_RAY_ALL_VISIBILITY);
|
||||
|
||||
#ifdef __VOLUME_RECORD_ALL__
|
||||
Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1];
|
||||
uint num_hits = scene_intersect_volume_all(
|
||||
kg, &volume_ray, hits, 2 * volume_stack_size, PATH_RAY_ALL_VISIBILITY);
|
||||
kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
|
||||
if (num_hits > 0) {
|
||||
Intersection *isect = hits;
|
||||
|
||||
|
@ -60,7 +63,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
|
|||
Intersection isect;
|
||||
int step = 0;
|
||||
while (step < 2 * volume_stack_size &&
|
||||
scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) {
|
||||
scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
|
||||
shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect);
|
||||
volume_stack_enter_exit(kg, state, stack_sd);
|
||||
|
||||
|
@ -74,7 +77,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
|
|||
#endif
|
||||
}
|
||||
|
||||
ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorState state)
|
||||
ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState state)
|
||||
{
|
||||
PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK);
|
||||
|
||||
|
@ -89,14 +92,20 @@ ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorSt
|
|||
volume_ray.D = make_float3(0.0f, 0.0f, 1.0f);
|
||||
volume_ray.t = FLT_MAX;
|
||||
|
||||
const uint visibility = (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_ALL_VISIBILITY);
|
||||
int stack_index = 0, enclosed_index = 0;
|
||||
|
||||
/* Write background shader. */
|
||||
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
|
||||
const uint32_t visibility = SHADOW_CATCHER_PATH_VISIBILITY(path_flag, PATH_RAY_CAMERA);
|
||||
|
||||
/* Initialize volume stack with background volume For shadow catcher the
|
||||
* background volume is always assumed to be CG. */
|
||||
if (kernel_data.background.volume_shader != SHADER_NONE) {
|
||||
const VolumeStack new_entry = {OBJECT_NONE, kernel_data.background.volume_shader};
|
||||
integrator_state_write_volume_stack(state, stack_index, new_entry);
|
||||
stack_index++;
|
||||
if (!(path_flag & PATH_RAY_SHADOW_CATCHER_PASS)) {
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, stack_index, object) = OBJECT_NONE;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(
|
||||
state, volume_stack, stack_index, shader) = kernel_data.background.volume_shader;
|
||||
stack_index++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Store to avoid global fetches on every intersection step. */
|
||||
|
@ -202,9 +211,22 @@ ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorSt
|
|||
/* Write terminator. */
|
||||
const VolumeStack new_entry = {OBJECT_NONE, SHADER_NONE};
|
||||
integrator_state_write_volume_stack(state, stack_index, new_entry);
|
||||
}
|
||||
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorState state)
|
||||
{
|
||||
integrator_volume_stack_init(kg, state);
|
||||
|
||||
if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_PASS) {
|
||||
/* Volume stack re-init for shadow catcher, continue with shading of hit. */
|
||||
integrator_intersect_next_kernel_after_shadow_catcher_volume<
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK>(kg, state);
|
||||
}
|
||||
else {
|
||||
/* Volume stack init for camera rays, continue with intersection of camera ray. */
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
|
||||
DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -192,23 +192,11 @@ ccl_device void integrator_shade_background(KernelGlobals kg,
|
|||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) {
|
||||
/* Special case for shadow catcher where we want to fill the background pass
|
||||
* behind the shadow catcher but also continue tracing the path. */
|
||||
INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_SHADOW_CATCHER_BACKGROUND;
|
||||
|
||||
const int isect_prim = INTEGRATOR_STATE(state, isect, prim);
|
||||
const int isect_type = INTEGRATOR_STATE(state, isect, type);
|
||||
const int shader = intersection_get_shader_from_isect_prim(kg, isect_prim, isect_type);
|
||||
const int shader_flags = kernel_tex_fetch(__shaders, shader).flags;
|
||||
|
||||
if (shader_flags & SD_HAS_RAYTRACE) {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
|
||||
shader);
|
||||
}
|
||||
else {
|
||||
INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
|
||||
shader);
|
||||
}
|
||||
integrator_intersect_next_kernel_after_shadow_catcher_background<
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND>(kg, state);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1023,25 +1023,9 @@ ccl_device void integrator_shade_volume(KernelGlobals kg,
|
|||
}
|
||||
else {
|
||||
/* Continue to background, light or surface. */
|
||||
if (isect.prim == PRIM_NONE) {
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
|
||||
return;
|
||||
}
|
||||
else if (isect.type & PRIMITIVE_LAMP) {
|
||||
INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
/* Hit a surface, continue with surface kernel unless terminated. */
|
||||
const int shader = intersection_get_shader(kg, &isect);
|
||||
const int flags = kernel_tex_fetch(__shaders, shader).flags;
|
||||
|
||||
integrator_intersect_shader_next_kernel<DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME>(
|
||||
kg, state, &isect, shader, flags);
|
||||
return;
|
||||
}
|
||||
integrator_intersect_next_kernel_after_volume<DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME>(
|
||||
kg, state, &isect);
|
||||
return;
|
||||
}
|
||||
#endif /* __VOLUME__ */
|
||||
}
|
||||
|
|
|
@ -76,33 +76,6 @@ ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg,
|
|||
return (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND) != 0;
|
||||
}
|
||||
|
||||
/* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths
|
||||
* after this function. */
|
||||
ccl_device_inline bool kernel_shadow_catcher_split(KernelGlobals kg,
|
||||
IntegratorState state,
|
||||
const int object_flags)
|
||||
{
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
|
||||
if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, object_flags)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* The split is to be done. Mark the current state as such, so that it stops contributing to the
|
||||
* shadow catcher matte pass, but keeps contributing to the combined pass. */
|
||||
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT;
|
||||
|
||||
/* Split new state from the current one. This new state will only track contribution of shadow
|
||||
* catcher objects ignoring non-catcher objects. */
|
||||
integrator_state_shadow_catcher_split(kg, state);
|
||||
|
||||
return true;
|
||||
#else
|
||||
(void)object_flags;
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
|
||||
ccl_device_forceinline bool kernel_shadow_catcher_is_matte_path(const uint32_t path_flag)
|
||||
|
|
|
@ -173,10 +173,10 @@ typedef const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState;
|
|||
|
||||
/* Array access on GPU with Structure-of-Arrays. */
|
||||
|
||||
typedef const int IntegratorState;
|
||||
typedef const int ConstIntegratorState;
|
||||
typedef const int IntegratorShadowState;
|
||||
typedef const int ConstIntegratorShadowState;
|
||||
typedef int IntegratorState;
|
||||
typedef int ConstIntegratorState;
|
||||
typedef int IntegratorShadowState;
|
||||
typedef int ConstIntegratorShadowState;
|
||||
|
||||
# define INTEGRATOR_STATE_NULL -1
|
||||
|
||||
|
|
|
@ -326,8 +326,8 @@ ccl_device_inline void integrator_shadow_state_move(KernelGlobals kg,
|
|||
|
||||
/* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths
|
||||
* after this function. */
|
||||
ccl_device_inline void integrator_state_shadow_catcher_split(KernelGlobals kg,
|
||||
IntegratorState state)
|
||||
ccl_device_inline IntegratorState integrator_state_shadow_catcher_split(KernelGlobals kg,
|
||||
IntegratorState state)
|
||||
{
|
||||
#if defined(__KERNEL_GPU__)
|
||||
ConstIntegratorState to_state = atomic_fetch_and_add_uint32(
|
||||
|
@ -337,14 +337,14 @@ ccl_device_inline void integrator_state_shadow_catcher_split(KernelGlobals kg,
|
|||
#else
|
||||
IntegratorStateCPU *ccl_restrict to_state = state + 1;
|
||||
|
||||
/* Only copy the required subset, since shadow intersections are big and irrelevant here. */
|
||||
/* Only copy the required subset for performance. */
|
||||
to_state->path = state->path;
|
||||
to_state->ray = state->ray;
|
||||
to_state->isect = state->isect;
|
||||
integrator_state_copy_volume_stack(kg, to_state, state);
|
||||
#endif
|
||||
|
||||
INTEGRATOR_STATE_WRITE(to_state, path, flag) |= PATH_RAY_SHADOW_CATCHER_PASS;
|
||||
return to_state;
|
||||
}
|
||||
|
||||
#ifdef __KERNEL_CPU__
|
||||
|
|
|
@ -1503,7 +1503,8 @@ static void icon_draw_rect(float x,
|
|||
int draw_w = w;
|
||||
int draw_h = h;
|
||||
int draw_x = x;
|
||||
int draw_y = y;
|
||||
/* We need to round y, to avoid the icon jittering in some cases. */
|
||||
int draw_y = round_fl_to_int(y);
|
||||
|
||||
/* sanity check */
|
||||
if (w <= 0 || h <= 0 || w > 2000 || h > 2000) {
|
||||
|
|
|
@ -1407,8 +1407,8 @@ static void widget_draw_icon(
|
|||
|
||||
/* force positions to integers, for zoom levels near 1. draws icons crisp. */
|
||||
if (aspect > 0.95f && aspect < 1.05f) {
|
||||
xs = (int)(xs + 0.1f);
|
||||
ys = (int)(ys + 0.1f);
|
||||
xs = roundf(xs);
|
||||
ys = roundf(ys);
|
||||
}
|
||||
|
||||
/* Get theme color. */
|
||||
|
|
Loading…
Reference in New Issue