Cleanup: Move OptiX denoiser code from device into denoiser class
Cycles already treats denoising fairly separate in its code, with a dedicated `Denoiser` base class used to describe denoising behavior. That class has been fully implemented for OIDN (`denoiser_oidn.cpp`), but for OptiX was mostly empty (`denoiser_optix.cpp`) and denoising was instead implemented in the OptiX device. That meant denoising code was split over various files and directories, making it a bit awkward to work with. This patch moves the OptiX denoising implementation into the existing `OptiXDenoiser` class, so that everything is in one place. There are no functional changes, code has been mostly moved as-is. To retain support for potential other denoiser implementations based on a GPU device in the future, the `DeviceDenoiser` base class was kept and slightly extended (and its file renamed to `denoiser_gpu.cpp` to follow similar naming rules as `path_trace_work_*.cpp`). Differential Revision: https://developer.blender.org/D16502
This commit is contained in:
parent
a94c3aafe5
commit
a859837cde
|
@ -253,6 +253,33 @@ if(WITH_CYCLES_OSL)
|
|||
)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX)
|
||||
add_definitions(-DWITH_CUDA)
|
||||
|
||||
if(WITH_CUDA_DYNLOAD)
|
||||
include_directories(
|
||||
../../extern/cuew/include
|
||||
)
|
||||
add_definitions(-DWITH_CUDA_DYNLOAD)
|
||||
else()
|
||||
include_directories(
|
||||
SYSTEM
|
||||
${CUDA_TOOLKIT_INCLUDE}
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_HIP)
|
||||
add_definitions(-DWITH_HIP)
|
||||
|
||||
if(WITH_HIP_DYNLOAD)
|
||||
include_directories(
|
||||
../../extern/hipew/include
|
||||
)
|
||||
add_definitions(-DWITH_HIP_DYNLOAD)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_OPTIX)
|
||||
find_package(OptiX 7.3.0)
|
||||
|
||||
|
@ -261,12 +288,16 @@ if(WITH_CYCLES_DEVICE_OPTIX)
|
|||
include_directories(
|
||||
SYSTEM
|
||||
${OPTIX_INCLUDE_DIR}
|
||||
)
|
||||
)
|
||||
else()
|
||||
set_and_warn_library_found("OptiX" OPTIX_FOUND WITH_CYCLES_DEVICE_OPTIX)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_METAL)
|
||||
add_definitions(-DWITH_METAL)
|
||||
endif()
|
||||
|
||||
if (WITH_CYCLES_DEVICE_ONEAPI)
|
||||
add_definitions(-DWITH_ONEAPI)
|
||||
endif()
|
||||
|
|
|
@ -8,28 +8,13 @@ set(INC
|
|||
set(INC_SYS )
|
||||
|
||||
if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
|
||||
if(WITH_CUDA_DYNLOAD)
|
||||
list(APPEND INC
|
||||
../../../extern/cuew/include
|
||||
)
|
||||
add_definitions(-DWITH_CUDA_DYNLOAD)
|
||||
else()
|
||||
list(APPEND INC_SYS
|
||||
${CUDA_TOOLKIT_INCLUDE}
|
||||
)
|
||||
if(NOT WITH_CUDA_DYNLOAD)
|
||||
add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
|
||||
endif()
|
||||
|
||||
add_definitions(-DCYCLES_RUNTIME_OPTIX_ROOT_DIR="${CYCLES_RUNTIME_OPTIX_ROOT_DIR}")
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
|
||||
list(APPEND INC
|
||||
../../../extern/hipew/include
|
||||
)
|
||||
add_definitions(-DWITH_HIP_DYNLOAD)
|
||||
endif()
|
||||
|
||||
set(SRC_BASE
|
||||
device.cpp
|
||||
denoise.cpp
|
||||
|
@ -168,24 +153,15 @@ if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
|
|||
)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_CUDA)
|
||||
add_definitions(-DWITH_CUDA)
|
||||
endif()
|
||||
if(WITH_CYCLES_DEVICE_HIP)
|
||||
add_definitions(-DWITH_HIP)
|
||||
endif()
|
||||
if(WITH_CYCLES_DEVICE_OPTIX)
|
||||
add_definitions(-DWITH_OPTIX)
|
||||
endif()
|
||||
if(WITH_CYCLES_DEVICE_METAL)
|
||||
list(APPEND LIB
|
||||
${METAL_LIBRARY}
|
||||
)
|
||||
add_definitions(-DWITH_METAL)
|
||||
list(APPEND SRC
|
||||
${SRC_METAL}
|
||||
)
|
||||
endif()
|
||||
|
||||
if (WITH_CYCLES_DEVICE_ONEAPI)
|
||||
if(WITH_CYCLES_ONEAPI_BINARIES)
|
||||
set(cycles_kernel_oneapi_lib_suffix "_aot")
|
||||
|
@ -203,7 +179,6 @@ if (WITH_CYCLES_DEVICE_ONEAPI)
|
|||
else()
|
||||
list(APPEND LIB ${SYCL_LIBRARY})
|
||||
endif()
|
||||
add_definitions(-DWITH_ONEAPI)
|
||||
list(APPEND SRC
|
||||
${SRC_ONEAPI}
|
||||
)
|
||||
|
|
|
@ -78,24 +78,4 @@ class DenoiseParams : public Node {
|
|||
}
|
||||
};
|
||||
|
||||
/* All the parameters needed to perform buffer denoising on a device.
|
||||
* Is not really a task in its canonical terms (as in, is not an asynchronous running task). Is
|
||||
* more like a wrapper for all the arguments and parameters needed to perform denoising. Is a
|
||||
* single place where they are all listed, so that it's not required to modify all device methods
|
||||
* when these parameters do change. */
|
||||
class DeviceDenoiseTask {
|
||||
public:
|
||||
DenoiseParams params;
|
||||
|
||||
int num_samples;
|
||||
|
||||
RenderBuffers *render_buffers;
|
||||
BufferParams buffer_params;
|
||||
|
||||
/* Allow to do in-place modification of the input passes (scaling them down i.e.). This will
|
||||
* lower the memory footprint of the denoiser but will make input passes "invalid" (from path
|
||||
* tracer) point of view. */
|
||||
bool allow_inplace_modification;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -233,21 +233,6 @@ class Device {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
/* Buffer denoising. */
|
||||
|
||||
/* Returns true if task is fully handled. */
|
||||
virtual bool denoise_buffer(const DeviceDenoiseTask & /*task*/)
|
||||
{
|
||||
LOG(ERROR) << "Request buffer denoising from a device which does not support it.";
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual DeviceQueue *get_denoise_queue()
|
||||
{
|
||||
LOG(ERROR) << "Request denoising queue from a device which does not support it.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/* Sub-devices */
|
||||
|
||||
/* Run given callback for every individual device which will be handling rendering.
|
||||
|
|
|
@ -1,16 +1,15 @@
|
|||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2019, NVIDIA Corporation.
|
||||
* Copyright 2019-2022 Blender Foundation. */
|
||||
* Copyright 2019, NVIDIA Corporation
|
||||
* Copyright 2019-2022 Blender Foundation */
|
||||
|
||||
#ifdef WITH_OPTIX
|
||||
|
||||
# include "device/optix/device_impl.h"
|
||||
# include "device/optix/queue.h"
|
||||
|
||||
# include "bvh/bvh.h"
|
||||
# include "bvh/optix.h"
|
||||
|
||||
# include "integrator/pass_accessor_gpu.h"
|
||||
|
||||
# include "scene/hair.h"
|
||||
# include "scene/mesh.h"
|
||||
# include "scene/object.h"
|
||||
|
@ -29,197 +28,8 @@
|
|||
# define __KERNEL_OPTIX__
|
||||
# include "kernel/device/optix/globals.h"
|
||||
|
||||
# include <optix_denoiser_tiling.h>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
// A minimal copy of functionality `optix_denoiser_tiling.h` which allows to fix integer overflow
|
||||
// issues without bumping SDK or driver requirement.
|
||||
//
|
||||
// The original code is Copyright NVIDIA Corporation, BSD-3-Clause.
|
||||
namespace {
|
||||
|
||||
# if OPTIX_ABI_VERSION >= 60
|
||||
using ::optixUtilDenoiserInvokeTiled;
|
||||
# else
|
||||
static OptixResult optixUtilDenoiserSplitImage(const OptixImage2D &input,
|
||||
const OptixImage2D &output,
|
||||
unsigned int overlapWindowSizeInPixels,
|
||||
unsigned int tileWidth,
|
||||
unsigned int tileHeight,
|
||||
std::vector<OptixUtilDenoiserImageTile> &tiles)
|
||||
{
|
||||
if (tileWidth == 0 || tileHeight == 0)
|
||||
return OPTIX_ERROR_INVALID_VALUE;
|
||||
|
||||
unsigned int inPixelStride = optixUtilGetPixelStride(input);
|
||||
unsigned int outPixelStride = optixUtilGetPixelStride(output);
|
||||
|
||||
int inp_w = std::min(tileWidth + 2 * overlapWindowSizeInPixels, input.width);
|
||||
int inp_h = std::min(tileHeight + 2 * overlapWindowSizeInPixels, input.height);
|
||||
int inp_y = 0, copied_y = 0;
|
||||
|
||||
do {
|
||||
int inputOffsetY = inp_y == 0 ? 0 :
|
||||
std::max((int)overlapWindowSizeInPixels,
|
||||
inp_h - ((int)input.height - inp_y));
|
||||
int copy_y = inp_y == 0 ? std::min(input.height, tileHeight + overlapWindowSizeInPixels) :
|
||||
std::min(tileHeight, input.height - copied_y);
|
||||
|
||||
int inp_x = 0, copied_x = 0;
|
||||
do {
|
||||
int inputOffsetX = inp_x == 0 ? 0 :
|
||||
std::max((int)overlapWindowSizeInPixels,
|
||||
inp_w - ((int)input.width - inp_x));
|
||||
int copy_x = inp_x == 0 ? std::min(input.width, tileWidth + overlapWindowSizeInPixels) :
|
||||
std::min(tileWidth, input.width - copied_x);
|
||||
|
||||
OptixUtilDenoiserImageTile tile;
|
||||
tile.input.data = input.data + (size_t)(inp_y - inputOffsetY) * input.rowStrideInBytes +
|
||||
+(size_t)(inp_x - inputOffsetX) * inPixelStride;
|
||||
tile.input.width = inp_w;
|
||||
tile.input.height = inp_h;
|
||||
tile.input.rowStrideInBytes = input.rowStrideInBytes;
|
||||
tile.input.pixelStrideInBytes = input.pixelStrideInBytes;
|
||||
tile.input.format = input.format;
|
||||
|
||||
tile.output.data = output.data + (size_t)inp_y * output.rowStrideInBytes +
|
||||
(size_t)inp_x * outPixelStride;
|
||||
tile.output.width = copy_x;
|
||||
tile.output.height = copy_y;
|
||||
tile.output.rowStrideInBytes = output.rowStrideInBytes;
|
||||
tile.output.pixelStrideInBytes = output.pixelStrideInBytes;
|
||||
tile.output.format = output.format;
|
||||
|
||||
tile.inputOffsetX = inputOffsetX;
|
||||
tile.inputOffsetY = inputOffsetY;
|
||||
tiles.push_back(tile);
|
||||
|
||||
inp_x += inp_x == 0 ? tileWidth + overlapWindowSizeInPixels : tileWidth;
|
||||
copied_x += copy_x;
|
||||
} while (inp_x < static_cast<int>(input.width));
|
||||
|
||||
inp_y += inp_y == 0 ? tileHeight + overlapWindowSizeInPixels : tileHeight;
|
||||
copied_y += copy_y;
|
||||
} while (inp_y < static_cast<int>(input.height));
|
||||
|
||||
return OPTIX_SUCCESS;
|
||||
}
|
||||
|
||||
static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
|
||||
CUstream stream,
|
||||
const OptixDenoiserParams *params,
|
||||
CUdeviceptr denoiserState,
|
||||
size_t denoiserStateSizeInBytes,
|
||||
const OptixDenoiserGuideLayer *guideLayer,
|
||||
const OptixDenoiserLayer *layers,
|
||||
unsigned int numLayers,
|
||||
CUdeviceptr scratch,
|
||||
size_t scratchSizeInBytes,
|
||||
unsigned int overlapWindowSizeInPixels,
|
||||
unsigned int tileWidth,
|
||||
unsigned int tileHeight)
|
||||
{
|
||||
if (!guideLayer || !layers)
|
||||
return OPTIX_ERROR_INVALID_VALUE;
|
||||
|
||||
std::vector<std::vector<OptixUtilDenoiserImageTile>> tiles(numLayers);
|
||||
std::vector<std::vector<OptixUtilDenoiserImageTile>> prevTiles(numLayers);
|
||||
for (unsigned int l = 0; l < numLayers; l++) {
|
||||
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].input,
|
||||
layers[l].output,
|
||||
overlapWindowSizeInPixels,
|
||||
tileWidth,
|
||||
tileHeight,
|
||||
tiles[l]))
|
||||
return res;
|
||||
|
||||
if (layers[l].previousOutput.data) {
|
||||
OptixImage2D dummyOutput = layers[l].previousOutput;
|
||||
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].previousOutput,
|
||||
dummyOutput,
|
||||
overlapWindowSizeInPixels,
|
||||
tileWidth,
|
||||
tileHeight,
|
||||
prevTiles[l]))
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<OptixUtilDenoiserImageTile> albedoTiles;
|
||||
if (guideLayer->albedo.data) {
|
||||
OptixImage2D dummyOutput = guideLayer->albedo;
|
||||
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->albedo,
|
||||
dummyOutput,
|
||||
overlapWindowSizeInPixels,
|
||||
tileWidth,
|
||||
tileHeight,
|
||||
albedoTiles))
|
||||
return res;
|
||||
}
|
||||
|
||||
std::vector<OptixUtilDenoiserImageTile> normalTiles;
|
||||
if (guideLayer->normal.data) {
|
||||
OptixImage2D dummyOutput = guideLayer->normal;
|
||||
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->normal,
|
||||
dummyOutput,
|
||||
overlapWindowSizeInPixels,
|
||||
tileWidth,
|
||||
tileHeight,
|
||||
normalTiles))
|
||||
return res;
|
||||
}
|
||||
std::vector<OptixUtilDenoiserImageTile> flowTiles;
|
||||
if (guideLayer->flow.data) {
|
||||
OptixImage2D dummyOutput = guideLayer->flow;
|
||||
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->flow,
|
||||
dummyOutput,
|
||||
overlapWindowSizeInPixels,
|
||||
tileWidth,
|
||||
tileHeight,
|
||||
flowTiles))
|
||||
return res;
|
||||
}
|
||||
|
||||
for (size_t t = 0; t < tiles[0].size(); t++) {
|
||||
std::vector<OptixDenoiserLayer> tlayers;
|
||||
for (unsigned int l = 0; l < numLayers; l++) {
|
||||
OptixDenoiserLayer layer = {};
|
||||
layer.input = (tiles[l])[t].input;
|
||||
layer.output = (tiles[l])[t].output;
|
||||
if (layers[l].previousOutput.data)
|
||||
layer.previousOutput = (prevTiles[l])[t].input;
|
||||
tlayers.push_back(layer);
|
||||
}
|
||||
|
||||
OptixDenoiserGuideLayer gl = {};
|
||||
if (guideLayer->albedo.data)
|
||||
gl.albedo = albedoTiles[t].input;
|
||||
|
||||
if (guideLayer->normal.data)
|
||||
gl.normal = normalTiles[t].input;
|
||||
|
||||
if (guideLayer->flow.data)
|
||||
gl.flow = flowTiles[t].input;
|
||||
|
||||
if (const OptixResult res = optixDenoiserInvoke(denoiser,
|
||||
stream,
|
||||
params,
|
||||
denoiserState,
|
||||
denoiserStateSizeInBytes,
|
||||
&gl,
|
||||
&tlayers[0],
|
||||
numLayers,
|
||||
(tiles[0])[t].inputOffsetX,
|
||||
(tiles[0])[t].inputOffsetY,
|
||||
scratch,
|
||||
scratchSizeInBytes))
|
||||
return res;
|
||||
}
|
||||
return OPTIX_SUCCESS;
|
||||
}
|
||||
# endif
|
||||
|
||||
# if OPTIX_ABI_VERSION >= 55
|
||||
static void execute_optix_task(TaskPool &pool, OptixTask task, OptixResult &failure_reason)
|
||||
{
|
||||
|
@ -239,18 +49,10 @@ static void execute_optix_task(TaskPool &pool, OptixTask task, OptixResult &fail
|
|||
}
|
||||
# endif
|
||||
|
||||
} // namespace
|
||||
|
||||
OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
|
||||
: device(device), queue(device), state(device, "__denoiser_state", true)
|
||||
{
|
||||
}
|
||||
|
||||
OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
: CUDADevice(info, stats, profiler),
|
||||
sbt_data(this, "__sbt", MEM_READ_ONLY),
|
||||
launch_params(this, "kernel_params", false),
|
||||
denoiser_(this)
|
||||
launch_params(this, "kernel_params", false)
|
||||
{
|
||||
/* Make the CUDA context current. */
|
||||
if (!cuContext) {
|
||||
|
@ -341,11 +143,6 @@ OptiXDevice::~OptiXDevice()
|
|||
}
|
||||
# endif
|
||||
|
||||
/* Make sure denoiser is destroyed before device context! */
|
||||
if (denoiser_.optix_denoiser != nullptr) {
|
||||
optixDenoiserDestroy(denoiser_.optix_denoiser);
|
||||
}
|
||||
|
||||
optixDeviceContextDestroy(context);
|
||||
}
|
||||
|
||||
|
@ -1120,571 +917,6 @@ void *OptiXDevice::get_cpu_osl_memory()
|
|||
# endif
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Buffer denoising.
|
||||
*/
|
||||
|
||||
class OptiXDevice::DenoiseContext {
|
||||
public:
|
||||
explicit DenoiseContext(OptiXDevice *device, const DeviceDenoiseTask &task)
|
||||
: denoise_params(task.params),
|
||||
render_buffers(task.render_buffers),
|
||||
buffer_params(task.buffer_params),
|
||||
guiding_buffer(device, "denoiser guiding passes buffer", true),
|
||||
num_samples(task.num_samples)
|
||||
{
|
||||
num_input_passes = 1;
|
||||
if (denoise_params.use_pass_albedo) {
|
||||
num_input_passes += 1;
|
||||
use_pass_albedo = true;
|
||||
pass_denoising_albedo = buffer_params.get_pass_offset(PASS_DENOISING_ALBEDO);
|
||||
if (denoise_params.use_pass_normal) {
|
||||
num_input_passes += 1;
|
||||
use_pass_normal = true;
|
||||
pass_denoising_normal = buffer_params.get_pass_offset(PASS_DENOISING_NORMAL);
|
||||
}
|
||||
}
|
||||
|
||||
if (denoise_params.temporally_stable) {
|
||||
prev_output.device_pointer = render_buffers->buffer.device_pointer;
|
||||
|
||||
prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
|
||||
|
||||
prev_output.stride = buffer_params.stride;
|
||||
prev_output.pass_stride = buffer_params.pass_stride;
|
||||
|
||||
num_input_passes += 1;
|
||||
use_pass_flow = true;
|
||||
pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
|
||||
}
|
||||
|
||||
use_guiding_passes = (num_input_passes - 1) > 0;
|
||||
|
||||
if (use_guiding_passes) {
|
||||
if (task.allow_inplace_modification) {
|
||||
guiding_params.device_pointer = render_buffers->buffer.device_pointer;
|
||||
|
||||
guiding_params.pass_albedo = pass_denoising_albedo;
|
||||
guiding_params.pass_normal = pass_denoising_normal;
|
||||
guiding_params.pass_flow = pass_motion;
|
||||
|
||||
guiding_params.stride = buffer_params.stride;
|
||||
guiding_params.pass_stride = buffer_params.pass_stride;
|
||||
}
|
||||
else {
|
||||
guiding_params.pass_stride = 0;
|
||||
if (use_pass_albedo) {
|
||||
guiding_params.pass_albedo = guiding_params.pass_stride;
|
||||
guiding_params.pass_stride += 3;
|
||||
}
|
||||
if (use_pass_normal) {
|
||||
guiding_params.pass_normal = guiding_params.pass_stride;
|
||||
guiding_params.pass_stride += 3;
|
||||
}
|
||||
if (use_pass_flow) {
|
||||
guiding_params.pass_flow = guiding_params.pass_stride;
|
||||
guiding_params.pass_stride += 2;
|
||||
}
|
||||
|
||||
guiding_params.stride = buffer_params.width;
|
||||
|
||||
guiding_buffer.alloc_to_device(buffer_params.width * buffer_params.height *
|
||||
guiding_params.pass_stride);
|
||||
guiding_params.device_pointer = guiding_buffer.device_pointer;
|
||||
}
|
||||
}
|
||||
|
||||
pass_sample_count = buffer_params.get_pass_offset(PASS_SAMPLE_COUNT);
|
||||
}
|
||||
|
||||
const DenoiseParams &denoise_params;
|
||||
|
||||
RenderBuffers *render_buffers = nullptr;
|
||||
const BufferParams &buffer_params;
|
||||
|
||||
/* Previous output. */
|
||||
struct {
|
||||
device_ptr device_pointer = 0;
|
||||
|
||||
int offset = PASS_UNUSED;
|
||||
|
||||
int stride = -1;
|
||||
int pass_stride = -1;
|
||||
} prev_output;
|
||||
|
||||
/* Device-side storage of the guiding passes. */
|
||||
device_only_memory<float> guiding_buffer;
|
||||
|
||||
struct {
|
||||
device_ptr device_pointer = 0;
|
||||
|
||||
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
|
||||
int pass_albedo = PASS_UNUSED;
|
||||
int pass_normal = PASS_UNUSED;
|
||||
int pass_flow = PASS_UNUSED;
|
||||
|
||||
int stride = -1;
|
||||
int pass_stride = -1;
|
||||
} guiding_params;
|
||||
|
||||
/* Number of input passes. Including the color and extra auxiliary passes. */
|
||||
int num_input_passes = 0;
|
||||
bool use_guiding_passes = false;
|
||||
bool use_pass_albedo = false;
|
||||
bool use_pass_normal = false;
|
||||
bool use_pass_flow = false;
|
||||
|
||||
int num_samples = 0;
|
||||
|
||||
int pass_sample_count = PASS_UNUSED;
|
||||
|
||||
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
|
||||
int pass_denoising_albedo = PASS_UNUSED;
|
||||
int pass_denoising_normal = PASS_UNUSED;
|
||||
int pass_motion = PASS_UNUSED;
|
||||
|
||||
/* For passes which don't need albedo channel for denoising we replace the actual albedo with
|
||||
* the (0.5, 0.5, 0.5). This flag indicates that the real albedo pass has been replaced with
|
||||
* the fake values and denoising of passes which do need albedo can no longer happen. */
|
||||
bool albedo_replaced_with_fake = false;
|
||||
};
|
||||
|
||||
class OptiXDevice::DenoisePass {
|
||||
public:
|
||||
DenoisePass(const PassType type, const BufferParams &buffer_params) : type(type)
|
||||
{
|
||||
noisy_offset = buffer_params.get_pass_offset(type, PassMode::NOISY);
|
||||
denoised_offset = buffer_params.get_pass_offset(type, PassMode::DENOISED);
|
||||
|
||||
const PassInfo pass_info = Pass::get_info(type);
|
||||
num_components = pass_info.num_components;
|
||||
use_compositing = pass_info.use_compositing;
|
||||
use_denoising_albedo = pass_info.use_denoising_albedo;
|
||||
}
|
||||
|
||||
PassType type;
|
||||
|
||||
int noisy_offset;
|
||||
int denoised_offset;
|
||||
|
||||
int num_components;
|
||||
bool use_compositing;
|
||||
bool use_denoising_albedo;
|
||||
};
|
||||
|
||||
bool OptiXDevice::denoise_buffer(const DeviceDenoiseTask &task)
|
||||
{
|
||||
const CUDAContextScope scope(this);
|
||||
|
||||
DenoiseContext context(this, task);
|
||||
|
||||
if (!denoise_ensure(context)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!denoise_filter_guiding_preprocess(context)) {
|
||||
LOG(ERROR) << "Error preprocessing guiding passes.";
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Passes which will use real albedo when it is available. */
|
||||
denoise_pass(context, PASS_COMBINED);
|
||||
denoise_pass(context, PASS_SHADOW_CATCHER_MATTE);
|
||||
|
||||
/* Passes which do not need albedo and hence if real is present it needs to become fake. */
|
||||
denoise_pass(context, PASS_SHADOW_CATCHER);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
DeviceQueue *OptiXDevice::get_denoise_queue()
|
||||
{
|
||||
return &denoiser_.queue;
|
||||
}
|
||||
|
||||
bool OptiXDevice::denoise_filter_guiding_preprocess(DenoiseContext &context)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
|
||||
DeviceKernelArguments args(&context.guiding_params.device_pointer,
|
||||
&context.guiding_params.pass_stride,
|
||||
&context.guiding_params.pass_albedo,
|
||||
&context.guiding_params.pass_normal,
|
||||
&context.guiding_params.pass_flow,
|
||||
&context.render_buffers->buffer.device_pointer,
|
||||
&buffer_params.offset,
|
||||
&buffer_params.stride,
|
||||
&buffer_params.pass_stride,
|
||||
&context.pass_sample_count,
|
||||
&context.pass_denoising_albedo,
|
||||
&context.pass_denoising_normal,
|
||||
&context.pass_motion,
|
||||
&buffer_params.full_x,
|
||||
&buffer_params.full_y,
|
||||
&buffer_params.width,
|
||||
&buffer_params.height,
|
||||
&context.num_samples);
|
||||
|
||||
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
|
||||
}
|
||||
|
||||
bool OptiXDevice::denoise_filter_guiding_set_fake_albedo(DenoiseContext &context)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
|
||||
DeviceKernelArguments args(&context.guiding_params.device_pointer,
|
||||
&context.guiding_params.pass_stride,
|
||||
&context.guiding_params.pass_albedo,
|
||||
&buffer_params.width,
|
||||
&buffer_params.height);
|
||||
|
||||
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
|
||||
}
|
||||
|
||||
void OptiXDevice::denoise_pass(DenoiseContext &context, PassType pass_type)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
|
||||
const DenoisePass pass(pass_type, buffer_params);
|
||||
|
||||
if (pass.noisy_offset == PASS_UNUSED) {
|
||||
return;
|
||||
}
|
||||
if (pass.denoised_offset == PASS_UNUSED) {
|
||||
LOG(DFATAL) << "Missing denoised pass " << pass_type_as_string(pass_type);
|
||||
return;
|
||||
}
|
||||
|
||||
if (pass.use_denoising_albedo) {
|
||||
if (context.albedo_replaced_with_fake) {
|
||||
LOG(ERROR) << "Pass which requires albedo is denoised after fake albedo has been set.";
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
|
||||
context.albedo_replaced_with_fake = true;
|
||||
if (!denoise_filter_guiding_set_fake_albedo(context)) {
|
||||
LOG(ERROR) << "Error replacing real albedo with the fake one.";
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Read and preprocess noisy color input pass. */
|
||||
denoise_color_read(context, pass);
|
||||
if (!denoise_filter_color_preprocess(context, pass)) {
|
||||
LOG(ERROR) << "Error connverting denoising passes to RGB buffer.";
|
||||
return;
|
||||
}
|
||||
|
||||
if (!denoise_run(context, pass)) {
|
||||
LOG(ERROR) << "Error running OptiX denoiser.";
|
||||
return;
|
||||
}
|
||||
|
||||
/* Store result in the combined pass of the render buffer.
|
||||
*
|
||||
* This will scale the denoiser result up to match the number of, possibly per-pixel, samples. */
|
||||
if (!denoise_filter_color_postprocess(context, pass)) {
|
||||
LOG(ERROR) << "Error copying denoiser result to the denoised pass.";
|
||||
return;
|
||||
}
|
||||
|
||||
denoiser_.queue.synchronize();
|
||||
}
|
||||
|
||||
void OptiXDevice::denoise_color_read(DenoiseContext &context, const DenoisePass &pass)
|
||||
{
|
||||
PassAccessor::PassAccessInfo pass_access_info;
|
||||
pass_access_info.type = pass.type;
|
||||
pass_access_info.mode = PassMode::NOISY;
|
||||
pass_access_info.offset = pass.noisy_offset;
|
||||
|
||||
/* Denoiser operates on passes which are used to calculate the approximation, and is never used
|
||||
* on the approximation. The latter is not even possible because OptiX does not support
|
||||
* denoising of semi-transparent pixels. */
|
||||
pass_access_info.use_approximate_shadow_catcher = false;
|
||||
pass_access_info.use_approximate_shadow_catcher_background = false;
|
||||
pass_access_info.show_active_pixels = false;
|
||||
|
||||
/* TODO(sergey): Consider adding support of actual exposure, to avoid clamping in extreme cases.
|
||||
*/
|
||||
const PassAccessorGPU pass_accessor(
|
||||
&denoiser_.queue, pass_access_info, 1.0f, context.num_samples);
|
||||
|
||||
PassAccessor::Destination destination(pass_access_info.type);
|
||||
destination.d_pixels = context.render_buffers->buffer.device_pointer +
|
||||
pass.denoised_offset * sizeof(float);
|
||||
destination.num_components = 3;
|
||||
destination.pixel_stride = context.buffer_params.pass_stride;
|
||||
|
||||
BufferParams buffer_params = context.buffer_params;
|
||||
buffer_params.window_x = 0;
|
||||
buffer_params.window_y = 0;
|
||||
buffer_params.window_width = buffer_params.width;
|
||||
buffer_params.window_height = buffer_params.height;
|
||||
|
||||
pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
|
||||
}
|
||||
|
||||
bool OptiXDevice::denoise_filter_color_preprocess(DenoiseContext &context, const DenoisePass &pass)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
|
||||
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
|
||||
&buffer_params.full_x,
|
||||
&buffer_params.full_y,
|
||||
&buffer_params.width,
|
||||
&buffer_params.height,
|
||||
&buffer_params.offset,
|
||||
&buffer_params.stride,
|
||||
&buffer_params.pass_stride,
|
||||
&pass.denoised_offset);
|
||||
|
||||
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
|
||||
}
|
||||
|
||||
bool OptiXDevice::denoise_filter_color_postprocess(DenoiseContext &context,
|
||||
const DenoisePass &pass)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
|
||||
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
|
||||
&buffer_params.full_x,
|
||||
&buffer_params.full_y,
|
||||
&buffer_params.width,
|
||||
&buffer_params.height,
|
||||
&buffer_params.offset,
|
||||
&buffer_params.stride,
|
||||
&buffer_params.pass_stride,
|
||||
&context.num_samples,
|
||||
&pass.noisy_offset,
|
||||
&pass.denoised_offset,
|
||||
&context.pass_sample_count,
|
||||
&pass.num_components,
|
||||
&pass.use_compositing);
|
||||
|
||||
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
|
||||
}
|
||||
|
||||
bool OptiXDevice::denoise_ensure(DenoiseContext &context)
|
||||
{
|
||||
if (!denoise_create_if_needed(context)) {
|
||||
LOG(ERROR) << "OptiX denoiser creation has failed.";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!denoise_configure_if_needed(context)) {
|
||||
LOG(ERROR) << "OptiX denoiser configuration has failed.";
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
|
||||
{
|
||||
const bool recreate_denoiser = (denoiser_.optix_denoiser == nullptr) ||
|
||||
(denoiser_.use_pass_albedo != context.use_pass_albedo) ||
|
||||
(denoiser_.use_pass_normal != context.use_pass_normal) ||
|
||||
(denoiser_.use_pass_flow != context.use_pass_flow);
|
||||
if (!recreate_denoiser) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Destroy existing handle before creating new one. */
|
||||
if (denoiser_.optix_denoiser) {
|
||||
optixDenoiserDestroy(denoiser_.optix_denoiser);
|
||||
}
|
||||
|
||||
/* Create OptiX denoiser handle on demand when it is first used. */
|
||||
OptixDenoiserOptions denoiser_options = {};
|
||||
denoiser_options.guideAlbedo = context.use_pass_albedo;
|
||||
denoiser_options.guideNormal = context.use_pass_normal;
|
||||
|
||||
OptixDenoiserModelKind model = OPTIX_DENOISER_MODEL_KIND_HDR;
|
||||
if (context.use_pass_flow) {
|
||||
model = OPTIX_DENOISER_MODEL_KIND_TEMPORAL;
|
||||
}
|
||||
|
||||
const OptixResult result = optixDenoiserCreate(
|
||||
this->context, model, &denoiser_options, &denoiser_.optix_denoiser);
|
||||
|
||||
if (result != OPTIX_SUCCESS) {
|
||||
set_error("Failed to create OptiX denoiser");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* OptiX denoiser handle was created with the requested number of input passes. */
|
||||
denoiser_.use_pass_albedo = context.use_pass_albedo;
|
||||
denoiser_.use_pass_normal = context.use_pass_normal;
|
||||
denoiser_.use_pass_flow = context.use_pass_flow;
|
||||
|
||||
/* OptiX denoiser has been created, but it needs configuration. */
|
||||
denoiser_.is_configured = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
|
||||
{
|
||||
/* Limit maximum tile size denoiser can be invoked with. */
|
||||
const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
|
||||
min(context.buffer_params.height, 4096));
|
||||
|
||||
if (denoiser_.is_configured &&
|
||||
(denoiser_.configured_size.x == tile_size.x && denoiser_.configured_size.y == tile_size.y)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
optix_assert(optixDenoiserComputeMemoryResources(
|
||||
denoiser_.optix_denoiser, tile_size.x, tile_size.y, &denoiser_.sizes));
|
||||
|
||||
/* Allocate denoiser state if tile size has changed since last setup. */
|
||||
denoiser_.state.alloc_to_device(denoiser_.sizes.stateSizeInBytes +
|
||||
denoiser_.sizes.withOverlapScratchSizeInBytes);
|
||||
|
||||
/* Initialize denoiser state for the current tile size. */
|
||||
const OptixResult result = optixDenoiserSetup(
|
||||
denoiser_.optix_denoiser,
|
||||
0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
|
||||
* on a stream that is not the default stream. */
|
||||
tile_size.x + denoiser_.sizes.overlapWindowSizeInPixels * 2,
|
||||
tile_size.y + denoiser_.sizes.overlapWindowSizeInPixels * 2,
|
||||
denoiser_.state.device_pointer,
|
||||
denoiser_.sizes.stateSizeInBytes,
|
||||
denoiser_.state.device_pointer + denoiser_.sizes.stateSizeInBytes,
|
||||
denoiser_.sizes.withOverlapScratchSizeInBytes);
|
||||
if (result != OPTIX_SUCCESS) {
|
||||
set_error("Failed to set up OptiX denoiser");
|
||||
return false;
|
||||
}
|
||||
|
||||
cuda_assert(cuCtxSynchronize());
|
||||
|
||||
denoiser_.is_configured = true;
|
||||
denoiser_.configured_size = tile_size;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
const int width = buffer_params.width;
|
||||
const int height = buffer_params.height;
|
||||
|
||||
/* Set up input and output layer information. */
|
||||
OptixImage2D color_layer = {0};
|
||||
OptixImage2D albedo_layer = {0};
|
||||
OptixImage2D normal_layer = {0};
|
||||
OptixImage2D flow_layer = {0};
|
||||
|
||||
OptixImage2D output_layer = {0};
|
||||
OptixImage2D prev_output_layer = {0};
|
||||
|
||||
/* Color pass. */
|
||||
{
|
||||
const int pass_denoised = pass.denoised_offset;
|
||||
const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride * sizeof(float);
|
||||
|
||||
color_layer.data = context.render_buffers->buffer.device_pointer +
|
||||
pass_denoised * sizeof(float);
|
||||
color_layer.width = width;
|
||||
color_layer.height = height;
|
||||
color_layer.rowStrideInBytes = pass_stride_in_bytes * context.buffer_params.stride;
|
||||
color_layer.pixelStrideInBytes = pass_stride_in_bytes;
|
||||
color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||
}
|
||||
|
||||
/* Previous output. */
|
||||
if (context.prev_output.offset != PASS_UNUSED) {
|
||||
const int64_t pass_stride_in_bytes = context.prev_output.pass_stride * sizeof(float);
|
||||
|
||||
prev_output_layer.data = context.prev_output.device_pointer +
|
||||
context.prev_output.offset * sizeof(float);
|
||||
prev_output_layer.width = width;
|
||||
prev_output_layer.height = height;
|
||||
prev_output_layer.rowStrideInBytes = pass_stride_in_bytes * context.prev_output.stride;
|
||||
prev_output_layer.pixelStrideInBytes = pass_stride_in_bytes;
|
||||
prev_output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||
}
|
||||
|
||||
/* Optional albedo and color passes. */
|
||||
if (context.num_input_passes > 1) {
|
||||
const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
|
||||
const int64_t pixel_stride_in_bytes = context.guiding_params.pass_stride * sizeof(float);
|
||||
const int64_t row_stride_in_bytes = context.guiding_params.stride * pixel_stride_in_bytes;
|
||||
|
||||
if (context.use_pass_albedo) {
|
||||
albedo_layer.data = d_guiding_buffer + context.guiding_params.pass_albedo * sizeof(float);
|
||||
albedo_layer.width = width;
|
||||
albedo_layer.height = height;
|
||||
albedo_layer.rowStrideInBytes = row_stride_in_bytes;
|
||||
albedo_layer.pixelStrideInBytes = pixel_stride_in_bytes;
|
||||
albedo_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||
}
|
||||
|
||||
if (context.use_pass_normal) {
|
||||
normal_layer.data = d_guiding_buffer + context.guiding_params.pass_normal * sizeof(float);
|
||||
normal_layer.width = width;
|
||||
normal_layer.height = height;
|
||||
normal_layer.rowStrideInBytes = row_stride_in_bytes;
|
||||
normal_layer.pixelStrideInBytes = pixel_stride_in_bytes;
|
||||
normal_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||
}
|
||||
|
||||
if (context.use_pass_flow) {
|
||||
flow_layer.data = d_guiding_buffer + context.guiding_params.pass_flow * sizeof(float);
|
||||
flow_layer.width = width;
|
||||
flow_layer.height = height;
|
||||
flow_layer.rowStrideInBytes = row_stride_in_bytes;
|
||||
flow_layer.pixelStrideInBytes = pixel_stride_in_bytes;
|
||||
flow_layer.format = OPTIX_PIXEL_FORMAT_FLOAT2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Denoise in-place of the noisy input in the render buffers. */
|
||||
output_layer = color_layer;
|
||||
|
||||
OptixDenoiserGuideLayer guide_layers = {};
|
||||
guide_layers.albedo = albedo_layer;
|
||||
guide_layers.normal = normal_layer;
|
||||
guide_layers.flow = flow_layer;
|
||||
|
||||
OptixDenoiserLayer image_layers = {};
|
||||
image_layers.input = color_layer;
|
||||
image_layers.previousOutput = prev_output_layer;
|
||||
image_layers.output = output_layer;
|
||||
|
||||
/* Finally run denoising. */
|
||||
OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
|
||||
|
||||
optix_assert(ccl::optixUtilDenoiserInvokeTiled(denoiser_.optix_denoiser,
|
||||
denoiser_.queue.stream(),
|
||||
¶ms,
|
||||
denoiser_.state.device_pointer,
|
||||
denoiser_.sizes.stateSizeInBytes,
|
||||
&guide_layers,
|
||||
&image_layers,
|
||||
1,
|
||||
denoiser_.state.device_pointer +
|
||||
denoiser_.sizes.stateSizeInBytes,
|
||||
denoiser_.sizes.withOverlapScratchSizeInBytes,
|
||||
denoiser_.sizes.overlapWindowSizeInPixels,
|
||||
denoiser_.configured_size.x,
|
||||
denoiser_.configured_size.y));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
||||
OptixBuildOperation operation,
|
||||
const OptixBuildInput &build_input,
|
||||
|
|
|
@ -1,17 +1,14 @@
|
|||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2019, NVIDIA Corporation.
|
||||
* Copyright 2019-2022 Blender Foundation. */
|
||||
* Copyright 2019, NVIDIA Corporation
|
||||
* Copyright 2019-2022 Blender Foundation */
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_OPTIX
|
||||
|
||||
# include "device/cuda/device_impl.h"
|
||||
# include "device/optix/queue.h"
|
||||
# include "device/optix/util.h"
|
||||
# include "kernel/osl/globals.h"
|
||||
# include "kernel/types.h"
|
||||
# include "util/unique_ptr.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -87,32 +84,6 @@ class OptiXDevice : public CUDADevice {
|
|||
vector<unique_ptr<device_only_memory<char>>> delayed_free_bvh_memory;
|
||||
thread_mutex delayed_free_bvh_mutex;
|
||||
|
||||
class Denoiser {
|
||||
public:
|
||||
explicit Denoiser(OptiXDevice *device);
|
||||
|
||||
OptiXDevice *device;
|
||||
OptiXDeviceQueue queue;
|
||||
|
||||
OptixDenoiser optix_denoiser = nullptr;
|
||||
|
||||
/* Configuration size, as provided to `optixDenoiserSetup`.
|
||||
* If the `optixDenoiserSetup()` was never used on the current `optix_denoiser` the
|
||||
* `is_configured` will be false. */
|
||||
bool is_configured = false;
|
||||
int2 configured_size = make_int2(0, 0);
|
||||
|
||||
/* OptiX denoiser state and scratch buffers, stored in a single memory buffer.
|
||||
* The memory layout goes as following: [denoiser state][scratch buffer]. */
|
||||
device_only_memory<unsigned char> state;
|
||||
OptixDenoiserSizes sizes = {};
|
||||
|
||||
bool use_pass_albedo = false;
|
||||
bool use_pass_normal = false;
|
||||
bool use_pass_flow = false;
|
||||
};
|
||||
Denoiser denoiser_;
|
||||
|
||||
public:
|
||||
OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||
~OptiXDevice();
|
||||
|
@ -142,53 +113,6 @@ class OptiXDevice : public CUDADevice {
|
|||
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
|
||||
|
||||
void *get_cpu_osl_memory() override;
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Denoising.
|
||||
*/
|
||||
|
||||
class DenoiseContext;
|
||||
class DenoisePass;
|
||||
|
||||
virtual bool denoise_buffer(const DeviceDenoiseTask &task) override;
|
||||
virtual DeviceQueue *get_denoise_queue() override;
|
||||
|
||||
/* Read guiding passes from the render buffers, preprocess them in a way which is expected by
|
||||
* OptiX and store in the guiding passes memory within the given context.
|
||||
*
|
||||
* Pre=-processing of the guiding passes is to only happen once per context lifetime. DO not
|
||||
* preprocess them for every pass which is being denoised. */
|
||||
bool denoise_filter_guiding_preprocess(DenoiseContext &context);
|
||||
|
||||
/* Set fake albedo pixels in the albedo guiding pass storage.
|
||||
* After this point only passes which do not need albedo for denoising can be processed. */
|
||||
bool denoise_filter_guiding_set_fake_albedo(DenoiseContext &context);
|
||||
|
||||
void denoise_pass(DenoiseContext &context, PassType pass_type);
|
||||
|
||||
/* Read input color pass from the render buffer into the memory which corresponds to the noisy
|
||||
* input within the given context. Pixels are scaled to the number of samples, but are not
|
||||
* preprocessed yet. */
|
||||
void denoise_color_read(DenoiseContext &context, const DenoisePass &pass);
|
||||
|
||||
/* Run corresponding filter kernels, preparing data for the denoiser or copying data from the
|
||||
* denoiser result to the render buffer. */
|
||||
bool denoise_filter_color_preprocess(DenoiseContext &context, const DenoisePass &pass);
|
||||
bool denoise_filter_color_postprocess(DenoiseContext &context, const DenoisePass &pass);
|
||||
|
||||
/* Make sure the OptiX denoiser is created and configured. */
|
||||
bool denoise_ensure(DenoiseContext &context);
|
||||
|
||||
/* Create OptiX denoiser descriptor if needed.
|
||||
* Will do nothing if the current OptiX descriptor is usable for the given parameters.
|
||||
* If the OptiX denoiser descriptor did re-allocate here it is left unconfigured. */
|
||||
bool denoise_create_if_needed(DenoiseContext &context);
|
||||
|
||||
/* Configure existing OptiX denoiser descriptor for the use for the given task. */
|
||||
bool denoise_configure_if_needed(DenoiseContext &context);
|
||||
|
||||
/* Run configured denoiser. */
|
||||
bool denoise_run(DenoiseContext &context, const DenoisePass &pass);
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -8,7 +8,7 @@ set(INC
|
|||
set(SRC
|
||||
adaptive_sampling.cpp
|
||||
denoiser.cpp
|
||||
denoiser_device.cpp
|
||||
denoiser_gpu.cpp
|
||||
denoiser_oidn.cpp
|
||||
denoiser_optix.cpp
|
||||
path_trace.cpp
|
||||
|
@ -30,7 +30,7 @@ set(SRC
|
|||
set(SRC_HEADERS
|
||||
adaptive_sampling.h
|
||||
denoiser.h
|
||||
denoiser_device.h
|
||||
denoiser_gpu.h
|
||||
denoiser_oidn.h
|
||||
denoiser_optix.h
|
||||
path_trace.h
|
||||
|
|
|
@ -16,9 +16,11 @@ unique_ptr<Denoiser> Denoiser::create(Device *path_trace_device, const DenoisePa
|
|||
{
|
||||
DCHECK(params.use);
|
||||
|
||||
#ifdef WITH_OPTIX
|
||||
if (params.type == DENOISER_OPTIX && Device::available_devices(DEVICE_MASK_OPTIX).size()) {
|
||||
return make_unique<OptiXDenoiser>(path_trace_device, params);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Always fallback to OIDN. */
|
||||
DenoiseParams oidn_params = params;
|
||||
|
|
|
@ -1,27 +0,0 @@
|
|||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "integrator/denoiser.h"
|
||||
#include "util/unique_ptr.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Denoiser which uses device-specific denoising implementation, such as OptiX denoiser which are
|
||||
* implemented as a part of a driver of specific device.
|
||||
*
|
||||
* This implementation makes sure the to-be-denoised buffer is available on the denoising device
|
||||
* and invoke denoising kernel via device API. */
|
||||
class DeviceDenoiser : public Denoiser {
|
||||
public:
|
||||
DeviceDenoiser(Device *path_trace_device, const DenoiseParams ¶ms);
|
||||
~DeviceDenoiser();
|
||||
|
||||
virtual bool denoise_buffer(const BufferParams &buffer_params,
|
||||
RenderBuffers *render_buffers,
|
||||
const int num_samples,
|
||||
bool allow_inplace_modification) override;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#include "integrator/denoiser_device.h"
|
||||
#include "integrator/denoiser_gpu.h"
|
||||
|
||||
#include "device/denoise.h"
|
||||
#include "device/device.h"
|
||||
|
@ -13,17 +13,17 @@
|
|||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
DeviceDenoiser::DeviceDenoiser(Device *path_trace_device, const DenoiseParams ¶ms)
|
||||
DenoiserGPU::DenoiserGPU(Device *path_trace_device, const DenoiseParams ¶ms)
|
||||
: Denoiser(path_trace_device, params)
|
||||
{
|
||||
}
|
||||
|
||||
DeviceDenoiser::~DeviceDenoiser()
|
||||
DenoiserGPU::~DenoiserGPU()
|
||||
{
|
||||
/* Explicit implementation, to allow forward declaration of Device in the header. */
|
||||
}
|
||||
|
||||
bool DeviceDenoiser::denoise_buffer(const BufferParams &buffer_params,
|
||||
bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
|
||||
RenderBuffers *render_buffers,
|
||||
const int num_samples,
|
||||
bool allow_inplace_modification)
|
||||
|
@ -33,7 +33,7 @@ bool DeviceDenoiser::denoise_buffer(const BufferParams &buffer_params,
|
|||
return false;
|
||||
}
|
||||
|
||||
DeviceDenoiseTask task;
|
||||
DenoiseTask task;
|
||||
task.params = params_;
|
||||
task.num_samples = num_samples;
|
||||
task.buffer_params = buffer_params;
|
||||
|
@ -50,8 +50,6 @@ bool DeviceDenoiser::denoise_buffer(const BufferParams &buffer_params,
|
|||
else {
|
||||
VLOG_WORK << "Creating temporary buffer on denoiser device.";
|
||||
|
||||
DeviceQueue *queue = denoiser_device->get_denoise_queue();
|
||||
|
||||
/* Create buffer which is available by the device used by denoiser. */
|
||||
|
||||
/* TODO(sergey): Optimize data transfers. For example, only copy denoising related passes,
|
||||
|
@ -70,13 +68,13 @@ bool DeviceDenoiser::denoise_buffer(const BufferParams &buffer_params,
|
|||
render_buffers->buffer.data(),
|
||||
sizeof(float) * local_render_buffers.buffer.size());
|
||||
|
||||
queue->copy_to_device(local_render_buffers.buffer);
|
||||
denoiser_queue_->copy_to_device(local_render_buffers.buffer);
|
||||
|
||||
task.render_buffers = &local_render_buffers;
|
||||
task.allow_inplace_modification = true;
|
||||
}
|
||||
|
||||
const bool denoise_result = denoiser_device->denoise_buffer(task);
|
||||
const bool denoise_result = denoise_buffer(task);
|
||||
|
||||
if (local_buffer_used) {
|
||||
local_render_buffers.copy_from_device();
|
||||
|
@ -90,4 +88,21 @@ bool DeviceDenoiser::denoise_buffer(const BufferParams &buffer_params,
|
|||
return denoise_result;
|
||||
}
|
||||
|
||||
Device *DenoiserGPU::ensure_denoiser_device(Progress *progress)
|
||||
{
|
||||
Device *denoiser_device = Denoiser::ensure_denoiser_device(progress);
|
||||
if (!denoiser_device) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!denoiser_queue_) {
|
||||
denoiser_queue_ = denoiser_device->gpu_queue_create();
|
||||
if (!denoiser_queue_) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
return denoiser_device;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
|
@ -0,0 +1,52 @@
|
|||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "integrator/denoiser.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Implementation of Denoiser which uses a device-specific denoising implementation, running on a
|
||||
* GPU device queue. It makes sure the to-be-denoised buffer is available on the denoising device
|
||||
* and invokes denoising kernels via the device queue API. */
|
||||
class DenoiserGPU : public Denoiser {
|
||||
public:
|
||||
DenoiserGPU(Device *path_trace_device, const DenoiseParams ¶ms);
|
||||
~DenoiserGPU();
|
||||
|
||||
virtual bool denoise_buffer(const BufferParams &buffer_params,
|
||||
RenderBuffers *render_buffers,
|
||||
const int num_samples,
|
||||
bool allow_inplace_modification) override;
|
||||
|
||||
protected:
|
||||
/* All the parameters needed to perform buffer denoising on a device.
|
||||
* Is not really a task in its canonical terms (as in, is not an asynchronous running task). Is
|
||||
* more like a wrapper for all the arguments and parameters needed to perform denoising. Is a
|
||||
* single place where they are all listed, so that it's not required to modify all device methods
|
||||
* when these parameters do change. */
|
||||
class DenoiseTask {
|
||||
public:
|
||||
DenoiseParams params;
|
||||
|
||||
int num_samples;
|
||||
|
||||
RenderBuffers *render_buffers;
|
||||
BufferParams buffer_params;
|
||||
|
||||
/* Allow to do in-place modification of the input passes (scaling them down i.e.). This will
|
||||
* lower the memory footprint of the denoiser but will make input passes "invalid" (from path
|
||||
* tracer) point of view. */
|
||||
bool allow_inplace_modification;
|
||||
};
|
||||
|
||||
/* Returns true if task is fully handled. */
|
||||
virtual bool denoise_buffer(const DenoiseTask & /*task*/) = 0;
|
||||
|
||||
virtual Device *ensure_denoiser_device(Progress *progress) override;
|
||||
|
||||
unique_ptr<DeviceQueue> denoiser_queue_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
|
@ -1,16 +1,216 @@
|
|||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#include "integrator/denoiser_optix.h"
|
||||
#ifdef WITH_OPTIX
|
||||
|
||||
#include "device/denoise.h"
|
||||
#include "device/device.h"
|
||||
# include "integrator/denoiser_optix.h"
|
||||
# include "integrator/pass_accessor_gpu.h"
|
||||
|
||||
# include "device/optix/device_impl.h"
|
||||
# include "device/optix/queue.h"
|
||||
|
||||
# include <optix_denoiser_tiling.h>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
OptiXDenoiser::OptiXDenoiser(Device *path_trace_device, const DenoiseParams ¶ms)
|
||||
: DeviceDenoiser(path_trace_device, params)
|
||||
# if OPTIX_ABI_VERSION >= 60
|
||||
using ::optixUtilDenoiserInvokeTiled;
|
||||
# else
|
||||
// A minimal copy of functionality `optix_denoiser_tiling.h` which allows to fix integer overflow
|
||||
// issues without bumping SDK or driver requirement.
|
||||
//
|
||||
// The original code is Copyright NVIDIA Corporation, BSD-3-Clause.
|
||||
static OptixResult optixUtilDenoiserSplitImage(const OptixImage2D &input,
|
||||
const OptixImage2D &output,
|
||||
unsigned int overlapWindowSizeInPixels,
|
||||
unsigned int tileWidth,
|
||||
unsigned int tileHeight,
|
||||
std::vector<OptixUtilDenoiserImageTile> &tiles)
|
||||
{
|
||||
if (tileWidth == 0 || tileHeight == 0)
|
||||
return OPTIX_ERROR_INVALID_VALUE;
|
||||
|
||||
unsigned int inPixelStride = optixUtilGetPixelStride(input);
|
||||
unsigned int outPixelStride = optixUtilGetPixelStride(output);
|
||||
|
||||
int inp_w = std::min(tileWidth + 2 * overlapWindowSizeInPixels, input.width);
|
||||
int inp_h = std::min(tileHeight + 2 * overlapWindowSizeInPixels, input.height);
|
||||
int inp_y = 0, copied_y = 0;
|
||||
|
||||
do {
|
||||
int inputOffsetY = inp_y == 0 ? 0 :
|
||||
std::max((int)overlapWindowSizeInPixels,
|
||||
inp_h - ((int)input.height - inp_y));
|
||||
int copy_y = inp_y == 0 ? std::min(input.height, tileHeight + overlapWindowSizeInPixels) :
|
||||
std::min(tileHeight, input.height - copied_y);
|
||||
|
||||
int inp_x = 0, copied_x = 0;
|
||||
do {
|
||||
int inputOffsetX = inp_x == 0 ? 0 :
|
||||
std::max((int)overlapWindowSizeInPixels,
|
||||
inp_w - ((int)input.width - inp_x));
|
||||
int copy_x = inp_x == 0 ? std::min(input.width, tileWidth + overlapWindowSizeInPixels) :
|
||||
std::min(tileWidth, input.width - copied_x);
|
||||
|
||||
OptixUtilDenoiserImageTile tile;
|
||||
tile.input.data = input.data + (size_t)(inp_y - inputOffsetY) * input.rowStrideInBytes +
|
||||
+(size_t)(inp_x - inputOffsetX) * inPixelStride;
|
||||
tile.input.width = inp_w;
|
||||
tile.input.height = inp_h;
|
||||
tile.input.rowStrideInBytes = input.rowStrideInBytes;
|
||||
tile.input.pixelStrideInBytes = input.pixelStrideInBytes;
|
||||
tile.input.format = input.format;
|
||||
|
||||
tile.output.data = output.data + (size_t)inp_y * output.rowStrideInBytes +
|
||||
(size_t)inp_x * outPixelStride;
|
||||
tile.output.width = copy_x;
|
||||
tile.output.height = copy_y;
|
||||
tile.output.rowStrideInBytes = output.rowStrideInBytes;
|
||||
tile.output.pixelStrideInBytes = output.pixelStrideInBytes;
|
||||
tile.output.format = output.format;
|
||||
|
||||
tile.inputOffsetX = inputOffsetX;
|
||||
tile.inputOffsetY = inputOffsetY;
|
||||
tiles.push_back(tile);
|
||||
|
||||
inp_x += inp_x == 0 ? tileWidth + overlapWindowSizeInPixels : tileWidth;
|
||||
copied_x += copy_x;
|
||||
} while (inp_x < static_cast<int>(input.width));
|
||||
|
||||
inp_y += inp_y == 0 ? tileHeight + overlapWindowSizeInPixels : tileHeight;
|
||||
copied_y += copy_y;
|
||||
} while (inp_y < static_cast<int>(input.height));
|
||||
|
||||
return OPTIX_SUCCESS;
|
||||
}
|
||||
|
||||
static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
|
||||
CUstream stream,
|
||||
const OptixDenoiserParams *params,
|
||||
CUdeviceptr denoiserState,
|
||||
size_t denoiserStateSizeInBytes,
|
||||
const OptixDenoiserGuideLayer *guideLayer,
|
||||
const OptixDenoiserLayer *layers,
|
||||
unsigned int numLayers,
|
||||
CUdeviceptr scratch,
|
||||
size_t scratchSizeInBytes,
|
||||
unsigned int overlapWindowSizeInPixels,
|
||||
unsigned int tileWidth,
|
||||
unsigned int tileHeight)
|
||||
{
|
||||
if (!guideLayer || !layers)
|
||||
return OPTIX_ERROR_INVALID_VALUE;
|
||||
|
||||
std::vector<std::vector<OptixUtilDenoiserImageTile>> tiles(numLayers);
|
||||
std::vector<std::vector<OptixUtilDenoiserImageTile>> prevTiles(numLayers);
|
||||
for (unsigned int l = 0; l < numLayers; l++) {
|
||||
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].input,
|
||||
layers[l].output,
|
||||
overlapWindowSizeInPixels,
|
||||
tileWidth,
|
||||
tileHeight,
|
||||
tiles[l]))
|
||||
return res;
|
||||
|
||||
if (layers[l].previousOutput.data) {
|
||||
OptixImage2D dummyOutput = layers[l].previousOutput;
|
||||
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].previousOutput,
|
||||
dummyOutput,
|
||||
overlapWindowSizeInPixels,
|
||||
tileWidth,
|
||||
tileHeight,
|
||||
prevTiles[l]))
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<OptixUtilDenoiserImageTile> albedoTiles;
|
||||
if (guideLayer->albedo.data) {
|
||||
OptixImage2D dummyOutput = guideLayer->albedo;
|
||||
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->albedo,
|
||||
dummyOutput,
|
||||
overlapWindowSizeInPixels,
|
||||
tileWidth,
|
||||
tileHeight,
|
||||
albedoTiles))
|
||||
return res;
|
||||
}
|
||||
|
||||
std::vector<OptixUtilDenoiserImageTile> normalTiles;
|
||||
if (guideLayer->normal.data) {
|
||||
OptixImage2D dummyOutput = guideLayer->normal;
|
||||
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->normal,
|
||||
dummyOutput,
|
||||
overlapWindowSizeInPixels,
|
||||
tileWidth,
|
||||
tileHeight,
|
||||
normalTiles))
|
||||
return res;
|
||||
}
|
||||
std::vector<OptixUtilDenoiserImageTile> flowTiles;
|
||||
if (guideLayer->flow.data) {
|
||||
OptixImage2D dummyOutput = guideLayer->flow;
|
||||
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->flow,
|
||||
dummyOutput,
|
||||
overlapWindowSizeInPixels,
|
||||
tileWidth,
|
||||
tileHeight,
|
||||
flowTiles))
|
||||
return res;
|
||||
}
|
||||
|
||||
for (size_t t = 0; t < tiles[0].size(); t++) {
|
||||
std::vector<OptixDenoiserLayer> tlayers;
|
||||
for (unsigned int l = 0; l < numLayers; l++) {
|
||||
OptixDenoiserLayer layer = {};
|
||||
layer.input = (tiles[l])[t].input;
|
||||
layer.output = (tiles[l])[t].output;
|
||||
if (layers[l].previousOutput.data)
|
||||
layer.previousOutput = (prevTiles[l])[t].input;
|
||||
tlayers.push_back(layer);
|
||||
}
|
||||
|
||||
OptixDenoiserGuideLayer gl = {};
|
||||
if (guideLayer->albedo.data)
|
||||
gl.albedo = albedoTiles[t].input;
|
||||
|
||||
if (guideLayer->normal.data)
|
||||
gl.normal = normalTiles[t].input;
|
||||
|
||||
if (guideLayer->flow.data)
|
||||
gl.flow = flowTiles[t].input;
|
||||
|
||||
if (const OptixResult res = optixDenoiserInvoke(denoiser,
|
||||
stream,
|
||||
params,
|
||||
denoiserState,
|
||||
denoiserStateSizeInBytes,
|
||||
&gl,
|
||||
&tlayers[0],
|
||||
numLayers,
|
||||
(tiles[0])[t].inputOffsetX,
|
||||
(tiles[0])[t].inputOffsetY,
|
||||
scratch,
|
||||
scratchSizeInBytes))
|
||||
return res;
|
||||
}
|
||||
return OPTIX_SUCCESS;
|
||||
}
|
||||
# endif
|
||||
|
||||
OptiXDenoiser::OptiXDenoiser(Device *path_trace_device, const DenoiseParams ¶ms)
|
||||
: DenoiserGPU(path_trace_device, params), state_(path_trace_device, "__denoiser_state", true)
|
||||
{
|
||||
}
|
||||
|
||||
OptiXDenoiser::~OptiXDenoiser()
|
||||
{
|
||||
/* It is important that the OptixDenoiser handle is destroyed before the OptixDeviceContext
|
||||
* handle, which is guaranteed since the local denoising device owning the OptiX device context
|
||||
* is deleted as part of the Denoiser class destructor call after this. */
|
||||
if (optix_denoiser_ != nullptr) {
|
||||
optixDenoiserDestroy(optix_denoiser_);
|
||||
}
|
||||
}
|
||||
|
||||
uint OptiXDenoiser::get_device_type_mask() const
|
||||
|
@ -18,4 +218,569 @@ uint OptiXDenoiser::get_device_type_mask() const
|
|||
return DEVICE_MASK_OPTIX;
|
||||
}
|
||||
|
||||
class OptiXDenoiser::DenoiseContext {
|
||||
public:
|
||||
explicit DenoiseContext(OptiXDevice *device, const DenoiseTask &task)
|
||||
: denoise_params(task.params),
|
||||
render_buffers(task.render_buffers),
|
||||
buffer_params(task.buffer_params),
|
||||
guiding_buffer(device, "denoiser guiding passes buffer", true),
|
||||
num_samples(task.num_samples)
|
||||
{
|
||||
num_input_passes = 1;
|
||||
if (denoise_params.use_pass_albedo) {
|
||||
num_input_passes += 1;
|
||||
use_pass_albedo = true;
|
||||
pass_denoising_albedo = buffer_params.get_pass_offset(PASS_DENOISING_ALBEDO);
|
||||
if (denoise_params.use_pass_normal) {
|
||||
num_input_passes += 1;
|
||||
use_pass_normal = true;
|
||||
pass_denoising_normal = buffer_params.get_pass_offset(PASS_DENOISING_NORMAL);
|
||||
}
|
||||
}
|
||||
|
||||
if (denoise_params.temporally_stable) {
|
||||
prev_output.device_pointer = render_buffers->buffer.device_pointer;
|
||||
|
||||
prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
|
||||
|
||||
prev_output.stride = buffer_params.stride;
|
||||
prev_output.pass_stride = buffer_params.pass_stride;
|
||||
|
||||
num_input_passes += 1;
|
||||
use_pass_motion = true;
|
||||
pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
|
||||
}
|
||||
|
||||
use_guiding_passes = (num_input_passes - 1) > 0;
|
||||
|
||||
if (use_guiding_passes) {
|
||||
if (task.allow_inplace_modification) {
|
||||
guiding_params.device_pointer = render_buffers->buffer.device_pointer;
|
||||
|
||||
guiding_params.pass_albedo = pass_denoising_albedo;
|
||||
guiding_params.pass_normal = pass_denoising_normal;
|
||||
guiding_params.pass_flow = pass_motion;
|
||||
|
||||
guiding_params.stride = buffer_params.stride;
|
||||
guiding_params.pass_stride = buffer_params.pass_stride;
|
||||
}
|
||||
else {
|
||||
guiding_params.pass_stride = 0;
|
||||
if (use_pass_albedo) {
|
||||
guiding_params.pass_albedo = guiding_params.pass_stride;
|
||||
guiding_params.pass_stride += 3;
|
||||
}
|
||||
if (use_pass_normal) {
|
||||
guiding_params.pass_normal = guiding_params.pass_stride;
|
||||
guiding_params.pass_stride += 3;
|
||||
}
|
||||
if (use_pass_motion) {
|
||||
guiding_params.pass_flow = guiding_params.pass_stride;
|
||||
guiding_params.pass_stride += 2;
|
||||
}
|
||||
|
||||
guiding_params.stride = buffer_params.width;
|
||||
|
||||
guiding_buffer.alloc_to_device(buffer_params.width * buffer_params.height *
|
||||
guiding_params.pass_stride);
|
||||
guiding_params.device_pointer = guiding_buffer.device_pointer;
|
||||
}
|
||||
}
|
||||
|
||||
pass_sample_count = buffer_params.get_pass_offset(PASS_SAMPLE_COUNT);
|
||||
}
|
||||
|
||||
const DenoiseParams &denoise_params;
|
||||
|
||||
RenderBuffers *render_buffers = nullptr;
|
||||
const BufferParams &buffer_params;
|
||||
|
||||
/* Previous output. */
|
||||
struct {
|
||||
device_ptr device_pointer = 0;
|
||||
|
||||
int offset = PASS_UNUSED;
|
||||
|
||||
int stride = -1;
|
||||
int pass_stride = -1;
|
||||
} prev_output;
|
||||
|
||||
/* Device-side storage of the guiding passes. */
|
||||
device_only_memory<float> guiding_buffer;
|
||||
|
||||
struct {
|
||||
device_ptr device_pointer = 0;
|
||||
|
||||
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
|
||||
int pass_albedo = PASS_UNUSED;
|
||||
int pass_normal = PASS_UNUSED;
|
||||
int pass_flow = PASS_UNUSED;
|
||||
|
||||
int stride = -1;
|
||||
int pass_stride = -1;
|
||||
} guiding_params;
|
||||
|
||||
/* Number of input passes. Including the color and extra auxiliary passes. */
|
||||
int num_input_passes = 0;
|
||||
bool use_guiding_passes = false;
|
||||
bool use_pass_albedo = false;
|
||||
bool use_pass_normal = false;
|
||||
bool use_pass_motion = false;
|
||||
|
||||
int num_samples = 0;
|
||||
|
||||
int pass_sample_count = PASS_UNUSED;
|
||||
|
||||
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
|
||||
int pass_denoising_albedo = PASS_UNUSED;
|
||||
int pass_denoising_normal = PASS_UNUSED;
|
||||
int pass_motion = PASS_UNUSED;
|
||||
|
||||
/* For passes which don't need albedo channel for denoising we replace the actual albedo with
|
||||
* the (0.5, 0.5, 0.5). This flag indicates that the real albedo pass has been replaced with
|
||||
* the fake values and denoising of passes which do need albedo can no longer happen. */
|
||||
bool albedo_replaced_with_fake = false;
|
||||
};
|
||||
|
||||
class OptiXDenoiser::DenoisePass {
|
||||
public:
|
||||
DenoisePass(const PassType type, const BufferParams &buffer_params) : type(type)
|
||||
{
|
||||
noisy_offset = buffer_params.get_pass_offset(type, PassMode::NOISY);
|
||||
denoised_offset = buffer_params.get_pass_offset(type, PassMode::DENOISED);
|
||||
|
||||
const PassInfo pass_info = Pass::get_info(type);
|
||||
num_components = pass_info.num_components;
|
||||
use_compositing = pass_info.use_compositing;
|
||||
use_denoising_albedo = pass_info.use_denoising_albedo;
|
||||
}
|
||||
|
||||
PassType type;
|
||||
|
||||
int noisy_offset;
|
||||
int denoised_offset;
|
||||
|
||||
int num_components;
|
||||
bool use_compositing;
|
||||
bool use_denoising_albedo;
|
||||
};
|
||||
|
||||
bool OptiXDenoiser::denoise_buffer(const DenoiseTask &task)
|
||||
{
|
||||
OptiXDevice *const optix_device = static_cast<OptiXDevice *>(denoiser_device_);
|
||||
|
||||
const CUDAContextScope scope(optix_device);
|
||||
|
||||
DenoiseContext context(optix_device, task);
|
||||
|
||||
if (!denoise_ensure(context)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!denoise_filter_guiding_preprocess(context)) {
|
||||
LOG(ERROR) << "Error preprocessing guiding passes.";
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Passes which will use real albedo when it is available. */
|
||||
denoise_pass(context, PASS_COMBINED);
|
||||
denoise_pass(context, PASS_SHADOW_CATCHER_MATTE);
|
||||
|
||||
/* Passes which do not need albedo and hence if real is present it needs to become fake. */
|
||||
denoise_pass(context, PASS_SHADOW_CATCHER);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptiXDenoiser::denoise_filter_guiding_preprocess(const DenoiseContext &context)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
|
||||
DeviceKernelArguments args(&context.guiding_params.device_pointer,
|
||||
&context.guiding_params.pass_stride,
|
||||
&context.guiding_params.pass_albedo,
|
||||
&context.guiding_params.pass_normal,
|
||||
&context.guiding_params.pass_flow,
|
||||
&context.render_buffers->buffer.device_pointer,
|
||||
&buffer_params.offset,
|
||||
&buffer_params.stride,
|
||||
&buffer_params.pass_stride,
|
||||
&context.pass_sample_count,
|
||||
&context.pass_denoising_albedo,
|
||||
&context.pass_denoising_normal,
|
||||
&context.pass_motion,
|
||||
&buffer_params.full_x,
|
||||
&buffer_params.full_y,
|
||||
&buffer_params.width,
|
||||
&buffer_params.height,
|
||||
&context.num_samples);
|
||||
|
||||
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
|
||||
}
|
||||
|
||||
bool OptiXDenoiser::denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
|
||||
DeviceKernelArguments args(&context.guiding_params.device_pointer,
|
||||
&context.guiding_params.pass_stride,
|
||||
&context.guiding_params.pass_albedo,
|
||||
&buffer_params.width,
|
||||
&buffer_params.height);
|
||||
|
||||
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
|
||||
}
|
||||
|
||||
void OptiXDenoiser::denoise_pass(DenoiseContext &context, PassType pass_type)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
|
||||
const DenoisePass pass(pass_type, buffer_params);
|
||||
|
||||
if (pass.noisy_offset == PASS_UNUSED) {
|
||||
return;
|
||||
}
|
||||
if (pass.denoised_offset == PASS_UNUSED) {
|
||||
LOG(DFATAL) << "Missing denoised pass " << pass_type_as_string(pass_type);
|
||||
return;
|
||||
}
|
||||
|
||||
if (pass.use_denoising_albedo) {
|
||||
if (context.albedo_replaced_with_fake) {
|
||||
LOG(ERROR) << "Pass which requires albedo is denoised after fake albedo has been set.";
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
|
||||
context.albedo_replaced_with_fake = true;
|
||||
if (!denoise_filter_guiding_set_fake_albedo(context)) {
|
||||
LOG(ERROR) << "Error replacing real albedo with the fake one.";
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Read and preprocess noisy color input pass. */
|
||||
denoise_color_read(context, pass);
|
||||
if (!denoise_filter_color_preprocess(context, pass)) {
|
||||
LOG(ERROR) << "Error converting denoising passes to RGB buffer.";
|
||||
return;
|
||||
}
|
||||
|
||||
if (!denoise_run(context, pass)) {
|
||||
LOG(ERROR) << "Error running OptiX denoiser.";
|
||||
return;
|
||||
}
|
||||
|
||||
/* Store result in the combined pass of the render buffer.
|
||||
*
|
||||
* This will scale the denoiser result up to match the number of, possibly per-pixel, samples. */
|
||||
if (!denoise_filter_color_postprocess(context, pass)) {
|
||||
LOG(ERROR) << "Error copying denoiser result to the denoised pass.";
|
||||
return;
|
||||
}
|
||||
|
||||
denoiser_queue_->synchronize();
|
||||
}
|
||||
|
||||
void OptiXDenoiser::denoise_color_read(const DenoiseContext &context, const DenoisePass &pass)
|
||||
{
|
||||
PassAccessor::PassAccessInfo pass_access_info;
|
||||
pass_access_info.type = pass.type;
|
||||
pass_access_info.mode = PassMode::NOISY;
|
||||
pass_access_info.offset = pass.noisy_offset;
|
||||
|
||||
/* Denoiser operates on passes which are used to calculate the approximation, and is never used
|
||||
* on the approximation. The latter is not even possible because OptiX does not support
|
||||
* denoising of semi-transparent pixels. */
|
||||
pass_access_info.use_approximate_shadow_catcher = false;
|
||||
pass_access_info.use_approximate_shadow_catcher_background = false;
|
||||
pass_access_info.show_active_pixels = false;
|
||||
|
||||
/* TODO(sergey): Consider adding support of actual exposure, to avoid clamping in extreme cases.
|
||||
*/
|
||||
const PassAccessorGPU pass_accessor(
|
||||
denoiser_queue_.get(), pass_access_info, 1.0f, context.num_samples);
|
||||
|
||||
PassAccessor::Destination destination(pass_access_info.type);
|
||||
destination.d_pixels = context.render_buffers->buffer.device_pointer +
|
||||
pass.denoised_offset * sizeof(float);
|
||||
destination.num_components = 3;
|
||||
destination.pixel_stride = context.buffer_params.pass_stride;
|
||||
|
||||
BufferParams buffer_params = context.buffer_params;
|
||||
buffer_params.window_x = 0;
|
||||
buffer_params.window_y = 0;
|
||||
buffer_params.window_width = buffer_params.width;
|
||||
buffer_params.window_height = buffer_params.height;
|
||||
|
||||
pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
|
||||
}
|
||||
|
||||
bool OptiXDenoiser::denoise_filter_color_preprocess(const DenoiseContext &context,
|
||||
const DenoisePass &pass)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
|
||||
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
|
||||
&buffer_params.full_x,
|
||||
&buffer_params.full_y,
|
||||
&buffer_params.width,
|
||||
&buffer_params.height,
|
||||
&buffer_params.offset,
|
||||
&buffer_params.stride,
|
||||
&buffer_params.pass_stride,
|
||||
&pass.denoised_offset);
|
||||
|
||||
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
|
||||
}
|
||||
|
||||
bool OptiXDenoiser::denoise_filter_color_postprocess(const DenoiseContext &context,
|
||||
const DenoisePass &pass)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
|
||||
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
|
||||
&buffer_params.full_x,
|
||||
&buffer_params.full_y,
|
||||
&buffer_params.width,
|
||||
&buffer_params.height,
|
||||
&buffer_params.offset,
|
||||
&buffer_params.stride,
|
||||
&buffer_params.pass_stride,
|
||||
&context.num_samples,
|
||||
&pass.noisy_offset,
|
||||
&pass.denoised_offset,
|
||||
&context.pass_sample_count,
|
||||
&pass.num_components,
|
||||
&pass.use_compositing);
|
||||
|
||||
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
|
||||
}
|
||||
|
||||
bool OptiXDenoiser::denoise_ensure(DenoiseContext &context)
|
||||
{
|
||||
if (!denoise_create_if_needed(context)) {
|
||||
LOG(ERROR) << "OptiX denoiser creation has failed.";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!denoise_configure_if_needed(context)) {
|
||||
LOG(ERROR) << "OptiX denoiser configuration has failed.";
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptiXDenoiser::denoise_create_if_needed(DenoiseContext &context)
|
||||
{
|
||||
const bool recreate_denoiser = (optix_denoiser_ == nullptr) ||
|
||||
(use_pass_albedo_ != context.use_pass_albedo) ||
|
||||
(use_pass_normal_ != context.use_pass_normal) ||
|
||||
(use_pass_motion_ != context.use_pass_motion);
|
||||
if (!recreate_denoiser) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Destroy existing handle before creating new one. */
|
||||
if (optix_denoiser_) {
|
||||
optixDenoiserDestroy(optix_denoiser_);
|
||||
}
|
||||
|
||||
/* Create OptiX denoiser handle on demand when it is first used. */
|
||||
OptixDenoiserOptions denoiser_options = {};
|
||||
denoiser_options.guideAlbedo = context.use_pass_albedo;
|
||||
denoiser_options.guideNormal = context.use_pass_normal;
|
||||
|
||||
OptixDenoiserModelKind model = OPTIX_DENOISER_MODEL_KIND_HDR;
|
||||
if (context.use_pass_motion) {
|
||||
model = OPTIX_DENOISER_MODEL_KIND_TEMPORAL;
|
||||
}
|
||||
|
||||
const OptixResult result = optixDenoiserCreate(
|
||||
static_cast<OptiXDevice *>(denoiser_device_)->context,
|
||||
model,
|
||||
&denoiser_options,
|
||||
&optix_denoiser_);
|
||||
|
||||
if (result != OPTIX_SUCCESS) {
|
||||
denoiser_device_->set_error("Failed to create OptiX denoiser");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* OptiX denoiser handle was created with the requested number of input passes. */
|
||||
use_pass_albedo_ = context.use_pass_albedo;
|
||||
use_pass_normal_ = context.use_pass_normal;
|
||||
use_pass_motion_ = context.use_pass_motion;
|
||||
|
||||
/* OptiX denoiser has been created, but it needs configuration. */
|
||||
is_configured_ = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptiXDenoiser::denoise_configure_if_needed(DenoiseContext &context)
|
||||
{
|
||||
/* Limit maximum tile size denoiser can be invoked with. */
|
||||
const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
|
||||
min(context.buffer_params.height, 4096));
|
||||
|
||||
if (is_configured_ && (configured_size_.x == tile_size.x && configured_size_.y == tile_size.y)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
optix_device_assert(
|
||||
denoiser_device_,
|
||||
optixDenoiserComputeMemoryResources(optix_denoiser_, tile_size.x, tile_size.y, &sizes_));
|
||||
|
||||
/* Allocate denoiser state if tile size has changed since last setup. */
|
||||
state_.device = denoiser_device_;
|
||||
state_.alloc_to_device(sizes_.stateSizeInBytes + sizes_.withOverlapScratchSizeInBytes);
|
||||
|
||||
/* Initialize denoiser state for the current tile size. */
|
||||
const OptixResult result = optixDenoiserSetup(
|
||||
optix_denoiser_,
|
||||
0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
|
||||
* on a stream that is not the default stream. */
|
||||
tile_size.x + sizes_.overlapWindowSizeInPixels * 2,
|
||||
tile_size.y + sizes_.overlapWindowSizeInPixels * 2,
|
||||
state_.device_pointer,
|
||||
sizes_.stateSizeInBytes,
|
||||
state_.device_pointer + sizes_.stateSizeInBytes,
|
||||
sizes_.withOverlapScratchSizeInBytes);
|
||||
if (result != OPTIX_SUCCESS) {
|
||||
denoiser_device_->set_error("Failed to set up OptiX denoiser");
|
||||
return false;
|
||||
}
|
||||
|
||||
cuda_device_assert(denoiser_device_, cuCtxSynchronize());
|
||||
|
||||
is_configured_ = true;
|
||||
configured_size_ = tile_size;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptiXDenoiser::denoise_run(const DenoiseContext &context, const DenoisePass &pass)
|
||||
{
|
||||
const BufferParams &buffer_params = context.buffer_params;
|
||||
const int width = buffer_params.width;
|
||||
const int height = buffer_params.height;
|
||||
|
||||
/* Set up input and output layer information. */
|
||||
OptixImage2D color_layer = {0};
|
||||
OptixImage2D albedo_layer = {0};
|
||||
OptixImage2D normal_layer = {0};
|
||||
OptixImage2D flow_layer = {0};
|
||||
|
||||
OptixImage2D output_layer = {0};
|
||||
OptixImage2D prev_output_layer = {0};
|
||||
|
||||
/* Color pass. */
|
||||
{
|
||||
const int pass_denoised = pass.denoised_offset;
|
||||
const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride * sizeof(float);
|
||||
|
||||
color_layer.data = context.render_buffers->buffer.device_pointer +
|
||||
pass_denoised * sizeof(float);
|
||||
color_layer.width = width;
|
||||
color_layer.height = height;
|
||||
color_layer.rowStrideInBytes = pass_stride_in_bytes * context.buffer_params.stride;
|
||||
color_layer.pixelStrideInBytes = pass_stride_in_bytes;
|
||||
color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||
}
|
||||
|
||||
/* Previous output. */
|
||||
if (context.prev_output.offset != PASS_UNUSED) {
|
||||
const int64_t pass_stride_in_bytes = context.prev_output.pass_stride * sizeof(float);
|
||||
|
||||
prev_output_layer.data = context.prev_output.device_pointer +
|
||||
context.prev_output.offset * sizeof(float);
|
||||
prev_output_layer.width = width;
|
||||
prev_output_layer.height = height;
|
||||
prev_output_layer.rowStrideInBytes = pass_stride_in_bytes * context.prev_output.stride;
|
||||
prev_output_layer.pixelStrideInBytes = pass_stride_in_bytes;
|
||||
prev_output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||
}
|
||||
|
||||
/* Optional albedo and color passes. */
|
||||
if (context.num_input_passes > 1) {
|
||||
const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
|
||||
const int64_t pixel_stride_in_bytes = context.guiding_params.pass_stride * sizeof(float);
|
||||
const int64_t row_stride_in_bytes = context.guiding_params.stride * pixel_stride_in_bytes;
|
||||
|
||||
if (context.use_pass_albedo) {
|
||||
albedo_layer.data = d_guiding_buffer + context.guiding_params.pass_albedo * sizeof(float);
|
||||
albedo_layer.width = width;
|
||||
albedo_layer.height = height;
|
||||
albedo_layer.rowStrideInBytes = row_stride_in_bytes;
|
||||
albedo_layer.pixelStrideInBytes = pixel_stride_in_bytes;
|
||||
albedo_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||
}
|
||||
|
||||
if (context.use_pass_normal) {
|
||||
normal_layer.data = d_guiding_buffer + context.guiding_params.pass_normal * sizeof(float);
|
||||
normal_layer.width = width;
|
||||
normal_layer.height = height;
|
||||
normal_layer.rowStrideInBytes = row_stride_in_bytes;
|
||||
normal_layer.pixelStrideInBytes = pixel_stride_in_bytes;
|
||||
normal_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||
}
|
||||
|
||||
if (context.use_pass_motion) {
|
||||
flow_layer.data = d_guiding_buffer + context.guiding_params.pass_flow * sizeof(float);
|
||||
flow_layer.width = width;
|
||||
flow_layer.height = height;
|
||||
flow_layer.rowStrideInBytes = row_stride_in_bytes;
|
||||
flow_layer.pixelStrideInBytes = pixel_stride_in_bytes;
|
||||
flow_layer.format = OPTIX_PIXEL_FORMAT_FLOAT2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Denoise in-place of the noisy input in the render buffers. */
|
||||
output_layer = color_layer;
|
||||
|
||||
OptixDenoiserGuideLayer guide_layers = {};
|
||||
guide_layers.albedo = albedo_layer;
|
||||
guide_layers.normal = normal_layer;
|
||||
guide_layers.flow = flow_layer;
|
||||
|
||||
OptixDenoiserLayer image_layers = {};
|
||||
image_layers.input = color_layer;
|
||||
image_layers.previousOutput = prev_output_layer;
|
||||
image_layers.output = output_layer;
|
||||
|
||||
/* Finally run denoising. */
|
||||
OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
|
||||
|
||||
optix_device_assert(denoiser_device_,
|
||||
ccl::optixUtilDenoiserInvokeTiled(
|
||||
optix_denoiser_,
|
||||
static_cast<OptiXDeviceQueue *>(denoiser_queue_.get())->stream(),
|
||||
¶ms,
|
||||
state_.device_pointer,
|
||||
sizes_.stateSizeInBytes,
|
||||
&guide_layers,
|
||||
&image_layers,
|
||||
1,
|
||||
state_.device_pointer + sizes_.stateSizeInBytes,
|
||||
sizes_.withOverlapScratchSizeInBytes,
|
||||
sizes_.overlapWindowSizeInPixels,
|
||||
configured_size_.x,
|
||||
configured_size_.y));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
|
|
@ -3,16 +3,84 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "integrator/denoiser_device.h"
|
||||
#ifdef WITH_OPTIX
|
||||
|
||||
# include "integrator/denoiser_gpu.h"
|
||||
|
||||
# include "device/optix/util.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class OptiXDenoiser : public DeviceDenoiser {
|
||||
/* Implementation of denoising API which uses the OptiX denoiser. */
|
||||
class OptiXDenoiser : public DenoiserGPU {
|
||||
public:
|
||||
OptiXDenoiser(Device *path_trace_device, const DenoiseParams ¶ms);
|
||||
~OptiXDenoiser();
|
||||
|
||||
protected:
|
||||
virtual uint get_device_type_mask() const override;
|
||||
|
||||
private:
|
||||
class DenoiseContext;
|
||||
class DenoisePass;
|
||||
|
||||
virtual bool denoise_buffer(const DenoiseTask &task) override;
|
||||
|
||||
/* Read guiding passes from the render buffers, preprocess them in a way which is expected by
|
||||
* OptiX and store in the guiding passes memory within the given context.
|
||||
*
|
||||
* Pre-processing of the guiding passes is to only happen once per context lifetime. DO not
|
||||
* preprocess them for every pass which is being denoised. */
|
||||
bool denoise_filter_guiding_preprocess(const DenoiseContext &context);
|
||||
|
||||
/* Set fake albedo pixels in the albedo guiding pass storage.
|
||||
* After this point only passes which do not need albedo for denoising can be processed. */
|
||||
bool denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context);
|
||||
|
||||
void denoise_pass(DenoiseContext &context, PassType pass_type);
|
||||
|
||||
/* Read input color pass from the render buffer into the memory which corresponds to the noisy
|
||||
* input within the given context. Pixels are scaled to the number of samples, but are not
|
||||
* preprocessed yet. */
|
||||
void denoise_color_read(const DenoiseContext &context, const DenoisePass &pass);
|
||||
|
||||
/* Run corresponding filter kernels, preparing data for the denoiser or copying data from the
|
||||
* denoiser result to the render buffer. */
|
||||
bool denoise_filter_color_preprocess(const DenoiseContext &context, const DenoisePass &pass);
|
||||
bool denoise_filter_color_postprocess(const DenoiseContext &context, const DenoisePass &pass);
|
||||
|
||||
/* Make sure the OptiX denoiser is created and configured. */
|
||||
bool denoise_ensure(DenoiseContext &context);
|
||||
|
||||
/* Create OptiX denoiser descriptor if needed.
|
||||
* Will do nothing if the current OptiX descriptor is usable for the given parameters.
|
||||
* If the OptiX denoiser descriptor did re-allocate here it is left unconfigured. */
|
||||
bool denoise_create_if_needed(DenoiseContext &context);
|
||||
|
||||
/* Configure existing OptiX denoiser descriptor for the use for the given task. */
|
||||
bool denoise_configure_if_needed(DenoiseContext &context);
|
||||
|
||||
/* Run configured denoiser. */
|
||||
bool denoise_run(const DenoiseContext &context, const DenoisePass &pass);
|
||||
|
||||
OptixDenoiser optix_denoiser_ = nullptr;
|
||||
|
||||
/* Configuration size, as provided to `optixDenoiserSetup`.
|
||||
* If the `optixDenoiserSetup()` was never used on the current `optix_denoiser` the
|
||||
* `is_configured` will be false. */
|
||||
bool is_configured_ = false;
|
||||
int2 configured_size_ = make_int2(0, 0);
|
||||
|
||||
/* OptiX denoiser state and scratch buffers, stored in a single memory buffer.
|
||||
* The memory layout goes as following: [denoiser state][scratch buffer]. */
|
||||
device_only_memory<unsigned char> state_;
|
||||
OptixDenoiserSizes sizes_ = {};
|
||||
|
||||
bool use_pass_albedo_ = false;
|
||||
bool use_pass_normal_ = false;
|
||||
bool use_pass_motion_ = false;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue