Cycles: Add OptiX temporal denoising support

Enables the `bpy.ops.cycles.denoise_animation()` operator again and modifies it to support
temporal denoising with OptiX. This requires renders that were done with both the "Vector"
and "Denoising Data" passes.

Differential Revision: https://developer.blender.org/D11442
This commit is contained in:
Patrick Mours 2022-01-04 21:39:54 +01:00
parent 86141a75eb
commit 8393ccd076
Notes: blender-bot 2023-02-14 10:32:59 +01:00
Referenced by issue #94711, Blender crashes with on Apple M1 when using Cycles with GPU Compute since rB8393ccd07634
12 changed files with 291 additions and 477 deletions

View File

@ -735,27 +735,20 @@ static bool image_parse_filepaths(PyObject *pyfilepaths, vector<string> &filepat
static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *keywords)
{
#if 1
(void)args;
(void)keywords;
#else
static const char *keyword_list[] = {
"preferences", "scene", "view_layer", "input", "output", "tile_size", "samples", NULL};
"preferences", "scene", "view_layer", "input", "output", NULL};
PyObject *pypreferences, *pyscene, *pyviewlayer;
PyObject *pyinput, *pyoutput = NULL;
int tile_size = 0, samples = 0;
if (!PyArg_ParseTupleAndKeywords(args,
keywords,
"OOOO|Oii",
"OOOO|O",
(char **)keyword_list,
&pypreferences,
&pyscene,
&pyviewlayer,
&pyinput,
&pyoutput,
&tile_size,
&samples)) {
&pyoutput)) {
return NULL;
}
@ -777,14 +770,10 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key
&RNA_ViewLayer,
PyLong_AsVoidPtr(pyviewlayer),
&viewlayerptr);
PointerRNA cviewlayer = RNA_pointer_get(&viewlayerptr, "cycles");
BL::ViewLayer b_view_layer(viewlayerptr);
DenoiseParams params;
params.radius = get_int(cviewlayer, "denoising_radius");
params.strength = get_float(cviewlayer, "denoising_strength");
params.feature_strength = get_float(cviewlayer, "denoising_feature_strength");
params.relative_pca = get_boolean(cviewlayer, "denoising_relative_pca");
params.neighbor_frames = get_int(cviewlayer, "denoising_neighbor_frames");
DenoiseParams params = BlenderSync::get_denoise_params(b_scene, b_view_layer, true);
params.use = true;
/* Parse file paths list. */
vector<string> input, output;
@ -812,24 +801,15 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key
}
/* Create denoiser. */
DenoiserPipeline denoiser(device);
denoiser.params = params;
DenoiserPipeline denoiser(device, params);
denoiser.input = input;
denoiser.output = output;
if (tile_size > 0) {
denoiser.tile_size = make_int2(tile_size, tile_size);
}
if (samples > 0) {
denoiser.samples_override = samples;
}
/* Run denoiser. */
if (!denoiser.run()) {
PyErr_SetString(PyExc_ValueError, denoiser.error.c_str());
return NULL;
}
#endif
Py_RETURN_NONE;
}

View File

@ -105,11 +105,11 @@ class BlenderSync {
static BufferParams get_buffer_params(
BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, Camera *cam, int width, int height);
private:
static DenoiseParams get_denoise_params(BL::Scene &b_scene,
BL::ViewLayer &b_view_layer,
bool background);
private:
/* sync */
void sync_lights(BL::Depsgraph &b_depsgraph, bool update_all);
void sync_materials(BL::Depsgraph &b_depsgraph, bool update_all);

View File

@ -76,6 +76,8 @@ NODE_DEFINE(DenoiseParams)
SOCKET_BOOLEAN(use_pass_albedo, "Use Pass Albedo", true);
SOCKET_BOOLEAN(use_pass_normal, "Use Pass Normal", false);
SOCKET_BOOLEAN(temporally_stable, "Temporally Stable", false);
SOCKET_ENUM(prefilter, "Prefilter", *prefilter_enum, DENOISER_PREFILTER_FAST);
return type;

View File

@ -72,6 +72,9 @@ class DenoiseParams : public Node {
bool use_pass_albedo = true;
bool use_pass_normal = true;
/* Configure the denoiser to use motion vectors, previous image and a temporally stable model. */
bool temporally_stable = false;
DenoiserPrefilter prefilter = DENOISER_PREFILTER_FAST;
static const NodeEnum *get_type_enum();
@ -83,7 +86,8 @@ class DenoiseParams : public Node {
{
return !(use == other.use && type == other.type && start_sample == other.start_sample &&
use_pass_albedo == other.use_pass_albedo &&
use_pass_normal == other.use_pass_normal && prefilter == other.prefilter);
use_pass_normal == other.use_pass_normal &&
temporally_stable == other.temporally_stable && prefilter == other.prefilter);
}
};

View File

@ -566,6 +566,19 @@ class OptiXDevice::DenoiseContext {
}
}
if (denoise_params.temporally_stable) {
prev_output.device_pointer = render_buffers->buffer.device_pointer;
prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
prev_output.stride = buffer_params.stride;
prev_output.pass_stride = buffer_params.pass_stride;
num_input_passes += 1;
use_pass_flow = true;
pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
}
use_guiding_passes = (num_input_passes - 1) > 0;
if (use_guiding_passes) {
@ -574,6 +587,7 @@ class OptiXDevice::DenoiseContext {
guiding_params.pass_albedo = pass_denoising_albedo;
guiding_params.pass_normal = pass_denoising_normal;
guiding_params.pass_flow = pass_motion;
guiding_params.stride = buffer_params.stride;
guiding_params.pass_stride = buffer_params.pass_stride;
@ -588,6 +602,10 @@ class OptiXDevice::DenoiseContext {
guiding_params.pass_normal = guiding_params.pass_stride;
guiding_params.pass_stride += 3;
}
if (use_pass_flow) {
guiding_params.pass_flow = guiding_params.pass_stride;
guiding_params.pass_stride += 2;
}
guiding_params.stride = buffer_params.width;
@ -605,6 +623,16 @@ class OptiXDevice::DenoiseContext {
RenderBuffers *render_buffers = nullptr;
const BufferParams &buffer_params;
/* Previous output. */
struct {
device_ptr device_pointer = 0;
int offset = PASS_UNUSED;
int stride = -1;
int pass_stride = -1;
} prev_output;
/* Device-side storage of the guiding passes. */
device_only_memory<float> guiding_buffer;
@ -614,6 +642,7 @@ class OptiXDevice::DenoiseContext {
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
int pass_albedo = PASS_UNUSED;
int pass_normal = PASS_UNUSED;
int pass_flow = PASS_UNUSED;
int stride = -1;
int pass_stride = -1;
@ -624,6 +653,7 @@ class OptiXDevice::DenoiseContext {
bool use_guiding_passes = false;
bool use_pass_albedo = false;
bool use_pass_normal = false;
bool use_pass_flow = false;
int num_samples = 0;
@ -632,6 +662,7 @@ class OptiXDevice::DenoiseContext {
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
int pass_denoising_albedo = PASS_UNUSED;
int pass_denoising_normal = PASS_UNUSED;
int pass_motion = PASS_UNUSED;
/* For passes which don't need albedo channel for denoising we replace the actual albedo with
* the (0.5, 0.5, 0.5). This flag indicates that the real albedo pass has been replaced with
@ -702,6 +733,7 @@ bool OptiXDevice::denoise_filter_guiding_preprocess(DenoiseContext &context)
&context.guiding_params.pass_stride,
&context.guiding_params.pass_albedo,
&context.guiding_params.pass_normal,
&context.guiding_params.pass_flow,
&context.render_buffers->buffer.device_pointer,
&buffer_params.offset,
&buffer_params.stride,
@ -709,6 +741,7 @@ bool OptiXDevice::denoise_filter_guiding_preprocess(DenoiseContext &context)
&context.pass_sample_count,
&context.pass_denoising_albedo,
&context.pass_denoising_normal,
&context.pass_motion,
&buffer_params.full_x,
&buffer_params.full_y,
&buffer_params.width,
@ -881,7 +914,8 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
{
const bool recreate_denoiser = (denoiser_.optix_denoiser == nullptr) ||
(denoiser_.use_pass_albedo != context.use_pass_albedo) ||
(denoiser_.use_pass_normal != context.use_pass_normal);
(denoiser_.use_pass_normal != context.use_pass_normal) ||
(denoiser_.use_pass_flow != context.use_pass_flow);
if (!recreate_denoiser) {
return true;
}
@ -895,8 +929,14 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
OptixDenoiserOptions denoiser_options = {};
denoiser_options.guideAlbedo = context.use_pass_albedo;
denoiser_options.guideNormal = context.use_pass_normal;
OptixDenoiserModelKind model = OPTIX_DENOISER_MODEL_KIND_HDR;
if (context.use_pass_flow) {
model = OPTIX_DENOISER_MODEL_KIND_TEMPORAL;
}
const OptixResult result = optixDenoiserCreate(
this->context, OPTIX_DENOISER_MODEL_KIND_HDR, &denoiser_options, &denoiser_.optix_denoiser);
this->context, model, &denoiser_options, &denoiser_.optix_denoiser);
if (result != OPTIX_SUCCESS) {
set_error("Failed to create OptiX denoiser");
@ -906,6 +946,7 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
/* OptiX denoiser handle was created with the requested number of input passes. */
denoiser_.use_pass_albedo = context.use_pass_albedo;
denoiser_.use_pass_normal = context.use_pass_normal;
denoiser_.use_pass_flow = context.use_pass_flow;
/* OptiX denoiser has been created, but it needs configuration. */
denoiser_.is_configured = false;
@ -965,8 +1006,10 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
OptixImage2D color_layer = {0};
OptixImage2D albedo_layer = {0};
OptixImage2D normal_layer = {0};
OptixImage2D flow_layer = {0};
OptixImage2D output_layer = {0};
OptixImage2D prev_output_layer = {0};
/* Color pass. */
{
@ -982,6 +1025,19 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
}
/* Previous output. */
if (context.prev_output.offset != PASS_UNUSED) {
const int64_t pass_stride_in_bytes = context.prev_output.pass_stride * sizeof(float);
prev_output_layer.data = context.prev_output.device_pointer +
context.prev_output.offset * sizeof(float);
prev_output_layer.width = width;
prev_output_layer.height = height;
prev_output_layer.rowStrideInBytes = pass_stride_in_bytes * context.prev_output.stride;
prev_output_layer.pixelStrideInBytes = pass_stride_in_bytes;
prev_output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
}
/* Optional albedo and color passes. */
if (context.num_input_passes > 1) {
const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
@ -1005,21 +1061,32 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
normal_layer.pixelStrideInBytes = pixel_stride_in_bytes;
normal_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
}
if (context.use_pass_flow) {
flow_layer.data = d_guiding_buffer + context.guiding_params.pass_flow * sizeof(float);
flow_layer.width = width;
flow_layer.height = height;
flow_layer.rowStrideInBytes = row_stride_in_bytes;
flow_layer.pixelStrideInBytes = pixel_stride_in_bytes;
flow_layer.format = OPTIX_PIXEL_FORMAT_FLOAT2;
}
}
/* Denoise in-place of the noisy input in the render buffers. */
output_layer = color_layer;
/* Finally run denoising. */
OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
OptixDenoiserLayer image_layers = {};
image_layers.input = color_layer;
image_layers.output = output_layer;
OptixDenoiserGuideLayer guide_layers = {};
guide_layers.albedo = albedo_layer;
guide_layers.normal = normal_layer;
guide_layers.flow = flow_layer;
OptixDenoiserLayer image_layers = {};
image_layers.input = color_layer;
image_layers.previousOutput = prev_output_layer;
image_layers.output = output_layer;
/* Finally run denoising. */
OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
optix_assert(optixUtilDenoiserInvokeTiled(denoiser_.optix_denoiser,
denoiser_.queue.stream(),

View File

@ -104,6 +104,7 @@ class OptiXDevice : public CUDADevice {
bool use_pass_albedo = false;
bool use_pass_normal = false;
bool use_pass_flow = false;
};
Denoiser denoiser_;

View File

@ -19,6 +19,7 @@
#include "device/kernel.h"
#include "device/graphics_interop.h"
#include "util/debug.h"
#include "util/log.h"
#include "util/map.h"
#include "util/string.h"
@ -42,7 +43,7 @@ struct DeviceKernelArguments {
KERNEL_FILM_CONVERT,
};
static const int MAX_ARGS = 16;
static const int MAX_ARGS = 18;
Type types[MAX_ARGS];
void *values[MAX_ARGS];
size_t sizes[MAX_ARGS];
@ -85,6 +86,8 @@ struct DeviceKernelArguments {
}
void add(const Type type, const void *value, size_t size)
{
assert(count < MAX_ARGS);
types[count] = type;
values[count] = (void *)value;
sizes[count] = size;

View File

@ -756,6 +756,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
int guiding_pass_stride,
int guiding_pass_albedo,
int guiding_pass_normal,
int guiding_pass_flow,
ccl_global const float *render_buffer,
int render_offset,
int render_stride,
@ -763,6 +764,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
int render_pass_sample_count,
int render_pass_denoising_albedo,
int render_pass_denoising_normal,
int render_pass_motion,
int full_x,
int full_y,
int width,
@ -814,6 +816,17 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
normal_out[1] = normal_in[1] * pixel_scale;
normal_out[2] = normal_in[2] * pixel_scale;
}
/* Flow pass. */
if (guiding_pass_flow != PASS_UNUSED) {
kernel_assert(render_pass_motion != PASS_UNUSED);
const float *motion_in = buffer + render_pass_motion;
float *flow_out = guiding_pixel + guiding_pass_flow;
flow_out[0] = -motion_in[0] * pixel_scale;
flow_out[1] = -motion_in[1] * pixel_scale;
}
}
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
@ -899,7 +912,6 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
else {
/* Assigning to zero since this is a default alpha value for 3-component passes, and it
* is an opaque pixel for 4 component passes. */
denoised_pixel[3] = 0;
}
}

View File

@ -388,6 +388,7 @@ typedef enum PassType {
PASS_DENOISING_NORMAL,
PASS_DENOISING_ALBEDO,
PASS_DENOISING_DEPTH,
PASS_DENOISING_PREVIOUS,
/* PASS_SHADOW_CATCHER accumulates contribution of shadow catcher object which is not affected by
* any other object. The pass accessor will divide the combined pass by the shadow catcher. The

View File

@ -101,6 +101,7 @@ const NodeEnum *Pass::get_type_enum()
pass_type_enum.insert("denoising_normal", PASS_DENOISING_NORMAL);
pass_type_enum.insert("denoising_albedo", PASS_DENOISING_ALBEDO);
pass_type_enum.insert("denoising_depth", PASS_DENOISING_DEPTH);
pass_type_enum.insert("denoising_previous", PASS_DENOISING_PREVIOUS);
pass_type_enum.insert("shadow_catcher", PASS_SHADOW_CATCHER);
pass_type_enum.insert("shadow_catcher_sample_count", PASS_SHADOW_CATCHER_SAMPLE_COUNT);
@ -299,6 +300,10 @@ PassInfo Pass::get_info(const PassType type, const bool include_albedo)
case PASS_DENOISING_DEPTH:
pass_info.num_components = 1;
break;
case PASS_DENOISING_PREVIOUS:
pass_info.num_components = 3;
pass_info.use_exposure = true;
break;
case PASS_SHADOW_CATCHER:
pass_info.num_components = 3;

View File

@ -16,17 +16,12 @@
#include "session/denoising.h"
#if 0
#include "util/map.h"
#include "util/system.h"
#include "util/task.h"
#include "util/time.h"
# include "kernel/filter/filter_defines.h"
# include "util/util_foreach.h"
# include "util/util_map.h"
# include "util/util_system.h"
# include "util/util_task.h"
# include "util/util_time.h"
# include <OpenImageIO/filesystem.h>
#include <OpenImageIO/filesystem.h>
CCL_NAMESPACE_BEGIN
@ -125,24 +120,18 @@ static void fill_mapping(vector<ChannelMapping> &map, int pos, string name, stri
}
}
static const int INPUT_NUM_CHANNELS = 15;
static const int INPUT_DENOISING_DEPTH = 0;
static const int INPUT_DENOISING_NORMAL = 1;
static const int INPUT_DENOISING_SHADOWING = 4;
static const int INPUT_DENOISING_ALBEDO = 5;
static const int INPUT_NOISY_IMAGE = 8;
static const int INPUT_DENOISING_VARIANCE = 11;
static const int INPUT_DENOISING_INTENSITY = 14;
static const int INPUT_NUM_CHANNELS = 13;
static const int INPUT_NOISY_IMAGE = 0;
static const int INPUT_DENOISING_NORMAL = 3;
static const int INPUT_DENOISING_ALBEDO = 6;
static const int INPUT_MOTION = 9;
static vector<ChannelMapping> input_channels()
{
vector<ChannelMapping> map;
fill_mapping(map, INPUT_DENOISING_DEPTH, "Denoising Depth", "Z");
fill_mapping(map, INPUT_NOISY_IMAGE, "Combined", "RGB");
fill_mapping(map, INPUT_DENOISING_NORMAL, "Denoising Normal", "XYZ");
fill_mapping(map, INPUT_DENOISING_SHADOWING, "Denoising Shadowing", "X");
fill_mapping(map, INPUT_DENOISING_ALBEDO, "Denoising Albedo", "RGB");
fill_mapping(map, INPUT_NOISY_IMAGE, "Noisy Image", "RGB");
fill_mapping(map, INPUT_DENOISING_VARIANCE, "Denoising Variance", "RGB");
fill_mapping(map, INPUT_DENOISING_INTENSITY, "Denoising Intensity", "X");
fill_mapping(map, INPUT_MOTION, "Vector", "XYZW");
return map;
}
@ -162,7 +151,7 @@ bool DenoiseImageLayer::detect_denoising_channels()
input_to_image_channel.clear();
input_to_image_channel.resize(INPUT_NUM_CHANNELS, -1);
foreach (const ChannelMapping &mapping, input_channels()) {
for (const ChannelMapping &mapping : input_channels()) {
vector<string>::iterator i = find(channels.begin(), channels.end(), mapping.name);
if (i == channels.end()) {
return false;
@ -177,7 +166,7 @@ bool DenoiseImageLayer::detect_denoising_channels()
output_to_image_channel.clear();
output_to_image_channel.resize(OUTPUT_NUM_CHANNELS, -1);
foreach (const ChannelMapping &mapping, output_channels()) {
for (const ChannelMapping &mapping : output_channels()) {
vector<string>::iterator i = find(channels.begin(), channels.end(), mapping.name);
if (i == channels.end()) {
return false;
@ -199,18 +188,16 @@ bool DenoiseImageLayer::detect_denoising_channels()
return true;
}
bool DenoiseImageLayer::match_channels(int neighbor,
const std::vector<string> &channelnames,
bool DenoiseImageLayer::match_channels(const std::vector<string> &channelnames,
const std::vector<string> &neighbor_channelnames)
{
neighbor_input_to_image_channel.resize(neighbor + 1);
vector<int> &mapping = neighbor_input_to_image_channel[neighbor];
vector<int> &mapping = previous_output_to_image_channel;
assert(mapping.size() == 0);
mapping.resize(input_to_image_channel.size(), -1);
mapping.resize(output_to_image_channel.size(), -1);
for (int i = 0; i < input_to_image_channel.size(); i++) {
const string &channel = channelnames[input_to_image_channel[i]];
for (int i = 0; i < output_to_image_channel.size(); i++) {
const string &channel = channelnames[output_to_image_channel[i]];
std::vector<string>::const_iterator frame_channel = find(
neighbor_channelnames.begin(), neighbor_channelnames.end(), channel);
@ -226,19 +213,9 @@ bool DenoiseImageLayer::match_channels(int neighbor,
/* Denoise Task */
DenoiseTask::DenoiseTask(Device *device,
DenoiserPipeline *denoiser,
int frame,
const vector<int> &neighbor_frames)
: denoiser(denoiser),
device(device),
frame(frame),
neighbor_frames(neighbor_frames),
current_layer(0),
input_pixels(device, "filter input buffer", MEM_READ_ONLY),
num_tiles(0)
DenoiseTask::DenoiseTask(Device *device, DenoiserPipeline *denoiser, int frame)
: denoiser(denoiser), device(device), frame(frame), current_layer(0), buffers(device)
{
image.samples = denoiser->samples_override;
}
DenoiseTask::~DenoiseTask()
@ -246,284 +223,39 @@ DenoiseTask::~DenoiseTask()
free();
}
/* Device callbacks */
bool DenoiseTask::acquire_tile(Device *device, Device *tile_device, RenderTile &tile)
{
thread_scoped_lock tile_lock(tiles_mutex);
if (tiles.empty()) {
return false;
}
tile = tiles.front();
tiles.pop_front();
device->map_tile(tile_device, tile);
print_progress(num_tiles - tiles.size(), num_tiles, frame, denoiser->num_frames);
return true;
}
/* Mapping tiles is required for regular rendering since each tile has its separate memory
* which may be allocated on a different device.
* For standalone denoising, there is a single memory that is present on all devices, so the only
* thing that needs to be done here is to specify the surrounding tile geometry.
*
* However, since there is only one large memory, the denoised result has to be written to
* a different buffer to avoid having to copy an entire horizontal slice of the image. */
void DenoiseTask::map_neighboring_tiles(RenderTileNeighbors &neighbors, Device *tile_device)
{
RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
RenderTile &target_tile = neighbors.target;
/* Fill tile information. */
for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
if (i == RenderTileNeighbors::CENTER) {
continue;
}
RenderTile &tile = neighbors.tiles[i];
int dx = (i % 3) - 1;
int dy = (i / 3) - 1;
tile.x = clamp(center_tile.x + dx * denoiser->tile_size.x, 0, image.width);
tile.w = clamp(center_tile.x + (dx + 1) * denoiser->tile_size.x, 0, image.width) - tile.x;
tile.y = clamp(center_tile.y + dy * denoiser->tile_size.y, 0, image.height);
tile.h = clamp(center_tile.y + (dy + 1) * denoiser->tile_size.y, 0, image.height) - tile.y;
tile.buffer = center_tile.buffer;
tile.offset = center_tile.offset;
tile.stride = image.width;
}
/* Allocate output buffer. */
device_vector<float> *output_mem = new device_vector<float>(
tile_device, "denoising_output", MEM_READ_WRITE);
output_mem->alloc(OUTPUT_NUM_CHANNELS * center_tile.w * center_tile.h);
/* Fill output buffer with noisy image, assumed by kernel_filter_finalize
* when skipping denoising of some pixels. */
float *result = output_mem->data();
float *in = &image.pixels[image.num_channels * (center_tile.y * image.width + center_tile.x)];
const DenoiseImageLayer &layer = image.layers[current_layer];
const int *input_to_image_channel = layer.input_to_image_channel.data();
for (int y = 0; y < center_tile.h; y++) {
for (int x = 0; x < center_tile.w; x++, result += OUTPUT_NUM_CHANNELS) {
for (int i = 0; i < OUTPUT_NUM_CHANNELS; i++) {
result[i] = in[image.num_channels * x + input_to_image_channel[INPUT_NOISY_IMAGE + i]];
}
}
in += image.num_channels * image.width;
}
output_mem->copy_to_device();
/* Fill output tile info. */
target_tile = center_tile;
target_tile.buffer = output_mem->device_pointer;
target_tile.stride = target_tile.w;
target_tile.offset -= target_tile.x + target_tile.y * target_tile.stride;
thread_scoped_lock output_lock(output_mutex);
assert(output_pixels.count(center_tile.tile_index) == 0);
output_pixels[target_tile.tile_index] = output_mem;
}
void DenoiseTask::unmap_neighboring_tiles(RenderTileNeighbors &neighbors)
{
RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
RenderTile &target_tile = neighbors.target;
thread_scoped_lock output_lock(output_mutex);
assert(output_pixels.count(center_tile.tile_index) == 1);
device_vector<float> *output_mem = output_pixels[target_tile.tile_index];
output_pixels.erase(center_tile.tile_index);
output_lock.unlock();
/* Copy denoised pixels from device. */
output_mem->copy_from_device(0, OUTPUT_NUM_CHANNELS * target_tile.w, target_tile.h);
float *result = output_mem->data();
float *out = &image.pixels[image.num_channels * (target_tile.y * image.width + target_tile.x)];
const DenoiseImageLayer &layer = image.layers[current_layer];
const int *output_to_image_channel = layer.output_to_image_channel.data();
for (int y = 0; y < target_tile.h; y++) {
for (int x = 0; x < target_tile.w; x++, result += OUTPUT_NUM_CHANNELS) {
for (int i = 0; i < OUTPUT_NUM_CHANNELS; i++) {
out[image.num_channels * x + output_to_image_channel[i]] = result[i];
}
}
out += image.num_channels * image.width;
}
/* Free device buffer. */
output_mem->free();
delete output_mem;
}
void DenoiseTask::release_tile()
{
}
bool DenoiseTask::get_cancel()
{
return false;
}
void DenoiseTask::create_task(DeviceTask &task)
{
/* Callback functions. */
task.acquire_tile = function_bind(&DenoiseTask::acquire_tile, this, device, _1, _2);
task.map_neighbor_tiles = function_bind(&DenoiseTask::map_neighboring_tiles, this, _1, _2);
task.unmap_neighbor_tiles = function_bind(&DenoiseTask::unmap_neighboring_tiles, this, _1);
task.release_tile = function_bind(&DenoiseTask::release_tile, this);
task.get_cancel = function_bind(&DenoiseTask::get_cancel, this);
/* Denoising parameters. */
task.denoising = denoiser->params;
task.denoising.type = DENOISER_NLM;
task.denoising.use = true;
task.denoising_from_render = false;
task.denoising_frames.resize(neighbor_frames.size());
for (int i = 0; i < neighbor_frames.size(); i++) {
task.denoising_frames[i] = neighbor_frames[i] - frame;
}
/* Buffer parameters. */
task.pass_stride = INPUT_NUM_CHANNELS;
task.target_pass_stride = OUTPUT_NUM_CHANNELS;
task.pass_denoising_data = 0;
task.pass_denoising_clean = -1;
task.frame_stride = image.width * image.height * INPUT_NUM_CHANNELS;
/* Create tiles. */
thread_scoped_lock tile_lock(tiles_mutex);
thread_scoped_lock output_lock(output_mutex);
tiles.clear();
assert(output_pixels.empty());
output_pixels.clear();
int tiles_x = divide_up(image.width, denoiser->tile_size.x);
int tiles_y = divide_up(image.height, denoiser->tile_size.y);
for (int ty = 0; ty < tiles_y; ty++) {
for (int tx = 0; tx < tiles_x; tx++) {
RenderTile tile;
tile.x = tx * denoiser->tile_size.x;
tile.y = ty * denoiser->tile_size.y;
tile.w = min(image.width - tile.x, denoiser->tile_size.x);
tile.h = min(image.height - tile.y, denoiser->tile_size.y);
tile.start_sample = 0;
tile.num_samples = image.layers[current_layer].samples;
tile.sample = 0;
tile.offset = 0;
tile.stride = image.width;
tile.tile_index = ty * tiles_x + tx;
tile.task = RenderTile::DENOISE;
tile.buffers = NULL;
tile.buffer = input_pixels.device_pointer;
tiles.push_back(tile);
}
}
num_tiles = tiles.size();
}
/* Denoiser Operations */
bool DenoiseTask::load_input_pixels(int layer)
{
int w = image.width;
int h = image.height;
int num_pixels = image.width * image.height;
int frame_stride = num_pixels * INPUT_NUM_CHANNELS;
/* Load center image */
DenoiseImageLayer &image_layer = image.layers[layer];
float *buffer_data = input_pixels.data();
image.read_pixels(image_layer, buffer_data);
buffer_data += frame_stride;
float *buffer_data = buffers.buffer.data();
image.read_pixels(image_layer, buffers.params, buffer_data);
/* Load neighbor images */
for (int i = 0; i < image.in_neighbors.size(); i++) {
if (!image.read_neighbor_pixels(i, image_layer, buffer_data)) {
error = "Failed to read neighbor frame pixels";
return false;
}
buffer_data += frame_stride;
}
/* Preprocess */
buffer_data = input_pixels.data();
for (int neighbor = 0; neighbor < image.in_neighbors.size() + 1; neighbor++) {
/* Clamp */
if (denoiser->params.clamp_input) {
for (int i = 0; i < num_pixels * INPUT_NUM_CHANNELS; i++) {
buffer_data[i] = clamp(buffer_data[i], -1e8f, 1e8f);
}
}
/* Box blur */
int r = 5 * denoiser->params.radius;
float *data = buffer_data + 14;
array<float> temp(num_pixels);
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
int n = 0;
float sum = 0.0f;
for (int dx = max(x - r, 0); dx < min(x + r + 1, w); dx++, n++) {
sum += data[INPUT_NUM_CHANNELS * (y * w + dx)];
}
temp[y * w + x] = sum / n;
}
}
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
int n = 0;
float sum = 0.0f;
for (int dy = max(y - r, 0); dy < min(y + r + 1, h); dy++, n++) {
sum += temp[dy * w + x];
}
data[INPUT_NUM_CHANNELS * (y * w + x)] = sum / n;
}
}
/* Highlight compression */
data = buffer_data + 8;
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
int idx = INPUT_NUM_CHANNELS * (y * w + x);
float3 color = make_float3(data[idx], data[idx + 1], data[idx + 2]);
color = color_highlight_compress(color, NULL);
data[idx] = color.x;
data[idx + 1] = color.y;
data[idx + 2] = color.z;
}
}
buffer_data += frame_stride;
/* Load previous image */
if (frame > 0 && !image.read_previous_pixels(image_layer, buffers.params, buffer_data)) {
error = "Failed to read neighbor frame pixels";
return false;
}
/* Copy to device */
input_pixels.copy_to_device();
buffers.buffer.copy_to_device();
return true;
}
/* Task stages */
static void add_pass(vector<Pass *> &passes, PassType type, PassMode mode = PassMode::NOISY)
{
Pass *pass = new Pass();
pass->set_type(type);
pass->set_mode(mode);
passes.push_back(pass);
}
bool DenoiseTask::load()
{
string center_filepath = denoiser->input[frame];
@ -531,7 +263,8 @@ bool DenoiseTask::load()
return false;
}
if (!image.load_neighbors(denoiser->input, neighbor_frames, error)) {
/* Use previous frame output as input for subsequent frames. */
if (frame > 0 && !image.load_previous(denoiser->output[frame - 1], error)) {
return false;
}
@ -540,10 +273,35 @@ bool DenoiseTask::load()
return false;
}
/* Enable temporal denoising for frames after the first (which will use the output from the
* previous frames). */
DenoiseParams params = denoiser->denoiser->get_params();
params.temporally_stable = frame > 0;
denoiser->denoiser->set_params(params);
/* Allocate device buffer. */
int num_frames = image.in_neighbors.size() + 1;
input_pixels.alloc(image.width * INPUT_NUM_CHANNELS, image.height * num_frames);
input_pixels.zero_to_device();
vector<Pass *> passes;
add_pass(passes, PassType::PASS_COMBINED);
add_pass(passes, PassType::PASS_DENOISING_ALBEDO);
add_pass(passes, PassType::PASS_DENOISING_NORMAL);
add_pass(passes, PassType::PASS_MOTION);
add_pass(passes, PassType::PASS_DENOISING_PREVIOUS);
add_pass(passes, PassType::PASS_COMBINED, PassMode::DENOISED);
BufferParams buffer_params;
buffer_params.width = image.width;
buffer_params.height = image.height;
buffer_params.full_x = 0;
buffer_params.full_y = 0;
buffer_params.full_width = image.width;
buffer_params.full_height = image.height;
buffer_params.update_passes(passes);
for (Pass *pass : passes) {
delete pass;
}
buffers.reset(buffer_params);
/* Read pixels for first layer. */
current_layer = 0;
@ -565,10 +323,26 @@ bool DenoiseTask::exec()
}
/* Run task on device. */
DeviceTask task(DeviceTask::RENDER);
create_task(task);
device->task_add(task);
device->task_wait();
denoiser->denoiser->denoise_buffer(buffers.params, &buffers, 1, true);
/* Copy denoised pixels from device. */
buffers.buffer.copy_from_device();
float *result = buffers.buffer.data(), *out = image.pixels.data();
const DenoiseImageLayer &layer = image.layers[current_layer];
const int *output_to_image_channel = layer.output_to_image_channel.data();
for (int y = 0; y < image.height; y++) {
for (int x = 0; x < image.width; x++, result += buffers.params.pass_stride) {
for (int j = 0; j < OUTPUT_NUM_CHANNELS; j++) {
int offset = buffers.params.get_pass_offset(PASS_COMBINED, PassMode::DENOISED);
int image_channel = output_to_image_channel[j];
out[image.num_channels * x + image_channel] = result[offset + j];
}
}
out += image.num_channels * image.width;
}
printf("\n");
}
@ -586,8 +360,7 @@ bool DenoiseTask::save()
void DenoiseTask::free()
{
image.free();
input_pixels.free();
assert(output_pixels.empty());
buffers.buffer.free();
}
/* Denoise Image Storage */
@ -607,7 +380,7 @@ DenoiseImage::~DenoiseImage()
void DenoiseImage::close_input()
{
in_neighbors.clear();
in_previous.reset();
}
void DenoiseImage::free()
@ -677,39 +450,61 @@ bool DenoiseImage::parse_channels(const ImageSpec &in_spec, string &error)
return true;
}
void DenoiseImage::read_pixels(const DenoiseImageLayer &layer, float *input_pixels)
void DenoiseImage::read_pixels(const DenoiseImageLayer &layer,
const BufferParams &params,
float *input_pixels)
{
/* Pixels from center file have already been loaded into pixels.
* We copy a subset into the device input buffer with channels reshuffled. */
const int *input_to_image_channel = layer.input_to_image_channel.data();
for (int i = 0; i < width * height; i++) {
for (int j = 0; j < INPUT_NUM_CHANNELS; j++) {
int image_channel = input_to_image_channel[j];
input_pixels[i * INPUT_NUM_CHANNELS + j] =
for (int j = 0; j < 3; ++j) {
int offset = params.get_pass_offset(PASS_COMBINED);
int image_channel = input_to_image_channel[INPUT_NOISY_IMAGE + j];
input_pixels[i * params.pass_stride + offset + j] =
pixels[((size_t)i) * num_channels + image_channel];
}
for (int j = 0; j < 3; ++j) {
int offset = params.get_pass_offset(PASS_DENOISING_NORMAL);
int image_channel = input_to_image_channel[INPUT_DENOISING_NORMAL + j];
input_pixels[i * params.pass_stride + offset + j] =
pixels[((size_t)i) * num_channels + image_channel];
}
for (int j = 0; j < 3; ++j) {
int offset = params.get_pass_offset(PASS_DENOISING_ALBEDO);
int image_channel = input_to_image_channel[INPUT_DENOISING_ALBEDO + j];
input_pixels[i * params.pass_stride + offset + j] =
pixels[((size_t)i) * num_channels + image_channel];
}
for (int j = 0; j < 4; ++j) {
int offset = params.get_pass_offset(PASS_MOTION);
int image_channel = input_to_image_channel[INPUT_MOTION + j];
input_pixels[i * params.pass_stride + offset + j] =
pixels[((size_t)i) * num_channels + image_channel];
}
}
}
bool DenoiseImage::read_neighbor_pixels(int neighbor,
const DenoiseImageLayer &layer,
bool DenoiseImage::read_previous_pixels(const DenoiseImageLayer &layer,
const BufferParams &params,
float *input_pixels)
{
/* Load pixels from neighboring frames, and copy them into device buffer
* with channels reshuffled. */
size_t num_pixels = (size_t)width * (size_t)height;
array<float> neighbor_pixels(num_pixels * num_channels);
if (!in_neighbors[neighbor]->read_image(TypeDesc::FLOAT, neighbor_pixels.data())) {
if (!in_previous->read_image(TypeDesc::FLOAT, neighbor_pixels.data())) {
return false;
}
const int *input_to_image_channel = layer.neighbor_input_to_image_channel[neighbor].data();
const int *output_to_image_channel = layer.previous_output_to_image_channel.data();
for (int i = 0; i < width * height; i++) {
for (int j = 0; j < INPUT_NUM_CHANNELS; j++) {
int image_channel = input_to_image_channel[j];
input_pixels[i * INPUT_NUM_CHANNELS + j] =
for (int j = 0; j < 3; ++j) {
int offset = params.get_pass_offset(PASS_DENOISING_PREVIOUS);
int image_channel = output_to_image_channel[j];
input_pixels[i * params.pass_stride + offset + j] =
neighbor_pixels[((size_t)i) * num_channels + image_channel];
}
}
@ -739,8 +534,8 @@ bool DenoiseImage::load(const string &in_filepath, string &error)
return false;
}
if (layers.size() == 0) {
error = "Could not find a render layer containing denoising info";
if (layers.empty()) {
error = "Could not find a render layer containing denoising data and motion vector passes";
return false;
}
@ -757,46 +552,34 @@ bool DenoiseImage::load(const string &in_filepath, string &error)
return true;
}
bool DenoiseImage::load_neighbors(const vector<string> &filepaths,
const vector<int> &frames,
string &error)
bool DenoiseImage::load_previous(const string &filepath, string &error)
{
if (frames.size() > DENOISE_MAX_FRAMES - 1) {
error = string_printf("Maximum number of neighbors (%d) exceeded\n", DENOISE_MAX_FRAMES - 1);
if (!Filesystem::is_regular(filepath)) {
error = "Couldn't find neighbor frame: " + filepath;
return false;
}
for (int neighbor = 0; neighbor < frames.size(); neighbor++) {
int frame = frames[neighbor];
const string &filepath = filepaths[frame];
if (!Filesystem::is_regular(filepath)) {
error = "Couldn't find neighbor frame: " + filepath;
return false;
}
unique_ptr<ImageInput> in_neighbor(ImageInput::open(filepath));
if (!in_neighbor) {
error = "Couldn't open neighbor frame: " + filepath;
return false;
}
const ImageSpec &neighbor_spec = in_neighbor->spec();
if (neighbor_spec.width != width || neighbor_spec.height != height) {
error = "Neighbor frame has different dimensions: " + filepath;
return false;
}
foreach (DenoiseImageLayer &layer, layers) {
if (!layer.match_channels(neighbor, in_spec.channelnames, neighbor_spec.channelnames)) {
error = "Neighbor frame misses denoising data passes: " + filepath;
return false;
}
}
in_neighbors.push_back(std::move(in_neighbor));
unique_ptr<ImageInput> in_neighbor(ImageInput::open(filepath));
if (!in_neighbor) {
error = "Couldn't open neighbor frame: " + filepath;
return false;
}
const ImageSpec &neighbor_spec = in_neighbor->spec();
if (neighbor_spec.width != width || neighbor_spec.height != height) {
error = "Neighbor frame has different dimensions: " + filepath;
return false;
}
for (DenoiseImageLayer &layer : layers) {
if (!layer.match_channels(in_spec.channelnames, neighbor_spec.channelnames)) {
error = "Neighbor frame misses denoising data passes: " + filepath;
return false;
}
}
in_previous = std::move(in_neighbor);
return true;
}
@ -864,24 +647,22 @@ bool DenoiseImage::save_output(const string &out_filepath, string &error)
/* File pattern handling and outer loop over frames */
DenoiserPipeline::DenoiserPipeline(DeviceInfo &device_info)
DenoiserPipeline::DenoiserPipeline(DeviceInfo &device_info, const DenoiseParams &params)
{
samples_override = 0;
tile_size = make_int2(64, 64);
num_frames = 0;
/* Initialize task scheduler. */
TaskScheduler::init();
/* Initialize device. */
device = Device::create(device_info, stats, profiler, true);
device = Device::create(device_info, stats, profiler);
device->load_kernels(KERNEL_FEATURE_DENOISING);
denoiser = Denoiser::create(device, params);
denoiser->load_kernels(nullptr);
}
DenoiserPipeline::~DenoiserPipeline()
{
denoiser.reset();
delete device;
TaskScheduler::exit();
}
@ -890,7 +671,7 @@ bool DenoiserPipeline::run()
{
assert(input.size() == output.size());
num_frames = output.size();
int num_frames = output.size();
for (int frame = 0; frame < num_frames; frame++) {
/* Skip empty output paths. */
@ -898,16 +679,8 @@ bool DenoiserPipeline::run()
continue;
}
/* Determine neighbor frame numbers that should be used for filtering. */
vector<int> neighbor_frames;
for (int f = frame - params.neighbor_frames; f <= frame + params.neighbor_frames; f++) {
if (f >= 0 && f < num_frames && f != frame) {
neighbor_frames.push_back(f);
}
}
/* Execute task. */
DenoiseTask task(device, this, frame, neighbor_frames);
DenoiseTask task(device, this, frame);
if (!task.load()) {
error = task.error;
return false;
@ -930,5 +703,3 @@ bool DenoiserPipeline::run()
}
CCL_NAMESPACE_END
#endif

View File

@ -17,20 +17,17 @@
#ifndef __DENOISING_H__
#define __DENOISING_H__
#if 0
/* TODO(sergey): Make it explicit and clear when something is a denoiser, its pipeline or
* parameters. Currently it is an annoying mixture of terms used interchangeably. */
# include "device/device.h"
#include "device/device.h"
#include "integrator/denoiser.h"
# include "render/buffers.h"
#include "util/string.h"
#include "util/unique_ptr.h"
#include "util/vector.h"
# include "util/util_string.h"
# include "util/util_unique_ptr.h"
# include "util/util_vector.h"
# include <OpenImageIO/imageio.h>
#include <OpenImageIO/imageio.h>
OIIO_NAMESPACE_USING
@ -40,7 +37,7 @@ CCL_NAMESPACE_BEGIN
class DenoiserPipeline {
public:
DenoiserPipeline(DeviceInfo &device_info);
DenoiserPipeline(DeviceInfo &device_info, const DenoiseParams &params);
~DenoiserPipeline();
bool run();
@ -55,22 +52,13 @@ class DenoiserPipeline {
* taking into account all input frames. */
vector<string> output;
/* Sample number override, takes precedence over values from input frames. */
int samples_override;
/* Tile size for processing on device. */
int2 tile_size;
/* Equivalent to the settings in the regular denoiser. */
DenoiseParams params;
protected:
friend class DenoiseTask;
Stats stats;
Profiler profiler;
Device *device;
int num_frames;
std::unique_ptr<Denoiser> denoiser;
};
/* Denoise Image Layer */
@ -88,13 +76,13 @@ struct DenoiseImageLayer {
/* Device input channel will be copied from image channel input_to_image_channel[i]. */
vector<int> input_to_image_channel;
/* input_to_image_channel of the secondary frames, if any are used. */
vector<vector<int>> neighbor_input_to_image_channel;
/* Write i-th channel of the processing output to output_to_image_channel[i]-th channel of the
* file. */
vector<int> output_to_image_channel;
/* output_to_image_channel of the previous frame, if used. */
vector<int> previous_output_to_image_channel;
/* Detect whether this layer contains a full set of channels and set up the offsets accordingly.
*/
bool detect_denoising_channels();
@ -102,8 +90,7 @@ struct DenoiseImageLayer {
/* Map the channels of a secondary frame to the channels that are required for processing,
* fill neighbor_input_to_image_channel if all are present or return false if a channel are
* missing. */
bool match_channels(int neighbor,
const std::vector<string> &channelnames,
bool match_channels(const std::vector<string> &channelnames,
const std::vector<string> &neighbor_channelnames);
};
@ -125,7 +112,7 @@ class DenoiseImage {
/* Image file handles */
ImageSpec in_spec;
vector<unique_ptr<ImageInput>> in_neighbors;
unique_ptr<ImageInput> in_previous;
/* Render layers */
vector<DenoiseImageLayer> layers;
@ -137,12 +124,16 @@ class DenoiseImage {
bool load(const string &in_filepath, string &error);
/* Load neighboring frames. */
bool load_neighbors(const vector<string> &filepaths, const vector<int> &frames, string &error);
bool load_previous(const string &in_filepath, string &error);
/* Load subset of pixels from file buffer into input buffer, as needed for denoising
* on the device. Channels are reshuffled following the provided mapping. */
void read_pixels(const DenoiseImageLayer &layer, float *input_pixels);
bool read_neighbor_pixels(int neighbor, const DenoiseImageLayer &layer, float *input_pixels);
void read_pixels(const DenoiseImageLayer &layer,
const BufferParams &params,
float *input_pixels);
bool read_previous_pixels(const DenoiseImageLayer &layer,
const BufferParams &params,
float *input_pixels);
bool save_output(const string &out_filepath, string &error);
@ -159,10 +150,7 @@ class DenoiseImage {
class DenoiseTask {
public:
DenoiseTask(Device *device,
DenoiserPipeline *denoiser,
int frame,
const vector<int> &neighbor_frames);
DenoiseTask(Device *device, DenoiserPipeline *denoiser, int frame);
~DenoiseTask();
/* Task stages */
@ -180,37 +168,17 @@ class DenoiseTask {
/* Frame number to be denoised */
int frame;
vector<int> neighbor_frames;
/* Image file data */
DenoiseImage image;
int current_layer;
/* Device input buffer */
device_vector<float> input_pixels;
/* Tiles */
thread_mutex tiles_mutex;
list<RenderTile> tiles;
int num_tiles;
thread_mutex output_mutex;
map<int, device_vector<float> *> output_pixels;
RenderBuffers buffers;
/* Task handling */
bool load_input_pixels(int layer);
void create_task(DeviceTask &task);
/* Device task callbacks */
bool acquire_tile(Device *device, Device *tile_device, RenderTile &tile);
void map_neighboring_tiles(RenderTileNeighbors &neighbors, Device *tile_device);
void unmap_neighboring_tiles(RenderTileNeighbors &neighbors);
void release_tile();
bool get_cancel();
};
CCL_NAMESPACE_END
#endif
#endif /* __DENOISING_H__ */