Cycles: Add support for denoising in the viewport

The OptiX denoiser can be a great help when rendering in the viewport, since it is really fast
and needs few samples to produce convincing results. This patch therefore adds support for
using any Cycles denoiser in the viewport also (but only the OptiX one is selectable because
the NLM one is too slow to be usable currently). It also adds support for denoising on a
different device than rendering (so one can e.g. render with the CPU but denoise with OptiX).

Reviewed By: #cycles, brecht

Differential Revision: https://developer.blender.org/D6554
This commit is contained in:
Patrick Mours 2020-02-11 16:30:01 +01:00
parent 35490c3ead
commit 38589de10c
25 changed files with 727 additions and 255 deletions

View File

@ -197,7 +197,12 @@ enum_aov_types = (
('COLOR', "Color", "Write a Color pass", 1),
)
enum_denoising_optix_input_passes= (
enum_viewport_denoising = (
('NONE', "None", "Disable viewport denoising", 0),
('OPTIX', "OptiX AI-Accelerated", "Use the OptiX denoiser running on the GPU (requires at least one compatible OptiX device)", 1),
)
enum_denoising_optix_input_passes = (
('RGB', "Color", "Use only color as input", 1),
('RGB_ALBEDO', "Color + Albedo", "Use color and albedo data as input", 2),
('RGB_ALBEDO_NORMAL', "Color + Albedo + Normal", "Use color, albedo and normal data as input", 3),
@ -229,6 +234,18 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default='PATH',
)
preview_pause: BoolProperty(
name="Pause Preview",
description="Pause all viewport preview renders",
default=False,
)
preview_denoising: EnumProperty(
name="Viewport Denoising",
description="Denoise the image after each preview update with the selected denoiser engine",
items=enum_viewport_denoising,
default='NONE',
)
use_square_samples: BoolProperty(
name="Square Samples",
description="Square sampling values for easier artist control",
@ -247,11 +264,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
min=0, max=(1 << 24),
default=32,
)
preview_pause: BoolProperty(
name="Pause Preview",
description="Pause all viewport preview renders",
default=False,
)
aa_samples: IntProperty(
name="AA Samples",
description="Number of antialiasing samples to render for each pixel",
@ -264,6 +276,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
min=0, max=2097151,
default=32,
)
diffuse_samples: IntProperty(
name="Diffuse Samples",
description="Number of diffuse bounce samples to render for each AA sample",
@ -294,14 +307,12 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
min=1, max=1024,
default=1,
)
subsurface_samples: IntProperty(
name="Subsurface Samples",
description="Number of subsurface scattering samples to render for each AA sample",
min=1, max=1024,
default=1,
)
volume_samples: IntProperty(
name="Volume Samples",
description="Number of volume scattering samples to render for each AA sample",
@ -1305,12 +1316,6 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
default=False,
update=update_render_passes,
)
use_optix_denoising: BoolProperty(
name="Use OptiX AI Denoising",
description="Denoise the rendered image with the OptiX AI denoiser",
default=False,
update=update_render_passes,
)
denoising_diffuse_direct: BoolProperty(
name="Diffuse Direct",
description="Denoise the direct diffuse lighting",
@ -1387,11 +1392,18 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
min=0, max=7,
default=0,
)
use_optix_denoising: BoolProperty(
name="OptiX AI-Accelerated",
description="Use the OptiX denoiser to denoise the rendered image",
default=False,
update=update_render_passes,
)
denoising_optix_input_passes: EnumProperty(
name="Input Passes",
description="Controls which passes the OptiX AI denoiser should use as input, which can have different effects on the denoised image",
description="Passes handed over to the OptiX denoiser (this can have different effects on the denoised image)",
items=enum_denoising_optix_input_passes,
default='RGB',
default='RGB_ALBEDO',
)
use_pass_crypto_object: BoolProperty(

View File

@ -112,6 +112,10 @@ def show_device_active(context):
return True
return context.preferences.addons[__package__].preferences.has_active_device()
def show_optix_denoising(context):
# OptiX AI denoiser can be used when at least one device supports OptiX
return bool(context.preferences.addons[__package__].preferences.get_devices_for_type('OPTIX'))
def draw_samples_info(layout, context):
cscene = context.scene.cycles
@ -177,17 +181,23 @@ class CYCLES_RENDER_PT_sampling(CyclesButtonsPanel, Panel):
if not use_optix(context):
layout.prop(cscene, "progressive")
if cscene.progressive == 'PATH' or use_branched_path(context) is False:
if not use_branched_path(context):
col = layout.column(align=True)
col.prop(cscene, "samples", text="Render")
col.prop(cscene, "preview_samples", text="Viewport")
draw_samples_info(layout, context)
else:
col = layout.column(align=True)
col.prop(cscene, "aa_samples", text="Render")
col.prop(cscene, "preview_aa_samples", text="Viewport")
# Viewport denoising is currently only supported with OptiX
if show_optix_denoising(context):
col = layout.column()
col.prop(cscene, "preview_denoising")
if not use_branched_path(context):
draw_samples_info(layout, context)
class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel):
bl_label = "Sub Samples"
@ -195,9 +205,7 @@ class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel):
@classmethod
def poll(cls, context):
scene = context.scene
cscene = scene.cycles
return cscene.progressive != 'PATH' and use_branched_path(context)
return use_branched_path(context)
def draw(self, context):
layout = self.layout
@ -635,9 +643,6 @@ class CYCLES_RENDER_PT_performance_tiles(CyclesButtonsPanel, Panel):
sub = col.column()
sub.active = not rd.use_save_buffers
for view_layer in scene.view_layers:
if view_layer.cycles.use_denoising:
sub.active = False
sub.prop(cscene, "use_progressive_refine")
@ -981,15 +986,14 @@ class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel):
col = split.column(align=True)
if use_optix(context):
col.prop(cycles_view_layer, "use_optix_denoising", text="OptiX AI Denoising")
if show_optix_denoising(context):
col.prop(cycles_view_layer, "use_optix_denoising")
col.separator(factor=2.0)
if cycles_view_layer.use_optix_denoising:
col.prop(cycles_view_layer, "denoising_optix_input_passes")
return
col.separator(factor=2.0)
col.prop(cycles_view_layer, "denoising_radius", text="Radius")
col.prop(cycles_view_layer, "denoising_strength", slider=True, text="Strength")
col.prop(cycles_view_layer, "denoising_feature_strength", slider=True, text="Feature Strength")
@ -2192,8 +2196,6 @@ def draw_device(self, context):
col = layout.column()
col.prop(cscene, "feature_set")
scene = context.scene
col = layout.column()
col.active = show_device_active(context)
col.prop(cscene, "device")

View File

@ -863,7 +863,8 @@ void BlenderSync::sync_view(BL::SpaceView3D &b_v3d,
}
}
BufferParams BlenderSync::get_buffer_params(BL::RenderSettings &b_render,
BufferParams BlenderSync::get_buffer_params(BL::Scene &b_scene,
BL::RenderSettings &b_render,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
Camera *cam,
@ -899,7 +900,11 @@ BufferParams BlenderSync::get_buffer_params(BL::RenderSettings &b_render,
params.height = height;
}
update_viewport_display_passes(b_v3d, params.passes);
PassType display_pass = update_viewport_display_passes(b_v3d, params.passes);
/* Can only denoise the combined image pass */
params.denoising_data_pass = display_pass == PASS_COMBINED &&
update_viewport_display_denoising(b_v3d, b_scene);
return params;
}

View File

@ -19,6 +19,22 @@
CCL_NAMESPACE_BEGIN
enum DenoiserType {
DENOISER_NONE = 0,
DENOISER_OPTIX = 1,
DENOISER_NUM
};
enum ComputeDevice {
COMPUTE_DEVICE_CPU = 0,
COMPUTE_DEVICE_CUDA = 1,
COMPUTE_DEVICE_OPENCL = 2,
COMPUTE_DEVICE_OPTIX = 3,
COMPUTE_DEVICE_NUM
};
int blender_device_threads(BL::Scene &b_scene)
{
BL::RenderSettings b_r = b_scene.render();
@ -40,7 +56,7 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
/* Find network device. */
vector<DeviceInfo> devices = Device::available_devices(DEVICE_MASK_NETWORK);
if (!devices.empty()) {
device = devices.front();
return devices.front();
}
}
else if (get_enum(cscene, "device") == 1) {
@ -57,14 +73,6 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
}
/* Test if we are using GPU devices. */
enum ComputeDevice {
COMPUTE_DEVICE_CPU = 0,
COMPUTE_DEVICE_CUDA = 1,
COMPUTE_DEVICE_OPENCL = 2,
COMPUTE_DEVICE_OPTIX = 3,
COMPUTE_DEVICE_NUM = 4,
};
ComputeDevice compute_device = (ComputeDevice)get_enum(
cpreferences, "compute_device_type", COMPUTE_DEVICE_NUM, COMPUTE_DEVICE_CPU);
@ -106,6 +114,33 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
}
}
/* Ensure there is an OptiX device when using the OptiX denoiser. */
bool use_optix_denoising = DENOISER_OPTIX ==
get_enum(cscene, "preview_denoising", DENOISER_NUM, DENOISER_NONE);
BL::Scene::view_layers_iterator b_view_layer;
for (b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end();
++b_view_layer) {
PointerRNA crl = RNA_pointer_get(&b_view_layer->ptr, "cycles");
if (get_boolean(crl, "use_optix_denoising")) {
use_optix_denoising = true;
}
}
if (use_optix_denoising && device.type != DEVICE_OPTIX) {
vector<DeviceInfo> optix_devices = Device::available_devices(DEVICE_MASK_OPTIX);
if (!optix_devices.empty()) {
/* Convert to a special multi device with separate denoising devices. */
if (device.multi_devices.empty()) {
device.multi_devices.push_back(device);
}
/* Simply use the first available OptiX device. */
const DeviceInfo optix_device = optix_devices.front();
device.id += optix_device.id; /* Uniquely identify this special multi device. */
device.denoising_devices.push_back(optix_device);
}
}
return device;
}

View File

@ -166,7 +166,7 @@ void BlenderSession::create_session()
/* set buffer parameters */
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_render, b_v3d, b_rv3d, scene->camera, width, height);
b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
session->reset(buffer_params, session_params.samples);
b_engine.use_highlight_tiles(session_params.progressive_refine == false);
@ -244,7 +244,7 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg
BL::SpaceView3D b_null_space_view3d(PointerRNA_NULL);
BL::RegionView3D b_null_region_view3d(PointerRNA_NULL);
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_render, b_null_space_view3d, b_null_region_view3d, scene->camera, width, height);
b_scene, b_render, b_null_space_view3d, b_null_region_view3d, scene->camera, width, height);
session->reset(buffer_params, session_params.samples);
b_engine.use_highlight_tiles(session_params.progressive_refine == false);
@ -460,7 +460,7 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_render, b_v3d, b_rv3d, scene->camera, width, height);
b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
/* render each layer */
BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
@ -706,7 +706,7 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_render, b_v3d, b_rv3d, scene->camera, width, height);
b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
scene->bake_manager->set_shader_limit((size_t)b_engine.tile_x(), (size_t)b_engine.tile_y());
@ -851,7 +851,6 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
if (session->params.modified(session_params) || scene->params.modified(scene_params)) {
free_session();
create_session();
return;
}
/* increase samples, but never decrease */
@ -886,10 +885,28 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
else
sync->sync_camera(b_render, b_camera_override, width, height, "");
/* get buffer parameters */
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
if (session_params.device.type != DEVICE_OPTIX &&
session_params.device.denoising_devices.empty()) {
/* cannot use OptiX denoising when it is not supported by the device. */
buffer_params.denoising_data_pass = false;
}
else {
session->set_denoising(buffer_params.denoising_data_pass, true);
}
if (scene->film->denoising_data_pass != buffer_params.denoising_data_pass) {
scene->film->denoising_data_pass = buffer_params.denoising_data_pass;
/* Force a scene and session reset below. */
scene->film->tag_update(scene);
}
/* reset if needed */
if (scene->need_reset()) {
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_render, b_v3d, b_rv3d, scene->camera, width, height);
session->reset(buffer_params, session_params.samples);
/* After session reset, so device is not accessing image data anymore. */
@ -956,7 +973,7 @@ bool BlenderSession::draw(int w, int h)
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_render, b_v3d, b_rv3d, scene->camera, width, height);
b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
bool session_pause = BlenderSync::get_session_pause(b_scene, background);
if (session_pause == false) {
@ -974,7 +991,7 @@ bool BlenderSession::draw(int w, int h)
/* draw */
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_render, b_v3d, b_rv3d, scene->camera, width, height);
b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
DeviceDrawParams draw_params;
if (session->params.display_buffer_linear) {

View File

@ -846,20 +846,10 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
/* progressive refine */
BL::RenderSettings b_r = b_scene.render();
params.progressive_refine = (b_engine.is_preview() ||
get_boolean(cscene, "use_progressive_refine")) &&
!b_r.use_save_buffers();
if (params.progressive_refine) {
BL::Scene::view_layers_iterator b_view_layer;
for (b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end();
++b_view_layer) {
PointerRNA crl = RNA_pointer_get(&b_view_layer->ptr, "cycles");
if (get_boolean(crl, "use_denoising")) {
params.progressive_refine = false;
}
}
}
params.progressive_refine = b_engine.is_preview() ||
get_boolean(cscene, "use_progressive_refine");
if (b_r.use_save_buffers())
params.progressive_refine = false;
if (background) {
if (params.progressive_refine)

View File

@ -95,7 +95,8 @@ class BlenderSync {
BL::Scene &b_scene,
bool background);
static bool get_session_pause(BL::Scene &b_scene, bool background);
static BufferParams get_buffer_params(BL::RenderSettings &b_render,
static BufferParams get_buffer_params(BL::Scene &b_scene,
BL::RenderSettings &b_render,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
Camera *cam,

View File

@ -61,6 +61,17 @@ const bool BlenderViewportParameters::custom_viewport_parameters() const
return !(use_scene_world && use_scene_lights);
}
bool BlenderViewportParameters::get_viewport_display_denoising(BL::SpaceView3D &b_v3d,
BL::Scene &b_scene)
{
bool use_denoising = false;
if (b_v3d) {
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
use_denoising = get_enum(cscene, "preview_denoising") != 0;
}
return use_denoising;
}
PassType BlenderViewportParameters::get_viewport_display_render_pass(BL::SpaceView3D &b_v3d)
{
PassType display_pass = PASS_NONE;
@ -72,6 +83,11 @@ PassType BlenderViewportParameters::get_viewport_display_render_pass(BL::SpaceVi
return display_pass;
}
bool update_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene)
{
return BlenderViewportParameters::get_viewport_display_denoising(b_v3d, b_scene);
}
PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes)
{
if (b_v3d) {

View File

@ -44,11 +44,15 @@ class BlenderViewportParameters {
friend class BlenderSync;
public:
/* Get whether to enable denoising data pass in viewport. */
static bool get_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene);
/* Retrieve the render pass that needs to be displayed on the given `SpaceView3D`
* When the `b_v3d` parameter is not given `PASS_NONE` will be returned. */
static PassType get_viewport_display_render_pass(BL::SpaceView3D &b_v3d);
};
bool update_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene);
PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes);
CCL_NAMESPACE_END

View File

@ -366,6 +366,15 @@ void Device::draw_pixels(device_memory &rgba,
Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{
#ifdef WITH_MULTI
if (!info.multi_devices.empty()) {
/* Always create a multi device when info contains multiple devices.
* This is done so that the type can still be e.g. DEVICE_CPU to indicate
* that it is a homogeneous collection of devices, which simplifies checks. */
return device_multi_create(info, stats, profiler, background);
}
#endif
Device *device;
switch (info.type) {
@ -388,11 +397,6 @@ Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool
device = NULL;
break;
#endif
#ifdef WITH_MULTI
case DEVICE_MULTI:
device = device_multi_create(info, stats, profiler, background);
break;
#endif
#ifdef WITH_NETWORK
case DEVICE_NETWORK:
device = device_network_create(info, stats, profiler, "127.0.0.1");
@ -586,7 +590,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
}
DeviceInfo info;
info.type = DEVICE_MULTI;
info.type = subdevices.front().type;
info.id = "MULTI";
info.description = "Multi Device";
info.num = 0;
@ -624,6 +628,14 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
info.multi_devices.push_back(device);
}
/* Create unique ID for this combination of devices. */
info.id += device.id;
/* Set device type to MULTI if subdevices are not of a common type. */
if (device.type != info.type) {
info.type = DEVICE_MULTI;
}
/* Accumulate device info. */
info.has_half_images &= device.has_half_images;
info.has_volume_decoupled &= device.has_volume_decoupled;

View File

@ -83,6 +83,7 @@ class DeviceInfo {
bool has_profiling; /* Supports runtime collection of profiling info. */
int cpu_threads;
vector<DeviceInfo> multi_devices;
vector<DeviceInfo> denoising_devices;
DeviceInfo()
{

View File

@ -508,13 +508,14 @@ class CPUDevice : public Device {
void thread_run(DeviceTask *task)
{
if (task->type == DeviceTask::RENDER) {
if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE)
thread_render(*task);
}
else if (task->type == DeviceTask::FILM_CONVERT)
thread_film_convert(*task);
else if (task->type == DeviceTask::SHADER)
thread_shader(*task);
else if (task->type == DeviceTask::FILM_CONVERT)
thread_film_convert(*task);
else if (task->type == DeviceTask::DENOISE_BUFFER)
thread_denoise(*task);
}
class CPUDeviceTask : public DeviceTask {
@ -954,6 +955,33 @@ class CPUDevice : public Device {
delete split_kernel;
}
void thread_denoise(DeviceTask &task)
{
RenderTile tile;
tile.x = task.x;
tile.y = task.y;
tile.w = task.w;
tile.h = task.h;
tile.buffer = task.buffer;
tile.sample = task.sample + task.num_samples;
tile.num_samples = task.num_samples;
tile.start_sample = task.sample;
tile.offset = task.offset;
tile.stride = task.stride;
tile.buffers = task.buffers;
DenoisingTask denoising(this, task);
ProfilingState denoising_profiler_state;
profiler.add_state(&denoising_profiler_state);
denoising.profiler = &denoising_profiler_state;
denoise(denoising, tile);
task.update_progress(&tile, tile.w * tile.h);
profiler.remove_state(&denoising_profiler_state);
}
void thread_film_convert(DeviceTask &task)
{
float sample_scale = 1.0f / (task.sample + 1);

View File

@ -994,16 +994,16 @@ class CUDADevice : public Device {
else if (mem.type == MEM_TEXTURE) {
assert(!"mem_copy_from not supported for textures.");
}
else {
CUDAContextScope scope(this);
size_t offset = elem * y * w;
size_t size = elem * w * h;
else if (mem.host_pointer) {
const size_t size = elem * w * h;
const size_t offset = elem * y * w;
if (mem.host_pointer && mem.device_pointer) {
if (mem.device_pointer) {
const CUDAContextScope scope(this);
cuda_assert(cuMemcpyDtoH(
(uchar *)mem.host_pointer + offset, (CUdeviceptr)(mem.device_pointer + offset), size));
(char *)mem.host_pointer + offset, (CUdeviceptr)mem.device_pointer + offset, size));
}
else if (mem.host_pointer) {
else {
memset((char *)mem.host_pointer + offset, 0, size);
}
}
@ -1014,20 +1014,19 @@ class CUDADevice : public Device {
if (!mem.device_pointer) {
mem_alloc(mem);
}
if (mem.host_pointer) {
memset(mem.host_pointer, 0, mem.memory_size());
if (!mem.device_pointer) {
return;
}
/* If use_mapped_host of mem is false, mem.device_pointer currently
* refers to device memory regardless of mem.host_pointer and
* mem.shared_pointer. */
if (mem.device_pointer &&
(cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer)) {
CUDAContextScope scope(this);
/* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
* regardless of mem.host_pointer and mem.shared_pointer. */
if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
const CUDAContextScope scope(this);
cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size()));
}
else if (mem.host_pointer) {
memset(mem.host_pointer, 0, mem.memory_size());
}
}
void mem_free(device_memory &mem)
@ -2240,7 +2239,7 @@ class CUDADevice : public Device {
{
CUDAContextScope scope(this);
if (task->type == DeviceTask::RENDER) {
if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE) {
DeviceRequestedFeatures requested_features;
if (use_split_kernel()) {
if (split_kernel == NULL) {
@ -2288,6 +2287,24 @@ class CUDADevice : public Device {
cuda_assert(cuCtxSynchronize());
}
else if (task->type == DeviceTask::DENOISE_BUFFER) {
RenderTile tile;
tile.x = task->x;
tile.y = task->y;
tile.w = task->w;
tile.h = task->h;
tile.buffer = task->buffer;
tile.sample = task->sample + task->num_samples;
tile.num_samples = task->num_samples;
tile.start_sample = task->sample;
tile.offset = task->offset;
tile.stride = task->stride;
tile.buffers = task->buffers;
DenoisingTask denoising(this, *task);
denoise(tile, denoising);
task->update_progress(&tile, tile.w * tile.h);
}
}
class CUDADeviceTask : public DeviceTask {

View File

@ -427,6 +427,11 @@ template<typename T> class device_vector : public device_memory {
device_copy_to();
}
void copy_from_device()
{
device_copy_from(0, data_width, data_height, sizeof(T));
}
void copy_from_device(int y, int w, int h)
{
device_copy_from(y, w, h, sizeof(T));

View File

@ -42,7 +42,7 @@ class MultiDevice : public Device {
map<device_ptr, device_ptr> ptr_map;
};
list<SubDevice> devices;
list<SubDevice> devices, denoising_devices;
device_ptr unique_key;
MultiDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
@ -61,6 +61,12 @@ class MultiDevice : public Device {
}
}
foreach (DeviceInfo &subinfo, info.denoising_devices) {
Device *device = Device::create(subinfo, sub_stats_, profiler, background);
denoising_devices.push_back(SubDevice(device));
}
#ifdef WITH_NETWORK
/* try to add network devices */
ServerDiscovery discovery(true);
@ -80,17 +86,18 @@ class MultiDevice : public Device {
{
foreach (SubDevice &sub, devices)
delete sub.device;
foreach (SubDevice &sub, denoising_devices)
delete sub.device;
}
const string &error_message()
{
foreach (SubDevice &sub, devices) {
if (sub.device->error_message() != "") {
if (error_msg == "")
error_msg = sub.device->error_message();
break;
}
}
error_msg.clear();
foreach (SubDevice &sub, devices)
error_msg += sub.device->error_message();
foreach (SubDevice &sub, denoising_devices)
error_msg += sub.device->error_message();
return error_msg;
}
@ -118,6 +125,12 @@ class MultiDevice : public Device {
if (!sub.device->load_kernels(requested_features))
return false;
if (requested_features.use_denoising) {
foreach (SubDevice &sub, denoising_devices)
if (!sub.device->load_kernels(requested_features))
return false;
}
return true;
}
@ -127,6 +140,12 @@ class MultiDevice : public Device {
if (!sub.device->wait_for_availability(requested_features))
return false;
if (requested_features.use_denoising) {
foreach (SubDevice &sub, denoising_devices)
if (!sub.device->wait_for_availability(requested_features))
return false;
}
return true;
}
@ -150,16 +169,17 @@ class MultiDevice : public Device {
break;
}
}
return result;
}
bool build_optix_bvh(BVH *bvh)
{
// Broadcast acceleration structure build to all devices
foreach (SubDevice &sub, devices) {
// Broadcast acceleration structure build to all render devices
foreach (SubDevice &sub, devices)
if (!sub.device->build_optix_bvh(bvh))
return false;
}
return true;
}
@ -236,6 +256,17 @@ class MultiDevice : public Device {
sub.ptr_map[key] = mem.device_pointer;
}
if (strcmp(mem.name, "RenderBuffers") == 0) {
foreach (SubDevice &sub, denoising_devices) {
mem.device = sub.device;
mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
mem.device_size = existing_size;
sub.device->mem_zero(mem);
sub.ptr_map[key] = mem.device_pointer;
}
}
mem.device = this;
mem.device_pointer = key;
stats.mem_alloc(mem.device_size - existing_size);
@ -255,6 +286,17 @@ class MultiDevice : public Device {
sub.ptr_map.erase(sub.ptr_map.find(key));
}
if (strcmp(mem.name, "RenderBuffers") == 0) {
foreach (SubDevice &sub, denoising_devices) {
mem.device = sub.device;
mem.device_pointer = sub.ptr_map[key];
mem.device_size = existing_size;
sub.device->mem_free(mem);
sub.ptr_map.erase(sub.ptr_map.find(key));
}
}
mem.device = this;
mem.device_pointer = 0;
mem.device_size = 0;
@ -302,10 +344,21 @@ class MultiDevice : public Device {
void map_tile(Device *sub_device, RenderTile &tile)
{
if (!tile.buffer) {
return;
}
foreach (SubDevice &sub, devices) {
if (sub.device == sub_device) {
if (tile.buffer)
tile.buffer = sub.ptr_map[tile.buffer];
tile.buffer = sub.ptr_map[tile.buffer];
return;
}
}
foreach (SubDevice &sub, denoising_devices) {
if (sub.device == sub_device) {
tile.buffer = sub.ptr_map[tile.buffer];
return;
}
}
}
@ -320,6 +373,12 @@ class MultiDevice : public Device {
i++;
}
foreach (SubDevice &sub, denoising_devices) {
if (sub.device == sub_device)
return i;
i++;
}
return -1;
}
@ -330,11 +389,20 @@ class MultiDevice : public Device {
continue;
}
device_vector<float> &mem = tiles[i].buffers->buffer;
tiles[i].buffer = mem.device_pointer;
if (mem.device == this && denoising_devices.empty()) {
/* Skip unnecessary copies in viewport mode (buffer covers the
* whole image), but still need to fix up the tile evice pointer. */
map_tile(sub_device, tiles[i]);
continue;
}
/* If the tile was rendered on another device, copy its memory to
* to the current device now, for the duration of the denoising task.
* Note that this temporarily modifies the RenderBuffers and calls
* the device, so this function is not thread safe. */
device_vector<float> &mem = tiles[i].buffers->buffer;
if (mem.device != sub_device) {
/* Only copy from device to host once. This is faster, but
* also required for the case where a CPU thread is denoising
@ -342,12 +410,20 @@ class MultiDevice : public Device {
* overwriting the buffer being denoised by the CPU thread. */
if (!tiles[i].buffers->map_neighbor_copied) {
tiles[i].buffers->map_neighbor_copied = true;
mem.copy_from_device(0, mem.data_size, 1);
mem.copy_from_device();
}
mem.swap_device(sub_device, 0, 0);
if (mem.device == this) {
/* Can re-use memory if tile is already allocated on the sub device. */
map_tile(sub_device, tiles[i]);
mem.swap_device(sub_device, mem.device_size, tiles[i].buffer);
}
else {
mem.swap_device(sub_device, 0, 0);
}
mem.copy_to_device();
tiles[i].buffer = mem.device_pointer;
tiles[i].device_size = mem.device_size;
@ -358,11 +434,17 @@ class MultiDevice : public Device {
void unmap_neighbor_tiles(Device *sub_device, RenderTile *tiles)
{
/* Copy denoised result back to the host. */
device_vector<float> &mem = tiles[9].buffers->buffer;
if (mem.device == this && denoising_devices.empty()) {
return;
}
/* Copy denoised result back to the host. */
mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer);
mem.copy_from_device(0, mem.data_size, 1);
mem.copy_from_device();
mem.restore_device();
/* Copy denoised result to the original device. */
mem.copy_to_device();
@ -372,7 +454,9 @@ class MultiDevice : public Device {
}
device_vector<float> &mem = tiles[i].buffers->buffer;
if (mem.device != sub_device) {
if (mem.device != sub_device && mem.device != this) {
/* Free up memory again if it was allocated for the copy above. */
mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer);
sub_device->mem_free(mem);
mem.restore_device();
@ -398,10 +482,16 @@ class MultiDevice : public Device {
void task_add(DeviceTask &task)
{
list<DeviceTask> tasks;
task.split(tasks, devices.size());
list<SubDevice> &task_devices = denoising_devices.empty() ||
(task.type != DeviceTask::DENOISE &&
task.type != DeviceTask::DENOISE_BUFFER) ?
devices :
denoising_devices;
foreach (SubDevice &sub, devices) {
list<DeviceTask> tasks;
task.split(tasks, task_devices.size());
foreach (SubDevice &sub, task_devices) {
if (!tasks.empty()) {
DeviceTask subtask = tasks.front();
tasks.pop_front();
@ -426,12 +516,16 @@ class MultiDevice : public Device {
{
foreach (SubDevice &sub, devices)
sub.device->task_wait();
foreach (SubDevice &sub, denoising_devices)
sub.device->task_wait();
}
void task_cancel()
{
foreach (SubDevice &sub, devices)
sub.device->task_cancel();
foreach (SubDevice &sub, denoising_devices)
sub.device->task_cancel();
}
protected:

View File

@ -213,6 +213,7 @@ class OptiXDevice : public Device {
OptixDenoiser denoiser = NULL;
vector<pair<int2, CUdeviceptr>> denoiser_state;
int denoiser_input_passes = 0;
public:
OptiXDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
@ -632,7 +633,7 @@ class OptiXDevice : public Device {
if (have_error())
return; // Abort early if there was an error previously
if (task.type == DeviceTask::RENDER) {
if (task.type == DeviceTask::RENDER || task.type == DeviceTask::DENOISE) {
RenderTile tile;
while (task.acquire_tile(this, tile)) {
if (tile.task == RenderTile::PATH_TRACE)
@ -652,6 +653,22 @@ class OptiXDevice : public Device {
else if (task.type == DeviceTask::FILM_CONVERT) {
launch_film_convert(task, thread_index);
}
else if (task.type == DeviceTask::DENOISE_BUFFER) {
// Set up a single tile that covers the whole task and denoise it
RenderTile tile;
tile.x = task.x;
tile.y = task.y;
tile.w = task.w;
tile.h = task.h;
tile.buffer = task.buffer;
tile.num_samples = task.num_samples;
tile.start_sample = task.sample;
tile.offset = task.offset;
tile.stride = task.stride;
tile.buffers = task.buffers;
launch_denoise(task, tile, thread_index);
}
}
void launch_render(DeviceTask &task, RenderTile &rtile, int thread_index)
@ -740,6 +757,7 @@ class OptiXDevice : public Device {
RenderTile rtiles[10];
rtiles[4] = rtile;
task.map_neighbor_tiles(rtiles, this);
rtile = rtiles[4]; // Tile may have been modified by mapping code
// Calculate size of the tile to denoise (including overlap)
int4 rect = make_int4(
@ -846,7 +864,14 @@ class OptiXDevice : public Device {
}
# endif
if (denoiser == NULL) {
const bool recreate_denoiser = (denoiser == NULL) ||
(task.denoising.optix_input_passes != denoiser_input_passes);
if (recreate_denoiser) {
// Destroy existing handle before creating new one
if (denoiser != NULL) {
optixDenoiserDestroy(denoiser);
}
// Create OptiX denoiser handle on demand when it is first used
OptixDenoiserOptions denoiser_options;
assert(task.denoising.optix_input_passes >= 1 && task.denoising.optix_input_passes <= 3);
@ -856,6 +881,9 @@ class OptiXDevice : public Device {
check_result_optix_ret(optixDenoiserCreate(context, &denoiser_options, &denoiser));
check_result_optix_ret(
optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0));
// OptiX denoiser handle was created with the requested number of input passes
denoiser_input_passes = task.denoising.optix_input_passes;
}
OptixDenoiserSizes sizes = {};
@ -868,13 +896,16 @@ class OptiXDevice : public Device {
const size_t scratch_offset = sizes.stateSizeInBytes;
// Allocate denoiser state if tile size has changed since last setup
if (state_size.x != rect_size.x || state_size.y != rect_size.y) {
if (state_size.x != rect_size.x || state_size.y != rect_size.y || recreate_denoiser) {
// Free existing state before allocating new one
if (state) {
cuMemFree(state);
state = 0;
}
check_result_cuda_ret(cuMemAlloc(&state, scratch_offset + scratch_size));
// Initialize denoiser state for the current tile size
check_result_optix_ret(optixDenoiserSetup(denoiser,
cuda_stream[thread_index],
rect_size.x,
@ -1972,17 +2003,17 @@ class OptiXDevice : public Device {
else if (mem.type == MEM_TEXTURE) {
assert(!"mem_copy_from not supported for textures.");
}
else {
else if (mem.host_pointer) {
// Calculate linear memory offset and size
const size_t size = elem * w * h;
const size_t offset = elem * y * w;
if (mem.host_pointer && mem.device_pointer) {
if (mem.device_pointer) {
const CUDAContextScope scope(cuda_context);
check_result_cuda(cuMemcpyDtoH(
(char *)mem.host_pointer + offset, (CUdeviceptr)mem.device_pointer + offset, size));
}
else if (mem.host_pointer) {
else {
memset((char *)mem.host_pointer + offset, 0, size);
}
}
@ -1990,21 +2021,22 @@ class OptiXDevice : public Device {
void mem_zero(device_memory &mem) override
{
if (mem.host_pointer)
memset(mem.host_pointer, 0, mem.memory_size());
if (!mem.device_pointer)
if (!mem.device_pointer) {
mem_alloc(mem); // Need to allocate memory first if it does not exist yet
}
if (!mem.device_pointer) {
return;
}
/* If use_mapped_host of mem is false, mem.device_pointer currently
* refers to device memory regardless of mem.host_pointer and
* mem.shared_pointer. */
if (mem.device_pointer &&
(cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer)) {
/* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
* regardless of mem.host_pointer and mem.shared_pointer. */
if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
const CUDAContextScope scope(cuda_context);
check_result_cuda(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
}
else if (mem.host_pointer) {
memset(mem.host_pointer, 0, mem.memory_size());
}
}
void mem_free(device_memory &mem) override

View File

@ -68,7 +68,7 @@ int DeviceTask::get_subtask_count(int num, int max_size)
if (type == SHADER) {
num = min(shader_w, num);
}
else if (type == RENDER) {
else if (type == RENDER || type == DENOISE) {
}
else {
num = min(h, num);
@ -94,7 +94,7 @@ void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size)
tasks.push_back(task);
}
}
else if (type == RENDER) {
else if (type == RENDER || type == DENOISE) {
for (int i = 0; i < num; i++)
tasks.push_back(*this);
}
@ -115,7 +115,7 @@ void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size)
void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
{
if ((type != RENDER) && (type != SHADER))
if (type == FILM_CONVERT)
return;
if (update_progress_sample) {

View File

@ -47,7 +47,7 @@ class DenoiseParams {
int neighbor_frames;
/* Clamp the input to the range of +-1e8. Should be enough for any legitimate data. */
bool clamp_input;
/* Controls which passes the OptiX AI denoiser should use as input. */
/* Passes handed over to the OptiX denoiser (default to color + albedo). */
int optix_input_passes;
DenoiseParams()
@ -58,13 +58,13 @@ class DenoiseParams {
relative_pca = false;
neighbor_frames = 2;
clamp_input = true;
optix_input_passes = 1;
optix_input_passes = 2;
}
};
class DeviceTask : public Task {
public:
typedef enum { RENDER, FILM_CONVERT, SHADER } Type;
typedef enum { RENDER, DENOISE, DENOISE_BUFFER, FILM_CONVERT, SHADER } Type;
Type type;
int x, y, w, h;
@ -81,7 +81,7 @@ class DeviceTask : public Task {
int shader_filter;
int shader_x, shader_w;
int passes_size;
RenderBuffers *buffers;
explicit DeviceTask(Type type = RENDER);
@ -114,7 +114,6 @@ class DeviceTask : public Task {
bool need_finish_queue;
bool integrator_branched;
int2 requested_tile_size;
protected:
double last_update_time;

View File

@ -1308,13 +1308,7 @@ void OpenCLDevice::thread_run(DeviceTask *task)
{
flush_texture_buffers();
if (task->type == DeviceTask::FILM_CONVERT) {
film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
}
else if (task->type == DeviceTask::SHADER) {
shader(*task);
}
else if (task->type == DeviceTask::RENDER) {
if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE) {
RenderTile tile;
DenoisingTask denoising(this, *task);
@ -1352,6 +1346,30 @@ void OpenCLDevice::thread_run(DeviceTask *task)
kgbuffer.free();
}
else if (task->type == DeviceTask::SHADER) {
shader(*task);
}
else if (task->type == DeviceTask::FILM_CONVERT) {
film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
}
else if (task->type == DeviceTask::DENOISE_BUFFER) {
RenderTile tile;
tile.x = task->x;
tile.y = task->y;
tile.w = task->w;
tile.h = task->h;
tile.buffer = task->buffer;
tile.sample = task->sample + task->num_samples;
tile.num_samples = task->num_samples;
tile.start_sample = task->sample;
tile.offset = task->offset;
tile.stride = task->stride;
tile.buffers = task->buffers;
DenoisingTask denoising(this, *task);
denoise(tile, denoising);
task->update_progress(&tile, tile.w * tile.h);
}
}
void OpenCLDevice::film_convert(DeviceTask &task,

View File

@ -57,9 +57,9 @@ kernel_cuda_filter_convert_to_rgb(float *rgb, float *buf, int sw, int sh, int st
if (num_inputs > 0) {
float *in = buf + x * pass_stride + (y * stride + pass_offset.x) / sizeof(float);
float *out = rgb + (x + y * sw) * 3;
out[0] = in[0];
out[1] = in[1];
out[2] = in[2];
out[0] = clamp(in[0], 0.0f, 10000.0f);
out[1] = clamp(in[1], 0.0f, 10000.0f);
out[2] = clamp(in[2], 0.0f, 10000.0f);
}
if (num_inputs > 1) {
float *in = buf + x * pass_stride + (y * stride + pass_offset.y) / sizeof(float);

View File

@ -146,7 +146,7 @@ void RenderBuffers::reset(BufferParams &params_)
params = params_;
/* re-allocate buffer */
buffer.alloc(params.width * params.height * params.get_passes_size());
buffer.alloc(params.width * params.get_passes_size(), params.height);
buffer.zero_to_device();
}

View File

@ -183,7 +183,8 @@ bool Session::draw_gpu(BufferParams &buffer_params, DeviceDrawParams &draw_param
if (gpu_draw_ready) {
/* then verify the buffers have the expected size, so we don't
* draw previous results in a resized window */
if (!buffer_params.modified(display->params)) {
if (buffer_params.width == display->params.width &&
buffer_params.height == display->params.height) {
/* for CUDA we need to do tone-mapping still, since we can
* only access GL buffers from the main thread. */
if (gpu_need_display_buffer_update) {
@ -211,6 +212,7 @@ void Session::run_gpu()
reset_time = time_dt();
last_update_time = time_dt();
last_display_time = last_update_time;
progress.set_render_start_time();
@ -291,12 +293,21 @@ void Session::run_gpu()
* reset and draw in between */
thread_scoped_lock buffers_lock(buffers_mutex);
/* avoid excessive denoising in viewport after reaching a certain amount of samples */
bool need_denoise = tile_manager.schedule_denoising || tile_manager.state.sample < 20 ||
(time_dt() - last_display_time) >= params.progressive_update_timeout;
/* update status and timing */
update_status_time();
/* render */
render();
/* denoise */
if (need_denoise) {
denoise();
}
device->task_wait();
if (!device->error_message().empty())
@ -305,7 +316,7 @@ void Session::run_gpu()
/* update status and timing */
update_status_time();
gpu_need_display_buffer_update = true;
gpu_need_display_buffer_update = need_denoise || !params.run_denoising;
gpu_draw_ready = true;
progress.set_update();
@ -359,7 +370,8 @@ bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_param
if (display->draw_ready()) {
/* then verify the buffers have the expected size, so we don't
* draw previous results in a resized window */
if (!buffer_params.modified(display->params)) {
if (buffer_params.width == display->params.width &&
buffer_params.height == display->params.height) {
display->draw(device, draw_params);
if (display_outdated && (time_dt() - reset_time) > params.text_timeout)
@ -372,7 +384,7 @@ bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_param
return false;
}
bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
bool Session::acquire_tile(Device *tile_device, RenderTile &rtile, RenderTile::Task task)
{
if (progress.get_cancel()) {
if (params.progressive_refine == false) {
@ -387,8 +399,14 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
Tile *tile;
int device_num = device->device_number(tile_device);
if (!tile_manager.next_tile(tile, device_num))
while (!tile_manager.next_tile(tile, device_num, task == RenderTile::DENOISE)) {
/* Wait for denoising tiles to become available */
if (task == RenderTile::DENOISE && !progress.get_cancel() && tile_manager.has_tiles()) {
denoising_cond.wait(tile_lock);
continue;
}
return false;
}
/* fill render tile */
rtile.x = tile_manager.state.buffer.full_x + tile->x;
@ -399,7 +417,7 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
rtile.num_samples = tile_manager.state.num_samples;
rtile.resolution = tile_manager.state.resolution_divider;
rtile.tile_index = tile->index;
rtile.task = (tile->state == Tile::DENOISE) ? RenderTile::DENOISE : RenderTile::PATH_TRACE;
rtile.task = task;
tile_lock.unlock();
@ -413,6 +431,9 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
device->map_tile(tile_device, rtile);
/* Reset copy state, since buffer contents change after the tile was acquired */
buffers->map_neighbor_copied = false;
return true;
}
@ -429,6 +450,8 @@ bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
tile->buffers->reset(buffer_params);
}
tile->buffers->map_neighbor_copied = false;
tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride);
rtile.buffer = tile->buffers->buffer.device_pointer;
@ -484,45 +507,75 @@ void Session::release_tile(RenderTile &rtile)
}
update_status_time();
/* Notify denoising thread that a tile was finished. */
denoising_cond.notify_all();
}
void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device)
{
thread_scoped_lock tile_lock(tile_mutex);
int center_idx = tiles[4].tile_index;
assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE);
BufferParams buffer_params = tile_manager.params;
int4 image_region = make_int4(buffer_params.full_x,
buffer_params.full_y,
buffer_params.full_x + buffer_params.width,
buffer_params.full_y + buffer_params.height);
const int4 image_region = make_int4(
tile_manager.state.buffer.full_x,
tile_manager.state.buffer.full_y,
tile_manager.state.buffer.full_x + tile_manager.state.buffer.width,
tile_manager.state.buffer.full_y + tile_manager.state.buffer.height);
for (int dy = -1, i = 0; dy <= 1; dy++) {
for (int dx = -1; dx <= 1; dx++, i++) {
int px = tiles[4].x + dx * params.tile_size.x;
int py = tiles[4].y + dy * params.tile_size.y;
if (px >= image_region.x && py >= image_region.y && px < image_region.z &&
py < image_region.w) {
int tile_index = center_idx + dy * tile_manager.state.tile_stride + dx;
Tile *tile = &tile_manager.state.tiles[tile_index];
assert(tile->buffers);
if (!tile_manager.schedule_denoising) {
/* Fix up tile slices with overlap. */
if (tile_manager.slice_overlap != 0) {
int y = max(tiles[4].y - tile_manager.slice_overlap, image_region.y);
tiles[4].h = min(tiles[4].y + tiles[4].h + tile_manager.slice_overlap, image_region.w) - y;
tiles[4].y = y;
}
tiles[i].buffer = tile->buffers->buffer.device_pointer;
tiles[i].x = tile_manager.state.buffer.full_x + tile->x;
tiles[i].y = tile_manager.state.buffer.full_y + tile->y;
tiles[i].w = tile->w;
tiles[i].h = tile->h;
tiles[i].buffers = tile->buffers;
/* Tiles are not being denoised individually, which means the entire image is processed. */
tiles[3].x = tiles[4].x;
tiles[1].y = tiles[4].y;
tiles[5].x = tiles[4].x + tiles[4].w;
tiles[7].y = tiles[4].y + tiles[4].h;
}
else {
int center_idx = tiles[4].tile_index;
assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE);
tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride);
}
else {
tiles[i].buffer = (device_ptr)NULL;
tiles[i].buffers = NULL;
tiles[i].x = clamp(px, image_region.x, image_region.z);
tiles[i].y = clamp(py, image_region.y, image_region.w);
tiles[i].w = tiles[i].h = 0;
for (int dy = -1, i = 0; dy <= 1; dy++) {
for (int dx = -1; dx <= 1; dx++, i++) {
int nindex = tile_manager.get_neighbor_index(center_idx, i);
if (nindex >= 0) {
Tile *tile = &tile_manager.state.tiles[nindex];
tiles[i].x = image_region.x + tile->x;
tiles[i].y = image_region.y + tile->y;
tiles[i].w = tile->w;
tiles[i].h = tile->h;
if (buffers) {
tile_manager.state.buffer.get_offset_stride(tiles[i].offset, tiles[i].stride);
tiles[i].buffer = buffers->buffer.device_pointer;
tiles[i].buffers = buffers;
}
else {
assert(tile->buffers);
tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride);
tiles[i].buffer = tile->buffers->buffer.device_pointer;
tiles[i].buffers = tile->buffers;
}
}
else {
int px = tiles[4].x + dx * params.tile_size.x;
int py = tiles[4].y + dy * params.tile_size.y;
tiles[i].x = clamp(px, image_region.x, image_region.z);
tiles[i].y = clamp(py, image_region.y, image_region.w);
tiles[i].w = tiles[i].h = 0;
tiles[i].buffer = (device_ptr)NULL;
tiles[i].buffers = NULL;
}
}
}
}
@ -545,6 +598,7 @@ void Session::run_cpu()
bool tiles_written = false;
last_update_time = time_dt();
last_display_time = last_update_time;
{
/* reset once to start */
@ -620,11 +674,6 @@ void Session::run_cpu()
}
if (!no_tiles) {
/* buffers mutex is locked entirely while rendering each
* sample, and released/reacquired on each iteration to allow
* reset and draw in between */
thread_scoped_lock buffers_lock(buffers_mutex);
/* update scene */
scoped_timer update_timer;
if (update_scene()) {
@ -638,17 +687,31 @@ void Session::run_cpu()
if (progress.get_cancel())
break;
/* buffers mutex is locked entirely while rendering each
* sample, and released/reacquired on each iteration to allow
* reset and draw in between */
thread_scoped_lock buffers_lock(buffers_mutex);
/* avoid excessive denoising in viewport after reaching a certain amount of samples */
bool need_denoise = tile_manager.schedule_denoising || tile_manager.state.sample < 20 ||
(time_dt() - last_display_time) >= params.progressive_update_timeout;
/* update status and timing */
update_status_time();
/* render */
render();
/* denoise */
if (need_denoise) {
denoise();
}
/* update status and timing */
update_status_time();
if (!params.background)
need_copy_to_display_buffer = true;
need_copy_to_display_buffer = need_denoise || !params.run_denoising;
if (!device->error_message().empty())
progress.set_error(device->error_message());
@ -869,6 +932,20 @@ void Session::set_pause(bool pause_)
pause_cond.notify_all();
}
void Session::set_denoising(bool denoising, bool optix_denoising)
{
/* Lock buffers so no denoising operation is triggered while the settings are changed here. */
thread_scoped_lock buffers_lock(buffers_mutex);
params.run_denoising = denoising;
params.full_denoising = !optix_denoising;
params.optix_denoising = optix_denoising;
// TODO(pmours): Query the required overlap value for denoising from the device?
tile_manager.slice_overlap = denoising && !params.background ? 64 : 0;
tile_manager.schedule_denoising = denoising && !buffers;
}
void Session::wait()
{
if (session_thread) {
@ -1016,33 +1093,74 @@ void Session::render()
/* Add path trace task. */
DeviceTask task(DeviceTask::RENDER);
task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2);
task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2, RenderTile::PATH_TRACE);
task.release_tile = function_bind(&Session::release_tile, this, _1);
task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2);
task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2);
task.get_cancel = function_bind(&Progress::get_cancel, &this->progress);
task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
task.need_finish_queue = params.progressive_refine;
task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH;
task.requested_tile_size = params.tile_size;
task.passes_size = tile_manager.params.get_passes_size();
if (params.run_denoising) {
task.denoising = params.denoising;
device->task_add(task);
}
assert(!scene->film->need_update);
task.pass_stride = scene->film->pass_stride;
task.target_pass_stride = task.pass_stride;
task.pass_denoising_data = scene->film->denoising_data_offset;
task.pass_denoising_clean = scene->film->denoising_clean_offset;
task.denoising_from_render = true;
task.denoising_do_filter = params.full_denoising;
task.denoising_use_optix = params.optix_denoising;
task.denoising_write_passes = params.write_denoising_passes;
void Session::denoise()
{
if (!params.run_denoising) {
return;
}
/* It can happen that denoising was already enabled, but the scene still needs an update. */
if (scene->film->need_update || !scene->film->denoising_data_offset) {
return;
}
/* Add separate denoising task. */
DeviceTask task(DeviceTask::DENOISE);
if (tile_manager.schedule_denoising) {
/* Run denoising on each tile. */
task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2, RenderTile::DENOISE);
task.release_tile = function_bind(&Session::release_tile, this, _1);
task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
}
else {
assert(buffers);
/* Wait for rendering to finish. */
device->task_wait();
/* Run denoising on the whole image at once. */
task.type = DeviceTask::DENOISE_BUFFER;
task.x = tile_manager.state.buffer.full_x;
task.y = tile_manager.state.buffer.full_y;
task.w = tile_manager.state.buffer.width;
task.h = tile_manager.state.buffer.height;
task.buffer = buffers->buffer.device_pointer;
task.sample = tile_manager.state.sample;
task.num_samples = tile_manager.state.num_samples;
tile_manager.state.buffer.get_offset_stride(task.offset, task.stride);
task.buffers = buffers;
}
task.get_cancel = function_bind(&Progress::get_cancel, &this->progress);
task.need_finish_queue = params.progressive_refine;
task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2);
task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2);
task.denoising = params.denoising;
task.pass_stride = scene->film->pass_stride;
task.target_pass_stride = task.pass_stride;
task.pass_denoising_data = scene->film->denoising_data_offset;
task.pass_denoising_clean = scene->film->denoising_clean_offset;
task.denoising_from_render = true;
task.denoising_do_filter = params.full_denoising;
task.denoising_use_optix = params.optix_denoising;
task.denoising_write_passes = params.write_denoising_passes;
device->task_add(task);
}
@ -1067,6 +1185,8 @@ void Session::copy_to_display_buffer(int sample)
/* set display to new size */
display->draw_set(task.w, task.h);
last_display_time = time_dt();
}
display_outdated = false;

View File

@ -154,6 +154,7 @@ class Session {
void reset(BufferParams &params, int samples);
void set_samples(int samples);
void set_pause(bool pause);
void set_denoising(bool denoising, bool optix_denoising);
bool update_scene();
bool load_kernels(bool lock_scene = true);
@ -178,8 +179,10 @@ class Session {
void update_status_time(bool show_pause = false, bool show_done = false);
void copy_to_display_buffer(int sample);
void render();
void denoise();
void copy_to_display_buffer(int sample);
void reset_(BufferParams &params, int samples);
void run_cpu();
@ -190,7 +193,7 @@ class Session {
bool draw_gpu(BufferParams &params, DeviceDrawParams &draw_params);
void reset_gpu(BufferParams &params, int samples);
bool acquire_tile(Device *tile_device, RenderTile &tile);
bool acquire_tile(Device *tile_device, RenderTile &tile, RenderTile::Task task);
void update_tile_sample(RenderTile &tile);
void release_tile(RenderTile &tile);
@ -213,14 +216,16 @@ class Session {
thread_mutex tile_mutex;
thread_mutex buffers_mutex;
thread_mutex display_mutex;
thread_condition_variable denoising_cond;
bool kernels_loaded;
DeviceRequestedFeatures loaded_kernel_features;
double reset_time;
double last_update_time;
double last_display_time;
/* progressive refine */
double last_update_time;
bool update_progressive_refine(bool cancel);
DeviceRequestedFeatures get_requested_device_features();

View File

@ -101,6 +101,7 @@ TileManager::TileManager(bool progressive_,
tile_order = tile_order_;
start_resolution = start_resolution_;
pixel_size = pixel_size_;
slice_overlap = 0;
num_samples = num_samples_;
num_devices = num_devices_;
preserve_tile_device = preserve_tile_device_;
@ -201,8 +202,7 @@ int TileManager::gen_tiles(bool sliced)
int image_h = max(1, params.height / resolution);
int2 center = make_int2(image_w / 2, image_h / 2);
int num_logical_devices = preserve_tile_device ? num_devices : 1;
int num = min(image_h, num_logical_devices);
int num = preserve_tile_device || sliced ? min(image_h, num_devices) : 1;
int slice_num = sliced ? num : 1;
int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x);
@ -216,7 +216,7 @@ int TileManager::gen_tiles(bool sliced)
tile_list = state.render_tiles.begin();
if (tile_order == TILE_HILBERT_SPIRAL) {
assert(!sliced);
assert(!sliced && slice_overlap == 0);
int tile_h = (tile_size.y >= image_h) ? 1 : divide_up(image_h, tile_size.y);
state.tiles.resize(tile_w * tile_h);
@ -319,6 +319,12 @@ int TileManager::gen_tiles(bool sliced)
int slice_h = (slice == slice_num - 1) ? image_h - slice * (image_h / slice_num) :
image_h / slice_num;
if (slice_overlap != 0) {
int slice_y_offset = max(slice_y - slice_overlap, 0);
slice_h = min(slice_y + slice_h + slice_overlap, image_h) - slice_y_offset;
slice_y = slice_y_offset;
}
int tile_h = (tile_size.y >= slice_h) ? 1 : divide_up(slice_h, tile_size.y);
int tiles_per_device = divide_up(tile_w * tile_h, num);
@ -363,6 +369,7 @@ void TileManager::gen_render_tiles()
{
/* Regenerate just the render tiles for progressive render. */
foreach (Tile &tile, state.tiles) {
tile.state = Tile::RENDER;
state.render_tiles[tile.device].push_back(tile.index);
}
}
@ -386,17 +393,29 @@ void TileManager::set_tiles()
int TileManager::get_neighbor_index(int index, int neighbor)
{
static const int dx[] = {-1, 0, 1, -1, 1, -1, 0, 1, 0}, dy[] = {-1, -1, -1, 0, 0, 1, 1, 1, 0};
/* Neighbor indices:
* 0 1 2
* 3 4 5
* 6 7 8
*/
static const int dx[] = {-1, 0, 1, -1, 0, 1, -1, 0, 1};
static const int dy[] = {-1, -1, -1, 0, 0, 0, 1, 1, 1};
int resolution = state.resolution_divider;
int image_w = max(1, params.width / resolution);
int image_h = max(1, params.height / resolution);
int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x);
int tile_h = (tile_size.y >= image_h) ? 1 : divide_up(image_h, tile_size.y);
int nx = state.tiles[index].x / tile_size.x + dx[neighbor],
ny = state.tiles[index].y / tile_size.y + dy[neighbor];
if (nx < 0 || ny < 0 || nx >= tile_w || ny >= tile_h)
int num = min(image_h, num_devices);
int slice_num = !background ? num : 1;
int slice_h = image_h / slice_num;
int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x);
int tile_h = (tile_size.y >= slice_h) ? 1 : divide_up(slice_h, tile_size.y);
/* Tiles in the state tile list are always indexed from left to right, top to bottom. */
int nx = (index % tile_w) + dx[neighbor];
int ny = (index / tile_w) + dy[neighbor];
if (nx < 0 || ny < 0 || nx >= tile_w || ny >= tile_h * slice_num)
return -1;
return ny * state.tile_stride + nx;
@ -426,15 +445,11 @@ bool TileManager::finish_tile(int index, bool &delete_tile)
{
delete_tile = false;
if (progressive) {
return true;
}
switch (state.tiles[index].state) {
case Tile::RENDER: {
if (!schedule_denoising) {
state.tiles[index].state = Tile::DONE;
delete_tile = true;
delete_tile = !progressive;
return true;
}
state.tiles[index].state = Tile::RENDERED;
@ -457,15 +472,18 @@ bool TileManager::finish_tile(int index, bool &delete_tile)
int nindex = get_neighbor_index(index, neighbor);
if (check_neighbor_state(nindex, Tile::DENOISED)) {
state.tiles[nindex].state = Tile::DONE;
/* It can happen that the tile just finished denoising and already can be freed here.
* However, in that case it still has to be written before deleting, so we can't delete
* it yet. */
if (neighbor == 8) {
delete_tile = true;
}
else {
delete state.tiles[nindex].buffers;
state.tiles[nindex].buffers = NULL;
/* Do not delete finished tiles in progressive mode. */
if (!progressive) {
/* It can happen that the tile just finished denoising and already can be freed here.
* However, in that case it still has to be written before deleting, so we can't delete
* it yet. */
if (neighbor == 4) {
delete_tile = true;
}
else {
delete state.tiles[nindex].buffers;
state.tiles[nindex].buffers = NULL;
}
}
}
}
@ -477,27 +495,56 @@ bool TileManager::finish_tile(int index, bool &delete_tile)
}
}
bool TileManager::next_tile(Tile *&tile, int device)
bool TileManager::next_tile(Tile *&tile, int device, bool denoising)
{
int logical_device = preserve_tile_device ? device : 0;
/* Preserve device if requested, unless this is a separate denoising device that just wants to
* grab any available tile. */
const bool preserve_device = preserve_tile_device && device < num_devices;
if (logical_device >= state.render_tiles.size())
return false;
int tile_index = -1;
int logical_device = preserve_device ? device : 0;
if (!state.denoising_tiles[logical_device].empty()) {
int idx = state.denoising_tiles[logical_device].front();
state.denoising_tiles[logical_device].pop_front();
tile = &state.tiles[idx];
if (denoising) {
while (logical_device < state.denoising_tiles.size()) {
if (state.denoising_tiles[logical_device].empty()) {
if (preserve_device) {
return false;
}
else {
logical_device++;
continue;
}
}
tile_index = state.denoising_tiles[logical_device].front();
state.denoising_tiles[logical_device].pop_front();
break;
}
}
else {
while (logical_device < state.render_tiles.size()) {
if (state.render_tiles[logical_device].empty()) {
if (preserve_device) {
return false;
}
else {
logical_device++;
continue;
}
}
tile_index = state.render_tiles[logical_device].front();
state.render_tiles[logical_device].pop_front();
break;
}
}
if (tile_index >= 0) {
tile = &state.tiles[tile_index];
return true;
}
if (state.render_tiles[logical_device].empty())
return false;
int idx = state.render_tiles[logical_device].front();
state.render_tiles[logical_device].pop_front();
tile = &state.tiles[idx];
return true;
return false;
}
bool TileManager::done()
@ -508,6 +555,16 @@ bool TileManager::done()
(state.sample + state.num_samples >= end_sample);
}
bool TileManager::has_tiles()
{
foreach (Tile &tile, state.tiles) {
if (tile.state != Tile::DONE) {
return true;
}
}
return false;
}
bool TileManager::next()
{
if (done())

View File

@ -89,6 +89,7 @@ class TileManager {
} state;
int num_samples;
int slice_overlap;
TileManager(bool progressive,
int num_samples,
@ -105,15 +106,19 @@ class TileManager {
void reset(BufferParams &params, int num_samples);
void set_samples(int num_samples);
bool next();
bool next_tile(Tile *&tile, int device = 0);
bool next_tile(Tile *&tile, int device, bool denoising);
bool finish_tile(int index, bool &delete_tile);
bool done();
bool has_tiles();
void set_tile_order(TileOrder tile_order_)
{
tile_order = tile_order_;
}
int get_neighbor_index(int index, int neighbor);
bool check_neighbor_state(int index, Tile::State state);
/* ** Sample range rendering. ** */
/* Start sample in the range. */
@ -160,9 +165,6 @@ class TileManager {
/* Generate tile list, return number of tiles. */
int gen_tiles(bool sliced);
void gen_render_tiles();
int get_neighbor_index(int index, int neighbor);
bool check_neighbor_state(int index, Tile::State state);
};
CCL_NAMESPACE_END