Cycles: Add OptiX temporal denoising support

Enables the `bpy.ops.cycles.denoise_animation()` operator again and modifies it to support temporal denoising with OptiX. This requires renders that were done with both the "Vector" and "Denoising Data" passes. Differential Revision: https://developer.blender.org/D11442
Referenced by issue #94711, Blender crashes with on Apple M1 when using Cycles with GPU Compute since rB8393ccd07634
2022-01-04 21:39:54 +01:00 · 2022-01-04 21:39:54 +01:00 · 8393ccd076 · 2023-02-14 10:32:59 +01:00
parent 86141a75eb
commit 8393ccd076
12 changed files with 291 additions and 477 deletions
--- a/intern/cycles/blender/python.cpp
+++ b/intern/cycles/blender/python.cpp
@ -735,27 +735,20 @@ static bool image_parse_filepaths(PyObject *pyfilepaths, vector<string> &filepat

 static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *keywords)
 {
-#if 1
-  (void)args;
-  (void)keywords;
-#else
  static const char *keyword_list[] = {
-      "preferences", "scene", "view_layer", "input", "output", "tile_size", "samples", NULL};
+      "preferences", "scene", "view_layer", "input", "output", NULL};
  PyObject *pypreferences, *pyscene, *pyviewlayer;
  PyObject *pyinput, *pyoutput = NULL;
-  int tile_size = 0, samples = 0;

  if (!PyArg_ParseTupleAndKeywords(args,
                                   keywords,
-                                   "OOOO|Oii",
+                                   "OOOO|O",
                                   (char **)keyword_list,
                                   &pypreferences,
                                   &pyscene,
                                   &pyviewlayer,
                                   &pyinput,
-                                   &pyoutput,
-                                   &tile_size,
-                                   &samples)) {
+                                   &pyoutput)) {
    return NULL;
  }

@ -777,14 +770,10 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key
                     &RNA_ViewLayer,
                     PyLong_AsVoidPtr(pyviewlayer),
                     &viewlayerptr);
-  PointerRNA cviewlayer = RNA_pointer_get(&viewlayerptr, "cycles");
+  BL::ViewLayer b_view_layer(viewlayerptr);

-  DenoiseParams params;
-  params.radius = get_int(cviewlayer, "denoising_radius");
-  params.strength = get_float(cviewlayer, "denoising_strength");
-  params.feature_strength = get_float(cviewlayer, "denoising_feature_strength");
-  params.relative_pca = get_boolean(cviewlayer, "denoising_relative_pca");
-  params.neighbor_frames = get_int(cviewlayer, "denoising_neighbor_frames");
+  DenoiseParams params = BlenderSync::get_denoise_params(b_scene, b_view_layer, true);
+  params.use = true;

  /* Parse file paths list. */
  vector<string> input, output;
@ -812,24 +801,15 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key
  }

  /* Create denoiser. */
-  DenoiserPipeline denoiser(device);
-  denoiser.params = params;
+  DenoiserPipeline denoiser(device, params);
  denoiser.input = input;
  denoiser.output = output;

-  if (tile_size > 0) {
-    denoiser.tile_size = make_int2(tile_size, tile_size);
-  }
-  if (samples > 0) {
-    denoiser.samples_override = samples;
-  }
-
  /* Run denoiser. */
  if (!denoiser.run()) {
    PyErr_SetString(PyExc_ValueError, denoiser.error.c_str());
    return NULL;
  }
-#endif

  Py_RETURN_NONE;
 }
--- a/intern/cycles/blender/sync.h
+++ b/intern/cycles/blender/sync.h
@ -105,11 +105,11 @@ class BlenderSync {
  static BufferParams get_buffer_params(
      BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, Camera *cam, int width, int height);

- private:
  static DenoiseParams get_denoise_params(BL::Scene &b_scene,
                                          BL::ViewLayer &b_view_layer,
                                          bool background);

+ private:
  /* sync */
  void sync_lights(BL::Depsgraph &b_depsgraph, bool update_all);
  void sync_materials(BL::Depsgraph &b_depsgraph, bool update_all);
--- a/intern/cycles/device/denoise.cpp
+++ b/intern/cycles/device/denoise.cpp
@ -76,6 +76,8 @@ NODE_DEFINE(DenoiseParams)
  SOCKET_BOOLEAN(use_pass_albedo, "Use Pass Albedo", true);
  SOCKET_BOOLEAN(use_pass_normal, "Use Pass Normal", false);

+  SOCKET_BOOLEAN(temporally_stable, "Temporally Stable", false);
+
  SOCKET_ENUM(prefilter, "Prefilter", *prefilter_enum, DENOISER_PREFILTER_FAST);

  return type;
--- a/intern/cycles/device/denoise.h
+++ b/intern/cycles/device/denoise.h
@ -72,6 +72,9 @@ class DenoiseParams : public Node {
  bool use_pass_albedo = true;
  bool use_pass_normal = true;

+  /* Configure the denoiser to use motion vectors, previous image and a temporally stable model. */
+  bool temporally_stable = false;
+
  DenoiserPrefilter prefilter = DENOISER_PREFILTER_FAST;

  static const NodeEnum *get_type_enum();
@ -83,7 +86,8 @@ class DenoiseParams : public Node {
  {
    return !(use == other.use && type == other.type && start_sample == other.start_sample &&
             use_pass_albedo == other.use_pass_albedo &&
-             use_pass_normal == other.use_pass_normal && prefilter == other.prefilter);
+             use_pass_normal == other.use_pass_normal &&
+             temporally_stable == other.temporally_stable && prefilter == other.prefilter);
  }
 };

--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@ -566,6 +566,19 @@ class OptiXDevice::DenoiseContext {
      }
    }

+    if (denoise_params.temporally_stable) {
+      prev_output.device_pointer = render_buffers->buffer.device_pointer;
+
+      prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
+
+      prev_output.stride = buffer_params.stride;
+      prev_output.pass_stride = buffer_params.pass_stride;
+
+      num_input_passes += 1;
+      use_pass_flow = true;
+      pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
+    }
+
    use_guiding_passes = (num_input_passes - 1) > 0;

    if (use_guiding_passes) {
@ -574,6 +587,7 @@ class OptiXDevice::DenoiseContext {

        guiding_params.pass_albedo = pass_denoising_albedo;
        guiding_params.pass_normal = pass_denoising_normal;
+        guiding_params.pass_flow = pass_motion;

        guiding_params.stride = buffer_params.stride;
        guiding_params.pass_stride = buffer_params.pass_stride;
@ -588,6 +602,10 @@ class OptiXDevice::DenoiseContext {
          guiding_params.pass_normal = guiding_params.pass_stride;
          guiding_params.pass_stride += 3;
        }
+        if (use_pass_flow) {
+          guiding_params.pass_flow = guiding_params.pass_stride;
+          guiding_params.pass_stride += 2;
+        }

        guiding_params.stride = buffer_params.width;

@ -605,6 +623,16 @@ class OptiXDevice::DenoiseContext {
  RenderBuffers *render_buffers = nullptr;
  const BufferParams &buffer_params;

+  /* Previous output. */
+  struct {
+    device_ptr device_pointer = 0;
+
+    int offset = PASS_UNUSED;
+
+    int stride = -1;
+    int pass_stride = -1;
+  } prev_output;
+
  /* Device-side storage of the guiding passes. */
  device_only_memory<float> guiding_buffer;

@ -614,6 +642,7 @@ class OptiXDevice::DenoiseContext {
    /* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
    int pass_albedo = PASS_UNUSED;
    int pass_normal = PASS_UNUSED;
+    int pass_flow = PASS_UNUSED;

    int stride = -1;
    int pass_stride = -1;
@ -624,6 +653,7 @@ class OptiXDevice::DenoiseContext {
  bool use_guiding_passes = false;
  bool use_pass_albedo = false;
  bool use_pass_normal = false;
+  bool use_pass_flow = false;

  int num_samples = 0;

@ -632,6 +662,7 @@ class OptiXDevice::DenoiseContext {
  /* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
  int pass_denoising_albedo = PASS_UNUSED;
  int pass_denoising_normal = PASS_UNUSED;
+  int pass_motion = PASS_UNUSED;

  /* For passes which don't need albedo channel for denoising we replace the actual albedo with
   * the (0.5, 0.5, 0.5). This flag indicates that the real albedo pass has been replaced with
@ -702,6 +733,7 @@ bool OptiXDevice::denoise_filter_guiding_preprocess(DenoiseContext &context)
                             &context.guiding_params.pass_stride,
                             &context.guiding_params.pass_albedo,
                             &context.guiding_params.pass_normal,
+                             &context.guiding_params.pass_flow,
                             &context.render_buffers->buffer.device_pointer,
                             &buffer_params.offset,
                             &buffer_params.stride,
@ -709,6 +741,7 @@ bool OptiXDevice::denoise_filter_guiding_preprocess(DenoiseContext &context)
                             &context.pass_sample_count,
                             &context.pass_denoising_albedo,
                             &context.pass_denoising_normal,
+                             &context.pass_motion,
                             &buffer_params.full_x,
                             &buffer_params.full_y,
                             &buffer_params.width,
@ -881,7 +914,8 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
 {
  const bool recreate_denoiser = (denoiser_.optix_denoiser == nullptr) ||
                                 (denoiser_.use_pass_albedo != context.use_pass_albedo) ||
-                                 (denoiser_.use_pass_normal != context.use_pass_normal);
+                                 (denoiser_.use_pass_normal != context.use_pass_normal) ||
+                                 (denoiser_.use_pass_flow != context.use_pass_flow);
  if (!recreate_denoiser) {
    return true;
  }
@ -895,8 +929,14 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
  OptixDenoiserOptions denoiser_options = {};
  denoiser_options.guideAlbedo = context.use_pass_albedo;
  denoiser_options.guideNormal = context.use_pass_normal;
+
+  OptixDenoiserModelKind model = OPTIX_DENOISER_MODEL_KIND_HDR;
+  if (context.use_pass_flow) {
+    model = OPTIX_DENOISER_MODEL_KIND_TEMPORAL;
+  }
+
  const OptixResult result = optixDenoiserCreate(
-      this->context, OPTIX_DENOISER_MODEL_KIND_HDR, &denoiser_options, &denoiser_.optix_denoiser);
+      this->context, model, &denoiser_options, &denoiser_.optix_denoiser);

  if (result != OPTIX_SUCCESS) {
    set_error("Failed to create OptiX denoiser");
@ -906,6 +946,7 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
  /* OptiX denoiser handle was created with the requested number of input passes. */
  denoiser_.use_pass_albedo = context.use_pass_albedo;
  denoiser_.use_pass_normal = context.use_pass_normal;
+  denoiser_.use_pass_flow = context.use_pass_flow;

  /* OptiX denoiser has been created, but it needs configuration. */
  denoiser_.is_configured = false;
@ -965,8 +1006,10 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
  OptixImage2D color_layer = {0};
  OptixImage2D albedo_layer = {0};
  OptixImage2D normal_layer = {0};
+  OptixImage2D flow_layer = {0};

  OptixImage2D output_layer = {0};
+  OptixImage2D prev_output_layer = {0};

  /* Color pass. */
  {
@ -982,6 +1025,19 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
    color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
  }

+  /* Previous output. */
+  if (context.prev_output.offset != PASS_UNUSED) {
+    const int64_t pass_stride_in_bytes = context.prev_output.pass_stride * sizeof(float);
+
+    prev_output_layer.data = context.prev_output.device_pointer +
+                             context.prev_output.offset * sizeof(float);
+    prev_output_layer.width = width;
+    prev_output_layer.height = height;
+    prev_output_layer.rowStrideInBytes = pass_stride_in_bytes * context.prev_output.stride;
+    prev_output_layer.pixelStrideInBytes = pass_stride_in_bytes;
+    prev_output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
+  }
+
  /* Optional albedo and color passes. */
  if (context.num_input_passes > 1) {
    const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
@ -1005,21 +1061,32 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
      normal_layer.pixelStrideInBytes = pixel_stride_in_bytes;
      normal_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
    }
+
+    if (context.use_pass_flow) {
+      flow_layer.data = d_guiding_buffer + context.guiding_params.pass_flow * sizeof(float);
+      flow_layer.width = width;
+      flow_layer.height = height;
+      flow_layer.rowStrideInBytes = row_stride_in_bytes;
+      flow_layer.pixelStrideInBytes = pixel_stride_in_bytes;
+      flow_layer.format = OPTIX_PIXEL_FORMAT_FLOAT2;
+    }
  }

  /* Denoise in-place of the noisy input in the render buffers. */
  output_layer = color_layer;

-  /* Finally run denoising. */
-  OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
-
-  OptixDenoiserLayer image_layers = {};
-  image_layers.input = color_layer;
-  image_layers.output = output_layer;
-
  OptixDenoiserGuideLayer guide_layers = {};
  guide_layers.albedo = albedo_layer;
  guide_layers.normal = normal_layer;
+  guide_layers.flow = flow_layer;
+
+  OptixDenoiserLayer image_layers = {};
+  image_layers.input = color_layer;
+  image_layers.previousOutput = prev_output_layer;
+  image_layers.output = output_layer;
+
+  /* Finally run denoising. */
+  OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */

  optix_assert(optixUtilDenoiserInvokeTiled(denoiser_.optix_denoiser,
                                            denoiser_.queue.stream(),
--- a/intern/cycles/device/optix/device_impl.h
+++ b/intern/cycles/device/optix/device_impl.h
@ -104,6 +104,7 @@ class OptiXDevice : public CUDADevice {

    bool use_pass_albedo = false;
    bool use_pass_normal = false;
+    bool use_pass_flow = false;
  };
  Denoiser denoiser_;

--- a/intern/cycles/device/queue.h
+++ b/intern/cycles/device/queue.h
@ -19,6 +19,7 @@
 #include "device/kernel.h"

 #include "device/graphics_interop.h"
+#include "util/debug.h"
 #include "util/log.h"
 #include "util/map.h"
 #include "util/string.h"
@ -42,7 +43,7 @@ struct DeviceKernelArguments {
    KERNEL_FILM_CONVERT,
  };

-  static const int MAX_ARGS = 16;
+  static const int MAX_ARGS = 18;
  Type types[MAX_ARGS];
  void *values[MAX_ARGS];
  size_t sizes[MAX_ARGS];
@ -85,6 +86,8 @@ struct DeviceKernelArguments {
  }
  void add(const Type type, const void *value, size_t size)
  {
+    assert(count < MAX_ARGS);
+
    types[count] = type;
    values[count] = (void *)value;
    sizes[count] = size;
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@ -756,6 +756,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
                             int guiding_pass_stride,
                             int guiding_pass_albedo,
                             int guiding_pass_normal,
+                             int guiding_pass_flow,
                             ccl_global const float *render_buffer,
                             int render_offset,
                             int render_stride,
@ -763,6 +764,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
                             int render_pass_sample_count,
                             int render_pass_denoising_albedo,
                             int render_pass_denoising_normal,
+                             int render_pass_motion,
                             int full_x,
                             int full_y,
                             int width,
@ -814,6 +816,17 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
    normal_out[1] = normal_in[1] * pixel_scale;
    normal_out[2] = normal_in[2] * pixel_scale;
  }
+
+  /* Flow pass. */
+  if (guiding_pass_flow != PASS_UNUSED) {
+    kernel_assert(render_pass_motion != PASS_UNUSED);
+
+    const float *motion_in = buffer + render_pass_motion;
+    float *flow_out = guiding_pixel + guiding_pass_flow;
+
+    flow_out[0] = -motion_in[0] * pixel_scale;
+    flow_out[1] = -motion_in[1] * pixel_scale;
+  }
 }

 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
@ -899,7 +912,6 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
  else {
    /* Assigning to zero since this is a default alpha value for 3-component passes, and it
     * is an opaque pixel for 4 component passes. */
-
    denoised_pixel[3] = 0;
  }
 }
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@ -388,6 +388,7 @@ typedef enum PassType {
  PASS_DENOISING_NORMAL,
  PASS_DENOISING_ALBEDO,
  PASS_DENOISING_DEPTH,
+  PASS_DENOISING_PREVIOUS,

  /* PASS_SHADOW_CATCHER accumulates contribution of shadow catcher object which is not affected by
   * any other object. The pass accessor will divide the combined pass by the shadow catcher. The
--- a/intern/cycles/scene/pass.cpp
+++ b/intern/cycles/scene/pass.cpp
@ -101,6 +101,7 @@ const NodeEnum *Pass::get_type_enum()
    pass_type_enum.insert("denoising_normal", PASS_DENOISING_NORMAL);
    pass_type_enum.insert("denoising_albedo", PASS_DENOISING_ALBEDO);
    pass_type_enum.insert("denoising_depth", PASS_DENOISING_DEPTH);
+    pass_type_enum.insert("denoising_previous", PASS_DENOISING_PREVIOUS);

    pass_type_enum.insert("shadow_catcher", PASS_SHADOW_CATCHER);
    pass_type_enum.insert("shadow_catcher_sample_count", PASS_SHADOW_CATCHER_SAMPLE_COUNT);
@ -299,6 +300,10 @@ PassInfo Pass::get_info(const PassType type, const bool include_albedo)
    case PASS_DENOISING_DEPTH:
      pass_info.num_components = 1;
      break;
+    case PASS_DENOISING_PREVIOUS:
+      pass_info.num_components = 3;
+      pass_info.use_exposure = true;
+      break;

    case PASS_SHADOW_CATCHER:
      pass_info.num_components = 3;
--- a/intern/cycles/session/denoising.cpp
+++ b/intern/cycles/session/denoising.cpp
@ -16,17 +16,12 @@

 #include "session/denoising.h"

-#if 0
+#include "util/map.h"
+#include "util/system.h"
+#include "util/task.h"
+#include "util/time.h"

-#  include "kernel/filter/filter_defines.h"
-
-#  include "util/util_foreach.h"
-#  include "util/util_map.h"
-#  include "util/util_system.h"
-#  include "util/util_task.h"
-#  include "util/util_time.h"
-
-#  include <OpenImageIO/filesystem.h>
+#include <OpenImageIO/filesystem.h>

 CCL_NAMESPACE_BEGIN

@ -125,24 +120,18 @@ static void fill_mapping(vector<ChannelMapping> &map, int pos, string name, stri
  }
 }

-static const int INPUT_NUM_CHANNELS = 15;
-static const int INPUT_DENOISING_DEPTH = 0;
-static const int INPUT_DENOISING_NORMAL = 1;
-static const int INPUT_DENOISING_SHADOWING = 4;
-static const int INPUT_DENOISING_ALBEDO = 5;
-static const int INPUT_NOISY_IMAGE = 8;
-static const int INPUT_DENOISING_VARIANCE = 11;
-static const int INPUT_DENOISING_INTENSITY = 14;
+static const int INPUT_NUM_CHANNELS = 13;
+static const int INPUT_NOISY_IMAGE = 0;
+static const int INPUT_DENOISING_NORMAL = 3;
+static const int INPUT_DENOISING_ALBEDO = 6;
+static const int INPUT_MOTION = 9;
 static vector<ChannelMapping> input_channels()
 {
  vector<ChannelMapping> map;
-  fill_mapping(map, INPUT_DENOISING_DEPTH, "Denoising Depth", "Z");
+  fill_mapping(map, INPUT_NOISY_IMAGE, "Combined", "RGB");
  fill_mapping(map, INPUT_DENOISING_NORMAL, "Denoising Normal", "XYZ");
-  fill_mapping(map, INPUT_DENOISING_SHADOWING, "Denoising Shadowing", "X");
  fill_mapping(map, INPUT_DENOISING_ALBEDO, "Denoising Albedo", "RGB");
-  fill_mapping(map, INPUT_NOISY_IMAGE, "Noisy Image", "RGB");
-  fill_mapping(map, INPUT_DENOISING_VARIANCE, "Denoising Variance", "RGB");
-  fill_mapping(map, INPUT_DENOISING_INTENSITY, "Denoising Intensity", "X");
+  fill_mapping(map, INPUT_MOTION, "Vector", "XYZW");
  return map;
 }

@ -162,7 +151,7 @@ bool DenoiseImageLayer::detect_denoising_channels()
  input_to_image_channel.clear();
  input_to_image_channel.resize(INPUT_NUM_CHANNELS, -1);

-  foreach (const ChannelMapping &mapping, input_channels()) {
+  for (const ChannelMapping &mapping : input_channels()) {
    vector<string>::iterator i = find(channels.begin(), channels.end(), mapping.name);
    if (i == channels.end()) {
      return false;
@ -177,7 +166,7 @@ bool DenoiseImageLayer::detect_denoising_channels()
  output_to_image_channel.clear();
  output_to_image_channel.resize(OUTPUT_NUM_CHANNELS, -1);

-  foreach (const ChannelMapping &mapping, output_channels()) {
+  for (const ChannelMapping &mapping : output_channels()) {
    vector<string>::iterator i = find(channels.begin(), channels.end(), mapping.name);
    if (i == channels.end()) {
      return false;
@ -199,18 +188,16 @@ bool DenoiseImageLayer::detect_denoising_channels()
  return true;
 }

-bool DenoiseImageLayer::match_channels(int neighbor,
-                                       const std::vector<string> &channelnames,
+bool DenoiseImageLayer::match_channels(const std::vector<string> &channelnames,
                                       const std::vector<string> &neighbor_channelnames)
 {
-  neighbor_input_to_image_channel.resize(neighbor + 1);
-  vector<int> &mapping = neighbor_input_to_image_channel[neighbor];
+  vector<int> &mapping = previous_output_to_image_channel;

  assert(mapping.size() == 0);
-  mapping.resize(input_to_image_channel.size(), -1);
+  mapping.resize(output_to_image_channel.size(), -1);

-  for (int i = 0; i < input_to_image_channel.size(); i++) {
-    const string &channel = channelnames[input_to_image_channel[i]];
+  for (int i = 0; i < output_to_image_channel.size(); i++) {
+    const string &channel = channelnames[output_to_image_channel[i]];
    std::vector<string>::const_iterator frame_channel = find(
        neighbor_channelnames.begin(), neighbor_channelnames.end(), channel);

@ -226,19 +213,9 @@ bool DenoiseImageLayer::match_channels(int neighbor,

 /* Denoise Task */

-DenoiseTask::DenoiseTask(Device *device,
-                         DenoiserPipeline *denoiser,
-                         int frame,
-                         const vector<int> &neighbor_frames)
-    : denoiser(denoiser),
-      device(device),
-      frame(frame),
-      neighbor_frames(neighbor_frames),
-      current_layer(0),
-      input_pixels(device, "filter input buffer", MEM_READ_ONLY),
-      num_tiles(0)
+DenoiseTask::DenoiseTask(Device *device, DenoiserPipeline *denoiser, int frame)
+    : denoiser(denoiser), device(device), frame(frame), current_layer(0), buffers(device)
 {
-  image.samples = denoiser->samples_override;
 }

 DenoiseTask::~DenoiseTask()
@ -246,284 +223,39 @@ DenoiseTask::~DenoiseTask()
  free();
 }

-/* Device callbacks */
-
-bool DenoiseTask::acquire_tile(Device *device, Device *tile_device, RenderTile &tile)
-{
-  thread_scoped_lock tile_lock(tiles_mutex);
-
-  if (tiles.empty()) {
-    return false;
-  }
-
-  tile = tiles.front();
-  tiles.pop_front();
-
-  device->map_tile(tile_device, tile);
-
-  print_progress(num_tiles - tiles.size(), num_tiles, frame, denoiser->num_frames);
-
-  return true;
-}
-
-/* Mapping tiles is required for regular rendering since each tile has its separate memory
- * which may be allocated on a different device.
- * For standalone denoising, there is a single memory that is present on all devices, so the only
- * thing that needs to be done here is to specify the surrounding tile geometry.
- *
- * However, since there is only one large memory, the denoised result has to be written to
- * a different buffer to avoid having to copy an entire horizontal slice of the image. */
-void DenoiseTask::map_neighboring_tiles(RenderTileNeighbors &neighbors, Device *tile_device)
-{
-  RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
-  RenderTile &target_tile = neighbors.target;
-
-  /* Fill tile information. */
-  for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
-    if (i == RenderTileNeighbors::CENTER) {
-      continue;
-    }
-
-    RenderTile &tile = neighbors.tiles[i];
-    int dx = (i % 3) - 1;
-    int dy = (i / 3) - 1;
-    tile.x = clamp(center_tile.x + dx * denoiser->tile_size.x, 0, image.width);
-    tile.w = clamp(center_tile.x + (dx + 1) * denoiser->tile_size.x, 0, image.width) - tile.x;
-    tile.y = clamp(center_tile.y + dy * denoiser->tile_size.y, 0, image.height);
-    tile.h = clamp(center_tile.y + (dy + 1) * denoiser->tile_size.y, 0, image.height) - tile.y;
-
-    tile.buffer = center_tile.buffer;
-    tile.offset = center_tile.offset;
-    tile.stride = image.width;
-  }
-
-  /* Allocate output buffer. */
-  device_vector<float> *output_mem = new device_vector<float>(
-      tile_device, "denoising_output", MEM_READ_WRITE);
-  output_mem->alloc(OUTPUT_NUM_CHANNELS * center_tile.w * center_tile.h);
-
-  /* Fill output buffer with noisy image, assumed by kernel_filter_finalize
-   * when skipping denoising of some pixels. */
-  float *result = output_mem->data();
-  float *in = &image.pixels[image.num_channels * (center_tile.y * image.width + center_tile.x)];
-
-  const DenoiseImageLayer &layer = image.layers[current_layer];
-  const int *input_to_image_channel = layer.input_to_image_channel.data();
-
-  for (int y = 0; y < center_tile.h; y++) {
-    for (int x = 0; x < center_tile.w; x++, result += OUTPUT_NUM_CHANNELS) {
-      for (int i = 0; i < OUTPUT_NUM_CHANNELS; i++) {
-        result[i] = in[image.num_channels * x + input_to_image_channel[INPUT_NOISY_IMAGE + i]];
-      }
-    }
-    in += image.num_channels * image.width;
-  }
-
-  output_mem->copy_to_device();
-
-  /* Fill output tile info. */
-  target_tile = center_tile;
-  target_tile.buffer = output_mem->device_pointer;
-  target_tile.stride = target_tile.w;
-  target_tile.offset -= target_tile.x + target_tile.y * target_tile.stride;
-
-  thread_scoped_lock output_lock(output_mutex);
-  assert(output_pixels.count(center_tile.tile_index) == 0);
-  output_pixels[target_tile.tile_index] = output_mem;
-}
-
-void DenoiseTask::unmap_neighboring_tiles(RenderTileNeighbors &neighbors)
-{
-  RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
-  RenderTile &target_tile = neighbors.target;
-
-  thread_scoped_lock output_lock(output_mutex);
-  assert(output_pixels.count(center_tile.tile_index) == 1);
-  device_vector<float> *output_mem = output_pixels[target_tile.tile_index];
-  output_pixels.erase(center_tile.tile_index);
-  output_lock.unlock();
-
-  /* Copy denoised pixels from device. */
-  output_mem->copy_from_device(0, OUTPUT_NUM_CHANNELS * target_tile.w, target_tile.h);
-
-  float *result = output_mem->data();
-  float *out = &image.pixels[image.num_channels * (target_tile.y * image.width + target_tile.x)];
-
-  const DenoiseImageLayer &layer = image.layers[current_layer];
-  const int *output_to_image_channel = layer.output_to_image_channel.data();
-
-  for (int y = 0; y < target_tile.h; y++) {
-    for (int x = 0; x < target_tile.w; x++, result += OUTPUT_NUM_CHANNELS) {
-      for (int i = 0; i < OUTPUT_NUM_CHANNELS; i++) {
-        out[image.num_channels * x + output_to_image_channel[i]] = result[i];
-      }
-    }
-    out += image.num_channels * image.width;
-  }
-
-  /* Free device buffer. */
-  output_mem->free();
-  delete output_mem;
-}
-
-void DenoiseTask::release_tile()
-{
-}
-
-bool DenoiseTask::get_cancel()
-{
-  return false;
-}
-
-void DenoiseTask::create_task(DeviceTask &task)
-{
-  /* Callback functions. */
-  task.acquire_tile = function_bind(&DenoiseTask::acquire_tile, this, device, _1, _2);
-  task.map_neighbor_tiles = function_bind(&DenoiseTask::map_neighboring_tiles, this, _1, _2);
-  task.unmap_neighbor_tiles = function_bind(&DenoiseTask::unmap_neighboring_tiles, this, _1);
-  task.release_tile = function_bind(&DenoiseTask::release_tile, this);
-  task.get_cancel = function_bind(&DenoiseTask::get_cancel, this);
-
-  /* Denoising parameters. */
-  task.denoising = denoiser->params;
-  task.denoising.type = DENOISER_NLM;
-  task.denoising.use = true;
-  task.denoising_from_render = false;
-
-  task.denoising_frames.resize(neighbor_frames.size());
-  for (int i = 0; i < neighbor_frames.size(); i++) {
-    task.denoising_frames[i] = neighbor_frames[i] - frame;
-  }
-
-  /* Buffer parameters. */
-  task.pass_stride = INPUT_NUM_CHANNELS;
-  task.target_pass_stride = OUTPUT_NUM_CHANNELS;
-  task.pass_denoising_data = 0;
-  task.pass_denoising_clean = -1;
-  task.frame_stride = image.width * image.height * INPUT_NUM_CHANNELS;
-
-  /* Create tiles. */
-  thread_scoped_lock tile_lock(tiles_mutex);
-  thread_scoped_lock output_lock(output_mutex);
-
-  tiles.clear();
-  assert(output_pixels.empty());
-  output_pixels.clear();
-
-  int tiles_x = divide_up(image.width, denoiser->tile_size.x);
-  int tiles_y = divide_up(image.height, denoiser->tile_size.y);
-
-  for (int ty = 0; ty < tiles_y; ty++) {
-    for (int tx = 0; tx < tiles_x; tx++) {
-      RenderTile tile;
-      tile.x = tx * denoiser->tile_size.x;
-      tile.y = ty * denoiser->tile_size.y;
-      tile.w = min(image.width - tile.x, denoiser->tile_size.x);
-      tile.h = min(image.height - tile.y, denoiser->tile_size.y);
-      tile.start_sample = 0;
-      tile.num_samples = image.layers[current_layer].samples;
-      tile.sample = 0;
-      tile.offset = 0;
-      tile.stride = image.width;
-      tile.tile_index = ty * tiles_x + tx;
-      tile.task = RenderTile::DENOISE;
-      tile.buffers = NULL;
-      tile.buffer = input_pixels.device_pointer;
-      tiles.push_back(tile);
-    }
-  }
-
-  num_tiles = tiles.size();
-}
-
 /* Denoiser Operations */

 bool DenoiseTask::load_input_pixels(int layer)
 {
-  int w = image.width;
-  int h = image.height;
-  int num_pixels = image.width * image.height;
-  int frame_stride = num_pixels * INPUT_NUM_CHANNELS;
-
  /* Load center image */
  DenoiseImageLayer &image_layer = image.layers[layer];

-  float *buffer_data = input_pixels.data();
-  image.read_pixels(image_layer, buffer_data);
-  buffer_data += frame_stride;
+  float *buffer_data = buffers.buffer.data();
+  image.read_pixels(image_layer, buffers.params, buffer_data);

-  /* Load neighbor images */
-  for (int i = 0; i < image.in_neighbors.size(); i++) {
-    if (!image.read_neighbor_pixels(i, image_layer, buffer_data)) {
-      error = "Failed to read neighbor frame pixels";
-      return false;
-    }
-    buffer_data += frame_stride;
-  }
-
-  /* Preprocess */
-  buffer_data = input_pixels.data();
-  for (int neighbor = 0; neighbor < image.in_neighbors.size() + 1; neighbor++) {
-    /* Clamp */
-    if (denoiser->params.clamp_input) {
-      for (int i = 0; i < num_pixels * INPUT_NUM_CHANNELS; i++) {
-        buffer_data[i] = clamp(buffer_data[i], -1e8f, 1e8f);
-      }
-    }
-
-    /* Box blur */
-    int r = 5 * denoiser->params.radius;
-    float *data = buffer_data + 14;
-    array<float> temp(num_pixels);
-
-    for (int y = 0; y < h; y++) {
-      for (int x = 0; x < w; x++) {
-        int n = 0;
-        float sum = 0.0f;
-        for (int dx = max(x - r, 0); dx < min(x + r + 1, w); dx++, n++) {
-          sum += data[INPUT_NUM_CHANNELS * (y * w + dx)];
-        }
-        temp[y * w + x] = sum / n;
-      }
-    }
-
-    for (int y = 0; y < h; y++) {
-      for (int x = 0; x < w; x++) {
-        int n = 0;
-        float sum = 0.0f;
-
-        for (int dy = max(y - r, 0); dy < min(y + r + 1, h); dy++, n++) {
-          sum += temp[dy * w + x];
-        }
-
-        data[INPUT_NUM_CHANNELS * (y * w + x)] = sum / n;
-      }
-    }
-
-    /* Highlight compression */
-    data = buffer_data + 8;
-    for (int y = 0; y < h; y++) {
-      for (int x = 0; x < w; x++) {
-        int idx = INPUT_NUM_CHANNELS * (y * w + x);
-        float3 color = make_float3(data[idx], data[idx + 1], data[idx + 2]);
-        color = color_highlight_compress(color, NULL);
-        data[idx] = color.x;
-        data[idx + 1] = color.y;
-        data[idx + 2] = color.z;
-      }
-    }
-
-    buffer_data += frame_stride;
+  /* Load previous image */
+  if (frame > 0 && !image.read_previous_pixels(image_layer, buffers.params, buffer_data)) {
+    error = "Failed to read neighbor frame pixels";
+    return false;
  }

  /* Copy to device */
-  input_pixels.copy_to_device();
+  buffers.buffer.copy_to_device();

  return true;
 }

 /* Task stages */

+static void add_pass(vector<Pass *> &passes, PassType type, PassMode mode = PassMode::NOISY)
+{
+  Pass *pass = new Pass();
+  pass->set_type(type);
+  pass->set_mode(mode);
+
+  passes.push_back(pass);
+}
+
 bool DenoiseTask::load()
 {
  string center_filepath = denoiser->input[frame];
@ -531,7 +263,8 @@ bool DenoiseTask::load()
    return false;
  }

-  if (!image.load_neighbors(denoiser->input, neighbor_frames, error)) {
+  /* Use previous frame output as input for subsequent frames. */
+  if (frame > 0 && !image.load_previous(denoiser->output[frame - 1], error)) {
    return false;
  }

@ -540,10 +273,35 @@ bool DenoiseTask::load()
    return false;
  }

+  /* Enable temporal denoising for frames after the first (which will use the output from the
+   * previous frames). */
+  DenoiseParams params = denoiser->denoiser->get_params();
+  params.temporally_stable = frame > 0;
+  denoiser->denoiser->set_params(params);
+
  /* Allocate device buffer. */
-  int num_frames = image.in_neighbors.size() + 1;
-  input_pixels.alloc(image.width * INPUT_NUM_CHANNELS, image.height * num_frames);
-  input_pixels.zero_to_device();
+  vector<Pass *> passes;
+  add_pass(passes, PassType::PASS_COMBINED);
+  add_pass(passes, PassType::PASS_DENOISING_ALBEDO);
+  add_pass(passes, PassType::PASS_DENOISING_NORMAL);
+  add_pass(passes, PassType::PASS_MOTION);
+  add_pass(passes, PassType::PASS_DENOISING_PREVIOUS);
+  add_pass(passes, PassType::PASS_COMBINED, PassMode::DENOISED);
+
+  BufferParams buffer_params;
+  buffer_params.width = image.width;
+  buffer_params.height = image.height;
+  buffer_params.full_x = 0;
+  buffer_params.full_y = 0;
+  buffer_params.full_width = image.width;
+  buffer_params.full_height = image.height;
+  buffer_params.update_passes(passes);
+
+  for (Pass *pass : passes) {
+    delete pass;
+  }
+
+  buffers.reset(buffer_params);

  /* Read pixels for first layer. */
  current_layer = 0;
@ -565,10 +323,26 @@ bool DenoiseTask::exec()
    }

    /* Run task on device. */
-    DeviceTask task(DeviceTask::RENDER);
-    create_task(task);
-    device->task_add(task);
-    device->task_wait();
+    denoiser->denoiser->denoise_buffer(buffers.params, &buffers, 1, true);
+
+    /* Copy denoised pixels from device. */
+    buffers.buffer.copy_from_device();
+
+    float *result = buffers.buffer.data(), *out = image.pixels.data();
+
+    const DenoiseImageLayer &layer = image.layers[current_layer];
+    const int *output_to_image_channel = layer.output_to_image_channel.data();
+
+    for (int y = 0; y < image.height; y++) {
+      for (int x = 0; x < image.width; x++, result += buffers.params.pass_stride) {
+        for (int j = 0; j < OUTPUT_NUM_CHANNELS; j++) {
+          int offset = buffers.params.get_pass_offset(PASS_COMBINED, PassMode::DENOISED);
+          int image_channel = output_to_image_channel[j];
+          out[image.num_channels * x + image_channel] = result[offset + j];
+        }
+      }
+      out += image.num_channels * image.width;
+    }

    printf("\n");
  }
@ -586,8 +360,7 @@ bool DenoiseTask::save()
 void DenoiseTask::free()
 {
  image.free();
-  input_pixels.free();
-  assert(output_pixels.empty());
+  buffers.buffer.free();
 }

 /* Denoise Image Storage */
@ -607,7 +380,7 @@ DenoiseImage::~DenoiseImage()

 void DenoiseImage::close_input()
 {
-  in_neighbors.clear();
+  in_previous.reset();
 }

 void DenoiseImage::free()
@ -677,39 +450,61 @@ bool DenoiseImage::parse_channels(const ImageSpec &in_spec, string &error)
  return true;
 }

-void DenoiseImage::read_pixels(const DenoiseImageLayer &layer, float *input_pixels)
+void DenoiseImage::read_pixels(const DenoiseImageLayer &layer,
+                               const BufferParams &params,
+                               float *input_pixels)
 {
  /* Pixels from center file have already been loaded into pixels.
   * We copy a subset into the device input buffer with channels reshuffled. */
  const int *input_to_image_channel = layer.input_to_image_channel.data();

  for (int i = 0; i < width * height; i++) {
-    for (int j = 0; j < INPUT_NUM_CHANNELS; j++) {
-      int image_channel = input_to_image_channel[j];
-      input_pixels[i * INPUT_NUM_CHANNELS + j] =
+    for (int j = 0; j < 3; ++j) {
+      int offset = params.get_pass_offset(PASS_COMBINED);
+      int image_channel = input_to_image_channel[INPUT_NOISY_IMAGE + j];
+      input_pixels[i * params.pass_stride + offset + j] =
+          pixels[((size_t)i) * num_channels + image_channel];
+    }
+    for (int j = 0; j < 3; ++j) {
+      int offset = params.get_pass_offset(PASS_DENOISING_NORMAL);
+      int image_channel = input_to_image_channel[INPUT_DENOISING_NORMAL + j];
+      input_pixels[i * params.pass_stride + offset + j] =
+          pixels[((size_t)i) * num_channels + image_channel];
+    }
+    for (int j = 0; j < 3; ++j) {
+      int offset = params.get_pass_offset(PASS_DENOISING_ALBEDO);
+      int image_channel = input_to_image_channel[INPUT_DENOISING_ALBEDO + j];
+      input_pixels[i * params.pass_stride + offset + j] =
+          pixels[((size_t)i) * num_channels + image_channel];
+    }
+    for (int j = 0; j < 4; ++j) {
+      int offset = params.get_pass_offset(PASS_MOTION);
+      int image_channel = input_to_image_channel[INPUT_MOTION + j];
+      input_pixels[i * params.pass_stride + offset + j] =
          pixels[((size_t)i) * num_channels + image_channel];
    }
  }
 }

-bool DenoiseImage::read_neighbor_pixels(int neighbor,
-                                        const DenoiseImageLayer &layer,
+bool DenoiseImage::read_previous_pixels(const DenoiseImageLayer &layer,
+                                        const BufferParams &params,
                                        float *input_pixels)
 {
  /* Load pixels from neighboring frames, and copy them into device buffer
   * with channels reshuffled. */
  size_t num_pixels = (size_t)width * (size_t)height;
  array<float> neighbor_pixels(num_pixels * num_channels);
-  if (!in_neighbors[neighbor]->read_image(TypeDesc::FLOAT, neighbor_pixels.data())) {
+  if (!in_previous->read_image(TypeDesc::FLOAT, neighbor_pixels.data())) {
    return false;
  }

-  const int *input_to_image_channel = layer.neighbor_input_to_image_channel[neighbor].data();
+  const int *output_to_image_channel = layer.previous_output_to_image_channel.data();

  for (int i = 0; i < width * height; i++) {
-    for (int j = 0; j < INPUT_NUM_CHANNELS; j++) {
-      int image_channel = input_to_image_channel[j];
-      input_pixels[i * INPUT_NUM_CHANNELS + j] =
+    for (int j = 0; j < 3; ++j) {
+      int offset = params.get_pass_offset(PASS_DENOISING_PREVIOUS);
+      int image_channel = output_to_image_channel[j];
+      input_pixels[i * params.pass_stride + offset + j] =
          neighbor_pixels[((size_t)i) * num_channels + image_channel];
    }
  }
@ -739,8 +534,8 @@ bool DenoiseImage::load(const string &in_filepath, string &error)
    return false;
  }

-  if (layers.size() == 0) {
-    error = "Could not find a render layer containing denoising info";
+  if (layers.empty()) {
+    error = "Could not find a render layer containing denoising data and motion vector passes";
    return false;
  }

@ -757,46 +552,34 @@ bool DenoiseImage::load(const string &in_filepath, string &error)
  return true;
 }

-bool DenoiseImage::load_neighbors(const vector<string> &filepaths,
-                                  const vector<int> &frames,
-                                  string &error)
+bool DenoiseImage::load_previous(const string &filepath, string &error)
 {
-  if (frames.size() > DENOISE_MAX_FRAMES - 1) {
-    error = string_printf("Maximum number of neighbors (%d) exceeded\n", DENOISE_MAX_FRAMES - 1);
+  if (!Filesystem::is_regular(filepath)) {
+    error = "Couldn't find neighbor frame: " + filepath;
    return false;
  }

-  for (int neighbor = 0; neighbor < frames.size(); neighbor++) {
-    int frame = frames[neighbor];
-    const string &filepath = filepaths[frame];
-
-    if (!Filesystem::is_regular(filepath)) {
-      error = "Couldn't find neighbor frame: " + filepath;
-      return false;
-    }
-
-    unique_ptr<ImageInput> in_neighbor(ImageInput::open(filepath));
-    if (!in_neighbor) {
-      error = "Couldn't open neighbor frame: " + filepath;
-      return false;
-    }
-
-    const ImageSpec &neighbor_spec = in_neighbor->spec();
-    if (neighbor_spec.width != width || neighbor_spec.height != height) {
-      error = "Neighbor frame has different dimensions: " + filepath;
-      return false;
-    }
-
-    foreach (DenoiseImageLayer &layer, layers) {
-      if (!layer.match_channels(neighbor, in_spec.channelnames, neighbor_spec.channelnames)) {
-        error = "Neighbor frame misses denoising data passes: " + filepath;
-        return false;
-      }
-    }
-
-    in_neighbors.push_back(std::move(in_neighbor));
+  unique_ptr<ImageInput> in_neighbor(ImageInput::open(filepath));
+  if (!in_neighbor) {
+    error = "Couldn't open neighbor frame: " + filepath;
+    return false;
  }

+  const ImageSpec &neighbor_spec = in_neighbor->spec();
+  if (neighbor_spec.width != width || neighbor_spec.height != height) {
+    error = "Neighbor frame has different dimensions: " + filepath;
+    return false;
+  }
+
+  for (DenoiseImageLayer &layer : layers) {
+    if (!layer.match_channels(in_spec.channelnames, neighbor_spec.channelnames)) {
+      error = "Neighbor frame misses denoising data passes: " + filepath;
+      return false;
+    }
+  }
+
+  in_previous = std::move(in_neighbor);
+
  return true;
 }

@ -864,24 +647,22 @@ bool DenoiseImage::save_output(const string &out_filepath, string &error)

 /* File pattern handling and outer loop over frames */

-DenoiserPipeline::DenoiserPipeline(DeviceInfo &device_info)
+DenoiserPipeline::DenoiserPipeline(DeviceInfo &device_info, const DenoiseParams &params)
 {
-  samples_override = 0;
-  tile_size = make_int2(64, 64);
-
-  num_frames = 0;
-
  /* Initialize task scheduler. */
  TaskScheduler::init();

  /* Initialize device. */
-  device = Device::create(device_info, stats, profiler, true);
-
+  device = Device::create(device_info, stats, profiler);
  device->load_kernels(KERNEL_FEATURE_DENOISING);
+
+  denoiser = Denoiser::create(device, params);
+  denoiser->load_kernels(nullptr);
 }

 DenoiserPipeline::~DenoiserPipeline()
 {
+  denoiser.reset();
  delete device;
  TaskScheduler::exit();
 }
@ -890,7 +671,7 @@ bool DenoiserPipeline::run()
 {
  assert(input.size() == output.size());

-  num_frames = output.size();
+  int num_frames = output.size();

  for (int frame = 0; frame < num_frames; frame++) {
    /* Skip empty output paths. */
@ -898,16 +679,8 @@ bool DenoiserPipeline::run()
      continue;
    }

-    /* Determine neighbor frame numbers that should be used for filtering. */
-    vector<int> neighbor_frames;
-    for (int f = frame - params.neighbor_frames; f <= frame + params.neighbor_frames; f++) {
-      if (f >= 0 && f < num_frames && f != frame) {
-        neighbor_frames.push_back(f);
-      }
-    }
-
    /* Execute task. */
-    DenoiseTask task(device, this, frame, neighbor_frames);
+    DenoiseTask task(device, this, frame);
    if (!task.load()) {
      error = task.error;
      return false;
@ -930,5 +703,3 @@ bool DenoiserPipeline::run()
 }

 CCL_NAMESPACE_END
-
-#endif
--- a/intern/cycles/session/denoising.h
+++ b/intern/cycles/session/denoising.h
@ -17,20 +17,17 @@
 #ifndef __DENOISING_H__
 #define __DENOISING_H__

-#if 0
-
 /* TODO(sergey): Make it explicit and clear when something is a denoiser, its pipeline or
 * parameters. Currently it is an annoying mixture of terms used interchangeably. */

-#  include "device/device.h"
+#include "device/device.h"
+#include "integrator/denoiser.h"

-#  include "render/buffers.h"
+#include "util/string.h"
+#include "util/unique_ptr.h"
+#include "util/vector.h"

-#  include "util/util_string.h"
-#  include "util/util_unique_ptr.h"
-#  include "util/util_vector.h"
-
-#  include <OpenImageIO/imageio.h>
+#include <OpenImageIO/imageio.h>

 OIIO_NAMESPACE_USING

@ -40,7 +37,7 @@ CCL_NAMESPACE_BEGIN

 class DenoiserPipeline {
 public:
-  DenoiserPipeline(DeviceInfo &device_info);
+  DenoiserPipeline(DeviceInfo &device_info, const DenoiseParams &params);
  ~DenoiserPipeline();

  bool run();
@ -55,22 +52,13 @@ class DenoiserPipeline {
   * taking into account all input frames. */
  vector<string> output;

-  /* Sample number override, takes precedence over values from input frames. */
-  int samples_override;
-  /* Tile size for processing on device. */
-  int2 tile_size;
-
-  /* Equivalent to the settings in the regular denoiser. */
-  DenoiseParams params;
-
 protected:
  friend class DenoiseTask;

  Stats stats;
  Profiler profiler;
  Device *device;
-
-  int num_frames;
+  std::unique_ptr<Denoiser> denoiser;
 };

 /* Denoise Image Layer */
@ -88,13 +76,13 @@ struct DenoiseImageLayer {
  /* Device input channel will be copied from image channel input_to_image_channel[i]. */
  vector<int> input_to_image_channel;

-  /* input_to_image_channel of the secondary frames, if any are used. */
-  vector<vector<int>> neighbor_input_to_image_channel;
-
  /* Write i-th channel of the processing output to output_to_image_channel[i]-th channel of the
   * file. */
  vector<int> output_to_image_channel;

+  /* output_to_image_channel of the previous frame, if used. */
+  vector<int> previous_output_to_image_channel;
+
  /* Detect whether this layer contains a full set of channels and set up the offsets accordingly.
   */
  bool detect_denoising_channels();
@ -102,8 +90,7 @@ struct DenoiseImageLayer {
  /* Map the channels of a secondary frame to the channels that are required for processing,
   * fill neighbor_input_to_image_channel if all are present or return false if a channel are
   * missing. */
-  bool match_channels(int neighbor,
-                      const std::vector<string> &channelnames,
+  bool match_channels(const std::vector<string> &channelnames,
                      const std::vector<string> &neighbor_channelnames);
 };

@ -125,7 +112,7 @@ class DenoiseImage {

  /* Image file handles */
  ImageSpec in_spec;
-  vector<unique_ptr<ImageInput>> in_neighbors;
+  unique_ptr<ImageInput> in_previous;

  /* Render layers */
  vector<DenoiseImageLayer> layers;
@ -137,12 +124,16 @@ class DenoiseImage {
  bool load(const string &in_filepath, string &error);

  /* Load neighboring frames. */
-  bool load_neighbors(const vector<string> &filepaths, const vector<int> &frames, string &error);
+  bool load_previous(const string &in_filepath, string &error);

  /* Load subset of pixels from file buffer into input buffer, as needed for denoising
   * on the device. Channels are reshuffled following the provided mapping. */
-  void read_pixels(const DenoiseImageLayer &layer, float *input_pixels);
-  bool read_neighbor_pixels(int neighbor, const DenoiseImageLayer &layer, float *input_pixels);
+  void read_pixels(const DenoiseImageLayer &layer,
+                   const BufferParams &params,
+                   float *input_pixels);
+  bool read_previous_pixels(const DenoiseImageLayer &layer,
+                            const BufferParams &params,
+                            float *input_pixels);

  bool save_output(const string &out_filepath, string &error);

@ -159,10 +150,7 @@ class DenoiseImage {

 class DenoiseTask {
 public:
-  DenoiseTask(Device *device,
-              DenoiserPipeline *denoiser,
-              int frame,
-              const vector<int> &neighbor_frames);
+  DenoiseTask(Device *device, DenoiserPipeline *denoiser, int frame);
  ~DenoiseTask();

  /* Task stages */
@ -180,37 +168,17 @@ class DenoiseTask {

  /* Frame number to be denoised */
  int frame;
-  vector<int> neighbor_frames;

  /* Image file data */
  DenoiseImage image;
  int current_layer;

-  /* Device input buffer */
-  device_vector<float> input_pixels;
-
-  /* Tiles */
-  thread_mutex tiles_mutex;
-  list<RenderTile> tiles;
-  int num_tiles;
-
-  thread_mutex output_mutex;
-  map<int, device_vector<float> *> output_pixels;
+  RenderBuffers buffers;

  /* Task handling */
  bool load_input_pixels(int layer);
-  void create_task(DeviceTask &task);
-
-  /* Device task callbacks */
-  bool acquire_tile(Device *device, Device *tile_device, RenderTile &tile);
-  void map_neighboring_tiles(RenderTileNeighbors &neighbors, Device *tile_device);
-  void unmap_neighboring_tiles(RenderTileNeighbors &neighbors);
-  void release_tile();
-  bool get_cancel();
 };

 CCL_NAMESPACE_END

-#endif
-
 #endif /* __DENOISING_H__ */