Cleanup: reduce hardcoded numbers in denoising neighbor tiles code

This commit is contained in:
Brecht Van Lommel 2020-07-09 20:01:22 +02:00
parent e65c78cd43
commit 93791381fe
14 changed files with 216 additions and 160 deletions

View File

@ -1760,7 +1760,7 @@ void CUDADevice::denoise(RenderTile &rtile, DenoisingTask &denoising)
denoising.render_buffer.samples = rtile.sample;
denoising.buffer.gpu_temporary_mem = true;
denoising.run_denoising(&rtile);
denoising.run_denoising(rtile);
}
void CUDADevice::adaptive_sampling_filter(uint filter_sample,

View File

@ -439,10 +439,10 @@ class Device {
{
return 0;
}
virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/)
virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTileNeighbors & /*neighbors*/)
{
}
virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/)
virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTileNeighbors & /*neighbors*/)
{
}

View File

@ -1040,7 +1040,7 @@ class CPUDevice : public Device {
denoising.render_buffer.samples = tile.sample;
denoising.buffer.gpu_temporary_mem = false;
denoising.run_denoising(&tile);
denoising.run_denoising(tile);
}
void thread_render(DeviceTask &task)

View File

@ -71,29 +71,30 @@ DenoisingTask::~DenoisingTask()
tile_info_mem.free();
}
void DenoisingTask::set_render_buffer(RenderTile *rtiles)
void DenoisingTask::set_render_buffer(RenderTileNeighbors &neighbors)
{
for (int i = 0; i < 9; i++) {
tile_info->offsets[i] = rtiles[i].offset;
tile_info->strides[i] = rtiles[i].stride;
tile_info->buffers[i] = rtiles[i].buffer;
for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
RenderTile &rtile = neighbors.tiles[i];
tile_info->offsets[i] = rtile.offset;
tile_info->strides[i] = rtile.stride;
tile_info->buffers[i] = rtile.buffer;
}
tile_info->x[0] = rtiles[3].x;
tile_info->x[1] = rtiles[4].x;
tile_info->x[2] = rtiles[5].x;
tile_info->x[3] = rtiles[5].x + rtiles[5].w;
tile_info->y[0] = rtiles[1].y;
tile_info->y[1] = rtiles[4].y;
tile_info->y[2] = rtiles[7].y;
tile_info->y[3] = rtiles[7].y + rtiles[7].h;
tile_info->x[0] = neighbors.tiles[3].x;
tile_info->x[1] = neighbors.tiles[4].x;
tile_info->x[2] = neighbors.tiles[5].x;
tile_info->x[3] = neighbors.tiles[5].x + neighbors.tiles[5].w;
tile_info->y[0] = neighbors.tiles[1].y;
tile_info->y[1] = neighbors.tiles[4].y;
tile_info->y[2] = neighbors.tiles[7].y;
tile_info->y[3] = neighbors.tiles[7].y + neighbors.tiles[7].h;
target_buffer.offset = rtiles[9].offset;
target_buffer.stride = rtiles[9].stride;
target_buffer.ptr = rtiles[9].buffer;
target_buffer.offset = neighbors.target.offset;
target_buffer.stride = neighbors.target.stride;
target_buffer.ptr = neighbors.target.buffer;
if (do_prefilter && rtiles[9].buffers) {
if (do_prefilter && neighbors.target.buffers) {
target_buffer.denoising_output_offset =
rtiles[9].buffers->params.get_denoising_prefiltered_offset();
neighbors.target.buffers->params.get_denoising_prefiltered_offset();
}
else {
target_buffer.denoising_output_offset = 0;
@ -320,12 +321,11 @@ void DenoisingTask::reconstruct()
functions.solve(target_buffer.ptr);
}
void DenoisingTask::run_denoising(RenderTile *tile)
void DenoisingTask::run_denoising(RenderTile &tile)
{
RenderTile rtiles[10];
rtiles[4] = *tile;
functions.map_neighbor_tiles(rtiles);
set_render_buffer(rtiles);
RenderTileNeighbors neighbors(tile);
functions.map_neighbor_tiles(neighbors);
set_render_buffer(neighbors);
setup_denoising_buffer();
@ -347,7 +347,7 @@ void DenoisingTask::run_denoising(RenderTile *tile)
write_buffer();
}
functions.unmap_neighbor_tiles(rtiles);
functions.unmap_neighbor_tiles(neighbors);
}
CCL_NAMESPACE_END

View File

@ -102,8 +102,8 @@ class DenoisingTask {
device_ptr output_ptr)>
detect_outliers;
function<bool(int out_offset, device_ptr frop_ptr, device_ptr buffer_ptr)> write_feature;
function<void(RenderTile *rtiles)> map_neighbor_tiles;
function<void(RenderTile *rtiles)> unmap_neighbor_tiles;
function<void(RenderTileNeighbors &neighbors)> map_neighbor_tiles;
function<void(RenderTileNeighbors &neighbors)> unmap_neighbor_tiles;
} functions;
/* Stores state of the current Reconstruction operation,
@ -154,7 +154,7 @@ class DenoisingTask {
DenoisingTask(Device *device, const DeviceTask &task);
~DenoisingTask();
void run_denoising(RenderTile *tile);
void run_denoising(RenderTile &tile);
struct DenoiseBuffers {
int pass_stride;
@ -179,7 +179,7 @@ class DenoisingTask {
protected:
Device *device;
void set_render_buffer(RenderTile *rtiles);
void set_render_buffer(RenderTileNeighbors &neighbors);
void setup_denoising_buffer();
void prefilter_shadowing();
void prefilter_features();

View File

@ -584,20 +584,22 @@ class MultiDevice : public Device {
return -1;
}
void map_neighbor_tiles(Device *sub_device, RenderTile *tiles)
void map_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors)
{
for (int i = 0; i < 9; i++) {
if (!tiles[i].buffers) {
for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
RenderTile &tile = neighbors.tiles[i];
if (!tile.buffers) {
continue;
}
device_vector<float> &mem = tiles[i].buffers->buffer;
tiles[i].buffer = mem.device_pointer;
device_vector<float> &mem = tile.buffers->buffer;
tile.buffer = mem.device_pointer;
if (mem.device == this && matching_rendering_and_denoising_devices) {
/* Skip unnecessary copies in viewport mode (buffer covers the
* whole image), but still need to fix up the tile device pointer. */
map_tile(sub_device, tiles[i]);
map_tile(sub_device, tile);
continue;
}
@ -610,15 +612,15 @@ class MultiDevice : public Device {
* also required for the case where a CPU thread is denoising
* a tile rendered on the GPU. In that case we have to avoid
* overwriting the buffer being de-noised by the CPU thread. */
if (!tiles[i].buffers->map_neighbor_copied) {
tiles[i].buffers->map_neighbor_copied = true;
if (!tile.buffers->map_neighbor_copied) {
tile.buffers->map_neighbor_copied = true;
mem.copy_from_device();
}
if (mem.device == this) {
/* Can re-use memory if tile is already allocated on the sub device. */
map_tile(sub_device, tiles[i]);
mem.swap_device(sub_device, mem.device_size, tiles[i].buffer);
map_tile(sub_device, tile);
mem.swap_device(sub_device, mem.device_size, tile.buffer);
}
else {
mem.swap_device(sub_device, 0, 0);
@ -626,40 +628,42 @@ class MultiDevice : public Device {
mem.copy_to_device();
tiles[i].buffer = mem.device_pointer;
tiles[i].device_size = mem.device_size;
tile.buffer = mem.device_pointer;
tile.device_size = mem.device_size;
mem.restore_device();
}
}
}
void unmap_neighbor_tiles(Device *sub_device, RenderTile *tiles)
void unmap_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors)
{
device_vector<float> &mem = tiles[9].buffers->buffer;
RenderTile &target_tile = neighbors.target;
device_vector<float> &mem = target_tile.buffers->buffer;
if (mem.device == this && matching_rendering_and_denoising_devices) {
return;
}
/* Copy denoised result back to the host. */
mem.swap_device(sub_device, tiles[9].device_size, tiles[9].buffer);
mem.swap_device(sub_device, target_tile.device_size, target_tile.buffer);
mem.copy_from_device();
mem.restore_device();
/* Copy denoised result to the original device. */
mem.copy_to_device();
for (int i = 0; i < 9; i++) {
if (!tiles[i].buffers) {
for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
RenderTile &tile = neighbors.tiles[i];
if (!tile.buffers) {
continue;
}
device_vector<float> &mem = tiles[i].buffers->buffer;
device_vector<float> &mem = tile.buffers->buffer;
if (mem.device != sub_device && mem.device != this) {
/* Free up memory again if it was allocated for the copy above. */
mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer);
mem.swap_device(sub_device, tile.device_size, tile.buffer);
sub_device->mem_free(mem);
mem.restore_device();
}

View File

@ -801,19 +801,18 @@ class OptiXDevice : public CUDADevice {
// 0 1 2
// 3 4 5
// 6 7 8 9
RenderTile rtiles[10];
rtiles[4] = rtile;
task.map_neighbor_tiles(rtiles, this);
rtile = rtiles[4]; // Tile may have been modified by mapping code
RenderTileNeighbors neighbors(rtile);
task.map_neighbor_tiles(neighbors, this);
RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
RenderTile &target_tile = neighbors.target;
rtile = center_tile; // Tile may have been modified by mapping code
// Calculate size of the tile to denoise (including overlap)
int4 rect = make_int4(
rtiles[4].x, rtiles[4].y, rtiles[4].x + rtiles[4].w, rtiles[4].y + rtiles[4].h);
int4 rect = center_tile.bounds();
// Overlap between tiles has to be at least 64 pixels
// TODO(pmours): Query this value from OptiX
rect = rect_expand(rect, 64);
int4 clip_rect = make_int4(
rtiles[3].x, rtiles[1].y, rtiles[5].x + rtiles[5].w, rtiles[7].y + rtiles[7].h);
int4 clip_rect = neighbors.bounds();
rect = rect_clip(rect, clip_rect);
int2 rect_size = make_int2(rect.z - rect.x, rect.w - rect.y);
int2 overlap_offset = make_int2(rtile.x - rect.x, rtile.y - rect.y);
@ -834,14 +833,14 @@ class OptiXDevice : public CUDADevice {
device_only_memory<float> input(this, "denoiser input");
device_vector<TileInfo> tile_info_mem(this, "denoiser tile info", MEM_READ_WRITE);
if ((!rtiles[0].buffer || rtiles[0].buffer == rtile.buffer) &&
(!rtiles[1].buffer || rtiles[1].buffer == rtile.buffer) &&
(!rtiles[2].buffer || rtiles[2].buffer == rtile.buffer) &&
(!rtiles[3].buffer || rtiles[3].buffer == rtile.buffer) &&
(!rtiles[5].buffer || rtiles[5].buffer == rtile.buffer) &&
(!rtiles[6].buffer || rtiles[6].buffer == rtile.buffer) &&
(!rtiles[7].buffer || rtiles[7].buffer == rtile.buffer) &&
(!rtiles[8].buffer || rtiles[8].buffer == rtile.buffer)) {
bool contiguous_memory = true;
for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
if (neighbors.tiles[i].buffer && neighbors.tiles[i].buffer != rtile.buffer) {
contiguous_memory = false;
}
}
if (contiguous_memory) {
// Tiles are in continous memory, so can just subtract overlap offset
input_ptr -= (overlap_offset.x + overlap_offset.y * rtile.stride) * pixel_stride;
// Stride covers the whole width of the image and not just a single tile
@ -856,19 +855,19 @@ class OptiXDevice : public CUDADevice {
input_stride *= rect_size.x;
TileInfo *tile_info = tile_info_mem.alloc(1);
for (int i = 0; i < 9; i++) {
tile_info->offsets[i] = rtiles[i].offset;
tile_info->strides[i] = rtiles[i].stride;
tile_info->buffers[i] = rtiles[i].buffer;
for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
tile_info->offsets[i] = neighbors.tiles[i].offset;
tile_info->strides[i] = neighbors.tiles[i].stride;
tile_info->buffers[i] = neighbors.tiles[i].buffer;
}
tile_info->x[0] = rtiles[3].x;
tile_info->x[1] = rtiles[4].x;
tile_info->x[2] = rtiles[5].x;
tile_info->x[3] = rtiles[5].x + rtiles[5].w;
tile_info->y[0] = rtiles[1].y;
tile_info->y[1] = rtiles[4].y;
tile_info->y[2] = rtiles[7].y;
tile_info->y[3] = rtiles[7].y + rtiles[7].h;
tile_info->x[0] = neighbors.tiles[3].x;
tile_info->x[1] = neighbors.tiles[4].x;
tile_info->x[2] = neighbors.tiles[5].x;
tile_info->x[3] = neighbors.tiles[5].x + neighbors.tiles[5].w;
tile_info->y[0] = neighbors.tiles[1].y;
tile_info->y[1] = neighbors.tiles[4].y;
tile_info->y[2] = neighbors.tiles[7].y;
tile_info->y[3] = neighbors.tiles[7].y + neighbors.tiles[7].h;
tile_info_mem.copy_to_device();
void *args[] = {
@ -977,10 +976,10 @@ class OptiXDevice : public CUDADevice {
int2 output_offset = overlap_offset;
overlap_offset = make_int2(0, 0); // Not supported by denoiser API, so apply manually
# else
output_layers[0].data = rtiles[9].buffer + pixel_offset;
output_layers[0].width = rtiles[9].w;
output_layers[0].height = rtiles[9].h;
output_layers[0].rowStrideInBytes = rtiles[9].stride * pixel_stride;
output_layers[0].data = target_tile.buffer + pixel_offset;
output_layers[0].width = target_tile.w;
output_layers[0].height = target_tile.h;
output_layers[0].rowStrideInBytes = target_tile.stride * pixel_stride;
output_layers[0].pixelStrideInBytes = pixel_stride;
# endif
output_layers[0].format = OPTIX_PIXEL_FORMAT_FLOAT3;
@ -1002,26 +1001,26 @@ class OptiXDevice : public CUDADevice {
# if OPTIX_DENOISER_NO_PIXEL_STRIDE
void *output_args[] = {&input_ptr,
&rtiles[9].buffer,
&target_tile.buffer,
&output_offset.x,
&output_offset.y,
&rect_size.x,
&rect_size.y,
&rtiles[9].x,
&rtiles[9].y,
&rtiles[9].w,
&rtiles[9].h,
&rtiles[9].offset,
&rtiles[9].stride,
&target_tile.x,
&target_tile.y,
&target_tile.w,
&target_tile.h,
&target_tile.offset,
&target_tile.stride,
&task.pass_stride,
&rtile.sample};
launch_filter_kernel(
"kernel_cuda_filter_convert_from_rgb", rtiles[9].w, rtiles[9].h, output_args);
"kernel_cuda_filter_convert_from_rgb", target_tile.w, target_tile.h, output_args);
# endif
check_result_cuda_ret(cuStreamSynchronize(0));
task.unmap_neighbor_tiles(rtiles, this);
task.unmap_neighbor_tiles(neighbors, this);
}
else {
// Run CUDA denoising kernels

View File

@ -29,6 +29,7 @@ CCL_NAMESPACE_BEGIN
class Device;
class RenderBuffers;
class RenderTile;
class RenderTileNeighbors;
class Tile;
enum DenoiserType {
@ -150,8 +151,8 @@ class DeviceTask {
function<void(RenderTile &)> update_tile_sample;
function<void(RenderTile &)> release_tile;
function<bool()> get_cancel;
function<void(RenderTile *, Device *)> map_neighbor_tiles;
function<void(RenderTile *, Device *)> unmap_neighbor_tiles;
function<void(RenderTileNeighbors &, Device *)> map_neighbor_tiles;
function<void(RenderTileNeighbors &, Device *)> unmap_neighbor_tiles;
uint tile_types;
DenoiseParams denoising;

View File

@ -1850,7 +1850,7 @@ void OpenCLDevice::denoise(RenderTile &rtile, DenoisingTask &denoising)
denoising.render_buffer.samples = rtile.sample;
denoising.buffer.gpu_temporary_mem = true;
denoising.run_denoising(&rtile);
denoising.run_denoising(rtile);
}
void OpenCLDevice::shader(DeviceTask &task)

View File

@ -52,7 +52,7 @@ class BufferParams {
/* passes */
vector<Pass> passes;
bool denoising_data_pass;
/* If only some light path types should be denoised, an additional pass is needed. */
/* If only some light path types should be target, an additional pass is needed. */
bool denoising_clean_pass;
/* When we're prefiltering the passes during rendering, we need to keep both the
* original and the prefiltered data around because neighboring tiles might still
@ -149,6 +149,50 @@ class RenderTile {
RenderBuffers *buffers;
RenderTile();
int4 bounds() const
{
return make_int4(x, /* xmin */
y, /* ymin */
x + w, /* xmax */
y + h); /* ymax */
}
};
/* Render Tile Neighbors
* Set of neighboring tiles used for denoising. Tile order:
* 0 1 2
* 3 4 5
* 6 7 8 */
class RenderTileNeighbors {
public:
static const int SIZE = 9;
static const int CENTER = 4;
RenderTile tiles[SIZE];
RenderTile target;
RenderTileNeighbors(const RenderTile &center)
{
tiles[CENTER] = center;
}
int4 bounds() const
{
return make_int4(tiles[3].x, /* xmin */
tiles[1].y, /* ymin */
tiles[5].x + tiles[5].w, /* xmax */
tiles[7].y + tiles[7].h); /* ymax */
}
void set_bounds_from_center()
{
tiles[3].x = tiles[CENTER].x;
tiles[1].y = tiles[CENTER].y;
tiles[5].x = tiles[CENTER].x + tiles[CENTER].w;
tiles[7].y = tiles[CENTER].y + tiles[CENTER].h;
}
};
CCL_NAMESPACE_END

View File

@ -271,42 +271,45 @@ bool DenoiseTask::acquire_tile(Device *device, Device *tile_device, RenderTile &
*
* However, since there is only one large memory, the denoised result has to be written to
* a different buffer to avoid having to copy an entire horizontal slice of the image. */
void DenoiseTask::map_neighboring_tiles(RenderTile *tiles, Device *tile_device)
void DenoiseTask::map_neighboring_tiles(RenderTileNeighbors &neighbors, Device *tile_device)
{
RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
RenderTile &target_tile = neighbors.target;
/* Fill tile information. */
for (int i = 0; i < 9; i++) {
if (i == 4) {
for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
if (i == RenderTileNeighbors::CENTER) {
continue;
}
RenderTile &tile = neighbors.tiles[i];
int dx = (i % 3) - 1;
int dy = (i / 3) - 1;
tiles[i].x = clamp(tiles[4].x + dx * denoiser->tile_size.x, 0, image.width);
tiles[i].w = clamp(tiles[4].x + (dx + 1) * denoiser->tile_size.x, 0, image.width) - tiles[i].x;
tiles[i].y = clamp(tiles[4].y + dy * denoiser->tile_size.y, 0, image.height);
tiles[i].h = clamp(tiles[4].y + (dy + 1) * denoiser->tile_size.y, 0, image.height) -
tiles[i].y;
tile.x = clamp(center_tile.x + dx * denoiser->tile_size.x, 0, image.width);
tile.w = clamp(center_tile.x + (dx + 1) * denoiser->tile_size.x, 0, image.width) - tile.x;
tile.y = clamp(center_tile.y + dy * denoiser->tile_size.y, 0, image.height);
tile.h = clamp(center_tile.y + (dy + 1) * denoiser->tile_size.y, 0, image.height) - tile.y;
tiles[i].buffer = tiles[4].buffer;
tiles[i].offset = tiles[4].offset;
tiles[i].stride = image.width;
tile.buffer = center_tile.buffer;
tile.offset = center_tile.offset;
tile.stride = image.width;
}
/* Allocate output buffer. */
device_vector<float> *output_mem = new device_vector<float>(
tile_device, "denoising_output", MEM_READ_WRITE);
output_mem->alloc(OUTPUT_NUM_CHANNELS * tiles[4].w * tiles[4].h);
output_mem->alloc(OUTPUT_NUM_CHANNELS * center_tile.w * center_tile.h);
/* Fill output buffer with noisy image, assumed by kernel_filter_finalize
* when skipping denoising of some pixels. */
float *result = output_mem->data();
float *in = &image.pixels[image.num_channels * (tiles[4].y * image.width + tiles[4].x)];
float *in = &image.pixels[image.num_channels * (center_tile.y * image.width + center_tile.x)];
const DenoiseImageLayer &layer = image.layers[current_layer];
const int *input_to_image_channel = layer.input_to_image_channel.data();
for (int y = 0; y < tiles[4].h; y++) {
for (int x = 0; x < tiles[4].w; x++, result += OUTPUT_NUM_CHANNELS) {
for (int y = 0; y < center_tile.h; y++) {
for (int x = 0; x < center_tile.w; x++, result += OUTPUT_NUM_CHANNELS) {
for (int i = 0; i < OUTPUT_NUM_CHANNELS; i++) {
result[i] = in[image.num_channels * x + input_to_image_channel[INPUT_NOISY_IMAGE + i]];
}
@ -317,35 +320,38 @@ void DenoiseTask::map_neighboring_tiles(RenderTile *tiles, Device *tile_device)
output_mem->copy_to_device();
/* Fill output tile info. */
tiles[9] = tiles[4];
tiles[9].buffer = output_mem->device_pointer;
tiles[9].stride = tiles[9].w;
tiles[9].offset -= tiles[9].x + tiles[9].y * tiles[9].stride;
target_tile = center_tile;
target_tile.buffer = output_mem->device_pointer;
target_tile.stride = target_tile.w;
target_tile.offset -= target_tile.x + target_tile.y * target_tile.stride;
thread_scoped_lock output_lock(output_mutex);
assert(output_pixels.count(tiles[4].tile_index) == 0);
output_pixels[tiles[9].tile_index] = output_mem;
assert(output_pixels.count(center_tile.tile_index) == 0);
output_pixels[target_tile.tile_index] = output_mem;
}
void DenoiseTask::unmap_neighboring_tiles(RenderTile *tiles)
void DenoiseTask::unmap_neighboring_tiles(RenderTileNeighbors &neighbors)
{
RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
RenderTile &target_tile = neighbors.target;
thread_scoped_lock output_lock(output_mutex);
assert(output_pixels.count(tiles[4].tile_index) == 1);
device_vector<float> *output_mem = output_pixels[tiles[9].tile_index];
output_pixels.erase(tiles[4].tile_index);
assert(output_pixels.count(center_tile.tile_index) == 1);
device_vector<float> *output_mem = output_pixels[target_tile.tile_index];
output_pixels.erase(center_tile.tile_index);
output_lock.unlock();
/* Copy denoised pixels from device. */
output_mem->copy_from_device(0, OUTPUT_NUM_CHANNELS * tiles[9].w, tiles[9].h);
output_mem->copy_from_device(0, OUTPUT_NUM_CHANNELS * target_tile.w, target_tile.h);
float *result = output_mem->data();
float *out = &image.pixels[image.num_channels * (tiles[9].y * image.width + tiles[9].x)];
float *out = &image.pixels[image.num_channels * (target_tile.y * image.width + target_tile.x)];
const DenoiseImageLayer &layer = image.layers[current_layer];
const int *output_to_image_channel = layer.output_to_image_channel.data();
for (int y = 0; y < tiles[9].h; y++) {
for (int x = 0; x < tiles[9].w; x++, result += OUTPUT_NUM_CHANNELS) {
for (int y = 0; y < target_tile.h; y++) {
for (int x = 0; x < target_tile.w; x++, result += OUTPUT_NUM_CHANNELS) {
for (int i = 0; i < OUTPUT_NUM_CHANNELS; i++) {
out[image.num_channels * x + output_to_image_channel[i]] = result[i];
}

View File

@ -196,8 +196,8 @@ class DenoiseTask {
/* Device task callbacks */
bool acquire_tile(Device *device, Device *tile_device, RenderTile &tile);
void map_neighboring_tiles(RenderTile *tiles, Device *tile_device);
void unmap_neighboring_tiles(RenderTile *tiles);
void map_neighboring_tiles(RenderTileNeighbors &neighbors, Device *tile_device);
void unmap_neighboring_tiles(RenderTileNeighbors &neighbors);
void release_tile();
bool get_cancel();
};

View File

@ -536,7 +536,7 @@ void Session::release_tile(RenderTile &rtile, const bool need_denoise)
denoising_cond.notify_all();
}
void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device)
void Session::map_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device)
{
thread_scoped_lock tile_lock(tile_mutex);
@ -546,75 +546,77 @@ void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device)
tile_manager.state.buffer.full_x + tile_manager.state.buffer.width,
tile_manager.state.buffer.full_y + tile_manager.state.buffer.height);
RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
if (!tile_manager.schedule_denoising) {
/* Fix up tile slices with overlap. */
if (tile_manager.slice_overlap != 0) {
int y = max(tiles[4].y - tile_manager.slice_overlap, image_region.y);
tiles[4].h = min(tiles[4].y + tiles[4].h + tile_manager.slice_overlap, image_region.w) - y;
tiles[4].y = y;
int y = max(center_tile.y - tile_manager.slice_overlap, image_region.y);
center_tile.h = min(center_tile.y + center_tile.h + tile_manager.slice_overlap,
image_region.w) -
y;
center_tile.y = y;
}
/* Tiles are not being denoised individually, which means the entire image is processed. */
tiles[3].x = tiles[4].x;
tiles[1].y = tiles[4].y;
tiles[5].x = tiles[4].x + tiles[4].w;
tiles[7].y = tiles[4].y + tiles[4].h;
neighbors.set_bounds_from_center();
}
else {
int center_idx = tiles[4].tile_index;
int center_idx = center_tile.tile_index;
assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE);
for (int dy = -1, i = 0; dy <= 1; dy++) {
for (int dx = -1; dx <= 1; dx++, i++) {
RenderTile &rtile = neighbors.tiles[i];
int nindex = tile_manager.get_neighbor_index(center_idx, i);
if (nindex >= 0) {
Tile *tile = &tile_manager.state.tiles[nindex];
tiles[i].x = image_region.x + tile->x;
tiles[i].y = image_region.y + tile->y;
tiles[i].w = tile->w;
tiles[i].h = tile->h;
rtile.x = image_region.x + tile->x;
rtile.y = image_region.y + tile->y;
rtile.w = tile->w;
rtile.h = tile->h;
if (buffers) {
tile_manager.state.buffer.get_offset_stride(tiles[i].offset, tiles[i].stride);
tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride);
tiles[i].buffer = buffers->buffer.device_pointer;
tiles[i].buffers = buffers;
rtile.buffer = buffers->buffer.device_pointer;
rtile.buffers = buffers;
}
else {
assert(tile->buffers);
tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride);
tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride);
tiles[i].buffer = tile->buffers->buffer.device_pointer;
tiles[i].buffers = tile->buffers;
rtile.buffer = tile->buffers->buffer.device_pointer;
rtile.buffers = tile->buffers;
}
}
else {
int px = tiles[4].x + dx * params.tile_size.x;
int py = tiles[4].y + dy * params.tile_size.y;
int px = center_tile.x + dx * params.tile_size.x;
int py = center_tile.y + dy * params.tile_size.y;
tiles[i].x = clamp(px, image_region.x, image_region.z);
tiles[i].y = clamp(py, image_region.y, image_region.w);
tiles[i].w = tiles[i].h = 0;
rtile.x = clamp(px, image_region.x, image_region.z);
rtile.y = clamp(py, image_region.y, image_region.w);
rtile.w = rtile.h = 0;
tiles[i].buffer = (device_ptr)NULL;
tiles[i].buffers = NULL;
rtile.buffer = (device_ptr)NULL;
rtile.buffers = NULL;
}
}
}
}
assert(tiles[4].buffers);
device->map_neighbor_tiles(tile_device, tiles);
assert(center_tile.buffers);
device->map_neighbor_tiles(tile_device, neighbors);
/* The denoised result is written back to the original tile. */
tiles[9] = tiles[4];
neighbors.target = center_tile;
}
void Session::unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device)
void Session::unmap_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device)
{
thread_scoped_lock tile_lock(tile_mutex);
device->unmap_neighbor_tiles(tile_device, tiles);
device->unmap_neighbor_tiles(tile_device, neighbors);
}
void Session::run_cpu()

View File

@ -198,8 +198,8 @@ class Session {
void update_tile_sample(RenderTile &tile);
void release_tile(RenderTile &tile, const bool need_denoise);
void map_neighbor_tiles(RenderTile *tiles, Device *tile_device);
void unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device);
void map_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device);
void unmap_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device);
bool device_use_gl;