Merge branch 'blender-v3.3-release'

This commit is contained in:
Patrick Mours 2022-08-12 16:04:06 +02:00
commit e7908c0790
8 changed files with 76 additions and 37 deletions

View File

@ -1202,11 +1202,11 @@ bool CUDADevice::should_use_graphics_interop()
}
vector<CUdevice> gl_devices(num_all_devices);
uint num_gl_devices;
uint num_gl_devices = 0;
cuGLGetDevices(&num_gl_devices, gl_devices.data(), num_all_devices, CU_GL_DEVICE_LIST_ALL);
for (CUdevice gl_device : gl_devices) {
if (gl_device == cuDevice) {
for (uint i = 0; i < num_gl_devices; ++i) {
if (gl_devices[i] == cuDevice) {
return true;
}
}

View File

@ -39,6 +39,9 @@ CCL_NAMESPACE_BEGIN
// The original code is Copyright NVIDIA Corporation, BSD-3-Clause.
namespace {
# if OPTIX_ABI_VERSION >= 60
using ::optixUtilDenoiserInvokeTiled;
# else
static OptixResult optixUtilDenoiserSplitImage(const OptixImage2D &input,
const OptixImage2D &output,
unsigned int overlapWindowSizeInPixels,
@ -215,6 +218,7 @@ static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
}
return OPTIX_SUCCESS;
}
# endif
# if OPTIX_ABI_VERSION >= 55
static void execute_optix_task(TaskPool &pool, OptixTask task, OptixResult &failure_reason)

View File

@ -101,10 +101,17 @@ static Device *find_best_device(Device *device, DenoiserType type)
if ((sub_device->info.denoisers & type) == 0) {
return;
}
if (!best_device) {
best_device = sub_device;
}
else {
/* Prefer a device that can use graphics interop for faster display update. */
if (sub_device->should_use_graphics_interop() &&
!best_device->should_use_graphics_interop()) {
best_device = sub_device;
}
/* TODO(sergey): Choose fastest device from available ones. Taking into account performance
* of the device and data transfer cost. */
}

View File

@ -26,6 +26,7 @@ PathTrace::PathTrace(Device *device,
RenderScheduler &render_scheduler,
TileManager &tile_manager)
: device_(device),
film_(film),
device_scene_(device_scene),
render_scheduler_(render_scheduler),
tile_manager_(tile_manager)
@ -60,7 +61,17 @@ PathTrace::~PathTrace()
void PathTrace::load_kernels()
{
if (denoiser_) {
/* Activate graphics interop while denoiser device is created, so that it can choose a device
* that supports interop for faster display updates. */
if (display_ && path_trace_works_.size() > 1) {
display_->graphics_interop_activate();
}
denoiser_->load_kernels(progress_);
if (display_ && path_trace_works_.size() > 1) {
display_->graphics_interop_deactivate();
}
}
}
@ -506,28 +517,30 @@ void PathTrace::denoise(const RenderWork &render_work)
const double start_time = time_dt();
RenderBuffers *buffer_to_denoise = nullptr;
unique_ptr<RenderBuffers> multi_device_buffers;
bool allow_inplace_modification = false;
if (path_trace_works_.size() == 1) {
buffer_to_denoise = path_trace_works_.front()->get_render_buffers();
Device *denoiser_device = denoiser_->get_denoiser_device();
if (path_trace_works_.size() > 1 && denoiser_device && !big_tile_denoise_work_) {
big_tile_denoise_work_ = PathTraceWork::create(denoiser_device, film_, device_scene_, nullptr);
}
else {
Device *denoiser_device = denoiser_->get_denoiser_device();
if (!denoiser_device) {
return;
}
multi_device_buffers = make_unique<RenderBuffers>(denoiser_device);
multi_device_buffers->reset(render_state_.effective_big_tile_params);
if (big_tile_denoise_work_) {
big_tile_denoise_work_->set_effective_buffer_params(render_state_.effective_big_tile_params,
render_state_.effective_big_tile_params,
render_state_.effective_big_tile_params);
buffer_to_denoise = multi_device_buffers.get();
buffer_to_denoise = big_tile_denoise_work_->get_render_buffers();
buffer_to_denoise->reset(render_state_.effective_big_tile_params);
copy_to_render_buffers(multi_device_buffers.get());
copy_to_render_buffers(buffer_to_denoise);
allow_inplace_modification = true;
}
else {
DCHECK_EQ(path_trace_works_.size(), 1);
buffer_to_denoise = path_trace_works_.front()->get_render_buffers();
}
if (denoiser_->denoise_buffer(render_state_.effective_big_tile_params,
buffer_to_denoise,
@ -536,14 +549,6 @@ void PathTrace::denoise(const RenderWork &render_work)
render_state_.has_denoised_result = true;
}
if (multi_device_buffers) {
multi_device_buffers->copy_from_device();
parallel_for_each(
path_trace_works_, [&multi_device_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
path_trace_work->copy_from_denoised_render_buffers(multi_device_buffers.get());
});
}
render_scheduler_.report_denoise_time(render_work, time_dt() - start_time);
}
@ -635,8 +640,13 @@ void PathTrace::update_display(const RenderWork &render_work)
/* TODO(sergey): When using multi-device rendering map the GPUDisplay once and copy data from
* all works in parallel. */
const int num_samples = get_num_samples_in_buffer();
for (auto &&path_trace_work : path_trace_works_) {
path_trace_work->copy_to_display(display_.get(), pass_mode, num_samples);
if (big_tile_denoise_work_ && render_state_.has_denoised_result) {
big_tile_denoise_work_->copy_to_display(display_.get(), pass_mode, num_samples);
}
else {
for (auto &&path_trace_work : path_trace_works_) {
path_trace_work->copy_to_display(display_.get(), pass_mode, num_samples);
}
}
display_->update_end();
@ -721,11 +731,10 @@ void PathTrace::write_tile_buffer(const RenderWork &render_work)
VLOG_WORK << "Write tile result via buffer write callback.";
tile_buffer_write();
}
/* Write tile to disk, so that the render work's render buffer can be re-used for the next tile.
*/
if (has_multiple_tiles) {
VLOG_WORK << "Write tile result into .";
else {
VLOG_WORK << "Write tile result to disk.";
tile_buffer_write_to_disk();
}
}
@ -901,6 +910,10 @@ bool PathTrace::copy_render_tile_from_device()
return true;
}
if (big_tile_denoise_work_ && render_state_.has_denoised_result) {
return big_tile_denoise_work_->copy_render_buffers_from_device();
}
bool success = true;
parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
@ -1002,6 +1015,10 @@ bool PathTrace::get_render_tile_pixels(const PassAccessor &pass_accessor,
return pass_accessor.get_render_tile_pixels(full_frame_state_.render_buffers, destination);
}
if (big_tile_denoise_work_ && render_state_.has_denoised_result) {
return big_tile_denoise_work_->get_render_tile_pixels(pass_accessor, destination);
}
bool success = true;
parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
@ -1082,6 +1099,10 @@ void PathTrace::destroy_gpu_resources()
for (auto &&path_trace_work : path_trace_works_) {
path_trace_work->destroy_gpu_resources(display_.get());
}
if (big_tile_denoise_work_) {
big_tile_denoise_work_->destroy_gpu_resources(display_.get());
}
}
}

View File

@ -236,6 +236,7 @@ class PathTrace {
/* CPU device for creating temporary render buffers on the CPU side. */
unique_ptr<Device> cpu_device_;
Film *film_;
DeviceScene *device_scene_;
RenderScheduler &render_scheduler_;
@ -261,6 +262,9 @@ class PathTrace {
/* Denoiser which takes care of denoising the big tile. */
unique_ptr<Denoiser> denoiser_;
/* Denoiser device descriptor which holds the denoised big tile for multi-device workloads. */
unique_ptr<PathTraceWork> denoiser_buffer_;
/* State which is common for all the steps of the render work.
* Is brought up to date in the `render()` call and is accessed from all the steps involved into
* rendering the work. */

View File

@ -33,7 +33,7 @@ bool PathTraceTile::get_pass_pixels(const string_view pass_name,
if (!copied_from_device_) {
/* Copy from device on demand. */
path_trace_.copy_render_tile_from_device();
const_cast<PathTraceTile *>(this)->copied_from_device_ = true;
copied_from_device_ = true;
}
const BufferParams &buffer_params = path_trace_.get_render_tile_params();

View File

@ -24,7 +24,7 @@ class PathTraceTile : public OutputDriver::Tile {
private:
PathTrace &path_trace_;
bool copied_from_device_;
mutable bool copied_from_device_;
};
CCL_NAMESPACE_END

View File

@ -370,6 +370,14 @@ RenderWork Session::run_update_for_next_iteration()
if (update_scene(width, height)) {
profiler.reset(scene->shaders.size(), scene->objects.size());
}
/* Unlock scene mutex before loading denoiser kernels, since that may attempt to activate
* graphics interop, which can deadlock when the scene mutex is still being held. */
scene_lock.unlock();
path_trace_->load_kernels();
path_trace_->alloc_work_memory();
progress.add_skip_time(update_timer, params.background);
}
@ -621,12 +629,7 @@ bool Session::update_scene(int width, int height)
Camera *cam = scene->camera;
cam->set_screen_size(width, height);
const bool scene_update_result = scene->update(progress);
path_trace_->load_kernels();
path_trace_->alloc_work_memory();
return scene_update_result;
return scene->update(progress);
}
static string status_append(const string &status, const string &suffix)