Merge branch 'blender-v3.3-release'
This commit is contained in:
commit
e7908c0790
|
@ -1202,11 +1202,11 @@ bool CUDADevice::should_use_graphics_interop()
|
|||
}
|
||||
|
||||
vector<CUdevice> gl_devices(num_all_devices);
|
||||
uint num_gl_devices;
|
||||
uint num_gl_devices = 0;
|
||||
cuGLGetDevices(&num_gl_devices, gl_devices.data(), num_all_devices, CU_GL_DEVICE_LIST_ALL);
|
||||
|
||||
for (CUdevice gl_device : gl_devices) {
|
||||
if (gl_device == cuDevice) {
|
||||
for (uint i = 0; i < num_gl_devices; ++i) {
|
||||
if (gl_devices[i] == cuDevice) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,6 +39,9 @@ CCL_NAMESPACE_BEGIN
|
|||
// The original code is Copyright NVIDIA Corporation, BSD-3-Clause.
|
||||
namespace {
|
||||
|
||||
# if OPTIX_ABI_VERSION >= 60
|
||||
using ::optixUtilDenoiserInvokeTiled;
|
||||
# else
|
||||
static OptixResult optixUtilDenoiserSplitImage(const OptixImage2D &input,
|
||||
const OptixImage2D &output,
|
||||
unsigned int overlapWindowSizeInPixels,
|
||||
|
@ -215,6 +218,7 @@ static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
|
|||
}
|
||||
return OPTIX_SUCCESS;
|
||||
}
|
||||
# endif
|
||||
|
||||
# if OPTIX_ABI_VERSION >= 55
|
||||
static void execute_optix_task(TaskPool &pool, OptixTask task, OptixResult &failure_reason)
|
||||
|
|
|
@ -101,10 +101,17 @@ static Device *find_best_device(Device *device, DenoiserType type)
|
|||
if ((sub_device->info.denoisers & type) == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!best_device) {
|
||||
best_device = sub_device;
|
||||
}
|
||||
else {
|
||||
/* Prefer a device that can use graphics interop for faster display update. */
|
||||
if (sub_device->should_use_graphics_interop() &&
|
||||
!best_device->should_use_graphics_interop()) {
|
||||
best_device = sub_device;
|
||||
}
|
||||
|
||||
/* TODO(sergey): Choose fastest device from available ones. Taking into account performance
|
||||
* of the device and data transfer cost. */
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ PathTrace::PathTrace(Device *device,
|
|||
RenderScheduler &render_scheduler,
|
||||
TileManager &tile_manager)
|
||||
: device_(device),
|
||||
film_(film),
|
||||
device_scene_(device_scene),
|
||||
render_scheduler_(render_scheduler),
|
||||
tile_manager_(tile_manager)
|
||||
|
@ -60,7 +61,17 @@ PathTrace::~PathTrace()
|
|||
void PathTrace::load_kernels()
|
||||
{
|
||||
if (denoiser_) {
|
||||
/* Activate graphics interop while denoiser device is created, so that it can choose a device
|
||||
* that supports interop for faster display updates. */
|
||||
if (display_ && path_trace_works_.size() > 1) {
|
||||
display_->graphics_interop_activate();
|
||||
}
|
||||
|
||||
denoiser_->load_kernels(progress_);
|
||||
|
||||
if (display_ && path_trace_works_.size() > 1) {
|
||||
display_->graphics_interop_deactivate();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -506,28 +517,30 @@ void PathTrace::denoise(const RenderWork &render_work)
|
|||
const double start_time = time_dt();
|
||||
|
||||
RenderBuffers *buffer_to_denoise = nullptr;
|
||||
|
||||
unique_ptr<RenderBuffers> multi_device_buffers;
|
||||
bool allow_inplace_modification = false;
|
||||
|
||||
if (path_trace_works_.size() == 1) {
|
||||
buffer_to_denoise = path_trace_works_.front()->get_render_buffers();
|
||||
Device *denoiser_device = denoiser_->get_denoiser_device();
|
||||
if (path_trace_works_.size() > 1 && denoiser_device && !big_tile_denoise_work_) {
|
||||
big_tile_denoise_work_ = PathTraceWork::create(denoiser_device, film_, device_scene_, nullptr);
|
||||
}
|
||||
else {
|
||||
Device *denoiser_device = denoiser_->get_denoiser_device();
|
||||
if (!denoiser_device) {
|
||||
return;
|
||||
}
|
||||
|
||||
multi_device_buffers = make_unique<RenderBuffers>(denoiser_device);
|
||||
multi_device_buffers->reset(render_state_.effective_big_tile_params);
|
||||
if (big_tile_denoise_work_) {
|
||||
big_tile_denoise_work_->set_effective_buffer_params(render_state_.effective_big_tile_params,
|
||||
render_state_.effective_big_tile_params,
|
||||
render_state_.effective_big_tile_params);
|
||||
|
||||
buffer_to_denoise = multi_device_buffers.get();
|
||||
buffer_to_denoise = big_tile_denoise_work_->get_render_buffers();
|
||||
buffer_to_denoise->reset(render_state_.effective_big_tile_params);
|
||||
|
||||
copy_to_render_buffers(multi_device_buffers.get());
|
||||
copy_to_render_buffers(buffer_to_denoise);
|
||||
|
||||
allow_inplace_modification = true;
|
||||
}
|
||||
else {
|
||||
DCHECK_EQ(path_trace_works_.size(), 1);
|
||||
|
||||
buffer_to_denoise = path_trace_works_.front()->get_render_buffers();
|
||||
}
|
||||
|
||||
if (denoiser_->denoise_buffer(render_state_.effective_big_tile_params,
|
||||
buffer_to_denoise,
|
||||
|
@ -536,14 +549,6 @@ void PathTrace::denoise(const RenderWork &render_work)
|
|||
render_state_.has_denoised_result = true;
|
||||
}
|
||||
|
||||
if (multi_device_buffers) {
|
||||
multi_device_buffers->copy_from_device();
|
||||
parallel_for_each(
|
||||
path_trace_works_, [&multi_device_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
|
||||
path_trace_work->copy_from_denoised_render_buffers(multi_device_buffers.get());
|
||||
});
|
||||
}
|
||||
|
||||
render_scheduler_.report_denoise_time(render_work, time_dt() - start_time);
|
||||
}
|
||||
|
||||
|
@ -635,8 +640,13 @@ void PathTrace::update_display(const RenderWork &render_work)
|
|||
/* TODO(sergey): When using multi-device rendering map the GPUDisplay once and copy data from
|
||||
* all works in parallel. */
|
||||
const int num_samples = get_num_samples_in_buffer();
|
||||
for (auto &&path_trace_work : path_trace_works_) {
|
||||
path_trace_work->copy_to_display(display_.get(), pass_mode, num_samples);
|
||||
if (big_tile_denoise_work_ && render_state_.has_denoised_result) {
|
||||
big_tile_denoise_work_->copy_to_display(display_.get(), pass_mode, num_samples);
|
||||
}
|
||||
else {
|
||||
for (auto &&path_trace_work : path_trace_works_) {
|
||||
path_trace_work->copy_to_display(display_.get(), pass_mode, num_samples);
|
||||
}
|
||||
}
|
||||
|
||||
display_->update_end();
|
||||
|
@ -721,11 +731,10 @@ void PathTrace::write_tile_buffer(const RenderWork &render_work)
|
|||
VLOG_WORK << "Write tile result via buffer write callback.";
|
||||
tile_buffer_write();
|
||||
}
|
||||
|
||||
/* Write tile to disk, so that the render work's render buffer can be re-used for the next tile.
|
||||
*/
|
||||
if (has_multiple_tiles) {
|
||||
VLOG_WORK << "Write tile result into .";
|
||||
else {
|
||||
VLOG_WORK << "Write tile result to disk.";
|
||||
tile_buffer_write_to_disk();
|
||||
}
|
||||
}
|
||||
|
@ -901,6 +910,10 @@ bool PathTrace::copy_render_tile_from_device()
|
|||
return true;
|
||||
}
|
||||
|
||||
if (big_tile_denoise_work_ && render_state_.has_denoised_result) {
|
||||
return big_tile_denoise_work_->copy_render_buffers_from_device();
|
||||
}
|
||||
|
||||
bool success = true;
|
||||
|
||||
parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
|
||||
|
@ -1002,6 +1015,10 @@ bool PathTrace::get_render_tile_pixels(const PassAccessor &pass_accessor,
|
|||
return pass_accessor.get_render_tile_pixels(full_frame_state_.render_buffers, destination);
|
||||
}
|
||||
|
||||
if (big_tile_denoise_work_ && render_state_.has_denoised_result) {
|
||||
return big_tile_denoise_work_->get_render_tile_pixels(pass_accessor, destination);
|
||||
}
|
||||
|
||||
bool success = true;
|
||||
|
||||
parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
|
||||
|
@ -1082,6 +1099,10 @@ void PathTrace::destroy_gpu_resources()
|
|||
for (auto &&path_trace_work : path_trace_works_) {
|
||||
path_trace_work->destroy_gpu_resources(display_.get());
|
||||
}
|
||||
|
||||
if (big_tile_denoise_work_) {
|
||||
big_tile_denoise_work_->destroy_gpu_resources(display_.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -236,6 +236,7 @@ class PathTrace {
|
|||
/* CPU device for creating temporary render buffers on the CPU side. */
|
||||
unique_ptr<Device> cpu_device_;
|
||||
|
||||
Film *film_;
|
||||
DeviceScene *device_scene_;
|
||||
|
||||
RenderScheduler &render_scheduler_;
|
||||
|
@ -261,6 +262,9 @@ class PathTrace {
|
|||
/* Denoiser which takes care of denoising the big tile. */
|
||||
unique_ptr<Denoiser> denoiser_;
|
||||
|
||||
/* Denoiser device descriptor which holds the denoised big tile for multi-device workloads. */
|
||||
unique_ptr<PathTraceWork> denoiser_buffer_;
|
||||
|
||||
/* State which is common for all the steps of the render work.
|
||||
* Is brought up to date in the `render()` call and is accessed from all the steps involved into
|
||||
* rendering the work. */
|
||||
|
|
|
@ -33,7 +33,7 @@ bool PathTraceTile::get_pass_pixels(const string_view pass_name,
|
|||
if (!copied_from_device_) {
|
||||
/* Copy from device on demand. */
|
||||
path_trace_.copy_render_tile_from_device();
|
||||
const_cast<PathTraceTile *>(this)->copied_from_device_ = true;
|
||||
copied_from_device_ = true;
|
||||
}
|
||||
|
||||
const BufferParams &buffer_params = path_trace_.get_render_tile_params();
|
||||
|
|
|
@ -24,7 +24,7 @@ class PathTraceTile : public OutputDriver::Tile {
|
|||
|
||||
private:
|
||||
PathTrace &path_trace_;
|
||||
bool copied_from_device_;
|
||||
mutable bool copied_from_device_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -370,6 +370,14 @@ RenderWork Session::run_update_for_next_iteration()
|
|||
if (update_scene(width, height)) {
|
||||
profiler.reset(scene->shaders.size(), scene->objects.size());
|
||||
}
|
||||
|
||||
/* Unlock scene mutex before loading denoiser kernels, since that may attempt to activate
|
||||
* graphics interop, which can deadlock when the scene mutex is still being held. */
|
||||
scene_lock.unlock();
|
||||
|
||||
path_trace_->load_kernels();
|
||||
path_trace_->alloc_work_memory();
|
||||
|
||||
progress.add_skip_time(update_timer, params.background);
|
||||
}
|
||||
|
||||
|
@ -621,12 +629,7 @@ bool Session::update_scene(int width, int height)
|
|||
Camera *cam = scene->camera;
|
||||
cam->set_screen_size(width, height);
|
||||
|
||||
const bool scene_update_result = scene->update(progress);
|
||||
|
||||
path_trace_->load_kernels();
|
||||
path_trace_->alloc_work_memory();
|
||||
|
||||
return scene_update_result;
|
||||
return scene->update(progress);
|
||||
}
|
||||
|
||||
static string status_append(const string &status, const string &suffix)
|
||||
|
|
Loading…
Reference in New Issue