Cleanup: remove unused device code and includes
This commit is contained in:
parent
9cf593f305
commit
ab8f24811d
|
@ -54,7 +54,6 @@
|
|||
#include "util/util_function.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_map.h"
|
||||
#include "util/util_opengl.h"
|
||||
#include "util/util_openimagedenoise.h"
|
||||
#include "util/util_optimization.h"
|
||||
#include "util/util_progress.h"
|
||||
|
@ -298,154 +297,6 @@ void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||
Device::build_bvh(bvh, progress, refit);
|
||||
}
|
||||
|
||||
#if 0
|
||||
void CPUDevice::render(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
|
||||
{
|
||||
const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;
|
||||
|
||||
scoped_timer timer(&tile.buffers->render_time);
|
||||
|
||||
Coverage coverage(kg, tile);
|
||||
if (use_coverage) {
|
||||
coverage.init_path_trace();
|
||||
}
|
||||
|
||||
float *render_buffer = (float *)tile.buffer;
|
||||
int start_sample = tile.start_sample;
|
||||
int end_sample = tile.start_sample + tile.num_samples;
|
||||
|
||||
/* Needed for Embree. */
|
||||
SIMD_SET_FLUSH_TO_ZERO;
|
||||
|
||||
for (int sample = start_sample; sample < end_sample; sample++) {
|
||||
if (task.get_cancel() || TaskPool::canceled()) {
|
||||
if (task.need_finish_queue == false)
|
||||
break;
|
||||
}
|
||||
|
||||
if (tile.stealing_state == RenderTile::CAN_BE_STOLEN && task.get_tile_stolen()) {
|
||||
tile.stealing_state = RenderTile::WAS_STOLEN;
|
||||
break;
|
||||
}
|
||||
|
||||
if (tile.task == RenderTile::PATH_TRACE) {
|
||||
for (int y = tile.y; y < tile.y + tile.h; y++) {
|
||||
for (int x = tile.x; x < tile.x + tile.w; x++) {
|
||||
if (use_coverage) {
|
||||
coverage.init_pixel(x, y);
|
||||
}
|
||||
kernels.path_trace(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int y = tile.y; y < tile.y + tile.h; y++) {
|
||||
for (int x = tile.x; x < tile.x + tile.w; x++) {
|
||||
kernels.bake(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
tile.sample = sample + 1;
|
||||
|
||||
if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(sample)) {
|
||||
const bool stop = adaptive_sampling_filter(kg, tile, sample);
|
||||
if (stop) {
|
||||
const int num_progress_samples = end_sample - sample;
|
||||
tile.sample = end_sample;
|
||||
task.update_progress(&tile, tile.w * tile.h * num_progress_samples);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
task.update_progress(&tile, tile.w * tile.h);
|
||||
}
|
||||
if (use_coverage) {
|
||||
coverage.finalize();
|
||||
}
|
||||
|
||||
if (task.adaptive_sampling.use && (tile.stealing_state != RenderTile::WAS_STOLEN)) {
|
||||
adaptive_sampling_post(tile, kg);
|
||||
}
|
||||
}
|
||||
|
||||
void CPUDevice::thread_render(DeviceTask &task)
|
||||
{
|
||||
if (TaskPool::canceled()) {
|
||||
if (task.need_finish_queue == false)
|
||||
return;
|
||||
}
|
||||
|
||||
/* allocate buffer for kernel globals */
|
||||
CPUKernelThreadGlobals kg(kernel_globals, get_cpu_osl_memory());
|
||||
|
||||
profiler.add_state(&kg.profiler);
|
||||
|
||||
/* NLM denoiser. */
|
||||
DenoisingTask *denoising = NULL;
|
||||
|
||||
/* OpenImageDenoise: we can only denoise with one thread at a time, so to
|
||||
* avoid waiting with mutex locks in the denoiser, we let only a single
|
||||
* thread acquire denoising tiles. */
|
||||
uint tile_types = task.tile_types;
|
||||
bool hold_denoise_lock = false;
|
||||
if ((tile_types & RenderTile::DENOISE) && task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
|
||||
if (!oidn_task_lock.try_lock()) {
|
||||
tile_types &= ~RenderTile::DENOISE;
|
||||
hold_denoise_lock = true;
|
||||
}
|
||||
}
|
||||
|
||||
RenderTile tile;
|
||||
while (task.acquire_tile(this, tile, tile_types)) {
|
||||
if (tile.task == RenderTile::PATH_TRACE) {
|
||||
render(task, tile, &kg);
|
||||
}
|
||||
else if (tile.task == RenderTile::BAKE) {
|
||||
render(task, tile, &kg);
|
||||
}
|
||||
else if (tile.task == RenderTile::DENOISE) {
|
||||
denoise_openimagedenoise(task, tile);
|
||||
task.update_progress(&tile, tile.w * tile.h);
|
||||
}
|
||||
|
||||
task.release_tile(tile);
|
||||
|
||||
if (TaskPool::canceled()) {
|
||||
if (task.need_finish_queue == false)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (hold_denoise_lock) {
|
||||
oidn_task_lock.unlock();
|
||||
}
|
||||
|
||||
profiler.remove_state(&kg.profiler);
|
||||
|
||||
delete denoising;
|
||||
}
|
||||
|
||||
void CPUDevice::thread_denoise(DeviceTask &task)
|
||||
{
|
||||
RenderTile tile;
|
||||
tile.x = task.x;
|
||||
tile.y = task.y;
|
||||
tile.w = task.w;
|
||||
tile.h = task.h;
|
||||
tile.buffer = task.buffer;
|
||||
tile.sample = task.sample + task.num_samples;
|
||||
tile.num_samples = task.num_samples;
|
||||
tile.start_sample = task.sample;
|
||||
tile.offset = task.offset;
|
||||
tile.stride = task.stride;
|
||||
tile.buffers = task.buffers;
|
||||
|
||||
denoise_openimagedenoise(task, tile);
|
||||
|
||||
task.update_progress(&tile, tile.w * tile.h);
|
||||
}
|
||||
#endif
|
||||
|
||||
const CPUKernels *CPUDevice::get_cpu_kernels() const
|
||||
{
|
||||
return &kernels;
|
||||
|
|
|
@ -31,7 +31,6 @@
|
|||
# include "util/util_logging.h"
|
||||
# include "util/util_map.h"
|
||||
# include "util/util_md5.h"
|
||||
# include "util/util_opengl.h"
|
||||
# include "util/util_path.h"
|
||||
# include "util/util_string.h"
|
||||
# include "util/util_system.h"
|
||||
|
@ -1169,141 +1168,6 @@ void CUDADevice::tex_free(device_texture &mem)
|
|||
}
|
||||
}
|
||||
|
||||
# if 0
|
||||
void CUDADevice::render(DeviceTask &task,
|
||||
RenderTile &rtile,
|
||||
device_vector<KernelWorkTile> &work_tiles)
|
||||
{
|
||||
scoped_timer timer(&rtile.buffers->render_time);
|
||||
|
||||
if (have_error())
|
||||
return;
|
||||
|
||||
CUDAContextScope scope(this);
|
||||
CUfunction cuRender;
|
||||
|
||||
/* Get kernel function. */
|
||||
if (rtile.task == RenderTile::BAKE) {
|
||||
cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake"));
|
||||
}
|
||||
else {
|
||||
cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace"));
|
||||
}
|
||||
|
||||
if (have_error()) {
|
||||
return;
|
||||
}
|
||||
|
||||
cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1));
|
||||
|
||||
/* Allocate work tile. */
|
||||
work_tiles.alloc(1);
|
||||
|
||||
KernelWorkTile *wtile = work_tiles.data();
|
||||
wtile->x = rtile.x;
|
||||
wtile->y = rtile.y;
|
||||
wtile->w = rtile.w;
|
||||
wtile->h = rtile.h;
|
||||
wtile->offset = rtile.offset;
|
||||
wtile->stride = rtile.stride;
|
||||
wtile->buffer = (float *)(CUdeviceptr)rtile.buffer;
|
||||
|
||||
/* Prepare work size. More step samples render faster, but for now we
|
||||
* remain conservative for GPUs connected to a display to avoid driver
|
||||
* timeouts and display freezing. */
|
||||
int min_blocks, num_threads_per_block;
|
||||
cuda_assert(
|
||||
cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0));
|
||||
if (!info.display_device) {
|
||||
min_blocks *= 8;
|
||||
}
|
||||
|
||||
uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h);
|
||||
|
||||
/* Render all samples. */
|
||||
uint start_sample = rtile.start_sample;
|
||||
uint end_sample = rtile.start_sample + rtile.num_samples;
|
||||
|
||||
for (int sample = start_sample; sample < end_sample;) {
|
||||
/* Setup and copy work tile to device. */
|
||||
wtile->start_sample = sample;
|
||||
wtile->num_samples = step_samples;
|
||||
if (task.adaptive_sampling.use) {
|
||||
wtile->num_samples = task.adaptive_sampling.align_samples(sample, step_samples);
|
||||
}
|
||||
wtile->num_samples = min(wtile->num_samples, end_sample - sample);
|
||||
work_tiles.copy_to_device();
|
||||
|
||||
CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer;
|
||||
uint total_work_size = wtile->w * wtile->h * wtile->num_samples;
|
||||
uint num_blocks = divide_up(total_work_size, num_threads_per_block);
|
||||
|
||||
/* Launch kernel. */
|
||||
void *args[] = {&d_work_tiles, &total_work_size};
|
||||
|
||||
cuda_assert(
|
||||
cuLaunchKernel(cuRender, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
|
||||
|
||||
/* Run the adaptive sampling kernels at selected samples aligned to step samples. */
|
||||
uint filter_sample = sample + wtile->num_samples - 1;
|
||||
if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(filter_sample)) {
|
||||
adaptive_sampling_filter(filter_sample, wtile, d_work_tiles);
|
||||
}
|
||||
|
||||
cuda_assert(cuCtxSynchronize());
|
||||
|
||||
/* Update progress. */
|
||||
sample += wtile->num_samples;
|
||||
rtile.sample = sample;
|
||||
task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
|
||||
|
||||
if (task.get_cancel()) {
|
||||
if (task.need_finish_queue == false)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Finalize adaptive sampling. */
|
||||
if (task.adaptive_sampling.use) {
|
||||
CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer;
|
||||
adaptive_sampling_post(rtile, wtile, d_work_tiles);
|
||||
cuda_assert(cuCtxSynchronize());
|
||||
task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
|
||||
}
|
||||
}
|
||||
|
||||
void CUDADevice::thread_run(DeviceTask &task)
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
if (task.type == DeviceTask::RENDER) {
|
||||
device_vector<KernelWorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY);
|
||||
|
||||
/* keep rendering tiles until done */
|
||||
RenderTile tile;
|
||||
DenoisingTask denoising(this, task);
|
||||
|
||||
while (task.acquire_tile(this, tile, task.tile_types)) {
|
||||
if (tile.task == RenderTile::PATH_TRACE) {
|
||||
render(task, tile, work_tiles);
|
||||
}
|
||||
else if (tile.task == RenderTile::BAKE) {
|
||||
render(task, tile, work_tiles);
|
||||
}
|
||||
|
||||
task.release_tile(tile);
|
||||
|
||||
if (task.get_cancel()) {
|
||||
if (task.need_finish_queue == false)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
work_tiles.free();
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
unique_ptr<DeviceQueue> CUDADevice::gpu_queue_create()
|
||||
{
|
||||
return make_unique<CUDADeviceQueue>(this);
|
||||
|
|
|
@ -26,7 +26,6 @@
|
|||
# ifdef WITH_CUDA_DYNLOAD
|
||||
# include "cuew.h"
|
||||
# else
|
||||
# include "util/util_opengl.h"
|
||||
# include <cuda.h>
|
||||
# include <cudaGL.h>
|
||||
# endif
|
||||
|
|
|
@ -32,7 +32,6 @@
|
|||
#include "util/util_half.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_math.h"
|
||||
#include "util/util_opengl.h"
|
||||
#include "util/util_string.h"
|
||||
#include "util/util_system.h"
|
||||
#include "util/util_time.h"
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
#include "util/util_foreach.h"
|
||||
#include "util/util_hash.h"
|
||||
#include "util/util_math.h"
|
||||
#include "util/util_opengl.h"
|
||||
#include "util/util_time.h"
|
||||
#include "util/util_types.h"
|
||||
|
||||
|
|
|
@ -38,7 +38,6 @@
|
|||
#include "util/util_function.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_math.h"
|
||||
#include "util/util_opengl.h"
|
||||
#include "util/util_task.h"
|
||||
#include "util/util_time.h"
|
||||
|
||||
|
|
Loading…
Reference in New Issue