Cycles: Cache split kernels in CUDA device

This way we don't re-load kernels for every sample in the viewport.
Additionally, we don't risk global size changed inbetween of samples.
This commit is contained in:
Sergey Sharybin 2017-05-02 15:02:49 +02:00
parent fc8f428224
commit 4174e533c0
1 changed files with 10 additions and 3 deletions

View File

@ -119,6 +119,7 @@ public:
int cuDevId;
int cuDevArchitecture;
bool first_error;
CUDASplitKernel *split_kernel;
struct PixelMem {
GLuint cuPBO;
@ -221,6 +222,8 @@ public:
cuDevice = 0;
cuContext = 0;
split_kernel = NULL;
need_bindless_mapping = false;
/* intialize */
@ -260,6 +263,8 @@ public:
{
task_pool.stop();
delete split_kernel;
if(info.has_bindless_textures) {
tex_free(bindless_mapping);
}
@ -1336,12 +1341,14 @@ public:
requested_features.max_closure = 64;
}
CUDASplitKernel split_kernel(this);
split_kernel.load_kernels(requested_features);
if(split_kernel == NULL) {
split_kernel = new CUDASplitKernel(this);
split_kernel->load_kernels(requested_features);
}
while(task->acquire_tile(this, tile)) {
device_memory void_buffer;
split_kernel.path_trace(task, tile, void_buffer, void_buffer);
split_kernel->path_trace(task, tile, void_buffer, void_buffer);
task->release_tile(tile);