T61463: Separate Baking kernels

Cycles OpenCL: Split baking kernels in own program

Fix T61463. Before this patch baking was part of the base kernels. There
are 3 baking kernels that and all 3 uses shader evaluation. Only for one
of these kernels the functionality was wrapped in the __NO_BAKING__
compile directive.

When you start baking this leads to long compile times. By separating
in individual programs will reduce the compile times.

Also wrapped all baking kernels with __NO_BAKING__ to reduce the
compilation times.

Impact on compilation time

    job   |   scene_name    | previous |  new  | percentage
  --------+-----------------+----------+-------+------------
   T61463 | empty           |    10.63 |  7.27 |         32%
   T61463 | bmw             |    17.91 | 14.24 |         20%
   T61463 | fishycat        |    19.57 | 15.08 |         23%
   T61463 | barbershop      |    54.10 | 48.18 |         11%
   T61463 | classroom       |    17.55 | 14.42 |         18%
   T61463 | koro            |    18.92 | 17.15 |          9%
   T61463 | pavillion       |    17.43 | 14.23 |         18%
   T61463 | splash279       |    16.48 | 15.33 |          7%
   T61463 | volume_emission |    36.22 | 34.19 |          6%

Impact on render time

    job   |   scene_name    | previous |   new   | percentage
  --------+-----------------+----------+---------+------------
   T61463 | empty           |    21.06 |   20.54 |          2%
   T61463 | bmw             |   198.44 |  189.59 |          4%
   T61463 | fishycat        |   394.20 |  388.50 |          1%
   T61463 | barbershop      |  1188.16 | 1185.49 |          0%
   T61463 | classroom       |   341.08 |  339.27 |          1%
   T61463 | koro            |   472.43 |  360.70 |         24%
   T61463 | pavillion       |   905.77 |  902.14 |          0%
   T61463 | splash279       |    55.26 |   54.92 |          1%
   T61463 | volume_emission |    62.59 |   39.09 |         38%

I don't have a grounded explanation why koro and volume_emission is this much
faster; I have done several tests though...

Maniphest Tasks: T61463

Differential Revision: https://developer.blender.org/D4376
This commit is contained in:
Jeroen Bakker 2019-02-19 16:31:31 +01:00
parent d6d306441f
commit 15edda3a8e
Notes: blender-bot 2023-02-14 07:31:34 +01:00
Referenced by commit 4ec6b16b4e, cycles/opencl: Fix compile error.
Referenced by issue #61463, Cycles OpenCL: Split bake functions in their own program.
8 changed files with 149 additions and 85 deletions

View File

@ -325,7 +325,11 @@ public:
map<ustring, cl_kernel> kernels;
};
OpenCLProgram base_program, denoising_program;
OpenCLProgram base_program;
OpenCLProgram bake_program;
OpenCLProgram displace_program;
OpenCLProgram background_program;
OpenCLProgram denoising_program;
typedef map<string, device_vector<uchar>*> ConstMemMap;
typedef map<string, device_ptr> MemMap;
@ -571,7 +575,7 @@ protected:
ustring key,
thread_scoped_lock& cache_locker);
virtual string build_options_for_base_program(
virtual string build_options_for_bake_program(
const DeviceRequestedFeatures& /*requested_features*/);
private:

View File

@ -162,6 +162,9 @@ OpenCLDeviceBase::~OpenCLDeviceBase()
}
base_program.release();
bake_program.release();
displace_program.release();
background_program.release();
if(cqCommandQueue)
clReleaseCommandQueue(cqCommandQueue);
if(cxContext)
@ -225,14 +228,20 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
if(!opencl_version_check())
return false;
base_program = OpenCLProgram(this, "base", "kernel.cl", build_options_for_base_program(requested_features));
base_program = OpenCLProgram(this, "base", "kernel.cl", "");
base_program.add_kernel(ustring("convert_to_byte"));
base_program.add_kernel(ustring("convert_to_half_float"));
base_program.add_kernel(ustring("displace"));
base_program.add_kernel(ustring("background"));
base_program.add_kernel(ustring("bake"));
base_program.add_kernel(ustring("zero_buffer"));
bake_program = OpenCLProgram(this, "bake", "kernel_bake.cl", build_options_for_bake_program(requested_features));
bake_program.add_kernel(ustring("bake"));
displace_program = OpenCLProgram(this, "displace", "kernel_displace.cl", build_options_for_bake_program(requested_features));
displace_program.add_kernel(ustring("displace"));
background_program = OpenCLProgram(this, "background", "kernel_background.cl", build_options_for_bake_program(requested_features));
background_program.add_kernel(ustring("background"));
denoising_program = OpenCLProgram(this, "denoising", "filter.cl", "");
denoising_program.add_kernel(ustring("filter_divide_shadow"));
denoising_program.add_kernel(ustring("filter_get_feature"));
@ -248,12 +257,15 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
denoising_program.add_kernel(ustring("filter_finalize"));
vector<OpenCLProgram*> programs;
programs.push_back(&base_program);
programs.push_back(&denoising_program);
programs.push_back(&bake_program);
programs.push_back(&displace_program);
programs.push_back(&background_program);
/* Call actual class to fill the vector with its programs. */
if(!add_kernel_programs(requested_features, programs)) {
return false;
}
programs.push_back(&base_program);
programs.push_back(&denoising_program);
/* Parallel compilation of Cycles kernels, this launches multiple
* processes to workaround OpenCL frameworks serializing the calls
@ -1152,13 +1164,13 @@ void OpenCLDeviceBase::shader(DeviceTask& task)
cl_kernel kernel;
if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
kernel = base_program(ustring("bake"));
kernel = bake_program(ustring("bake"));
}
else if(task.shader_eval_type == SHADER_EVAL_DISPLACE) {
kernel = base_program(ustring("displace"));
kernel = displace_program(ustring("displace"));
}
else {
kernel = base_program(ustring("background"));
kernel = background_program(ustring("background"));
}
cl_uint start_arg_index =
@ -1385,7 +1397,7 @@ void OpenCLDeviceBase::store_cached_kernel(
cache_locker);
}
string OpenCLDeviceBase::build_options_for_base_program(
string OpenCLDeviceBase::build_options_for_bake_program(
const DeviceRequestedFeatures& requested_features)
{
/* TODO(sergey): By default we compile all features, meaning

View File

@ -327,7 +327,7 @@ public:
protected:
/* ** Those guys are for workign around some compiler-specific bugs ** */
string build_options_for_base_program(
string build_options_for_bake_program(
const DeviceRequestedFeatures& requested_features)
{
return requested_features.get_build_options();

View File

@ -37,6 +37,9 @@ set(SRC_CUDA_KERNELS
set(SRC_OPENCL_KERNELS
kernels/opencl/kernel.cl
kernels/opencl/kernel_bake.cl
kernels/opencl/kernel_displace.cl
kernels/opencl/kernel_background.cl
kernels/opencl/kernel_state_buffer_size.cl
kernels/opencl/kernel_split.cl
kernels/opencl/kernel_split_bundle.cl

View File

@ -81,78 +81,6 @@ __kernel void kernel_ocl_path_trace(
#else /* __COMPILE_ONLY_MEGAKERNEL__ */
__kernel void kernel_ocl_displace(
ccl_constant KernelData *data,
ccl_global uint4 *input,
ccl_global float4 *output,
KERNEL_BUFFER_PARAMS,
int type, int sx, int sw, int offset, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
kg->data = data;
kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
kernel_set_buffer_info(kg);
int x = sx + ccl_global_id(0);
if(x < sx + sw) {
kernel_displace_evaluate(kg, input, output, x);
}
}
__kernel void kernel_ocl_background(
ccl_constant KernelData *data,
ccl_global uint4 *input,
ccl_global float4 *output,
KERNEL_BUFFER_PARAMS,
int type, int sx, int sw, int offset, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
kg->data = data;
kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
kernel_set_buffer_info(kg);
int x = sx + ccl_global_id(0);
if(x < sx + sw) {
kernel_background_evaluate(kg, input, output, x);
}
}
__kernel void kernel_ocl_bake(
ccl_constant KernelData *data,
ccl_global uint4 *input,
ccl_global float4 *output,
KERNEL_BUFFER_PARAMS,
int type, int filter, int sx, int sw, int offset, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
kg->data = data;
kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
kernel_set_buffer_info(kg);
int x = sx + ccl_global_id(0);
if(x < sx + sw) {
#ifdef __NO_BAKING__
output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
#else
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, x, offset, sample);
#endif
}
}
__kernel void kernel_ocl_convert_to_byte(
ccl_constant KernelData *data,
ccl_global uchar4 *rgba,

View File

@ -0,0 +1,39 @@
#include "kernel/kernel_compat_opencl.h"
#include "kernel/kernel_math.h"
#include "kernel/kernel_types.h"
#include "kernel/kernel_globals.h"
#include "kernel/kernel_color.h"
#include "kernel/kernels/opencl/kernel_opencl_image.h"
#include "kernel/kernel_path.h"
#include "kernel/kernel_path_branched.h"
#include "kernel/kernel_bake.h"
__kernel void kernel_ocl_background(
ccl_constant KernelData *data,
ccl_global uint4 *input,
ccl_global float4 *output,
KERNEL_BUFFER_PARAMS,
int type, int sx, int sw, int offset, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
kg->data = data;
kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
kernel_set_buffer_info(kg);
int x = sx + ccl_global_id(0);
if(x < sx + sw) {
#ifdef __NO_BAKING__
output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
#else
kernel_background_evaluate(kg, input, output, x);
#endif
}
}

View File

@ -0,0 +1,38 @@
#include "kernel/kernel_compat_opencl.h"
#include "kernel/kernel_math.h"
#include "kernel/kernel_types.h"
#include "kernel/kernel_globals.h"
#include "kernel/kernel_color.h"
#include "kernel/kernels/opencl/kernel_opencl_image.h"
#include "kernel/kernel_path.h"
#include "kernel/kernel_path_branched.h"
#include "kernel/kernel_bake.h"
__kernel void kernel_ocl_bake(
ccl_constant KernelData *data,
ccl_global uint4 *input,
ccl_global float4 *output,
KERNEL_BUFFER_PARAMS,
int type, int filter, int sx, int sw, int offset, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
kg->data = data;
kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
kernel_set_buffer_info(kg);
int x = sx + ccl_global_id(0);
if(x < sx + sw) {
#ifdef __NO_BAKING__
output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
#else
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, x, offset, sample);
#endif
}
}

View File

@ -0,0 +1,40 @@
#include "kernel/kernel_compat_opencl.h"
#include "kernel/kernel_math.h"
#include "kernel/kernel_types.h"
#include "kernel/kernel_globals.h"
#include "kernel/kernel_color.h"
#include "kernel/kernels/opencl/kernel_opencl_image.h"
#include "kernel/kernel_path.h"
#include "kernel/kernel_path_branched.h"
#include "kernel/kernel_bake.h
__kernel void kernel_ocl_displace(
ccl_constant KernelData *data,
ccl_global uint4 *input,
ccl_global float4 *output,
KERNEL_BUFFER_PARAMS,
int type, int sx, int sw, int offset, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
kg->data = data;
kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
kernel_set_buffer_info(kg);
int x = sx + ccl_global_id(0);
if(x < sx + sw) {
#ifdef __NO_BAKING__
output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
#else
kernel_displace_evaluate(kg, input, output, x);
#endif
}
}