Cycles: use OpenCL C 2.0 if available, to improve performance for AMD

Tested with AMD Radeon Pro WX 9100, where it brings performance back to 2.80
level, and combined with recent changes is about 2-15% faster than 2.80 in
our benchmark scenes.

This somehow appears to specifically address the issue where adding more shader
nodes leads to slower runtime. I found no additional speedup by applying this
to change to 2.80 or removing the new shader node code.

Ref T71479

Patch by Jeroen Bakker.

Differential Revision: https://developer.blender.org/D6252
This commit is contained in:
Brecht Van Lommel 2020-03-24 19:42:23 +01:00
parent bb26c1359e
commit 394a1373a0
Notes: blender-bot 2023-02-14 10:21:15 +01:00
Referenced by issue #77095, Artefacts when rendering with the GPU in Blender 2.90.2 and 2.83 release
Referenced by issue #71479, Blender 2.81/2.82 - OpenCL - Significan slow render times
3 changed files with 31 additions and 4 deletions

View File

@ -88,6 +88,10 @@ class OpenCLInfo {
static bool device_supported(const string &platform_name, const cl_device_id device_id);
static bool platform_version_check(cl_platform_id platform, string *error = NULL);
static bool device_version_check(cl_device_id device, string *error = NULL);
static bool get_device_version(cl_device_id device,
int *r_major,
int *r_minor,
string *error = NULL);
static string get_hardware_id(const string &platform_name, cl_device_id device_id);
static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices);

View File

@ -1896,6 +1896,17 @@ string OpenCLDevice::kernel_build_options(const string *debug_src)
{
string build_options = "-cl-no-signed-zeros -cl-mad-enable ";
/* Build with OpenCL 2.0 if available, this improves performance
* with AMD OpenCL drivers on Windows and Linux (legacy drivers).
* Note that OpenCL selects the highest 1.x version by default,
* only for 2.0 do we need the explicit compiler flag. */
int version_major, version_minor;
if (OpenCLInfo::get_device_version(cdDevice, &version_major, &version_minor)) {
if (version_major >= 2) {
build_options += "-cl-std=CL2.0 ";
}
}
if (platform_name == "NVIDIA CUDA") {
build_options +=
"-D__KERNEL_OPENCL_NVIDIA__ "

View File

@ -810,18 +810,30 @@ bool OpenCLInfo::platform_version_check(cl_platform_id platform, string *error)
return true;
}
bool OpenCLInfo::device_version_check(cl_device_id device, string *error)
bool OpenCLInfo::get_device_version(cl_device_id device, int *r_major, int *r_minor, string *error)
{
const int req_major = 1, req_minor = 1;
int major, minor;
char version[256];
clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, sizeof(version), &version, NULL);
if (sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) {
if (sscanf(version, "OpenCL C %d.%d", r_major, r_minor) < 2) {
if (error != NULL) {
*error = string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version);
}
return false;
}
if (error != NULL) {
*error = "";
}
return true;
}
bool OpenCLInfo::device_version_check(cl_device_id device, string *error)
{
const int req_major = 1, req_minor = 1;
int major, minor;
if (!get_device_version(device, &major, &minor, error)) {
return false;
}
if (!((major == req_major && minor >= req_minor) || (major > req_major))) {
if (error != NULL) {
*error = string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor);