Cycles: Use special debug panel to fine-tune debug flags

This panel is only visible when debug_value is set to 256 and has no
affect in other cases. However, if debug value is not set to this
value, environment variables will be used to control which features
are enabled, so there's no visible changes to anyone in fact.

There are some changes needed to prevent devices re-enumeration on
every Cycles session create.

Reviewers: juicyfruit, lukasstockner97, dingto, brecht

Reviewed By: lukasstockner97, dingto

Differential Revision: https://developer.blender.org/D1720
This commit is contained in:
Sergey Sharybin 2016-01-12 16:00:48 +05:00
parent c6c223ade6
commit ac7aefd7c2
12 changed files with 561 additions and 89 deletions

View File

@ -88,6 +88,11 @@ def create(engine, data, scene, region=None, v3d=None, rv3d=None, preview_osl=Fa
if rv3d:
rv3d = rv3d.as_pointer()
if bpy.app.debug_value == 256:
_cycles.debug_flags_update(scene)
else:
_cycles.debug_flags_reset()
engine.session = _cycles.create(engine.as_pointer(), userpref, data, scene, region, v3d, rv3d, preview_osl)

View File

@ -550,6 +550,46 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
min=0.0, max=1.0,
)
# Various fine-tuning debug flags
def devices_update_callback(self, context):
import _cycles
scene = context.scene.as_pointer()
return _cycles.debug_flags_update(scene)
cls.debug_use_cpu_avx2 = BoolProperty(name="AVX2", default=True)
cls.debug_use_cpu_avx = BoolProperty(name="AVX", default=True)
cls.debug_use_cpu_sse41 = BoolProperty(name="SSE41", default=True)
cls.debug_use_cpu_sse3 = BoolProperty(name="SSE3", default=True)
cls.debug_use_cpu_sse2 = BoolProperty(name="SSE2", default=True)
cls.debug_opencl_kernel_type = EnumProperty(
name="OpenCL Kernel Type",
default='DEFAULT',
items=(
('DEFAULT', "Default", ""),
('MEGA', "Mega", ""),
('SPLIT', "Split", ""),
),
update=devices_update_callback
)
cls.debug_opencl_device_type = EnumProperty(
name="OpenCL Device Type",
default='ALL',
items=(
('NONE', "None", ""),
('ALL', "All", ""),
('DEFAULT', "Default", ""),
('CPU', "CPU", ""),
('GPU', "GPU", ""),
('ACCELLERATOR', "Accellerator", ""),
),
update=devices_update_callback
)
cls.debug_use_opencl_debug = BoolProperty(name="Debug OpenCL", default=False)
@classmethod
def unregister(cls):
del bpy.types.Scene.cycles

View File

@ -1459,6 +1459,39 @@ class CyclesRender_PT_bake(CyclesButtonsPanel, Panel):
row.prop(cbk, "normal_b", text="")
class CyclesRender_PT_debug(CyclesButtonsPanel, Panel):
bl_label = "Debug"
bl_context = "render"
bl_options = {'DEFAULT_CLOSED'}
COMPAT_ENGINES = {'CYCLES'}
@classmethod
def poll(cls, context):
return bpy.app.debug_value == 256
def draw(self, context):
layout = self.layout
scene = context.scene
cscene = scene.cycles
col = layout.column()
col.label('CPU Flags:')
row = layout.row(align=True)
row.prop(cscene, "debug_use_cpu_sse2", toggle=True)
row.prop(cscene, "debug_use_cpu_sse3", toggle=True)
row.prop(cscene, "debug_use_cpu_sse41", toggle=True)
row.prop(cscene, "debug_use_cpu_avx", toggle=True)
row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
col = layout.column()
col.label('OpenCL Flags:')
col.prop(cscene, "debug_opencl_kernel_type", text="Kernel")
col.prop(cscene, "debug_opencl_device_type", text="Device")
col.prop(cscene, "debug_use_opencl_debug", text="Debug")
class CyclesParticle_PT_CurveSettings(CyclesButtonsPanel, Panel):
bl_label = "Cycles Hair Settings"
bl_context = "particle"

View File

@ -37,13 +37,89 @@
CCL_NAMESPACE_BEGIN
static void *pylong_as_voidptr_typesafe(PyObject *object)
namespace {
/* Flag describing whether debug flags were synchronized from scene. */
bool debug_flags_set = false;
void *pylong_as_voidptr_typesafe(PyObject *object)
{
if(object == Py_None)
return NULL;
return PyLong_AsVoidPtr(object);
}
/* Synchronize debug flags from a given Blender scene.
* Return truth when device list needs invalidation.
*/
bool debug_flags_sync_from_scene(BL::Scene b_scene)
{
DebugFlagsRef flags = DebugFlags();
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
/* Backup some settings for comparison. */
DebugFlags::OpenCL::DeviceType opencl_device_type = flags.opencl.device_type;
DebugFlags::OpenCL::KernelType opencl_kernel_type = flags.opencl.kernel_type;
/* Synchronize CPU flags. */
flags.cpu.avx2 = get_boolean(cscene, "debug_use_cpu_avx2");
flags.cpu.avx = get_boolean(cscene, "debug_use_cpu_avx");
flags.cpu.sse41 = get_boolean(cscene, "debug_use_cpu_sse41");
flags.cpu.sse3 = get_boolean(cscene, "debug_use_cpu_sse3");
flags.cpu.sse2 = get_boolean(cscene, "debug_use_cpu_sse2");
/* Synchronize OpenCL kernel type. */
switch(get_enum(cscene, "debug_opencl_kernel_type")) {
case 0:
flags.opencl.kernel_type = DebugFlags::OpenCL::KERNEL_DEFAULT;
break;
case 1:
flags.opencl.kernel_type = DebugFlags::OpenCL::KERNEL_MEGA;
break;
case 2:
flags.opencl.kernel_type = DebugFlags::OpenCL::KERNEL_SPLIT;
break;
}
/* Synchronize OpenCL device type. */
switch(get_enum(cscene, "debug_opencl_device_type")) {
case 0:
flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_NONE;
break;
case 1:
flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_ALL;
break;
case 2:
flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_DEFAULT;
break;
case 3:
flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_CPU;
break;
case 4:
flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_GPU;
break;
case 5:
flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_ACCELERATOR;
break;
}
/* Synchronize other OpenCL flags. */
flags.opencl.debug = get_boolean(cscene, "debug_use_opencl_debug");
return flags.opencl.device_type != opencl_device_type ||
flags.opencl.kernel_type != opencl_kernel_type;
}
/* Reset debug flags to default values.
* Return truth when device list needs invalidation.
*/
bool debug_flags_reset()
{
DebugFlagsRef flags = DebugFlags();
/* Backup some settings for comparison. */
DebugFlags::OpenCL::DeviceType opencl_device_type = flags.opencl.device_type;
DebugFlags::OpenCL::KernelType opencl_kernel_type = flags.opencl.kernel_type;
flags.reset();
return flags.opencl.device_type != opencl_device_type ||
flags.opencl.kernel_type != opencl_kernel_type;
}
} /* namespace */
void python_thread_state_save(void **python_thread_state)
{
*python_thread_state = (void*)PyEval_SaveThread();
@ -89,6 +165,9 @@ static PyObject *init_func(PyObject * /*self*/, PyObject *args)
BlenderSession::headless = headless;
VLOG(2) << "Debug flags initialized to:\n"
<< DebugFlags();
Py_RETURN_NONE;
}
@ -491,15 +570,49 @@ static PyObject *system_info_func(PyObject * /*self*/, PyObject * /*value*/)
static PyObject *opencl_disable_func(PyObject * /*self*/, PyObject * /*value*/)
{
VLOG(2) << "Disabling OpenCL platform.";
#ifdef WIN32
putenv("CYCLES_OPENCL_TEST=NONE");
#else
setenv("CYCLES_OPENCL_TEST", "NONE", 1);
#endif
DebugFlags().opencl.device_type = DebugFlags::OpenCL::DEVICE_NONE;
Py_RETURN_NONE;
}
#endif
static PyObject *debug_flags_update_func(PyObject * /*self*/, PyObject *args)
{
PyObject *pyscene;
if(!PyArg_ParseTuple(args, "O", &pyscene)) {
return NULL;
}
PointerRNA sceneptr;
RNA_id_pointer_create((ID*)PyLong_AsVoidPtr(pyscene), &sceneptr);
BL::Scene b_scene(sceneptr);
if(debug_flags_sync_from_scene(b_scene)) {
VLOG(2) << "Tagging device list for update.";
Device::tag_update();
}
VLOG(2) << "Debug flags set to:\n"
<< DebugFlags();
debug_flags_set = true;
Py_RETURN_NONE;
}
static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/)
{
if(debug_flags_reset()) {
VLOG(2) << "Tagging device list for update.";
Device::tag_update();
}
if(debug_flags_set) {
VLOG(2) << "Debug flags reset to:\n"
<< DebugFlags();
debug_flags_set = false;
}
Py_RETURN_NONE;
}
static PyMethodDef methods[] = {
{"init", init_func, METH_VARARGS, ""},
{"create", create_func, METH_VARARGS, ""},
@ -518,6 +631,8 @@ static PyMethodDef methods[] = {
#ifdef WITH_OPENCL
{"opencl_disable", opencl_disable_func, METH_NOARGS, ""},
#endif
{"debug_flags_update", debug_flags_update_func, METH_VARARGS, ""},
{"debug_flags_reset", debug_flags_reset_func, METH_NOARGS, ""},
{NULL, NULL, 0, NULL},
};

View File

@ -32,6 +32,9 @@
CCL_NAMESPACE_BEGIN
bool Device::need_types_update = true;
bool Device::need_devices_update = true;
/* Device Requested Features */
std::ostream& operator <<(std::ostream &os,
@ -278,9 +281,9 @@ string Device::string_from_type(DeviceType type)
vector<DeviceType>& Device::available_types()
{
static vector<DeviceType> types;
static bool types_init = false;
if(!types_init) {
if(need_types_update) {
types.clear();
types.push_back(DEVICE_CPU);
#ifdef WITH_CUDA
@ -300,7 +303,7 @@ vector<DeviceType>& Device::available_types()
types.push_back(DEVICE_MULTI);
#endif
types_init = true;
need_types_update = false;
}
return types;
@ -309,9 +312,9 @@ vector<DeviceType>& Device::available_types()
vector<DeviceInfo>& Device::available_devices()
{
static vector<DeviceInfo> devices;
static bool devices_init = false;
if(!devices_init) {
if(need_types_update) {
devices.clear();
#ifdef WITH_CUDA
if(device_cuda_init())
device_cuda_info(devices);
@ -332,7 +335,7 @@ vector<DeviceInfo>& Device::available_devices()
device_network_info(devices);
#endif
devices_init = true;
need_types_update = false;
}
return devices;
@ -359,4 +362,10 @@ string Device::device_capabilities()
return capabilities;
}
void Device::tag_update()
{
need_types_update = true;
need_devices_update = true;
}
CCL_NAMESPACE_END

View File

@ -265,6 +265,13 @@ public:
static vector<DeviceType>& available_types();
static vector<DeviceInfo>& available_devices();
static string device_capabilities();
/* Tag devices lists for update. */
static void tag_update();
private:
/* Indicted whether device types and devices lists were initialized. */
static bool need_types_update, need_devices_update;
};
CCL_NAMESPACE_END

View File

@ -78,6 +78,40 @@ public:
system_cpu_support_sse41();
system_cpu_support_avx();
system_cpu_support_avx2();
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if(system_cpu_support_avx2()) {
VLOG(1) << "Will be using AVX2 kernels.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
if(system_cpu_support_avx()) {
VLOG(1) << "Will be using AVX kernels.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
VLOG(1) << "Will be using SSE4.1 kernels.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
if(system_cpu_support_sse3()) {
VLOG(1) << "Will be using SSE3kernels.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
if(system_cpu_support_sse2()) {
VLOG(1) << "Will be using SSE2 kernels.";
}
else
#endif
{
VLOG(1) << "Will be using regular kernels.";
}
}
~CPUDevice()
@ -181,8 +215,6 @@ public:
void thread_path_trace(DeviceTask& task)
{
static bool cpu_type_logged = false;
if(task_pool.canceled()) {
if(task.need_finish_queue == false)
return;
@ -201,41 +233,35 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if(system_cpu_support_avx2()) {
path_trace_kernel = kernel_cpu_avx2_path_trace;
VLOG_ONCE(1, cpu_type_logged) << "Path tracing using AVX2 kernel.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
if(system_cpu_support_avx()) {
path_trace_kernel = kernel_cpu_avx_path_trace;
VLOG_ONCE(1, cpu_type_logged) << "Path tracing using AVX kernel.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
path_trace_kernel = kernel_cpu_sse41_path_trace;
VLOG_ONCE(1, cpu_type_logged) << "Path tracing using SSE4.1 kernel.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
if(system_cpu_support_sse3()) {
path_trace_kernel = kernel_cpu_sse3_path_trace;
VLOG_ONCE(1, cpu_type_logged) << "Path tracing using SSE3 kernel.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
if(system_cpu_support_sse2()) {
path_trace_kernel = kernel_cpu_sse2_path_trace;
VLOG_ONCE(1, cpu_type_logged) << "Path tracing using SSE2 kernel.";
}
else
#endif
{
path_trace_kernel = kernel_cpu_path_trace;
VLOG_ONCE(1, cpu_type_logged) << "Path tracing using regular kernel.";
}
while(task.acquire_tile(this, tile)) {
@ -277,7 +303,6 @@ public:
void thread_film_convert(DeviceTask& task)
{
static bool cpu_type_logged = false;
float sample_scale = 1.0f/(task.sample + 1);
if(task.rgba_half) {
@ -285,41 +310,35 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if(system_cpu_support_avx2()) {
convert_to_half_float_kernel = kernel_cpu_avx2_convert_to_half_float;
VLOG_ONCE(1, cpu_type_logged) << "Converting to half float using AVX2 kernel.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
if(system_cpu_support_avx()) {
convert_to_half_float_kernel = kernel_cpu_avx_convert_to_half_float;
VLOG_ONCE(1, cpu_type_logged) << "Converting to half float using AVX kernel.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
convert_to_half_float_kernel = kernel_cpu_sse41_convert_to_half_float;
VLOG_ONCE(1, cpu_type_logged) << "Converting to half float using SSE4.1 kernel.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
if(system_cpu_support_sse3()) {
convert_to_half_float_kernel = kernel_cpu_sse3_convert_to_half_float;
VLOG_ONCE(1, cpu_type_logged) << "Converting to half float using SSE3 kernel.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
if(system_cpu_support_sse2()) {
convert_to_half_float_kernel = kernel_cpu_sse2_convert_to_half_float;
VLOG_ONCE(1, cpu_type_logged) << "Converting to half float using SSE2 kernel.";
}
else
#endif
{
convert_to_half_float_kernel = kernel_cpu_convert_to_half_float;
VLOG_ONCE(1, cpu_type_logged) << "Converting to half float using regular kernel.";
}
for(int y = task.y; y < task.y + task.h; y++)
@ -332,41 +351,35 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if(system_cpu_support_avx2()) {
convert_to_byte_kernel = kernel_cpu_avx2_convert_to_byte;
VLOG_ONCE(1, cpu_type_logged) << "Converting to byte using AVX2 kernel.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
if(system_cpu_support_avx()) {
convert_to_byte_kernel = kernel_cpu_avx_convert_to_byte;
VLOG_ONCE(1, cpu_type_logged) << "Converting to byte using AVX kernel.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
convert_to_byte_kernel = kernel_cpu_sse41_convert_to_byte;
VLOG_ONCE(1, cpu_type_logged) << "Converting to byte using SSE4.1 kernel.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
if(system_cpu_support_sse3()) {
convert_to_byte_kernel = kernel_cpu_sse3_convert_to_byte;
VLOG_ONCE(1, cpu_type_logged) << "Converting to byte using SSE3 kernel.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
if(system_cpu_support_sse2()) {
convert_to_byte_kernel = kernel_cpu_sse2_convert_to_byte;
VLOG_ONCE(1, cpu_type_logged) << "Converting to byte using SSE2 kernel.";
}
else
#endif
{
convert_to_byte_kernel = kernel_cpu_convert_to_byte;
VLOG_ONCE(1, cpu_type_logged) << "Converting to byte using regular kernel.";
}
for(int y = task.y; y < task.y + task.h; y++)
@ -380,7 +393,6 @@ public:
void thread_shader(DeviceTask& task)
{
KernelGlobals kg = kernel_globals;
static bool cpu_type_logged = false;
#ifdef WITH_OSL
OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
@ -390,41 +402,35 @@ public:
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if(system_cpu_support_avx2()) {
shader_kernel = kernel_cpu_avx2_shader;
VLOG_ONCE(1, cpu_type_logged) << "Shading using AVX2 kernel.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
if(system_cpu_support_avx()) {
shader_kernel = kernel_cpu_avx_shader;
VLOG_ONCE(1, cpu_type_logged) << "Shading using AVX kernel.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
shader_kernel = kernel_cpu_sse41_shader;
VLOG_ONCE(1, cpu_type_logged) << "Shading using SSE4.1 kernel.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
if(system_cpu_support_sse3()) {
shader_kernel = kernel_cpu_sse3_shader;
VLOG_ONCE(1, cpu_type_logged) << "Shading using SSE3 kernel.";
}
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
if(system_cpu_support_sse2()) {
shader_kernel = kernel_cpu_sse2_shader;
VLOG_ONCE(1, cpu_type_logged) << "Shading using SSE2 kernel.";
}
else
#endif
{
shader_kernel = kernel_cpu_shader;
VLOG_ONCE(1, cpu_type_logged) << "Shading using regular kernel.";
}
for(int sample = 0; sample < task.num_samples; sample++) {

View File

@ -27,6 +27,7 @@
#include "buffers.h"
#include "util_debug.h"
#include "util_foreach.h"
#include "util_logging.h"
#include "util_map.h"
@ -84,29 +85,28 @@ namespace {
cl_device_type opencl_device_type()
{
char *device = getenv("CYCLES_OPENCL_TEST");
if(device) {
if(strcmp(device, "NONE") == 0)
switch(DebugFlags().opencl.device_type)
{
case DebugFlags::OpenCL::DEVICE_NONE:
return 0;
if(strcmp(device, "ALL") == 0)
case DebugFlags::OpenCL::DEVICE_ALL:
return CL_DEVICE_TYPE_ALL;
else if(strcmp(device, "DEFAULT") == 0)
case DebugFlags::OpenCL::DEVICE_DEFAULT:
return CL_DEVICE_TYPE_DEFAULT;
else if(strcmp(device, "CPU") == 0)
case DebugFlags::OpenCL::DEVICE_CPU:
return CL_DEVICE_TYPE_CPU;
else if(strcmp(device, "GPU") == 0)
case DebugFlags::OpenCL::DEVICE_GPU:
return CL_DEVICE_TYPE_GPU;
else if(strcmp(device, "ACCELERATOR") == 0)
case DebugFlags::OpenCL::DEVICE_ACCELERATOR:
return CL_DEVICE_TYPE_ACCELERATOR;
default:
return CL_DEVICE_TYPE_ALL;
}
return CL_DEVICE_TYPE_ALL;
}
bool opencl_kernel_use_debug()
inline bool opencl_kernel_use_debug()
{
return (getenv("CYCLES_OPENCL_DEBUG") != NULL);
return DebugFlags().opencl.debug;
}
bool opencl_kernel_use_advanced_shading(const string& platform)
@ -129,11 +129,11 @@ bool opencl_kernel_use_advanced_shading(const string& platform)
bool opencl_kernel_use_split(const string& platform_name,
const cl_device_type device_type)
{
if(getenv("CYCLES_OPENCL_SPLIT_KERNEL_TEST") != NULL) {
if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_SPLIT) {
VLOG(1) << "Forcing split kernel to use.";
return true;
}
if(getenv("CYCLES_OPENCL_MEGA_KERNEL_TEST") != NULL) {
if(DebugFlags().opencl.kernel_type == DebugFlags::OpenCL::KERNEL_MEGA) {
VLOG(1) << "Forcing mega kernel to use.";
return false;
}
@ -229,8 +229,7 @@ bool opencl_device_version_check(cl_device_id device,
void opencl_get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices)
{
const bool force_all_platforms =
(getenv("CYCLES_OPENCL_MEGA_KERNEL_TEST") != NULL) ||
(getenv("CYCLES_OPENCL_SPLIT_KERNEL_TEST") != NULL);
(DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT);
const cl_device_type device_type = opencl_device_type();
static bool first_time = true;
#define FIRST_VLOG(severity) if(first_time) VLOG(severity)

View File

@ -11,6 +11,7 @@ set(INC_SYS
set(SRC
util_aligned_malloc.cpp
util_cache.cpp
util_debug.cpp
util_logging.cpp
util_math_cdf.cpp
util_md5.cpp

View File

@ -0,0 +1,164 @@
/*
* Copyright 2011-2016 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "util_debug.h"
#include <stdlib.h>
#include "util_logging.h"
#include "util_string.h"
CCL_NAMESPACE_BEGIN
DebugFlags::CPU::CPU()
: avx2(true),
avx(true),
sse41(true),
sse3(true),
sse2(true)
{
reset();
}
void DebugFlags::CPU::reset()
{
#define STRINGIFY(x) #x
#define CHECK_CPU_FLAGS(flag, env) \
do { \
flag = (getenv(env) == NULL); \
if(!flag) { \
VLOG(1) << "Disabling " << STRINGIFY(flag) << " instruction set."; \
} \
} while(0)
CHECK_CPU_FLAGS(avx2, "CYCLES_CPU_NO_AVX2");
CHECK_CPU_FLAGS(avx, "CYCLES_CPU_NO_AVX");
CHECK_CPU_FLAGS(sse41, "CYCLES_CPU_NO_SSE41");
CHECK_CPU_FLAGS(sse3, "CYCLES_CPU_NO_SSE3");
CHECK_CPU_FLAGS(sse2, "CYCLES_CPU_NO_SSE2");
#undef STRINGIFY
#undef CHECK_CPU_FLAGS
}
DebugFlags::OpenCL::OpenCL()
: device_type(DebugFlags::OpenCL::DEVICE_ALL),
kernel_type(DebugFlags::OpenCL::KERNEL_DEFAULT),
debug(false)
{
reset();
}
void DebugFlags::OpenCL::reset()
{
/* Initialize device type from environment variables. */
device_type = DebugFlags::OpenCL::DEVICE_ALL;
char *device = getenv("CYCLES_OPENCL_TEST");
if(device) {
if(strcmp(device, "NONE") == 0) {
device_type = DebugFlags::OpenCL::DEVICE_NONE;
}
else if(strcmp(device, "ALL") == 0) {
device_type = DebugFlags::OpenCL::DEVICE_ALL;
}
else if(strcmp(device, "DEFAULT") == 0) {
device_type = DebugFlags::OpenCL::DEVICE_DEFAULT;
}
else if(strcmp(device, "CPU") == 0) {
device_type = DebugFlags::OpenCL::DEVICE_CPU;
}
else if(strcmp(device, "GPU") == 0) {
device_type = DebugFlags::OpenCL::DEVICE_GPU;
}
else if(strcmp(device, "ACCELERATOR") == 0) {
device_type = DebugFlags::OpenCL::DEVICE_ACCELERATOR;
}
}
/* Initialize kernel type from environment variables. */
kernel_type = DebugFlags::OpenCL::KERNEL_DEFAULT;
if(getenv("CYCLES_OPENCL_MEGA_KERNEL_TEST") != NULL) {
kernel_type = DebugFlags::OpenCL::KERNEL_MEGA;
}
else if(getenv("CYCLES_OPENCL_MEGA_KERNEL_TEST") != NULL) {
kernel_type = DebugFlags::OpenCL::KERNEL_SPLIT;
}
/* Initialize other flags from environment variables. */
debug = (getenv("CYCLES_OPENCL_DEBUG") != NULL);
}
DebugFlags::DebugFlags()
{
/* Nothing for now. */
}
void DebugFlags::reset()
{
cpu.reset();
opencl.reset();
}
std::ostream& operator <<(std::ostream &os,
const DebugFlagsRef debug_flags)
{
os << "CPU flags:\n"
<< " AVX2 : " << string_from_bool(debug_flags.cpu.avx2) << "\n"
<< " AVX : " << string_from_bool(debug_flags.cpu.avx) << "\n"
<< " SSE4.1 : " << string_from_bool(debug_flags.cpu.sse41) << "\n"
<< " SSE3 : " << string_from_bool(debug_flags.cpu.sse3) << "\n"
<< " SSE2 : " << string_from_bool(debug_flags.cpu.sse2) << "\n";
const char *opencl_device_type,
*opencl_kernel_type;
switch(debug_flags.opencl.device_type) {
case DebugFlags::OpenCL::DEVICE_NONE:
opencl_device_type = "NONE";
break;
case DebugFlags::OpenCL::DEVICE_ALL:
opencl_device_type = "ALL";
break;
case DebugFlags::OpenCL::DEVICE_DEFAULT:
opencl_device_type = "DEFAULT";
break;
case DebugFlags::OpenCL::DEVICE_CPU:
opencl_device_type = "CPU";
break;
case DebugFlags::OpenCL::DEVICE_GPU:
opencl_device_type = "GPU";
break;
case DebugFlags::OpenCL::DEVICE_ACCELERATOR:
opencl_device_type = "ACCELERATOR";
break;
}
switch(debug_flags.opencl.kernel_type) {
case DebugFlags::OpenCL::KERNEL_DEFAULT:
opencl_kernel_type = "DEFAULT";
break;
case DebugFlags::OpenCL::KERNEL_MEGA:
opencl_kernel_type = "MEGA";
break;
case DebugFlags::OpenCL::KERNEL_SPLIT:
opencl_kernel_type = "SPLIT";
break;
}
os << "OpenCL flags:\n"
<< " Device type : " << opencl_device_type << "\n"
<< " Kernel type : " << opencl_kernel_type << "\n"
<< " Debug : " << string_from_bool(debug_flags.opencl.debug)
<< "\n";
return os;
}
CCL_NAMESPACE_END

View File

@ -17,7 +17,119 @@
#ifndef __UTIL_DEBUG_H__
#define __UTIL_DEBUG_H__
#include <assert.h>
#include <cassert>
#include <iostream>
CCL_NAMESPACE_BEGIN
/* Global storage for all sort of flags used to fine-tune behavior of particular
* areas for the development purposes, without officially exposing settings to
* the interface.
*/
class DebugFlags {
public:
/* Descriptor of CPU feature-set to be used. */
struct CPU {
CPU();
/* Reset flags to their defaults. */
void reset();
/* Flags describing which instructions sets are allowed for use. */
bool avx2;
bool avx;
bool sse41;
bool sse3;
bool sse2;
};
/* Descriptor of OpenCL feature-set to be used. */
struct OpenCL {
OpenCL();
/* Reset flags to their defaults. */
void reset();
/* Available device types.
* Only gives a hint which devices to let user to choose from, does not
* try to use any sort of optimal device or so.
*/
enum DeviceType {
/* None of OpenCL devices will be used. */
DEVICE_NONE,
/* All OpenCL devices will be used. */
DEVICE_ALL,
/* Default system OpenCL device will be used. */
DEVICE_DEFAULT,
/* Host processor will be used. */
DEVICE_CPU,
/* GPU devices will be used. */
DEVICE_GPU,
/* Dedicated OpenCL accelerator device will be used. */
DEVICE_ACCELERATOR,
};
/* Available kernel types. */
enum KernelType {
/* Do automated guess which kernel to use, based on the officially
* supported GPUs and such.
*/
KERNEL_DEFAULT,
/* Force mega kernel to be used. */
KERNEL_MEGA,
/* Force split kernel to be used. */
KERNEL_SPLIT,
};
/* Requested device type. */
DeviceType device_type;
/* Requested kernel type. */
KernelType kernel_type;
/* Use debug version of the kernel. */
bool debug;
};
/* Get instance of debug flags registry. */
static DebugFlags& get()
{
static DebugFlags instance;
return instance;
}
/* Reset flags to their defaults. */
void reset();
/* Requested CPU flags. */
CPU cpu;
/* Requested OpenCL flags. */
OpenCL opencl;
private:
DebugFlags();
#if (__cplusplus > 199711L)
public:
DebugFlags(DebugFlags const& /*other*/) = delete;
void operator=(DebugFlags const& /*other*/) = delete;
#else
private:
DebugFlags(DebugFlags const& /*other*/);
void operator=(DebugFlags const& /*other*/);
#endif
};
typedef DebugFlags& DebugFlagsRef;
inline DebugFlags& DebugFlags() {
return DebugFlags::get();
}
std::ostream& operator <<(std::ostream &os,
const DebugFlagsRef debug_flags);
CCL_NAMESPACE_END
#endif /* __UTIL_DEBUG_H__ */

View File

@ -15,6 +15,7 @@
*/
#include "util_system.h"
#include "util_debug.h"
#include "util_types.h"
#include "util_string.h"
@ -126,29 +127,6 @@ struct CPUCapabilities {
bool bmi2;
};
static void system_cpu_capabilities_override(CPUCapabilities *caps)
{
/* Only capabilities which affects on cycles kernel. */
if(getenv("CYCLES_CPU_NO_AVX2")) {
caps->avx2 = false;
}
if(getenv("CYCLES_CPU_NO_AVX")) {
caps->avx = false;
}
if(getenv("CYCLES_CPU_NO_SSE41")) {
caps->sse41 = false;
}
if(getenv("CYCLES_CPU_NO_SSE3")) {
caps->sse3 = false;
}
if(getenv("CYCLES_CPU_NO_SSE2")) {
caps->sse2 = false;
}
if(getenv("CYCLES_CPU_NO_SSE")) {
caps->sse = false;
}
}
static CPUCapabilities& system_cpu_capabilities()
{
static CPUCapabilities caps;
@ -201,8 +179,6 @@ static CPUCapabilities& system_cpu_capabilities()
caps.avx2 = (result[1] & ((int)1 << 5)) != 0;
}
system_cpu_capabilities_override(&caps);
caps_init = true;
}
@ -212,30 +188,35 @@ static CPUCapabilities& system_cpu_capabilities()
bool system_cpu_support_sse2()
{
CPUCapabilities& caps = system_cpu_capabilities();
return caps.sse && caps.sse2;
return DebugFlags().cpu.sse2 && caps.sse && caps.sse2;
}
bool system_cpu_support_sse3()
{
CPUCapabilities& caps = system_cpu_capabilities();
return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3;
return DebugFlags().cpu.sse3 &&
caps.sse && caps.sse2 && caps.sse3 && caps.ssse3;
}
bool system_cpu_support_sse41()
{
CPUCapabilities& caps = system_cpu_capabilities();
return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41;
return DebugFlags().cpu.sse41 &&
caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41;
}
bool system_cpu_support_avx()
{
CPUCapabilities& caps = system_cpu_capabilities();
return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx;
return DebugFlags().cpu.avx &&
caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx;
}
bool system_cpu_support_avx2()
{
CPUCapabilities& caps = system_cpu_capabilities();
return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx && caps.f16c && caps.avx2 && caps.fma3 && caps.bmi1 && caps.bmi2;
return DebugFlags().cpu.avx2 &&
caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx && caps.f16c && caps.avx2 && caps.fma3 && caps.bmi1 && caps.bmi2;
}
#else