Merge branch 'master' into blender2.8

This commit is contained in:
Campbell Barton 2018-11-29 12:55:58 +11:00
commit 9893fee4e6
27 changed files with 838 additions and 37 deletions

View File

@ -501,7 +501,7 @@ void BlenderSession::render(BL::Depsgraph& b_depsgraph_)
if(!b_engine.is_preview() && background && print_render_stats) {
RenderStats stats;
session->scene->collect_statistics(&stats);
session->collect_statistics(&stats);
printf("Render statistics:\n%s\n", stats.full_report().c_str());
}

View File

@ -830,7 +830,8 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
}
/* tiles */
if(params.device.type != DEVICE_CPU && !background) {
const bool is_cpu = (params.device.type == DEVICE_CPU);
if(!is_cpu && !background) {
/* currently GPU could be much slower than CPU when using tiles,
* still need to be investigated, but meanwhile make it possible
* to work in viewport smoothly
@ -907,6 +908,9 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
params.progressive_update_timeout = 0.1;
}
params.use_profiling = params.device.has_profiling && !b_engine.is_preview() &&
background && BlenderSession::print_render_stats;
return params;
}

View File

@ -505,6 +505,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th
info.has_half_images = true;
info.has_volume_decoupled = true;
info.has_osl = true;
info.has_profiling = true;
foreach(const DeviceInfo &device, subdevices) {
/* Ensure CPU device does not slow down GPU. */
@ -539,6 +540,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th
info.has_half_images &= device.has_half_images;
info.has_volume_decoupled &= device.has_volume_decoupled;
info.has_osl &= device.has_osl;
info.has_profiling &= device.has_profiling;
}
return info;

View File

@ -60,6 +60,7 @@ public:
bool has_volume_decoupled; /* Decoupled volume shading. */
bool has_osl; /* Support Open Shading Language. */
bool use_split_kernel; /* Use split or mega kernel. */
bool has_profiling; /* Supports runtime collection of profiling info. */
int cpu_threads;
vector<DeviceInfo> multi_devices;
@ -75,6 +76,7 @@ public:
has_volume_decoupled = false;
has_osl = false;
use_split_kernel = false;
has_profiling = false;
}
bool operator==(const DeviceInfo &info) {

View File

@ -477,6 +477,8 @@ public:
bool denoising_non_local_means(device_ptr image_ptr, device_ptr guide_ptr, device_ptr variance_ptr, device_ptr out_ptr,
DenoisingTask *task)
{
ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_NON_LOCAL_MEANS);
int4 rect = task->rect;
int r = task->nlm_state.r;
int f = task->nlm_state.f;
@ -529,6 +531,8 @@ public:
bool denoising_construct_transform(DenoisingTask *task)
{
ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_CONSTRUCT_TRANSFORM);
for(int y = 0; y < task->filter_area.w; y++) {
for(int x = 0; x < task->filter_area.z; x++) {
filter_construct_transform_kernel()((float*) task->buffer.mem.device_pointer,
@ -551,6 +555,8 @@ public:
device_ptr output_ptr,
DenoisingTask *task)
{
ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_RECONSTRUCT);
mem_zero(task->storage.XtWX);
mem_zero(task->storage.XtWY);
@ -609,8 +615,10 @@ public:
bool denoising_combine_halves(device_ptr a_ptr, device_ptr b_ptr,
device_ptr mean_ptr, device_ptr variance_ptr,
int r, int4 rect, DenoisingTask * /*task*/)
int r, int4 rect, DenoisingTask *task)
{
ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_COMBINE_HALVES);
for(int y = rect.y; y < rect.w; y++) {
for(int x = rect.x; x < rect.z; x++) {
filter_combine_halves_kernel()(x, y,
@ -629,6 +637,8 @@ public:
device_ptr sample_variance_ptr, device_ptr sv_variance_ptr,
device_ptr buffer_variance_ptr, DenoisingTask *task)
{
ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_DIVIDE_SHADOW);
for(int y = task->rect.y; y < task->rect.w; y++) {
for(int x = task->rect.x; x < task->rect.z; x++) {
filter_divide_shadow_kernel()(task->render_buffer.samples,
@ -653,6 +663,8 @@ public:
device_ptr variance_ptr,
DenoisingTask *task)
{
ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_GET_FEATURE);
for(int y = task->rect.y; y < task->rect.w; y++) {
for(int x = task->rect.x; x < task->rect.z; x++) {
filter_get_feature_kernel()(task->render_buffer.samples,
@ -676,6 +688,8 @@ public:
device_ptr output_ptr,
DenoisingTask *task)
{
ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_DETECT_OUTLIERS);
for(int y = task->rect.y; y < task->rect.w; y++) {
for(int x = task->rect.x; x < task->rect.z; x++) {
filter_detect_outliers_kernel()(x, y,
@ -735,6 +749,8 @@ public:
void denoise(DenoisingTask& denoising, RenderTile &tile)
{
ProfilingHelper profiling(denoising.profiler, PROFILING_DENOISING);
tile.sample = tile.start_sample + tile.num_samples;
denoising.functions.construct_transform = function_bind(&CPUDevice::denoising_construct_transform, this, &denoising);
@ -765,6 +781,8 @@ public:
KernelGlobals *kg = new ((void*) kgbuffer.device_pointer) KernelGlobals(thread_kernel_globals_init());
stats.profiler.add_state(&kg->profiler);
CPUSplitKernel *split_kernel = NULL;
if(use_split_kernel) {
split_kernel = new CPUSplitKernel(this);
@ -778,6 +796,7 @@ public:
RenderTile tile;
DenoisingTask denoising(this, task);
denoising.profiler = &kg->profiler;
while(task.acquire_tile(this, tile)) {
if(tile.task == RenderTile::PATH_TRACE) {
@ -802,6 +821,8 @@ public:
}
}
stats.profiler.remove_state(&kg->profiler);
thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
kg->~KernelGlobals();
kgbuffer.free();
@ -1061,6 +1082,7 @@ void device_cpu_info(vector<DeviceInfo>& devices)
info.has_volume_decoupled = true;
info.has_osl = true;
info.has_half_images = true;
info.has_profiling = true;
devices.insert(devices.begin(), info);
}

View File

@ -22,6 +22,7 @@ CCL_NAMESPACE_BEGIN
DenoisingTask::DenoisingTask(Device *device, const DeviceTask &task)
: tile_info_mem(device, "denoising tile info mem", MEM_READ_WRITE),
profiler(NULL),
storage(device),
buffer(device),
device(device)

View File

@ -23,6 +23,8 @@
#include "kernel/filter/filter_defines.h"
#include "util/util_profiling.h"
CCL_NAMESPACE_BEGIN
class DenoisingTask {
@ -51,6 +53,8 @@ public:
TileInfo *tile_info;
device_vector<int> tile_info_mem;
ProfilingState *profiler;
int4 rect;
int4 filter_area;

View File

@ -110,6 +110,7 @@ set(SRC_HEADERS
kernel_path_surface.h
kernel_path_subsurface.h
kernel_path_volume.h
kernel_profiling.h
kernel_projection.h
kernel_queues.h
kernel_random.h

View File

@ -186,6 +186,8 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
float difl,
float extmax)
{
PROFILING_INIT(kg, PROFILING_INTERSECT);
if(!scene_intersect_valid(&ray)) {
return false;
}
@ -248,6 +250,8 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
uint *lcg_state,
int max_hits)
{
PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL);
if(!scene_intersect_valid(&ray)) {
return false;
}
@ -327,6 +331,8 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
uint max_hits,
uint *num_hits)
{
PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW_ALL);
if(!scene_intersect_valid(ray)) {
return false;
}
@ -407,6 +413,8 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
Intersection *isect,
const uint visibility)
{
PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME);
if(!scene_intersect_valid(ray)) {
return false;
}
@ -438,6 +446,8 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
const uint max_hits,
const uint visibility)
{
PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_ALL);
if(!scene_intersect_valid(ray)) {
return false;
}

View File

@ -19,6 +19,8 @@
#ifndef __KERNEL_GLOBALS_H__
#define __KERNEL_GLOBALS_H__
#include "kernel/kernel_profiling.h"
#ifdef __KERNEL_CPU__
# include "util/util_vector.h"
# include "util/util_map.h"
@ -82,6 +84,8 @@ typedef struct KernelGlobals {
int2 global_size;
int2 global_id;
ProfilingState profiler;
} KernelGlobals;
#endif /* __KERNEL_CPU__ */

View File

@ -376,6 +376,9 @@ ccl_device_inline void kernel_write_result(KernelGlobals *kg,
int sample,
PathRadiance *L)
{
PROFILING_INIT(kg, PROFILING_WRITE_RESULT);
PROFILING_OBJECT(PRIM_NONE);
float alpha;
float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha);

View File

@ -57,6 +57,8 @@ ccl_device_forceinline bool kernel_path_scene_intersect(
Intersection *isect,
PathRadiance *L)
{
PROFILING_INIT(kg, PROFILING_SCENE_INTERSECT);
uint visibility = path_state_ray_visibility(kg, state);
if(path_state_ao_bounce(kg, state)) {
@ -105,6 +107,8 @@ ccl_device_forceinline void kernel_path_lamp_emission(
ShaderData *emission_sd,
PathRadiance *L)
{
PROFILING_INIT(kg, PROFILING_INDIRECT_EMISSION);
#ifdef __LAMP_MIS__
if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
/* ray starting from previous non-transparent bounce */
@ -172,6 +176,8 @@ ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(
ShaderData *emission_sd,
PathRadiance *L)
{
PROFILING_INIT(kg, PROFILING_VOLUME);
/* Sanitize volume stack. */
if(!hit) {
kernel_volume_clean_stack(kg, state->volume_stack);
@ -278,6 +284,8 @@ ccl_device_forceinline bool kernel_path_shader_apply(
PathRadiance *L,
ccl_global float *buffer)
{
PROFILING_INIT(kg, PROFILING_SHADER_APPLY);
#ifdef __SHADOW_TRICKS__
if((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) {
if(state->flag & PATH_RAY_TRANSPARENT_BACKGROUND) {
@ -355,6 +363,8 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
float3 throughput,
float3 ao_alpha)
{
PROFILING_INIT(kg, PROFILING_AO);
/* todo: solve correlation */
float bsdf_u, bsdf_v;
@ -568,6 +578,8 @@ ccl_device_forceinline void kernel_path_integrate(
ccl_global float *buffer,
ShaderData *emission_sd)
{
PROFILING_INIT(kg, PROFILING_PATH_INTEGRATE);
/* Shader data memory used for both volumes and surfaces, saves stack space. */
ShaderData sd;
@ -719,6 +731,8 @@ ccl_device void kernel_path_trace(KernelGlobals *kg,
ccl_global float *buffer,
int sample, int x, int y, int offset, int stride)
{
PROFILING_INIT(kg, PROFILING_RAY_SETUP);
/* buffer offset */
int index = offset + x + y*stride;
int pass_stride = kernel_data.film.pass_stride;

View File

@ -32,6 +32,8 @@ bool kernel_path_subsurface_scatter(
ccl_addr_space float3 *throughput,
ccl_addr_space SubsurfaceIndirectRays *ss_indirect)
{
PROFILING_INIT(kg, PROFILING_SUBSURFACE);
float bssrdf_u, bssrdf_v;
path_state_rng_2D(kg, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);

View File

@ -217,6 +217,8 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg,
ShaderData *sd, ShaderData *emission_sd, float3 throughput, ccl_addr_space PathState *state,
PathRadiance *L)
{
PROFILING_INIT(kg, PROFILING_CONNECT_LIGHT);
#ifdef __EMISSION__
if(!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)))
return;
@ -274,6 +276,8 @@ ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg,
PathRadianceState *L_state,
ccl_addr_space Ray *ray)
{
PROFILING_INIT(kg, PROFILING_SURFACE_BOUNCE);
/* no BSDF? we can stop here */
if(sd->flag & SD_BSDF) {
/* sample BSDF */

View File

@ -0,0 +1,40 @@
/*
* Copyright 2011-2018 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __KERNEL_PROFILING_H__
#define __KERNEL_PROFILING_H__
#ifdef __KERNEL_CPU__
# include "util/util_profiling.h"
#endif
CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_CPU__
# define PROFILING_INIT(kg, event) ProfilingHelper profiling_helper(&kg->profiler, event)
# define PROFILING_EVENT(event) profiling_helper.set_event(event)
# define PROFILING_SHADER(shader) if((shader) != SHADER_NONE) { profiling_helper.set_shader((shader) & SHADER_MASK); }
# define PROFILING_OBJECT(object) if((object) != PRIM_NONE) { profiling_helper.set_object(object); }
#else
# define PROFILING_INIT(kg, event)
# define PROFILING_EVENT(event)
# define PROFILING_SHADER(shader)
# define PROFILING_OBJECT(object)
#endif /* __KERNEL_CPU__ */
CCL_NAMESPACE_END
#endif /* __KERNEL_PROFILING_H__ */

View File

@ -54,6 +54,8 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
const Intersection *isect,
const Ray *ray)
{
PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
#ifdef __INSTANCING__
sd->object = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
#endif
@ -147,6 +149,9 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
differential_incoming(&sd->dI, ray->dD);
differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
#endif
PROFILING_SHADER(sd->shader);
PROFILING_OBJECT(sd->object);
}
/* ShaderData setup from BSSRDF scatter */
@ -163,6 +168,8 @@ void shader_setup_from_subsurface(
const Intersection *isect,
const Ray *ray)
{
PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
const bool backfacing = sd->flag & SD_BACKFACING;
/* object, matrices, time, ray_length stay the same */
@ -233,6 +240,8 @@ void shader_setup_from_subsurface(
differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
/* don't modify dP and dI */
# endif
PROFILING_SHADER(sd->shader);
}
#endif
@ -249,6 +258,8 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
bool object_space,
int lamp)
{
PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
/* vectors */
sd->P = P;
sd->N = Ng;
@ -353,6 +364,9 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
sd->du = differential_zero();
sd->dv = differential_zero();
#endif
PROFILING_SHADER(sd->shader);
PROFILING_OBJECT(sd->object);
}
/* ShaderData setup for displacement */
@ -380,6 +394,8 @@ ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
{
PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
/* vectors */
sd->P = ray->D;
sd->N = -ray->D;
@ -414,6 +430,9 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat
sd->du = differential_zero();
sd->dv = differential_zero();
#endif
PROFILING_SHADER(sd->shader);
PROFILING_OBJECT(sd->object);
}
/* ShaderData setup from point inside volume */
@ -421,6 +440,8 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat
#ifdef __VOLUME__
ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray)
{
PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
/* vectors */
sd->P = ray->P;
sd->N = -ray->D;
@ -461,6 +482,9 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *s
/* for NDC coordinates */
sd->ray_P = ray->P;
sd->ray_dP = ray->dP;
PROFILING_SHADER(sd->shader);
PROFILING_OBJECT(sd->object);
}
#endif /* __VOLUME__ */
@ -591,6 +615,8 @@ void shader_bsdf_eval(KernelGlobals *kg,
float light_pdf,
bool use_mis)
{
PROFILING_INIT(kg, PROFILING_CLOSURE_EVAL);
bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
#ifdef __BRANCHED_PATH__
@ -720,6 +746,8 @@ ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg,
differential3 *domega_in,
float *pdf)
{
PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE);
const ShaderClosure *sc = shader_bsdf_pick(sd, &randu);
if(sc == NULL) {
*pdf = 0.0f;
@ -751,6 +779,8 @@ ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd,
const ShaderClosure *sc, float randu, float randv, BsdfEval *bsdf_eval,
float3 *omega_in, differential3 *domega_in, float *pdf)
{
PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE);
int label;
float3 eval;
@ -984,6 +1014,8 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
ccl_addr_space PathState *state, int path_flag)
{
PROFILING_INIT(kg, PROFILING_SHADER_EVAL);
/* If path is being terminated, we are tracing a shadow ray or evaluating
* emission, then we don't need to store closures. The emission and shadow
* shader data also do not have a closure array to save GPU memory. */
@ -1084,6 +1116,8 @@ ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, con
ccl_device void shader_volume_phase_eval(KernelGlobals *kg, const ShaderData *sd,
const float3 omega_in, BsdfEval *eval, float *pdf)
{
PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_EVAL);
bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
_shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
@ -1093,6 +1127,8 @@ ccl_device int shader_volume_phase_sample(KernelGlobals *kg, const ShaderData *s
float randu, float randv, BsdfEval *phase_eval,
float3 *omega_in, differential3 *domega_in, float *pdf)
{
PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE);
int sampled = 0;
if(sd->num_closure > 1) {
@ -1151,6 +1187,8 @@ ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *
const ShaderClosure *sc, float randu, float randv, BsdfEval *phase_eval,
float3 *omega_in, differential3 *domega_in, float *pdf)
{
PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE);
int label;
float3 eval;

View File

@ -335,6 +335,11 @@ uint Object::visibility_for_tracing() const {
return trace_visibility;
}
int Object::get_device_index() const
{
return index;
}
/* Object Manager */
ObjectManager::ObjectManager()
@ -348,10 +353,9 @@ ObjectManager::~ObjectManager()
}
void ObjectManager::device_update_object_transform(UpdateObjectTransformState *state,
Object *ob,
int object_index)
Object *ob)
{
KernelObject& kobject = state->objects[object_index];
KernelObject& kobject = state->objects[ob->index];
Transform *object_motion_pass = state->object_motion_pass;
Mesh *mesh = ob->mesh;
@ -457,13 +461,13 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
tfm_post = tfm_post * itfm;
}
int motion_pass_offset = object_index*OBJECT_MOTION_PASS_SIZE;
int motion_pass_offset = ob->index*OBJECT_MOTION_PASS_SIZE;
object_motion_pass[motion_pass_offset + 0] = tfm_pre;
object_motion_pass[motion_pass_offset + 1] = tfm_post;
}
else if(state->need_motion == Scene::MOTION_BLUR) {
if(ob->use_motion()) {
kobject.motion_offset = state->motion_offset[object_index];
kobject.motion_offset = state->motion_offset[ob->index];
/* Decompose transforms for interpolation. */
DecomposedTransform *decomp = state->object_motion + kobject.motion_offset;
@ -494,7 +498,7 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
if(ob->use_holdout) {
flag |= SD_OBJECT_HOLDOUT_MASK;
}
state->object_flag[object_index] = flag;
state->object_flag[ob->index] = flag;
/* Have curves. */
if(mesh->num_curves()) {
@ -538,7 +542,7 @@ void ObjectManager::device_update_object_transform_task(
for(int i = 0; i < num_objects; ++i) {
const int object_index = start_index + i;
Object *ob = state->scene->objects[object_index];
device_update_object_transform(state, ob, object_index);
device_update_object_transform(state, ob);
}
}
}
@ -593,10 +597,8 @@ void ObjectManager::device_update_transforms(DeviceScene *dscene,
* need some tweaks to make mid-complex scenes optimal.
*/
if(scene->objects.size() < 64) {
int object_index = 0;
foreach(Object *ob, scene->objects) {
device_update_object_transform(&state, ob, object_index);
object_index++;
device_update_object_transform(&state, ob);
if(progress.get_cancel()) {
return;
}
@ -642,6 +644,12 @@ void ObjectManager::device_update(Device *device, DeviceScene *dscene, Scene *sc
if(scene->objects.size() == 0)
return;
/* Assign object IDs. */
int index = 0;
foreach(Object *object, scene->objects) {
object->index = index++;
}
/* set object transform matrices, before applying static transforms */
progress.set_status("Updating Objects", "Copying Transformations to device");
device_update_transforms(dscene, scene, progress);
@ -686,26 +694,25 @@ void ObjectManager::device_update_flags(Device *,
}
}
int object_index = 0;
foreach(Object *object, scene->objects) {
if(object->mesh->has_volume) {
object_flag[object_index] |= SD_OBJECT_HAS_VOLUME;
object_flag[object_index] &= ~SD_OBJECT_HAS_VOLUME_ATTRIBUTES;
object_flag[object->index] |= SD_OBJECT_HAS_VOLUME;
object_flag[object->index] &= ~SD_OBJECT_HAS_VOLUME_ATTRIBUTES;
foreach(Attribute& attr, object->mesh->attributes.attributes) {
if(attr.element == ATTR_ELEMENT_VOXEL) {
object_flag[object_index] |= SD_OBJECT_HAS_VOLUME_ATTRIBUTES;
object_flag[object->index] |= SD_OBJECT_HAS_VOLUME_ATTRIBUTES;
}
}
}
else {
object_flag[object_index] &= ~(SD_OBJECT_HAS_VOLUME|SD_OBJECT_HAS_VOLUME_ATTRIBUTES);
object_flag[object->index] &= ~(SD_OBJECT_HAS_VOLUME|SD_OBJECT_HAS_VOLUME_ATTRIBUTES);
}
if(object->is_shadow_catcher) {
object_flag[object_index] |= SD_OBJECT_SHADOW_CATCHER;
object_flag[object->index] |= SD_OBJECT_SHADOW_CATCHER;
}
else {
object_flag[object_index] &= ~SD_OBJECT_SHADOW_CATCHER;
object_flag[object->index] &= ~SD_OBJECT_SHADOW_CATCHER;
}
if(bounds_valid) {
@ -714,7 +721,7 @@ void ObjectManager::device_update_flags(Device *,
continue;
}
if(object->bounds.intersects(volume_object->bounds)) {
object_flag[object_index] |= SD_OBJECT_INTERSECTS_VOLUME;
object_flag[object->index] |= SD_OBJECT_INTERSECTS_VOLUME;
break;
}
}
@ -723,9 +730,8 @@ void ObjectManager::device_update_flags(Device *,
/* Not really valid, but can't make more reliable in the case
* of bounds not being up to date.
*/
object_flag[object_index] |= SD_OBJECT_INTERSECTS_VOLUME;
object_flag[object->index] |= SD_OBJECT_INTERSECTS_VOLUME;
}
++object_index;
}
/* Copy object flag. */
@ -741,7 +747,6 @@ void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Sc
KernelObject *kobjects = dscene->objects.data();
bool update = false;
int object_index = 0;
foreach(Object *object, scene->objects) {
Mesh* mesh = object->mesh;
@ -750,18 +755,16 @@ void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Sc
uint patch_map_offset = 2*(mesh->patch_table_offset + mesh->patch_table->total_size() -
mesh->patch_table->num_nodes * PATCH_NODE_SIZE) - mesh->patch_offset;
if(kobjects[object_index].patch_map_offset != patch_map_offset) {
kobjects[object_index].patch_map_offset = patch_map_offset;
if(kobjects[object->index].patch_map_offset != patch_map_offset) {
kobjects[object->index].patch_map_offset = patch_map_offset;
update = true;
}
}
if(kobjects[object_index].attribute_map_offset != mesh->attr_map_offset) {
kobjects[object_index].attribute_map_offset = mesh->attr_map_offset;
if(kobjects[object->index].attribute_map_offset != mesh->attr_map_offset) {
kobjects[object->index].attribute_map_offset = mesh->attr_map_offset;
update = true;
}
object_index++;
}
if(update) {

View File

@ -38,6 +38,7 @@ class Progress;
class Scene;
struct Transform;
struct UpdateObjectTransformState;
class ObjectManager;
/* Object */
@ -88,6 +89,16 @@ public:
* determined flags which denotes trace-time visibility.
*/
uint visibility_for_tracing() const;
/* Returns the index that is used in the kernel for this object. */
int get_device_index() const;
protected:
/* Specifies the position of the object in scene->objects and
* in the device vectors. Gets set in device_update. */
int index;
friend class ObjectManager;
};
/* Object Manager */
@ -123,8 +134,7 @@ public:
protected:
void device_update_object_transform(UpdateObjectTransformState *state,
Object *ob,
const int object_index);
Object *ob);
void device_update_object_transform_task(UpdateObjectTransformState *state);
bool device_update_object_transform_pop_work(
UpdateObjectTransformState *state,

View File

@ -250,7 +250,9 @@ void Session::run_gpu()
if(!no_tiles) {
/* update scene */
scoped_timer update_timer;
update_scene();
if(update_scene()) {
stats.profiler.reset(scene->shaders.size(), scene->objects.size());
}
progress.add_skip_time(update_timer, params.background);
if(!device->error_message().empty())
@ -585,7 +587,9 @@ void Session::run_cpu()
/* update scene */
scoped_timer update_timer;
update_scene();
if(update_scene()) {
stats.profiler.reset(scene->shaders.size(), scene->objects.size());
}
progress.add_skip_time(update_timer, params.background);
if(!device->error_message().empty())
@ -729,6 +733,10 @@ void Session::run()
/* load kernels */
load_kernels();
if(params.use_profiling && (params.device.type == DEVICE_CPU)) {
stats.profiler.start();
}
/* session thread loop */
progress.set_status("Waiting for render to start");
@ -743,6 +751,8 @@ void Session::run()
run_cpu();
}
stats.profiler.stop();
/* progress update */
if(progress.get_cancel())
progress.set_status("Cancel", progress.get_cancel_message());
@ -825,7 +835,7 @@ void Session::wait()
session_thread = NULL;
}
void Session::update_scene()
bool Session::update_scene()
{
thread_scoped_lock scene_lock(scene->mutex);
@ -876,7 +886,10 @@ void Session::update_scene()
progress.set_status("Updating Scene");
MEM_GUARDED_CALL(&progress, scene->device_update, device, progress);
return true;
}
return false;
}
void Session::update_status_time(bool show_pause, bool show_done)
@ -1052,6 +1065,14 @@ void Session::device_free()
*/
}
void Session::collect_statistics(RenderStats *render_stats)
{
scene->collect_statistics(render_stats);
if(params.use_profiling && (params.device.type == DEVICE_CPU)) {
render_stats->collect_profiling(scene, &stats);
}
}
int Session::get_max_closure_count()
{
int max_closures = 0;

View File

@ -20,6 +20,7 @@
#include "render/buffers.h"
#include "device/device.h"
#include "render/shader.h"
#include "render/stats.h"
#include "render/tile.h"
#include "util/util_progress.h"
@ -55,6 +56,8 @@ public:
int pixel_size;
int threads;
bool use_profiling;
bool display_buffer_linear;
bool use_denoising;
@ -89,6 +92,8 @@ public:
pixel_size = 1;
threads = 0;
use_profiling = false;
use_denoising = false;
denoising_passes = false;
denoising_radius = 8;
@ -118,6 +123,7 @@ public:
&& start_resolution == params.start_resolution
&& pixel_size == params.pixel_size
&& threads == params.threads
&& use_profiling == params.use_profiling
&& display_buffer_linear == params.display_buffer_linear
&& cancel_timeout == params.cancel_timeout
&& reset_timeout == params.reset_timeout
@ -159,7 +165,7 @@ public:
void set_samples(int samples);
void set_pause(bool pause);
void update_scene();
bool update_scene();
void load_kernels(bool lock_scene=true);
void device_free();
@ -168,6 +174,8 @@ public:
* (for example, when rendering with unlimited samples). */
float get_progress();
void collect_statistics(RenderStats *stats);
protected:
struct DelayedReset {
thread_mutex mutex;

View File

@ -15,6 +15,7 @@
*/
#include "render/stats.h"
#include "render/object.h"
#include "util/util_algorithm.h"
#include "util/util_foreach.h"
#include "util/util_string.h"
@ -33,6 +34,16 @@ bool namedSizeEntryComparator(const NamedSizeEntry& a, const NamedSizeEntry& b)
return a.size > b.size;
}
bool namedTimeSampleEntryComparator(const NamedNestedSampleStats& a, const NamedNestedSampleStats& b)
{
return a.sum_samples > b.sum_samples;
}
bool namedSampleCountPairComparator(const NamedSampleCountPair& a, const NamedSampleCountPair& b)
{
return a.samples > b.samples;
}
} // namespace
NamedSizeEntry::NamedSizeEntry()
@ -77,6 +88,113 @@ string NamedSizeStats::full_report(int indent_level)
return result;
}
/* Named time sample statistics. */
NamedNestedSampleStats::NamedNestedSampleStats()
: name(""), self_samples(0), sum_samples(0)
{}
NamedNestedSampleStats::NamedNestedSampleStats(const string& name, uint64_t samples)
: name(name), self_samples(samples), sum_samples(samples)
{}
NamedNestedSampleStats& NamedNestedSampleStats::add_entry(const string& name_, uint64_t samples_)
{
entries.push_back(NamedNestedSampleStats(name_, samples_));
return entries[entries.size()-1];
}
void NamedNestedSampleStats::update_sum()
{
sum_samples = self_samples;
foreach(NamedNestedSampleStats& entry, entries) {
entry.update_sum();
sum_samples += entry.sum_samples;
}
}
string NamedNestedSampleStats::full_report(int indent_level, uint64_t total_samples)
{
update_sum();
if(total_samples == 0) {
total_samples = sum_samples;
}
const string indent(indent_level * kIndentNumSpaces, ' ');
const double sum_percent = 100*((double) sum_samples) / total_samples;
const double sum_seconds = sum_samples * 0.001;
const double self_percent = 100*((double) self_samples) / total_samples;
const double self_seconds = self_samples * 0.001;
string info = string_printf("%-32s: Total %3.2f%% (%.2fs), Self %3.2f%% (%.2fs)\n",
name.c_str(),
sum_percent,
sum_seconds,
self_percent,
self_seconds);
string result = indent + info;
sort(entries.begin(), entries.end(), namedTimeSampleEntryComparator);
foreach(NamedNestedSampleStats& entry, entries) {
result += entry.full_report(indent_level + 1, total_samples);
}
return result;
}
/* Named sample count pairs. */
NamedSampleCountPair::NamedSampleCountPair(const ustring& name, uint64_t samples, uint64_t hits)
: name(name), samples(samples), hits(hits)
{}
NamedSampleCountStats::NamedSampleCountStats()
{}
void NamedSampleCountStats::add(const ustring& name, uint64_t samples, uint64_t hits)
{
entry_map::iterator entry = entries.find(name);
if(entry != entries.end()) {
entry->second.samples += samples;
entry->second.hits += hits;
return;
}
entries.emplace(name, NamedSampleCountPair(name, samples, hits));
}
string NamedSampleCountStats::full_report(int indent_level)
{
const string indent(indent_level * kIndentNumSpaces, ' ');
vector<NamedSampleCountPair> sorted_entries;
sorted_entries.reserve(entries.size());
uint64_t total_hits = 0, total_samples = 0;
foreach(entry_map::const_reference entry, entries) {
const NamedSampleCountPair &pair = entry.second;
total_hits += pair.hits;
total_samples += pair.samples;
sorted_entries.push_back(pair);
}
const double avg_samples_per_hit = ((double) total_samples) / total_hits;
sort(sorted_entries.begin(), sorted_entries.end(), namedSampleCountPairComparator);
string result = "";
foreach(const NamedSampleCountPair& entry, sorted_entries) {
const double seconds = entry.samples * 0.001;
const double relative = ((double) entry.samples) / (entry.hits * avg_samples_per_hit);
result += indent + string_printf("%-32s: %.2fs (Relative cost: %.2f)\n",
entry.name.c_str(),
seconds,
relative);
}
return result;
}
/* Mesh statistics. */
MeshStats::MeshStats() {
@ -106,6 +224,74 @@ string ImageStats::full_report(int indent_level)
/* Overall statistics. */
RenderStats::RenderStats() {
has_profiling = false;
}
void RenderStats::collect_profiling(Scene *scene, Stats *stats)
{
has_profiling = true;
Profiler &prof = stats->profiler;
kernel = NamedNestedSampleStats("Total render time", prof.get_event(PROFILING_UNKNOWN));
kernel.add_entry("Ray setup", prof.get_event(PROFILING_RAY_SETUP));
kernel.add_entry("Result writing", prof.get_event(PROFILING_WRITE_RESULT));
NamedNestedSampleStats &integrator = kernel.add_entry("Path integration", prof.get_event(PROFILING_PATH_INTEGRATE));
integrator.add_entry("Scene intersection", prof.get_event(PROFILING_SCENE_INTERSECT));
integrator.add_entry("Indirect emission", prof.get_event(PROFILING_INDIRECT_EMISSION));
integrator.add_entry("Volumes", prof.get_event(PROFILING_VOLUME));
NamedNestedSampleStats &shading = integrator.add_entry("Shading", 0);
shading.add_entry("Shader Setup", prof.get_event(PROFILING_SHADER_SETUP));
shading.add_entry("Shader Eval", prof.get_event(PROFILING_SHADER_EVAL));
shading.add_entry("Shader Apply", prof.get_event(PROFILING_SHADER_APPLY));
shading.add_entry("Ambient Occlusion", prof.get_event(PROFILING_AO));
shading.add_entry("Subsurface", prof.get_event(PROFILING_SUBSURFACE));
integrator.add_entry("Connect Light", prof.get_event(PROFILING_CONNECT_LIGHT));
integrator.add_entry("Surface Bounce", prof.get_event(PROFILING_SURFACE_BOUNCE));
NamedNestedSampleStats &intersection = kernel.add_entry("Intersection", 0);
intersection.add_entry("Full Intersection", prof.get_event(PROFILING_INTERSECT));
intersection.add_entry("Local Intersection", prof.get_event(PROFILING_INTERSECT_LOCAL));
intersection.add_entry("Shadow All Intersection", prof.get_event(PROFILING_INTERSECT_SHADOW_ALL));
intersection.add_entry("Volume Intersection", prof.get_event(PROFILING_INTERSECT_VOLUME));
intersection.add_entry("Volume All Intersection", prof.get_event(PROFILING_INTERSECT_VOLUME_ALL));
NamedNestedSampleStats &closure = kernel.add_entry("Closures", 0);
closure.add_entry("Surface Closure Evaluation", prof.get_event(PROFILING_CLOSURE_EVAL));
closure.add_entry("Surface Closure Sampling", prof.get_event(PROFILING_CLOSURE_SAMPLE));
closure.add_entry("Volume Closure Evaluation", prof.get_event(PROFILING_CLOSURE_VOLUME_EVAL));
closure.add_entry("Volume Closure Sampling", prof.get_event(PROFILING_CLOSURE_VOLUME_SAMPLE));
NamedNestedSampleStats &denoising = kernel.add_entry("Denoising", prof.get_event(PROFILING_DENOISING));
denoising.add_entry("Construct Transform", prof.get_event(PROFILING_DENOISING_CONSTRUCT_TRANSFORM));
denoising.add_entry("Reconstruct", prof.get_event(PROFILING_DENOISING_RECONSTRUCT));
NamedNestedSampleStats &prefilter = denoising.add_entry("Prefiltering", 0);
prefilter.add_entry("Divide Shadow", prof.get_event(PROFILING_DENOISING_DIVIDE_SHADOW));
prefilter.add_entry("Non-Local means", prof.get_event(PROFILING_DENOISING_NON_LOCAL_MEANS));
prefilter.add_entry("Get Feature", prof.get_event(PROFILING_DENOISING_GET_FEATURE));
prefilter.add_entry("Detect Outliers", prof.get_event(PROFILING_DENOISING_DETECT_OUTLIERS));
prefilter.add_entry("Combine Halves", prof.get_event(PROFILING_DENOISING_COMBINE_HALVES));
shaders.entries.clear();
foreach(Shader *shader, scene->shaders) {
uint64_t samples, hits;
if(prof.get_shader(shader->id, samples, hits)) {
shaders.add(shader->name, samples, hits);
}
}
objects.entries.clear();
foreach(Object *object, scene->objects) {
uint64_t samples, hits;
if(prof.get_object(object->get_device_index(), samples, hits)) {
objects.add(object->name, samples, hits);
}
}
}
string RenderStats::full_report()
@ -113,6 +299,14 @@ string RenderStats::full_report()
string result = "";
result += "Mesh statistics:\n" + mesh.full_report(1);
result += "Image statistics:\n" + image.full_report(1);
if(has_profiling) {
result += "Kernel statistics:\n" + kernel.full_report(1);
result += "Shader statistics:\n" + shaders.full_report(1);
result += "Object statistics:\n" + objects.full_report(1);
}
else {
result += "Profiling information not available (only works with CPU rendering)";
}
return result;
}

View File

@ -17,6 +17,9 @@
#ifndef __RENDER_STATS_H__
#define __RENDER_STATS_H__
#include "render/scene.h"
#include "util/util_stats.h"
#include "util/util_string.h"
#include "util/util_vector.h"
@ -61,6 +64,51 @@ public:
vector<NamedSizeEntry> entries;
};
class NamedNestedSampleStats {
public:
NamedNestedSampleStats();
NamedNestedSampleStats(const string& name, uint64_t samples);
NamedNestedSampleStats& add_entry(const string& name, uint64_t samples);
/* Updates sum_samples recursively. */
void update_sum();
string full_report(int indent_level = 0, uint64_t total_samples = 0);
string name;
/* self_samples contains only the samples that this specific event got,
* while sum_samples also includes the samples of all sub-entries. */
uint64_t self_samples, sum_samples;
vector<NamedNestedSampleStats> entries;
};
/* Named entry containing both a time-sample count for objects of a type and a
* total count of processed items.
* This allows to estimate the time spent per item. */
class NamedSampleCountPair {
public:
NamedSampleCountPair(const ustring& name, uint64_t samples, uint64_t hits);
ustring name;
uint64_t samples;
uint64_t hits;
};
/* Contains statistics about pairs of samples and counts as described above. */
class NamedSampleCountStats {
public:
NamedSampleCountStats();
string full_report(int indent_level = 0);
void add(const ustring& name, uint64_t samples, uint64_t hits);
typedef unordered_map<ustring, NamedSampleCountPair, ustringHash> entry_map;
entry_map entries;
};
/* Statistics about mesh in the render database. */
class MeshStats {
public:
@ -95,8 +143,16 @@ public:
/* Return full report as string. */
string full_report();
/* Collect kernel sampling information from Stats. */
void collect_profiling(Scene *scene, Stats *stats);
bool has_profiling;
MeshStats mesh;
ImageStats image;
NamedNestedSampleStats kernel;
NamedSampleCountStats shaders;
NamedSampleCountStats objects;
};
CCL_NAMESPACE_END

View File

@ -17,6 +17,7 @@ set(SRC
util_md5.cpp
util_murmurhash.cpp
util_path.cpp
util_profiling.cpp
util_string.cpp
util_simd.cpp
util_system.cpp
@ -71,6 +72,7 @@ set(SRC_HEADERS
util_optimization.h
util_param.h
util_path.h
util_profiling.h
util_progress.h
util_projection.h
util_queue.h

View File

@ -0,0 +1,178 @@
/*
* Copyright 2011-2018 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "util/util_algorithm.h"
#include "util/util_profiling.h"
#include "util/util_set.h"
CCL_NAMESPACE_BEGIN
Profiler::Profiler()
: do_stop_worker(true), worker(NULL)
{
}
Profiler::~Profiler()
{
assert(worker == NULL);
}
void Profiler::run()
{
uint64_t updates = 0;
auto start_time = std::chrono::system_clock::now();
while(!do_stop_worker) {
thread_scoped_lock lock(mutex);
foreach(ProfilingState *state, states) {
uint32_t cur_event = state->event;
int32_t cur_shader = state->shader;
int32_t cur_object = state->object;
/* The state reads/writes should be atomic, but just to be sure
* check the values for validity anyways. */
if(cur_event < PROFILING_NUM_EVENTS) {
event_samples[cur_event]++;
}
if(cur_shader >= 0 && cur_shader < shader_samples.size()) {
/* Only consider the active shader during events whose runtime significantly depends on it. */
if(((cur_event >= PROFILING_SHADER_EVAL ) && (cur_event <= PROFILING_SUBSURFACE)) ||
((cur_event >= PROFILING_CLOSURE_EVAL) && (cur_event <= PROFILING_CLOSURE_VOLUME_SAMPLE))) {
shader_samples[cur_shader]++;
}
}
if(cur_object >= 0 && cur_object < object_samples.size()) {
object_samples[cur_object]++;
}
}
lock.unlock();
/* Relative waits always overshoot a bit, so just waiting 1ms every
* time would cause the sampling to drift over time.
* By keeping track of the absolute time, the wait times correct themselves -
* if one wait overshoots a lot, the next one will be shorter to compensate. */
updates++;
std::this_thread::sleep_until(start_time + updates*std::chrono::milliseconds(1));
}
}
void Profiler::reset(int num_shaders, int num_objects)
{
bool running = (worker != NULL);
if(running) {
stop();
}
/* Resize and clear the accumulation vectors. */
shader_hits.assign(num_shaders, 0);
object_hits.assign(num_objects, 0);
event_samples.assign(PROFILING_NUM_EVENTS, 0);
shader_samples.assign(num_shaders, 0);
object_samples.assign(num_objects, 0);
if(running) {
start();
}
}
void Profiler::start()
{
assert(worker == NULL);
do_stop_worker = false;
worker = new thread(function_bind(&Profiler::run, this));
}
void Profiler::stop()
{
if(worker != NULL) {
do_stop_worker = true;
worker->join();
delete worker;
worker = NULL;
}
}
void Profiler::add_state(ProfilingState *state)
{
thread_scoped_lock lock(mutex);
/* Add the ProfilingState from the list of sampled states. */
assert(std::find(states.begin(), states.end(), state) == states.end());
states.push_back(state);
/* Resize thread-local hit counters. */
state->shader_hits.assign(shader_hits.size(), 0);
state->object_hits.assign(object_hits.size(), 0);
/* Initialize the state. */
state->event = PROFILING_UNKNOWN;
state->shader = -1;
state->object = -1;
state->active = true;
}
void Profiler::remove_state(ProfilingState *state)
{
thread_scoped_lock lock(mutex);
/* Remove the ProfilingState from the list of sampled states. */
states.erase(std::remove(states.begin(), states.end(), state), states.end());
state->active = false;
/* Merge thread-local hit counters. */
assert(shader_hits.size() == state->shader_hits.size());
for(int i = 0; i < shader_hits.size(); i++) {
shader_hits[i] += state->shader_hits[i];
}
assert(object_hits.size() == state->object_hits.size());
for(int i = 0; i < object_hits.size(); i++) {
object_hits[i] += state->object_hits[i];
}
}
uint64_t Profiler::get_event(ProfilingEvent event)
{
assert(worker == NULL);
return event_samples[event];
}
bool Profiler::get_shader(int shader, uint64_t &samples, uint64_t &hits)
{
assert(worker == NULL);
if(shader_samples[shader] == 0) {
return false;
}
samples = shader_samples[shader];
hits = shader_hits[shader];
return true;
}
bool Profiler::get_object(int object, uint64_t &samples, uint64_t &hits)
{
assert(worker == NULL);
if(object_samples[object] == 0) {
return false;
}
samples = object_samples[object];
hits = object_hits[object];
return true;
}
CCL_NAMESPACE_END

View File

@ -0,0 +1,175 @@
/*
* Copyright 2011-2018 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __UTIL_PROFILING_H__
#define __UTIL_PROFILING_H__
#include <atomic>
#include "util/util_foreach.h"
#include "util/util_map.h"
#include "util/util_thread.h"
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
enum ccl_try_align(16) ProfilingEvent : uint32_t {
PROFILING_UNKNOWN,
PROFILING_RAY_SETUP,
PROFILING_PATH_INTEGRATE,
PROFILING_SCENE_INTERSECT,
PROFILING_INDIRECT_EMISSION,
PROFILING_VOLUME,
PROFILING_SHADER_SETUP,
PROFILING_SHADER_EVAL,
PROFILING_SHADER_APPLY,
PROFILING_AO,
PROFILING_SUBSURFACE,
PROFILING_CONNECT_LIGHT,
PROFILING_SURFACE_BOUNCE,
PROFILING_WRITE_RESULT,
PROFILING_INTERSECT,
PROFILING_INTERSECT_LOCAL,
PROFILING_INTERSECT_SHADOW_ALL,
PROFILING_INTERSECT_VOLUME,
PROFILING_INTERSECT_VOLUME_ALL,
PROFILING_CLOSURE_EVAL,
PROFILING_CLOSURE_SAMPLE,
PROFILING_CLOSURE_VOLUME_EVAL,
PROFILING_CLOSURE_VOLUME_SAMPLE,
PROFILING_DENOISING,
PROFILING_DENOISING_CONSTRUCT_TRANSFORM,
PROFILING_DENOISING_RECONSTRUCT,
PROFILING_DENOISING_DIVIDE_SHADOW,
PROFILING_DENOISING_NON_LOCAL_MEANS,
PROFILING_DENOISING_COMBINE_HALVES,
PROFILING_DENOISING_GET_FEATURE,
PROFILING_DENOISING_DETECT_OUTLIERS,
PROFILING_NUM_EVENTS,
};
/* Contains the current execution state of a worker thread.
* These values are constantly updated by the worker.
* Periodically the profiler thread will wake up, read them
* and update its internal counters based on it.
*
* Atomics aren't needed here since we're only doing direct
* writes and reads to (4-byte-aligned) uint32_t, which is
* guaranteed to be atomic on x86 since the 486.
* Memory ordering is not guaranteed but does not matter.
*
* And even on other architectures, the extremely rare corner
* case of reading an intermediate state could at worst result
* in a single incorrect sample. */
struct ProfilingState {
volatile uint32_t event = PROFILING_UNKNOWN;
volatile int32_t shader = -1;
volatile int32_t object = -1;
volatile bool active = false;
vector<uint64_t> shader_hits;
vector<uint64_t> object_hits;
};
class Profiler {
public:
Profiler();
~Profiler();
void reset(int num_shaders, int num_objects);
void start();
void stop();
void add_state(ProfilingState *state);
void remove_state(ProfilingState *state);
uint64_t get_event(ProfilingEvent event);
bool get_shader(int shader, uint64_t &samples, uint64_t &hits);
bool get_object(int object, uint64_t &samples, uint64_t &hits);
protected:
void run();
/* Tracks how often the worker was in each ProfilingEvent while sampling,
* so multiplying the values by the sample frequency (currently 1ms)
* gives the approximate time spent in each state. */
vector<uint64_t> event_samples;
vector<uint64_t> shader_samples;
vector<uint64_t> object_samples;
/* Tracks the total amounts every object/shader was hit.
* Used to evaluate relative cost, written by the render thread.
* Indexed by the shader and object IDs that the kernel also uses
* to index __object_flag and __shaders. */
vector<uint64_t> shader_hits;
vector<uint64_t> object_hits;
volatile bool do_stop_worker;
thread *worker;
thread_mutex mutex;
vector<ProfilingState*> states;
};
class ProfilingHelper {
public:
ProfilingHelper(ProfilingState *state, ProfilingEvent event)
: state(state)
{
previous_event = state->event;
state->event = event;
}
inline void set_event(ProfilingEvent event)
{
state->event = event;
}
inline void set_shader(int shader)
{
state->shader = shader;
if(state->active) {
assert(shader < state->shader_hits.size());
state->shader_hits[shader]++;
}
}
inline void set_object(int object)
{
state->object = object;
if(state->active) {
assert(object < state->object_hits.size());
state->object_hits[object]++;
}
}
~ProfilingHelper()
{
state->event = previous_event;
}
private:
ProfilingState *state;
uint32_t previous_event;
};
CCL_NAMESPACE_END
#endif /* __UTIL_PROFILING_H__ */

View File

@ -18,6 +18,7 @@
#define __UTIL_STATS_H__
#include "util/util_atomic.h"
#include "util/util_profiling.h"
CCL_NAMESPACE_BEGIN
@ -40,6 +41,8 @@ public:
size_t mem_used;
size_t mem_peak;
Profiler profiler;
};
CCL_NAMESPACE_END

View File

@ -17,7 +17,7 @@
# ***** END GPL LICENSE BLOCK *****
set(INC
./include
include
)
set(INC_SYS