Cycles: code refactor to bake using regular render session and tiles

There should be no user visible change from this, except that tile size
now affects performance. The goal here is to simplify bake denoising in
D3099, letting it reuse more denoising tiles and pass code.

A lot of code is now shared with regular rendering, with the two main
differences being that we read some render result passes from the bake API
when starting to render a tile, and call the bake kernel instead of the
path trace kernel.

With this kind of design where Cycles asks for tiles from the bake API,
it should eventually be easier to reduce memory usage, show tiles as
they are baked, or bake multiple passes at once, though there's still
quite some work needed for that.

Reviewers: #cycles

Subscribers: monio, wmatyjewicz, lukasstockner97, michaelknubben

Differential Revision: https://developer.blender.org/D3108
This commit is contained in:
Brecht Van Lommel 2019-05-10 21:39:58 +02:00 committed by Lukas Stockner
parent 3ff8ca60e9
commit d9773edaa3
Notes: blender-bot 2023-02-14 08:59:10 +01:00
Referenced by issue #82592, Cycles Render engine 2.9
Referenced by issue #77298, Can't bake texture with multiple objects.
31 changed files with 606 additions and 674 deletions

View File

@ -82,8 +82,8 @@ class CyclesRender(bpy.types.RenderEngine):
def render(self, depsgraph):
engine.render(self, depsgraph)
def bake(self, depsgraph, obj, pass_type, pass_filter, object_id, pixel_array, num_pixels, depth, result):
engine.bake(self, depsgraph, obj, pass_type, pass_filter, object_id, pixel_array, num_pixels, depth, result)
def bake(self, depsgraph, obj, pass_type, pass_filter, width, height):
engine.bake(self, depsgraph, obj, pass_type, pass_filter, width, height)
# viewport render
def view_update(self, context, depsgraph):

View File

@ -168,11 +168,11 @@ def render(engine, depsgraph):
_cycles.render(engine.session, depsgraph.as_pointer())
def bake(engine, depsgraph, obj, pass_type, pass_filter, object_id, pixel_array, num_pixels, depth, result):
def bake(engine, depsgraph, obj, pass_type, pass_filter, width, height):
import _cycles
session = getattr(engine, "session", None)
if session is not None:
_cycles.bake(engine.session, depsgraph.as_pointer(), obj.as_pointer(), pass_type, pass_filter, object_id, pixel_array.as_pointer(), num_pixels, depth, result.as_pointer())
_cycles.bake(engine.session, depsgraph.as_pointer(), obj.as_pointer(), pass_type, pass_filter, width, height)
def reset(engine, data, depsgraph):

View File

@ -298,22 +298,18 @@ static PyObject *render_func(PyObject * /*self*/, PyObject *args)
static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
{
PyObject *pysession, *pydepsgraph, *pyobject;
PyObject *pypixel_array, *pyresult;
const char *pass_type;
int num_pixels, depth, object_id, pass_filter;
int pass_filter, width, height;
if (!PyArg_ParseTuple(args,
"OOOsiiOiiO",
"OOOsiii",
&pysession,
&pydepsgraph,
&pyobject,
&pass_type,
&pass_filter,
&object_id,
&pypixel_array,
&num_pixels,
&depth,
&pyresult))
&width,
&height))
return NULL;
BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
@ -326,23 +322,9 @@ static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyobject), &objectptr);
BL::Object b_object(objectptr);
void *b_result = PyLong_AsVoidPtr(pyresult);
PointerRNA bakepixelptr;
RNA_pointer_create(NULL, &RNA_BakePixel, PyLong_AsVoidPtr(pypixel_array), &bakepixelptr);
BL::BakePixel b_bake_pixel(bakepixelptr);
python_thread_state_save(&session->python_thread_state);
session->bake(b_depsgraph,
b_object,
pass_type,
pass_filter,
object_id,
b_bake_pixel,
(size_t)num_pixels,
depth,
(float *)b_result);
session->bake(b_depsgraph, b_object, pass_type, pass_filter, width, height);
python_thread_state_restore(&session->python_thread_state);

View File

@ -247,9 +247,7 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg
void BlenderSession::free_session()
{
if (sync)
delete sync;
delete sync;
delete session;
}
@ -317,6 +315,7 @@ static void end_render_result(BL::RenderEngine &b_engine,
void BlenderSession::do_write_update_render_tile(RenderTile &rtile,
bool do_update_only,
bool do_read_only,
bool highlight)
{
int x = rtile.x - session->tile_manager.params.full_x;
@ -342,7 +341,23 @@ void BlenderSession::do_write_update_render_tile(RenderTile &rtile,
BL::RenderLayer b_rlay = *b_single_rlay;
if (do_update_only) {
if (do_read_only) {
/* copy each pass */
BL::RenderLayer::passes_iterator b_iter;
for (b_rlay.passes.begin(b_iter); b_iter != b_rlay.passes.end(); ++b_iter) {
BL::RenderPass b_pass(*b_iter);
/* find matching pass type */
PassType pass_type = BlenderSync::get_pass_type(b_pass);
int components = b_pass.channels();
rtile.buffers->set_pass_rect(pass_type, components, (float *)b_pass.rect());
}
end_render_result(b_engine, b_rr, false, false, false);
}
else if (do_update_only) {
/* Sample would be zero at initial tile update, which is only needed
* to tag tile form blender side as IN PROGRESS for proper highlight
* no buffers should be sent to blender yet. For denoise we also
@ -362,9 +377,14 @@ void BlenderSession::do_write_update_render_tile(RenderTile &rtile,
}
}
void BlenderSession::read_render_tile(RenderTile &rtile)
{
do_write_update_render_tile(rtile, false, true, false);
}
void BlenderSession::write_render_tile(RenderTile &rtile)
{
do_write_update_render_tile(rtile, false, false);
do_write_update_render_tile(rtile, false, false, false);
}
void BlenderSession::update_render_tile(RenderTile &rtile, bool highlight)
@ -374,9 +394,9 @@ void BlenderSession::update_render_tile(RenderTile &rtile, bool highlight)
* would need to be investigated a bit further, but for now shall be fine
*/
if (!b_engine.is_preview())
do_write_update_render_tile(rtile, true, highlight);
do_write_update_render_tile(rtile, true, false, highlight);
else
do_write_update_render_tile(rtile, false, false);
do_write_update_render_tile(rtile, false, false, false);
}
static void add_cryptomatte_layer(BL::RenderResult &b_rr, string name, string manifest)
@ -593,25 +613,6 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
#endif
}
static void populate_bake_data(BakeData *data,
const int object_id,
BL::BakePixel &pixel_array,
const int num_pixels)
{
BL::BakePixel bp = pixel_array;
int i;
for (i = 0; i < num_pixels; i++) {
if (bp.object_id() == object_id) {
data->set(i, bp.primitive_id(), bp.uv(), bp.du_dx(), bp.du_dy(), bp.dv_dx(), bp.dv_dy());
}
else {
data->set_null(i);
}
bp = bp.next();
}
}
static int bake_pass_filter_get(const int pass_filter)
{
int flag = BAKE_FILTER_NONE;
@ -642,43 +643,26 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
BL::Object &b_object,
const string &pass_type,
const int pass_filter,
const int object_id,
BL::BakePixel &pixel_array,
const size_t num_pixels,
const int /*depth*/,
float result[])
const int bake_width,
const int bake_height)
{
b_depsgraph = b_depsgraph_;
ShaderEvalType shader_type = get_shader_type(pass_type);
/* Set baking flag in advance, so kernel loading can check if we need
* any baking capabilities.
*/
scene->bake_manager->set_baking(true);
/* ensure kernels are loaded before we do any scene updates */
session->load_kernels();
if (shader_type == SHADER_EVAL_UV) {
/* force UV to be available */
Pass::add(PASS_UV, scene->film->passes);
}
int bake_pass_filter = bake_pass_filter_get(pass_filter);
bake_pass_filter = BakeManager::shader_type_to_pass_filter(shader_type, bake_pass_filter);
/* force use_light_pass to be true if we bake more than just colors */
if (bake_pass_filter & ~BAKE_FILTER_COLOR) {
Pass::add(PASS_LIGHT, scene->film->passes);
}
/* Initialize bake manager, before we load the baking kernels. */
scene->bake_manager->set(scene, b_object.name(), shader_type, bake_pass_filter);
/* create device and update scene */
scene->film->tag_update(scene);
scene->integrator->tag_update(scene);
/* Passes are identified by name, so in order to return the combined pass we need to set the
* name. */
Pass::add(PASS_COMBINED, scene->film->passes, "Combined");
session->read_bake_tile_cb = function_bind(&BlenderSession::read_render_tile, this, _1);
session->write_render_tile_cb = function_bind(&BlenderSession::write_render_tile, this, _1);
if (!session->progress.get_cancel()) {
/* update scene */
/* Sync scene. */
BL::Object b_camera_override(b_engine.camera_override());
sync->sync_camera(b_render, b_camera_override, width, height, "");
sync->sync_data(
@ -686,75 +670,43 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
builtin_images_load();
}
BakeData *bake_data = NULL;
/* Object might have been disabled for rendering or excluded in some
* other way, in that case Blender will report a warning afterwards. */
bool object_found = false;
foreach (Object *ob, scene->objects) {
if (ob->name == b_object.name()) {
object_found = true;
break;
}
}
if (!session->progress.get_cancel()) {
/* get buffer parameters */
if (object_found && !session->progress.get_cancel()) {
/* Get session and buffer parameters. */
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
session_params.progressive_refine = false;
scene->bake_manager->set_shader_limit((size_t)b_engine.tile_x(), (size_t)b_engine.tile_y());
BufferParams buffer_params;
buffer_params.width = bake_width;
buffer_params.height = bake_height;
buffer_params.passes = scene->film->passes;
/* set number of samples */
/* Update session. */
session->tile_manager.set_samples(session_params.samples);
session->reset(buffer_params, session_params.samples);
session->update_scene();
/* find object index. todo: is arbitrary - copied from mesh_displace.cpp */
size_t object_index = OBJECT_NONE;
int tri_offset = 0;
for (size_t i = 0; i < scene->objects.size(); i++) {
const Object *object = scene->objects[i];
const Geometry *geom = object->geometry;
if (object->name == b_object.name() && geom->type == Geometry::MESH) {
const Mesh *mesh = static_cast<const Mesh *>(geom);
object_index = i;
tri_offset = mesh->prim_offset;
break;
}
}
/* Object might have been disabled for rendering or excluded in some
* other way, in that case Blender will report a warning afterwards. */
if (object_index != OBJECT_NONE) {
int object = object_index;
bake_data = scene->bake_manager->init(object, tri_offset, num_pixels);
populate_bake_data(bake_data, object_id, pixel_array, num_pixels);
}
/* set number of samples */
session->tile_manager.set_samples(session_params.samples);
session->reset(buffer_params, session_params.samples);
session->update_scene();
session->progress.set_update_callback(
function_bind(&BlenderSession::update_bake_progress, this));
}
/* Perform bake. Check cancel to avoid crash with incomplete scene data. */
if (!session->progress.get_cancel() && bake_data) {
scene->bake_manager->bake(scene->device,
&scene->dscene,
scene,
session->progress,
shader_type,
bake_pass_filter,
bake_data,
result);
if (object_found && !session->progress.get_cancel()) {
session->start();
session->wait();
}
/* free all memory used (host and device), so we wouldn't leave render
* engine with extra memory allocated
*/
session->device_free();
delete sync;
sync = NULL;
session->read_bake_tile_cb = function_null;
session->write_render_tile_cb = function_null;
}
void BlenderSession::do_write_update_render_result(BL::RenderLayer &b_rlay,

View File

@ -66,14 +66,12 @@ class BlenderSession {
BL::Object &b_object,
const string &pass_type,
const int custom_flag,
const int object_id,
BL::BakePixel &pixel_array,
const size_t num_pixels,
const int depth,
float pixels[]);
const int bake_width,
const int bake_height);
void write_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile);
void write_render_tile(RenderTile &rtile);
void read_render_tile(RenderTile &rtile);
/* update functions are used to update display buffer only after sample was rendered
* only needed for better visual feedback */
@ -155,7 +153,10 @@ class BlenderSession {
void do_write_update_render_result(BL::RenderLayer &b_rlay,
RenderTile &rtile,
bool do_update_only);
void do_write_update_render_tile(RenderTile &rtile, bool do_update_only, bool highlight);
void do_write_update_render_tile(RenderTile &rtile,
bool do_update_only,
bool do_read_only,
bool highlight);
void builtin_images_load();

View File

@ -481,6 +481,9 @@ PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass)
MAP_PASS("AO", PASS_AO);
MAP_PASS("Shadow", PASS_SHADOW);
MAP_PASS("BakePrimitive", PASS_BAKE_PRIMITIVE);
MAP_PASS("BakeDifferential", PASS_BAKE_DIFFERENTIAL);
#ifdef __KERNEL_DEBUG__
MAP_PASS("Debug BVH Traversed Nodes", PASS_BVH_TRAVERSED_NODES);
MAP_PASS("Debug BVH Traversed Instances", PASS_BVH_TRAVERSED_INSTANCES);

View File

@ -223,7 +223,7 @@ class CUDADevice : public Device {
CUdeviceptr d_wtile,
CUstream stream = 0);
void path_trace(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles);
void render(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles);
void film_convert(DeviceTask &task,
device_ptr buffer,

View File

@ -586,20 +586,23 @@ void CUDADevice::reserve_local_memory(const DeviceRequestedFeatures &requested_f
cuMemGetInfo(&free_before, &total);
/* Get kernel function. */
CUfunction cuPathTrace;
CUfunction cuRender;
if (requested_features.use_integrator_branched) {
cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
if (requested_features.use_baking) {
cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake"));
}
else if (requested_features.use_integrator_branched) {
cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_branched_path_trace"));
}
else {
cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace"));
}
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1));
int min_blocks, num_threads_per_block;
cuda_assert(cuOccupancyMaxPotentialBlockSize(
&min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
cuda_assert(
cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0));
/* Launch kernel, using just 1 block appears sufficient to reserve
* memory for all multiprocessors. It would be good to do this in
@ -609,7 +612,7 @@ void CUDADevice::reserve_local_memory(const DeviceRequestedFeatures &requested_f
void *args[] = {&d_work_tiles, &total_work_size};
cuda_assert(cuLaunchKernel(cuPathTrace, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
cuda_assert(cuLaunchKernel(cuRender, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
cuda_assert(cuCtxSynchronize());
@ -1780,9 +1783,7 @@ void CUDADevice::adaptive_sampling_post(RenderTile &rtile,
0));
}
void CUDADevice::path_trace(DeviceTask &task,
RenderTile &rtile,
device_vector<WorkTile> &work_tiles)
void CUDADevice::render(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles)
{
scoped_timer timer(&rtile.buffers->render_time);
@ -1790,21 +1791,24 @@ void CUDADevice::path_trace(DeviceTask &task,
return;
CUDAContextScope scope(this);
CUfunction cuPathTrace;
CUfunction cuRender;
/* Get kernel function. */
if (task.integrator_branched) {
cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
if (rtile.task == RenderTile::BAKE) {
cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake"));
}
else if (task.integrator_branched) {
cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_branched_path_trace"));
}
else {
cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace"));
}
if (have_error()) {
return;
}
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1));
/* Allocate work tile. */
work_tiles.alloc(1);
@ -1822,8 +1826,8 @@ void CUDADevice::path_trace(DeviceTask &task,
* remain conservative for GPUs connected to a display to avoid driver
* timeouts and display freezing. */
int min_blocks, num_threads_per_block;
cuda_assert(cuOccupancyMaxPotentialBlockSize(
&min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
cuda_assert(
cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0));
if (!info.display_device) {
min_blocks *= 8;
}
@ -1851,7 +1855,7 @@ void CUDADevice::path_trace(DeviceTask &task,
void *args[] = {&d_work_tiles, &total_work_size};
cuda_assert(
cuLaunchKernel(cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
cuLaunchKernel(cuRender, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
/* Run the adaptive sampling kernels at selected samples aligned to step samples. */
uint filter_sample = sample + wtile->num_samples - 1;
@ -1957,10 +1961,7 @@ void CUDADevice::shader(DeviceTask &task)
CUdeviceptr d_output = (CUdeviceptr)task.shader_output;
/* get kernel function */
if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake"));
}
else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace"));
}
else {
@ -2297,9 +2298,12 @@ void CUDADevice::thread_run(DeviceTask *task)
split_kernel->path_trace(task, tile, void_buffer, void_buffer);
}
else {
path_trace(*task, tile, work_tiles);
render(*task, tile, work_tiles);
}
}
else if (tile.task == RenderTile::BAKE) {
render(*task, tile, work_tiles);
}
else if (tile.task == RenderTile::DENOISE) {
tile.sample = tile.start_sample + tile.num_samples;

View File

@ -188,6 +188,7 @@ class CPUDevice : public Device {
convert_to_byte_kernel;
KernelFunctions<void (*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)>
shader_kernel;
KernelFunctions<void (*)(KernelGlobals *, float *, int, int, int, int, int)> bake_kernel;
KernelFunctions<void (*)(
int, TileInfo *, int, int, float *, float *, float *, float *, float *, int *, int, int)>
@ -270,6 +271,7 @@ class CPUDevice : public Device {
REGISTER_KERNEL(convert_to_half_float),
REGISTER_KERNEL(convert_to_byte),
REGISTER_KERNEL(shader),
REGISTER_KERNEL(bake),
REGISTER_KERNEL(filter_divide_shadow),
REGISTER_KERNEL(filter_get_feature),
REGISTER_KERNEL(filter_write_feature),
@ -895,7 +897,7 @@ class CPUDevice : public Device {
}
}
void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
void render(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
{
const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;
@ -919,12 +921,21 @@ class CPUDevice : public Device {
break;
}
for (int y = tile.y; y < tile.y + tile.h; y++) {
for (int x = tile.x; x < tile.x + tile.w; x++) {
if (use_coverage) {
coverage.init_pixel(x, y);
if (tile.task == RenderTile::PATH_TRACE) {
for (int y = tile.y; y < tile.y + tile.h; y++) {
for (int x = tile.x; x < tile.x + tile.w; x++) {
if (use_coverage) {
coverage.init_pixel(x, y);
}
path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
}
}
}
else {
for (int y = tile.y; y < tile.y + tile.h; y++) {
for (int x = tile.x; x < tile.x + tile.w; x++) {
bake_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
}
path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
}
}
tile.sample = sample + 1;
@ -1019,9 +1030,12 @@ class CPUDevice : public Device {
split_kernel->path_trace(&task, tile, kgbuffer, void_buffer);
}
else {
path_trace(task, tile, kg);
render(task, tile, kg);
}
}
else if (tile.task == RenderTile::BAKE) {
render(task, tile, kg);
}
else if (tile.task == RenderTile::DENOISE) {
denoise(denoising, tile);
task.update_progress(&tile, tile.w * tile.h);

View File

@ -451,6 +451,7 @@ class OpenCLDevice : public Device {
device_ptr rgba_half);
void shader(DeviceTask &task);
void update_adaptive(DeviceTask &task, RenderTile &tile, int sample);
void bake(DeviceTask &task, RenderTile &tile);
void denoise(RenderTile &tile, DenoisingTask &denoising);

View File

@ -1367,6 +1367,9 @@ void OpenCLDevice::thread_run(DeviceTask *task)
*/
clFinish(cqCommandQueue);
}
else if (tile.task == RenderTile::BAKE) {
bake(*task, tile);
}
else if (tile.task == RenderTile::DENOISE) {
tile.sample = tile.start_sample + tile.num_samples;
denoise(tile, denoising);
@ -1858,10 +1861,7 @@ void OpenCLDevice::shader(DeviceTask &task)
cl_int d_offset = task.offset;
OpenCLDevice::OpenCLProgram *program = &background_program;
if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
program = &bake_program;
}
else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
program = &displace_program;
}
program->wait_for_availability();
@ -1892,6 +1892,51 @@ void OpenCLDevice::shader(DeviceTask &task)
}
}
void OpenCLDevice::bake(DeviceTask &task, RenderTile &rtile)
{
scoped_timer timer(&rtile.buffers->render_time);
/* Cast arguments to cl types. */
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
cl_int d_x = rtile.x;
cl_int d_y = rtile.y;
cl_int d_w = rtile.w;
cl_int d_h = rtile.h;
cl_int d_offset = rtile.offset;
cl_int d_stride = rtile.stride;
bake_program.wait_for_availability();
cl_kernel kernel = bake_program();
cl_uint start_arg_index = kernel_set_args(kernel, 0, d_data, d_buffer);
set_kernel_arg_buffers(kernel, &start_arg_index);
start_arg_index += kernel_set_args(
kernel, start_arg_index, d_x, d_y, d_w, d_h, d_offset, d_stride);
int start_sample = rtile.start_sample;
int end_sample = rtile.start_sample + rtile.num_samples;
for (int sample = start_sample; sample < end_sample; sample++) {
if (task.get_cancel()) {
if (task.need_finish_queue == false)
break;
}
kernel_set_args(kernel, start_arg_index, sample);
enqueue_kernel(kernel, d_w, d_h);
rtile.sample = sample + 1;
task.update_progress(&rtile, rtile.w * rtile.h);
}
clFinish(cqCommandQueue);
}
string OpenCLDevice::kernel_build_options(const string *debug_src)
{
string build_options = "-cl-no-signed-zeros -cl-mad-enable ";

View File

@ -18,19 +18,33 @@ CCL_NAMESPACE_BEGIN
#ifdef __BAKING__
ccl_device_inline void compute_light_pass(
ccl_device_noinline void compute_light_pass(
KernelGlobals *kg, ShaderData *sd, PathRadiance *L, uint rng_hash, int pass_filter, int sample)
{
kernel_assert(kernel_data.film.use_light_pass);
PathRadiance L_sample;
PathState state;
Ray ray;
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
/* emission and indirect shader data memory used by various functions */
ShaderData emission_sd, indirect_sd;
/* Emission and indirect shader data memory used by various functions. */
ShaderDataTinyStorage emission_sd_storage;
ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
ShaderData indirect_sd;
/* Init radiance. */
path_radiance_init(kg, L);
/* Init path state. */
PathState state;
path_state_init(kg, emission_sd, &state, rng_hash, sample, NULL);
/* Evaluate surface shader. */
shader_eval_surface(kg, sd, &state, NULL, state.flag);
/* TODO, disable more closures we don't need besides transparent */
shader_bsdf_disable_transparency(kg, sd);
/* Init ray. */
Ray ray;
ray.P = sd->P + sd->Ng;
ray.D = -sd->Ng;
ray.t = FLT_MAX;
@ -38,18 +52,6 @@ ccl_device_inline void compute_light_pass(
ray.time = 0.5f;
# endif
/* init radiance */
path_radiance_init(kg, &L_sample);
/* init path state */
path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL);
/* evaluate surface shader */
shader_eval_surface(kg, sd, &state, NULL, state.flag);
/* TODO, disable more closures we don't need besides transparent */
shader_bsdf_disable_transparency(kg, sd);
# ifdef __BRANCHED_PATH__
if (!kernel_data.integrator.branched) {
/* regular path tracer */
@ -57,14 +59,13 @@ ccl_device_inline void compute_light_pass(
/* sample ambient occlusion */
if (pass_filter & BAKE_FILTER_AO) {
kernel_path_ao(
kg, sd, &emission_sd, &L_sample, &state, throughput, shader_bsdf_alpha(kg, sd));
kernel_path_ao(kg, sd, emission_sd, L, &state, throughput, shader_bsdf_alpha(kg, sd));
}
/* sample emission */
if ((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) {
float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
path_radiance_accum_emission(kg, &L_sample, &state, throughput, emission);
path_radiance_accum_emission(kg, L, &state, throughput, emission);
}
bool is_sss_sample = false;
@ -77,12 +78,10 @@ ccl_device_inline void compute_light_pass(
SubsurfaceIndirectRays ss_indirect;
kernel_path_subsurface_init_indirect(&ss_indirect);
if (kernel_path_subsurface_scatter(
kg, sd, &emission_sd, &L_sample, &state, &ray, &throughput, &ss_indirect)) {
kg, sd, emission_sd, L, &state, &ray, &throughput, &ss_indirect)) {
while (ss_indirect.num_rays) {
kernel_path_subsurface_setup_indirect(
kg, &ss_indirect, &state, &ray, &L_sample, &throughput);
kernel_path_indirect(
kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample);
kernel_path_subsurface_setup_indirect(kg, &ss_indirect, &state, &ray, L, &throughput);
kernel_path_indirect(kg, &indirect_sd, emission_sd, &ray, throughput, &state, L);
}
is_sss_sample = true;
}
@ -91,18 +90,18 @@ ccl_device_inline void compute_light_pass(
/* sample light and BSDF */
if (!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) {
kernel_path_surface_connect_light(kg, sd, &emission_sd, throughput, &state, &L_sample);
kernel_path_surface_connect_light(kg, sd, emission_sd, throughput, &state, L);
if (kernel_path_surface_bounce(kg, sd, &throughput, &state, &L_sample.state, &ray)) {
if (kernel_path_surface_bounce(kg, sd, &throughput, &state, &L->state, &ray)) {
# ifdef __LAMP_MIS__
state.ray_t = 0.0f;
# endif
/* compute indirect light */
kernel_path_indirect(kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample);
kernel_path_indirect(kg, &indirect_sd, emission_sd, &ray, throughput, &state, L);
/* sum and reset indirect light pass variables for the next samples */
path_radiance_sum_indirect(&L_sample);
path_radiance_reset_indirect(&L_sample);
path_radiance_sum_indirect(L);
path_radiance_reset_indirect(L);
}
}
# ifdef __BRANCHED_PATH__
@ -112,13 +111,13 @@ ccl_device_inline void compute_light_pass(
/* sample ambient occlusion */
if (pass_filter & BAKE_FILTER_AO) {
kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, throughput);
kernel_branched_path_ao(kg, sd, emission_sd, L, &state, throughput);
}
/* sample emission */
if ((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) {
float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
path_radiance_accum_emission(kg, &L_sample, &state, throughput, emission);
path_radiance_accum_emission(kg, L, &state, throughput, emission);
}
# ifdef __SUBSURFACE__
@ -127,7 +126,7 @@ ccl_device_inline void compute_light_pass(
/* When mixing BSSRDF and BSDF closures we should skip BSDF lighting
* if scattering was successful. */
kernel_branched_path_subsurface_scatter(
kg, sd, &indirect_sd, &emission_sd, &L_sample, &state, &ray, throughput);
kg, sd, &indirect_sd, emission_sd, L, &state, &ray, throughput);
}
# endif
@ -138,19 +137,16 @@ ccl_device_inline void compute_light_pass(
if (kernel_data.integrator.use_direct_light) {
int all = kernel_data.integrator.sample_all_lights_direct;
kernel_branched_path_surface_connect_light(
kg, sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all);
kg, sd, emission_sd, &state, throughput, 1.0f, L, all);
}
# endif
/* indirect light */
kernel_branched_path_surface_indirect_light(
kg, sd, &indirect_sd, &emission_sd, throughput, 1.0f, &state, &L_sample);
kg, sd, &indirect_sd, emission_sd, throughput, 1.0f, &state, L);
}
}
# endif
/* accumulate into master L */
path_radiance_accum_sample(L, &L_sample);
}
/* this helps with AA but it's not the real solution as it does not AA the geometry
@ -225,41 +221,28 @@ ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg,
return out;
}
ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
ccl_global uint4 *input,
ccl_global float4 *output,
ShaderEvalType type,
int pass_filter,
int i,
int offset,
int sample)
ccl_device void kernel_bake_evaluate(
KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride)
{
ShaderData sd;
PathState state = {0};
uint4 in = input[i * 2];
uint4 diff = input[i * 2 + 1];
/* Setup render buffers. */
const int index = offset + x + y * stride;
const int pass_stride = kernel_data.film.pass_stride;
buffer += index * pass_stride;
float3 out = make_float3(0.0f, 0.0f, 0.0f);
int object = in.x;
int prim = in.y;
ccl_global float *primitive = buffer + kernel_data.film.pass_bake_primitive;
ccl_global float *differential = buffer + kernel_data.film.pass_bake_differential;
ccl_global float *output = buffer + kernel_data.film.pass_combined;
int prim = __float_as_uint(primitive[1]);
if (prim == -1)
return;
float u = __uint_as_float(in.z);
float v = __uint_as_float(in.w);
float dudx = __uint_as_float(diff.x);
float dudy = __uint_as_float(diff.y);
float dvdx = __uint_as_float(diff.z);
float dvdy = __uint_as_float(diff.w);
prim += kernel_data.bake.tri_offset;
/* Random number generator. */
uint rng_hash = hash_uint2(x, y) ^ kernel_data.integrator.seed;
int num_samples = kernel_data.integrator.aa_samples;
/* random number generator */
uint rng_hash = cmj_hash(offset + i, kernel_data.integrator.seed);
float filter_x, filter_y;
if (sample == 0) {
filter_x = filter_y = 0.5f;
@ -268,23 +251,29 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
path_rng_2D(kg, rng_hash, sample, num_samples, PRNG_FILTER_U, &filter_x, &filter_y);
}
/* subpixel u/v offset */
/* Barycentric UV with subpixel offset. */
float u = primitive[2];
float v = primitive[3];
float dudx = differential[0];
float dudy = differential[1];
float dvdx = differential[2];
float dvdy = differential[3];
if (sample > 0) {
u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f);
v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f),
1.0f - u);
}
/* triangle */
/* Shader data setup. */
int object = kernel_data.bake.object_index;
int shader;
float3 P, Ng;
triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
/* light passes */
PathRadiance L;
path_radiance_init(kg, &L);
ShaderData sd;
shader_setup_from_sample(
kg,
&sd,
@ -302,7 +291,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
LAMP_NONE);
sd.I = sd.N;
/* update differentials */
/* Setup differentials. */
sd.dP.dx = sd.dPdu * dudx + sd.dPdv * dvdx;
sd.dP.dy = sd.dPdu * dudy + sd.dPdv * dvdy;
sd.du.dx = dudx;
@ -310,17 +299,24 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
sd.dv.dx = dvdx;
sd.dv.dy = dvdy;
/* set RNG state for shaders that use sampling */
/* Set RNG state for shaders that use sampling. */
PathState state = {0};
state.rng_hash = rng_hash;
state.rng_offset = 0;
state.sample = sample;
state.num_samples = num_samples;
state.min_ray_pdf = FLT_MAX;
/* light passes if we need more than color */
if (pass_filter & ~BAKE_FILTER_COLOR)
/* Light passes if we need more than color. */
PathRadiance L;
int pass_filter = kernel_data.bake.pass_filter;
if (kernel_data.bake.pass_filter & ~BAKE_FILTER_COLOR)
compute_light_pass(kg, &sd, &L, rng_hash, pass_filter, sample);
float3 out = make_float3(0.0f, 0.0f, 0.0f);
ShaderEvalType type = (ShaderEvalType)kernel_data.bake.type;
switch (type) {
/* data passes */
case SHADER_EVAL_NORMAL:
@ -441,10 +437,8 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
}
/* write output */
const float output_fac = 1.0f / num_samples;
const float4 scaled_result = make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
output[i] = (sample == 0) ? scaled_result : output[i] + scaled_result;
const float4 result = make_float4(out.x, out.y, out.z, 1.0f);
kernel_write_pass_float4(output, result);
}
#endif /* __BAKING__ */

View File

@ -395,6 +395,10 @@ typedef enum PassType {
PASS_VOLUME_INDIRECT,
/* No Scatter color since it's tricky to define what it would even mean. */
PASS_CATEGORY_LIGHT_END = 63,
PASS_BAKE_PRIMITIVE,
PASS_BAKE_DIFFERENTIAL,
PASS_CATEGORY_BAKE_END = 95
} PassType;
#define PASS_ANY (~0)
@ -1248,6 +1252,10 @@ typedef struct KernelFilm {
float4 xyz_to_b;
float4 rgb_to_y;
int pass_bake_primitive;
int pass_bake_differential;
int pad;
#ifdef __KERNEL_DEBUG__
int pass_bvh_traversed_nodes;
int pass_bvh_traversed_instances;
@ -1427,6 +1435,14 @@ typedef struct KernelTables {
} KernelTables;
static_assert_align(KernelTables, 16);
typedef struct KernelBake {
int object_index;
int tri_offset;
int type;
int pass_filter;
} KernelBake;
static_assert_align(KernelBake, 16);
typedef struct KernelData {
KernelCamera cam;
KernelFilm film;
@ -1435,6 +1451,7 @@ typedef struct KernelData {
KernelBVH bvh;
KernelCurves curve;
KernelTables tables;
KernelBake bake;
} KernelData;
static_assert_align(KernelData, 16);

View File

@ -46,6 +46,9 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
int offset,
int sample);
void KERNEL_FUNCTION_FULL_NAME(bake)(
KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride);
/* Split kernels */
void KERNEL_FUNCTION_FULL_NAME(data_init)(KernelGlobals *kg,

View File

@ -132,6 +132,18 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
# endif /* KERNEL_STUB */
}
/* Bake */
void KERNEL_FUNCTION_FULL_NAME(bake)(
KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride)
{
# ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, bake);
# else
kernel_bake_evaluate(kg, buffer, sample, x, y, offset, stride);
# endif /* KERNEL_STUB */
}
/* Shader Evaluate */
void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
@ -146,12 +158,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
# ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, shader);
# else
if (type >= SHADER_EVAL_BAKE) {
# ifdef __BAKING__
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, i, offset, sample);
# endif
}
else if (type == SHADER_EVAL_DISPLACE) {
if (type == SHADER_EVAL_DISPLACE) {
kernel_displace_evaluate(kg, input, output, i);
}
else {

View File

@ -214,13 +214,16 @@ kernel_cuda_background(uint4 *input,
#ifdef __BAKING__
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_bake(uint4 *input, float4 *output, int type, int filter, int sx, int sw, int offset, int sample)
kernel_cuda_bake(WorkTile *tile, uint total_work_size)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
int work_index = ccl_global_id(0);
if(work_index < total_work_size) {
uint x, y, sample;
get_work_pixel(tile, work_index, &x, &y, &sample);
if(x < sx + sw) {
KernelGlobals kg;
kernel_bake_evaluate(&kg, input, output, (ShaderEvalType)type, filter, x, offset, sample);
kernel_bake_evaluate(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
}
}
#endif

View File

@ -15,6 +15,7 @@
*/
#include "render/bake.h"
#include "render/buffers.h"
#include "render/integrator.h"
#include "render/mesh.h"
#include "render/object.h"
@ -24,235 +25,13 @@
CCL_NAMESPACE_BEGIN
BakeData::BakeData(const int object, const size_t tri_offset, const size_t num_pixels)
: m_object(object), m_tri_offset(tri_offset), m_num_pixels(num_pixels)
{
m_primitive.resize(num_pixels);
m_u.resize(num_pixels);
m_v.resize(num_pixels);
m_dudx.resize(num_pixels);
m_dudy.resize(num_pixels);
m_dvdx.resize(num_pixels);
m_dvdy.resize(num_pixels);
}
BakeData::~BakeData()
{
m_primitive.clear();
m_u.clear();
m_v.clear();
m_dudx.clear();
m_dudy.clear();
m_dvdx.clear();
m_dvdy.clear();
}
void BakeData::set(int i, int prim, float uv[2], float dudx, float dudy, float dvdx, float dvdy)
{
m_primitive[i] = (prim == -1 ? -1 : m_tri_offset + prim);
m_u[i] = uv[0];
m_v[i] = uv[1];
m_dudx[i] = dudx;
m_dudy[i] = dudy;
m_dvdx[i] = dvdx;
m_dvdy[i] = dvdy;
}
void BakeData::set_null(int i)
{
m_primitive[i] = -1;
}
int BakeData::object()
{
return m_object;
}
size_t BakeData::size()
{
return m_num_pixels;
}
bool BakeData::is_valid(int i)
{
return m_primitive[i] != -1;
}
uint4 BakeData::data(int i)
{
return make_uint4(m_object, m_primitive[i], __float_as_int(m_u[i]), __float_as_int(m_v[i]));
}
uint4 BakeData::differentials(int i)
{
return make_uint4(__float_as_int(m_dudx[i]),
__float_as_int(m_dudy[i]),
__float_as_int(m_dvdx[i]),
__float_as_int(m_dvdy[i]));
}
BakeManager::BakeManager()
{
m_bake_data = NULL;
m_is_baking = false;
need_update = true;
m_shader_limit = 512 * 512;
}
BakeManager::~BakeManager()
{
if (m_bake_data)
delete m_bake_data;
}
bool BakeManager::get_baking()
{
return m_is_baking;
}
void BakeManager::set_baking(const bool value)
{
m_is_baking = value;
}
BakeData *BakeManager::init(const int object, const size_t tri_offset, const size_t num_pixels)
{
m_bake_data = new BakeData(object, tri_offset, num_pixels);
return m_bake_data;
}
void BakeManager::set_shader_limit(const size_t x, const size_t y)
{
m_shader_limit = x * y;
m_shader_limit = (size_t)pow(2, std::ceil(log(m_shader_limit) / log(2)));
}
bool BakeManager::bake(Device *device,
DeviceScene *dscene,
Scene *scene,
Progress &progress,
ShaderEvalType shader_type,
const int pass_filter,
BakeData *bake_data,
float result[])
{
size_t num_pixels = bake_data->size();
int num_samples = aa_samples(scene, bake_data, shader_type);
/* calculate the total pixel samples for the progress bar */
total_pixel_samples = 0;
for (size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
total_pixel_samples += shader_size * num_samples;
}
progress.reset_sample();
progress.set_total_pixel_samples(total_pixel_samples);
/* needs to be up to date for baking specific AA samples */
dscene->data.integrator.aa_samples = num_samples;
device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
for (size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
/* setup input for device task */
device_vector<uint4> d_input(device, "bake_input", MEM_READ_ONLY);
uint4 *d_input_data = d_input.alloc(shader_size * 2);
size_t d_input_size = 0;
for (size_t i = shader_offset; i < (shader_offset + shader_size); i++) {
d_input_data[d_input_size++] = bake_data->data(i);
d_input_data[d_input_size++] = bake_data->differentials(i);
}
if (d_input_size == 0) {
m_is_baking = false;
return false;
}
/* run device task */
device_vector<float4> d_output(device, "bake_output", MEM_READ_WRITE);
d_output.alloc(shader_size);
d_output.zero_to_device();
d_input.copy_to_device();
DeviceTask task(DeviceTask::SHADER);
task.shader_input = d_input.device_pointer;
task.shader_output = d_output.device_pointer;
task.shader_eval_type = shader_type;
task.shader_filter = pass_filter;
task.shader_x = 0;
task.offset = shader_offset;
task.shader_w = d_output.size();
task.num_samples = num_samples;
task.get_cancel = function_bind(&Progress::get_cancel, &progress);
task.update_progress_sample = function_bind(&Progress::add_samples_update, &progress, _1, _2);
device->task_add(task);
device->task_wait();
if (progress.get_cancel()) {
d_input.free();
d_output.free();
m_is_baking = false;
return false;
}
d_output.copy_from_device(0, 1, d_output.size());
d_input.free();
/* read result */
int k = 0;
float4 *offset = d_output.data();
size_t depth = 4;
for (size_t i = shader_offset; i < (shader_offset + shader_size); i++) {
size_t index = i * depth;
float4 out = offset[k++];
if (bake_data->is_valid(i)) {
for (size_t j = 0; j < 4; j++) {
result[index + j] = out[j];
}
}
}
d_output.free();
}
m_is_baking = false;
return true;
}
void BakeManager::device_update(Device * /*device*/,
DeviceScene * /*dscene*/,
Scene * /*scene*/,
Progress &progress)
{
if (!need_update)
return;
if (progress.get_cancel())
return;
need_update = false;
}
void BakeManager::device_free(Device * /*device*/, DeviceScene * /*dscene*/)
{
}
int BakeManager::aa_samples(Scene *scene, BakeData *bake_data, ShaderEvalType type)
static int aa_samples(Scene *scene, Object *object, ShaderEvalType type)
{
if (type == SHADER_EVAL_UV || type == SHADER_EVAL_ROUGHNESS) {
return 1;
}
else if (type == SHADER_EVAL_NORMAL) {
/* Only antialias normal if mesh has bump mapping. */
Object *object = scene->objects[bake_data->object()];
if (object->geometry) {
foreach (Shader *shader, object->geometry->used_shaders) {
if (shader->has_bump) {
@ -269,7 +48,7 @@ int BakeManager::aa_samples(Scene *scene, BakeData *bake_data, ShaderEvalType ty
}
/* Keep it synced with kernel_bake.h logic */
int BakeManager::shader_type_to_pass_filter(ShaderEvalType type, const int pass_filter)
static int shader_type_to_pass_filter(ShaderEvalType type, int pass_filter)
{
const int component_flags = pass_filter &
(BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_COLOR);
@ -292,4 +71,84 @@ int BakeManager::shader_type_to_pass_filter(ShaderEvalType type, const int pass_
}
}
BakeManager::BakeManager()
{
type = SHADER_EVAL_BAKE;
pass_filter = 0;
need_update = true;
}
BakeManager::~BakeManager()
{
}
bool BakeManager::get_baking()
{
return !object_name.empty();
}
void BakeManager::set(Scene *scene,
const std::string &object_name_,
ShaderEvalType type_,
int pass_filter_)
{
object_name = object_name_;
type = type_;
pass_filter = shader_type_to_pass_filter(type_, pass_filter_);
Pass::add(PASS_BAKE_PRIMITIVE, scene->film->passes);
Pass::add(PASS_BAKE_DIFFERENTIAL, scene->film->passes);
if (type == SHADER_EVAL_UV) {
/* force UV to be available */
Pass::add(PASS_UV, scene->film->passes);
}
/* force use_light_pass to be true if we bake more than just colors */
if (pass_filter & ~BAKE_FILTER_COLOR) {
Pass::add(PASS_LIGHT, scene->film->passes);
}
/* create device and update scene */
scene->film->tag_update(scene);
scene->integrator->tag_update(scene);
need_update = true;
}
void BakeManager::device_update(Device * /*device*/,
DeviceScene *dscene,
Scene *scene,
Progress & /* progress */)
{
if (!need_update)
return;
KernelIntegrator *kintegrator = &dscene->data.integrator;
KernelBake *kbake = &dscene->data.bake;
kbake->type = type;
kbake->pass_filter = pass_filter;
int object_index = 0;
foreach (Object *object, scene->objects) {
const Geometry *geom = object->geometry;
if (object->name == object_name && geom->type == Geometry::MESH) {
kbake->object_index = object_index;
kbake->tri_offset = geom->prim_offset;
kintegrator->aa_samples = aa_samples(scene, object, type);
break;
}
object_index++;
}
need_update = false;
}
void BakeManager::device_free(Device * /*device*/, DeviceScene * /*dscene*/)
{
}
CCL_NAMESPACE_END

View File

@ -25,67 +25,23 @@
CCL_NAMESPACE_BEGIN
class BakeData {
public:
BakeData(const int object, const size_t tri_offset, const size_t num_pixels);
~BakeData();
void set(int i, int prim, float uv[2], float dudx, float dudy, float dvdx, float dvdy);
void set_null(int i);
int object();
size_t size();
uint4 data(int i);
uint4 differentials(int i);
bool is_valid(int i);
private:
int m_object;
size_t m_tri_offset;
size_t m_num_pixels;
vector<int> m_primitive;
vector<float> m_u;
vector<float> m_v;
vector<float> m_dudx;
vector<float> m_dudy;
vector<float> m_dvdx;
vector<float> m_dvdy;
};
class BakeManager {
public:
BakeManager();
~BakeManager();
void set(Scene *scene, const std::string &object_name, ShaderEvalType type, int pass_filter);
bool get_baking();
void set_baking(const bool value);
BakeData *init(const int object, const size_t tri_offset, const size_t num_pixels);
void set_shader_limit(const size_t x, const size_t y);
bool bake(Device *device,
DeviceScene *dscene,
Scene *scene,
Progress &progress,
ShaderEvalType shader_type,
const int pass_filter,
BakeData *bake_data,
float result[]);
void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
void device_free(Device *device, DeviceScene *dscene);
static int shader_type_to_pass_filter(ShaderEvalType type, const int pass_filter);
static int aa_samples(Scene *scene, BakeData *bake_data, ShaderEvalType type);
bool need_update;
size_t total_pixel_samples;
private:
BakeData *m_bake_data;
bool m_is_baking;
size_t m_shader_limit;
ShaderEvalType type;
int pass_filter;
std::string object_name;
};
CCL_NAMESPACE_END

View File

@ -459,6 +459,40 @@ bool RenderBuffers::get_pass_rect(
return false;
}
bool RenderBuffers::set_pass_rect(PassType type, int components, float *pixels)
{
if (buffer.data() == NULL) {
return false;
}
int pass_offset = 0;
for (size_t j = 0; j < params.passes.size(); j++) {
Pass &pass = params.passes[j];
if (pass.type != type) {
pass_offset += pass.components;
continue;
}
float *out = buffer.data() + pass_offset;
int pass_stride = params.get_passes_size();
int size = params.width * params.height;
assert(pass.components == components);
for (int i = 0; i < size; i++, out += pass_stride, pixels += components) {
for (int j = 0; j < components; j++) {
out[j] = pixels[j];
}
}
return true;
}
return false;
}
/* Display Buffer */
DisplayBuffer::DisplayBuffer(Device *device, bool linear)

View File

@ -92,6 +92,7 @@ class RenderBuffers {
const string &name, float exposure, int sample, int components, float *pixels);
bool get_denoising_pass_rect(
int offset, float exposure, int sample, int components, float *pixels);
bool set_pass_rect(PassType type, int components, float *pixels);
};
/* Display Buffer
@ -130,7 +131,7 @@ class DisplayBuffer {
class RenderTile {
public:
typedef enum { PATH_TRACE = (1 << 0), DENOISE = (1 << 1) } Task;
typedef enum { PATH_TRACE = (1 << 0), BAKE = (1 << 1), DENOISE = (1 << 2) } Task;
Task task;
int x, y, w, h;

View File

@ -196,6 +196,10 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name)
case PASS_AOV_VALUE:
pass.components = 1;
break;
case PASS_BAKE_PRIMITIVE:
case PASS_BAKE_DIFFERENTIAL:
pass.components = 4;
break;
default:
assert(false);
break;
@ -386,11 +390,13 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
if (pass.type <= PASS_CATEGORY_MAIN_END) {
kfilm->pass_flag |= pass_flag;
}
else {
assert(pass.type <= PASS_CATEGORY_LIGHT_END);
else if (pass.type <= PASS_CATEGORY_LIGHT_END) {
kfilm->use_light_pass = 1;
kfilm->light_pass_flag |= pass_flag;
}
else {
assert(pass.type <= PASS_CATEGORY_BAKE_END);
}
switch (pass.type) {
case PASS_COMBINED:
@ -471,6 +477,13 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
kfilm->pass_volume_direct = kfilm->pass_stride;
break;
case PASS_BAKE_PRIMITIVE:
kfilm->pass_bake_primitive = kfilm->pass_stride;
break;
case PASS_BAKE_DIFFERENTIAL:
kfilm->pass_bake_differential = kfilm->pass_stride;
break;
#ifdef WITH_CYCLES_DEBUG
case PASS_BVH_TRAVERSED_NODES:
kfilm->pass_bvh_traversed_nodes = kfilm->pass_stride;

View File

@ -410,7 +410,16 @@ bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_typ
rtile.num_samples = tile_manager.state.num_samples;
rtile.resolution = tile_manager.state.resolution_divider;
rtile.tile_index = tile->index;
rtile.task = tile->state == Tile::DENOISE ? RenderTile::DENOISE : RenderTile::PATH_TRACE;
if (tile->state == Tile::DENOISE) {
rtile.task = RenderTile::DENOISE;
}
else if (read_bake_tile_cb) {
rtile.task = RenderTile::BAKE;
}
else {
rtile.task = RenderTile::PATH_TRACE;
}
tile_lock.unlock();
@ -451,11 +460,20 @@ bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_typ
rtile.buffers = tile->buffers;
rtile.sample = tile_manager.state.sample;
/* this will tag tile as IN PROGRESS in blender-side render pipeline,
* which is needed to highlight currently rendering tile before first
* sample was processed for it
*/
update_tile_sample(rtile);
if (read_bake_tile_cb) {
/* This will read any passes needed as input for baking. */
{
thread_scoped_lock tile_lock(tile_mutex);
read_bake_tile_cb(rtile);
}
rtile.buffers->buffer.copy_to_device();
}
else {
/* This will tag tile as IN PROGRESS in blender-side render pipeline,
* which is needed to highlight currently rendering tile before first
* sample was processed for it. */
update_tile_sample(rtile);
}
return true;
}
@ -484,6 +502,7 @@ void Session::release_tile(RenderTile &rtile, const bool need_denoise)
bool delete_tile;
if (tile_manager.finish_tile(rtile.tile_index, need_denoise, delete_tile)) {
/* Finished tile pixels write. */
if (write_render_tile_cb && params.progressive_refine == false) {
write_render_tile_cb(rtile);
}
@ -494,6 +513,7 @@ void Session::release_tile(RenderTile &rtile, const bool need_denoise)
}
}
else {
/* In progress tile pixels update. */
if (update_render_tile_cb && params.progressive_refine == false) {
update_render_tile_cb(rtile, false);
}

View File

@ -148,6 +148,7 @@ class Session {
function<void(RenderTile &)> write_render_tile_cb;
function<void(RenderTile &, bool)> update_render_tile_cb;
function<void(RenderTile &)> read_bake_tile_cb;
explicit Session(const SessionParams &params);
~Session();

View File

@ -1024,7 +1024,7 @@ static int bake(Render *re,
highpoly[i].ob,
i,
pixel_array_high,
num_pixels,
&bake_images,
depth,
pass_type,
pass_filter,
@ -1046,7 +1046,7 @@ static int bake(Render *re,
ob_low_eval,
0,
pixel_array_low,
num_pixels,
&bake_images,
depth,
pass_type,
pass_filter,

View File

@ -181,11 +181,8 @@ static void engine_bake(RenderEngine *engine,
struct Object *object,
const int pass_type,
const int pass_filter,
const int object_id,
const struct BakePixel *pixel_array,
const int num_pixels,
const int depth,
void *result)
const int width,
const int height)
{
extern FunctionRNA rna_RenderEngine_bake_func;
PointerRNA ptr;
@ -200,11 +197,8 @@ static void engine_bake(RenderEngine *engine,
RNA_parameter_set_lookup(&list, "object", &object);
RNA_parameter_set_lookup(&list, "pass_type", &pass_type);
RNA_parameter_set_lookup(&list, "pass_filter", &pass_filter);
RNA_parameter_set_lookup(&list, "object_id", &object_id);
RNA_parameter_set_lookup(&list, "pixel_array", &pixel_array);
RNA_parameter_set_lookup(&list, "num_pixels", &num_pixels);
RNA_parameter_set_lookup(&list, "depth", &depth);
RNA_parameter_set_lookup(&list, "result", &result);
RNA_parameter_set_lookup(&list, "width", &width);
RNA_parameter_set_lookup(&list, "height", &height);
engine->type->rna_ext.call(NULL, &ptr, func, &list);
RNA_parameter_list_free(&list);
@ -461,12 +455,6 @@ void rna_RenderPass_rect_set(PointerRNA *ptr, const float *values)
memcpy(rpass->rect, values, sizeof(float) * rpass->rectx * rpass->recty * rpass->channels);
}
static PointerRNA rna_BakePixel_next_get(PointerRNA *ptr)
{
BakePixel *bp = ptr->data;
return rna_pointer_inherit_refine(ptr, &RNA_BakePixel, bp + 1);
}
static RenderPass *rna_RenderPass_find_by_type(RenderLayer *rl, int passtype, const char *view)
{
return RE_pass_find_by_type(rl, passtype, view);
@ -535,33 +523,9 @@ static void rna_def_render_engine(BlenderRNA *brna)
0,
INT_MAX);
RNA_def_parameter_flags(parm, 0, PARM_REQUIRED);
parm = RNA_def_int(func,
"object_id",
0,
0,
INT_MAX,
"Object Id",
"Id of the current object being baked in relation to the others",
0,
INT_MAX);
parm = RNA_def_int(func, "width", 0, 0, INT_MAX, "Width", "Image width", 0, INT_MAX);
RNA_def_parameter_flags(parm, 0, PARM_REQUIRED);
parm = RNA_def_pointer(func, "pixel_array", "BakePixel", "", "");
RNA_def_parameter_flags(parm, 0, PARM_REQUIRED);
parm = RNA_def_int(func,
"num_pixels",
0,
0,
INT_MAX,
"Number of Pixels",
"Size of the baking batch",
0,
INT_MAX);
RNA_def_parameter_flags(parm, 0, PARM_REQUIRED);
parm = RNA_def_int(
func, "depth", 0, 0, INT_MAX, "Pixels depth", "Number of channels", 1, INT_MAX);
RNA_def_parameter_flags(parm, 0, PARM_REQUIRED);
/* TODO, see how array size of 0 works, this shouldnt be used */
parm = RNA_def_pointer(func, "result", "AnyType", "", "");
parm = RNA_def_int(func, "height", 0, 0, INT_MAX, "Height", "Image height", 0, INT_MAX);
RNA_def_parameter_flags(parm, 0, PARM_REQUIRED);
/* viewport render callbacks */
@ -1119,53 +1083,6 @@ static void rna_def_render_pass(BlenderRNA *brna)
RNA_define_verify_sdna(1);
}
static void rna_def_render_bake_pixel(BlenderRNA *brna)
{
StructRNA *srna;
PropertyRNA *prop;
srna = RNA_def_struct(brna, "BakePixel", NULL);
RNA_def_struct_ui_text(srna, "Bake Pixel", "");
RNA_define_verify_sdna(0);
prop = RNA_def_property(srna, "primitive_id", PROP_INT, PROP_NONE);
RNA_def_property_int_sdna(prop, NULL, "primitive_id");
RNA_def_property_clear_flag(prop, PROP_EDITABLE);
prop = RNA_def_property(srna, "object_id", PROP_INT, PROP_NONE);
RNA_def_property_int_sdna(prop, NULL, "object_id");
RNA_def_property_clear_flag(prop, PROP_EDITABLE);
prop = RNA_def_property(srna, "uv", PROP_FLOAT, PROP_NONE);
RNA_def_property_array(prop, 2);
RNA_def_property_float_sdna(prop, NULL, "uv");
RNA_def_property_clear_flag(prop, PROP_EDITABLE);
prop = RNA_def_property(srna, "du_dx", PROP_FLOAT, PROP_NONE);
RNA_def_property_float_sdna(prop, NULL, "du_dx");
RNA_def_property_clear_flag(prop, PROP_EDITABLE);
prop = RNA_def_property(srna, "du_dy", PROP_FLOAT, PROP_NONE);
RNA_def_property_float_sdna(prop, NULL, "du_dy");
RNA_def_property_clear_flag(prop, PROP_EDITABLE);
prop = RNA_def_property(srna, "dv_dx", PROP_FLOAT, PROP_NONE);
RNA_def_property_float_sdna(prop, NULL, "dv_dx");
RNA_def_property_clear_flag(prop, PROP_EDITABLE);
prop = RNA_def_property(srna, "dv_dy", PROP_FLOAT, PROP_NONE);
RNA_def_property_float_sdna(prop, NULL, "dv_dy");
RNA_def_property_clear_flag(prop, PROP_EDITABLE);
prop = RNA_def_property(srna, "next", PROP_POINTER, PROP_NONE);
RNA_def_property_struct_type(prop, "BakePixel");
RNA_def_property_pointer_funcs(prop, "rna_BakePixel_next_get", NULL, NULL, NULL);
RNA_def_property_clear_flag(prop, PROP_EDITABLE);
RNA_define_verify_sdna(1);
}
void RNA_def_render(BlenderRNA *brna)
{
rna_def_render_engine(brna);
@ -1173,7 +1090,6 @@ void RNA_def_render(BlenderRNA *brna)
rna_def_render_view(brna);
rna_def_render_layer(brna);
rna_def_render_pass(brna);
rna_def_render_bake_pixel(brna);
}
#endif /* RNA_RUNTIME */

View File

@ -67,7 +67,7 @@ bool RE_bake_engine(struct Render *re,
struct Object *object,
const int object_id,
const BakePixel pixel_array[],
const size_t num_pixels,
const BakeImages *bake_images,
const int depth,
const eScenePassType pass_type,
const int pass_filter,

View File

@ -87,11 +87,8 @@ typedef struct RenderEngineType {
struct Object *object,
const int pass_type,
const int pass_filter,
const int object_id,
const struct BakePixel *pixel_array,
const int num_pixels,
const int depth,
void *result);
const int width,
const int height);
void (*view_update)(struct RenderEngine *engine,
const struct bContext *context,
@ -140,6 +137,13 @@ typedef struct RenderEngine {
struct ReportList *reports;
struct {
const struct BakePixel *pixels;
float *result;
int width, height, depth;
int object_id;
} bake;
/* Depsgraph */
struct Depsgraph *depsgraph;

View File

@ -84,11 +84,12 @@ void render_result_exr_file_begin(struct Render *re, struct RenderEngine *engine
void render_result_exr_file_end(struct Render *re, struct RenderEngine *engine);
/* render pass wrapper for gpencil */
struct RenderPass *gp_add_pass(struct RenderResult *rr,
struct RenderLayer *rl,
int channels,
const char *name,
const char *viewname);
struct RenderPass *render_layer_add_pass(struct RenderResult *rr,
struct RenderLayer *rl,
int channels,
const char *name,
const char *viewname,
const char *chanid);
void render_result_exr_file_merge(struct RenderResult *rr,
struct RenderResult *rrpart,

View File

@ -31,6 +31,7 @@
#include "BLI_ghash.h"
#include "BLI_listbase.h"
#include "BLI_math_bits.h"
#include "BLI_rect.h"
#include "BLI_string.h"
#include "BLI_utildefines.h"
@ -167,6 +168,89 @@ void RE_engine_free(RenderEngine *engine)
MEM_freeN(engine);
}
/* Bake Render Results */
static RenderResult *render_result_from_bake(RenderEngine *engine, int x, int y, int w, int h)
{
/* Create render result with specified size. */
RenderResult *rr = MEM_callocN(sizeof(RenderResult), __func__);
rr->rectx = w;
rr->recty = h;
rr->tilerect.xmin = x;
rr->tilerect.ymin = y;
rr->tilerect.xmax = x + w;
rr->tilerect.ymax = y + h;
/* Add single baking render layer. */
RenderLayer *rl = MEM_callocN(sizeof(RenderLayer), "bake render layer");
rl->rectx = w;
rl->recty = h;
BLI_addtail(&rr->layers, rl);
/* Add render passes. */
render_layer_add_pass(rr, rl, engine->bake.depth, RE_PASSNAME_COMBINED, "", "RGBA");
RenderPass *primitive_pass = render_layer_add_pass(rr, rl, 4, "BakePrimitive", "", "RGBA");
RenderPass *differential_pass = render_layer_add_pass(rr, rl, 4, "BakeDifferential", "", "RGBA");
/* Fill render passes from bake pixel array, to be read by the render engine. */
for (int ty = 0; ty < h; ty++) {
size_t offset = ty * w * 4;
float *primitive = primitive_pass->rect + offset;
float *differential = differential_pass->rect + offset;
size_t bake_offset = (y + ty) * engine->bake.width + x;
const BakePixel *bake_pixel = engine->bake.pixels + bake_offset;
for (int tx = 0; tx < w; tx++) {
if (bake_pixel->object_id != engine->bake.object_id) {
primitive[0] = int_as_float(-1);
primitive[1] = int_as_float(-1);
}
else {
primitive[0] = int_as_float(bake_pixel->object_id);
primitive[1] = int_as_float(bake_pixel->primitive_id);
primitive[2] = bake_pixel->uv[0];
primitive[3] = bake_pixel->uv[1];
differential[0] = bake_pixel->du_dx;
differential[1] = bake_pixel->du_dy;
differential[2] = bake_pixel->dv_dx;
differential[3] = bake_pixel->dv_dy;
}
primitive += 4;
differential += 4;
bake_pixel++;
}
}
return rr;
}
static void render_result_to_bake(RenderEngine *engine, RenderResult *rr)
{
RenderPass *rpass = RE_pass_find_by_name(rr->layers.first, RE_PASSNAME_COMBINED, "");
if (!rpass) {
return;
}
/* Copy from tile render result to full image bake result. */
int x = rr->tilerect.xmin;
int y = rr->tilerect.ymin;
int w = rr->tilerect.xmax - rr->tilerect.xmin;
int h = rr->tilerect.ymax - rr->tilerect.ymin;
for (int ty = 0; ty < h; ty++) {
size_t offset = ty * w * engine->bake.depth;
size_t bake_offset = ((y + ty) * engine->bake.width + x) * engine->bake.depth;
size_t size = w * engine->bake.depth * sizeof(float);
memcpy(engine->bake.result + bake_offset, rpass->rect + offset, size);
}
}
/* Render Results */
static RenderPart *get_part_from_result(Render *re, RenderResult *result)
@ -180,6 +264,12 @@ static RenderPart *get_part_from_result(Render *re, RenderResult *result)
RenderResult *RE_engine_begin_result(
RenderEngine *engine, int x, int y, int w, int h, const char *layername, const char *viewname)
{
if (engine->bake.pixels) {
RenderResult *result = render_result_from_bake(engine, x, y, w, h);
BLI_addtail(&engine->fullresult, result);
return result;
}
Render *re = engine->re;
RenderResult *result;
rcti disprect;
@ -237,6 +327,11 @@ RenderResult *RE_engine_begin_result(
void RE_engine_update_result(RenderEngine *engine, RenderResult *result)
{
if (engine->bake.pixels) {
/* No interactive baking updates for now. */
return;
}
Render *re = engine->re;
if (result) {
@ -270,6 +365,13 @@ void RE_engine_end_result(
return;
}
if (engine->bake.pixels) {
render_result_to_bake(engine, result);
BLI_remlink(&engine->fullresult, result);
render_result_free(result);
return;
}
/* merge. on break, don't merge in result for preview renders, looks nicer */
if (!highlight) {
/* for exr tile render, detect tiles that are done */
@ -574,7 +676,7 @@ bool RE_bake_engine(Render *re,
Object *object,
const int object_id,
const BakePixel pixel_array[],
const size_t num_pixels,
const BakeImages *bake_images,
const int depth,
const eScenePassType pass_type,
const int pass_filter,
@ -619,16 +721,21 @@ bool RE_bake_engine(Render *re,
type->update(engine, re->main, engine->depsgraph);
}
type->bake(engine,
engine->depsgraph,
object,
pass_type,
pass_filter,
object_id,
pixel_array,
num_pixels,
depth,
result);
for (int i = 0; i < bake_images->size; i++) {
const BakeImage *image = bake_images->data + i;
engine->bake.pixels = pixel_array + image->offset;
engine->bake.result = result + image->offset * depth;
engine->bake.width = image->width;
engine->bake.height = image->height;
engine->bake.depth = depth;
engine->bake.object_id = object_id;
type->bake(
engine, engine->depsgraph, object, pass_type, pass_filter, image->width, image->height);
memset(&engine->bake, 0, sizeof(engine->bake));
}
engine->depsgraph = NULL;
}

View File

@ -2955,5 +2955,5 @@ RenderPass *RE_create_gp_pass(RenderResult *rr, const char *layername, const cha
BLI_freelinkN(&rl->passes, rp);
}
/* create a totally new pass */
return gp_add_pass(rr, rl, 4, RE_PASSNAME_COMBINED, viewname);
return render_layer_add_pass(rr, rl, 4, RE_PASSNAME_COMBINED, viewname, "RGBA");
}

View File

@ -213,12 +213,12 @@ static void set_pass_full_name(
/********************************** New **************************************/
static RenderPass *render_layer_add_pass(RenderResult *rr,
RenderLayer *rl,
int channels,
const char *name,
const char *viewname,
const char *chan_id)
RenderPass *render_layer_add_pass(RenderResult *rr,
RenderLayer *rl,
int channels,
const char *name,
const char *viewname,
const char *chan_id)
{
const int view_id = BLI_findstringindex(&rr->views, viewname, offsetof(RenderView, name));
RenderPass *rpass = MEM_callocN(sizeof(RenderPass), name);
@ -280,12 +280,6 @@ static RenderPass *render_layer_add_pass(RenderResult *rr,
return rpass;
}
/* wrapper called from render_opengl */
RenderPass *gp_add_pass(
RenderResult *rr, RenderLayer *rl, int channels, const char *name, const char *viewname)
{
return render_layer_add_pass(rr, rl, channels, name, viewname, "RGBA");
}
/* called by main render as well for parts */
/* will read info from Render *re to define layers */