Cycles: More accurate volume stack size calculation

The idea is to allow having a lot of non-intersecting volumes without
allocating volume stack to its full size.

With the F11285472 file the memory usage goes from 1400 MiB to 1000
on the RTX6000 card.

The fix makes it so the integrator work memory is allocated after
scene update which has downside of possible less efficient update
when some textures don't fit GPU memory, but has an advantage of
making proper decision and having a clear and consistent internal API.

Fixes memory part of T92014.

Differential Revision: https://developer.blender.org/D12966
This commit is contained in:
Sergey Sharybin 2021-10-22 14:20:22 +02:00
parent 8733d310e5
commit c4fa17c67a
5 changed files with 31 additions and 36 deletions

View File

@ -114,6 +114,7 @@ Object::Object() : Node(get_node_type())
particle_index = 0;
attr_map_offset = 0;
bounds = BoundBox::empty;
intersects_volume = false;
}
Object::~Object()
@ -367,22 +368,6 @@ float Object::compute_volume_step_size() const
return step_size;
}
bool Object::check_is_volume() const
{
if (geometry->geometry_type == Geometry::VOLUME) {
return true;
}
for (Node *node : get_geometry()->get_used_shaders()) {
const Shader *shader = static_cast<const Shader *>(node);
if (shader->has_volume) {
return true;
}
}
return false;
}
int Object::get_device_index() const
{
return index;
@ -775,12 +760,14 @@ void ObjectManager::device_update_flags(
}
if (bounds_valid) {
object->intersects_volume = false;
foreach (Object *volume_object, volume_objects) {
if (object == volume_object) {
continue;
}
if (object->bounds.intersects(volume_object->bounds)) {
object_flag[object->index] |= SD_OBJECT_INTERSECTS_VOLUME;
object->intersects_volume = true;
break;
}
}

View File

@ -75,6 +75,9 @@ class Object : public Node {
NODE_SOCKET_API(float, ao_distance)
/* Set during device update. */
bool intersects_volume;
Object();
~Object();
@ -109,13 +112,6 @@ class Object : public Node {
/* Compute step size from attributes, shaders, transforms. */
float compute_volume_step_size() const;
/* Check whether this object requires volume sampling (and hence might require space in the
* volume stack).
*
* Note that this is a naive iteration over shaders, which allows to access information prior
* to `scene_update()`. */
bool check_is_volume() const;
protected:
/* Specifies the position of the object in scene->objects and
* in the device vectors. Gets set in device_update. */

View File

@ -360,6 +360,8 @@ void Scene::device_update(Device *device_, Progress &progress)
return;
if (device->have_error() == false) {
dscene.data.volume_stack_size = get_volume_stack_size();
progress.set_status("Updating Device", "Writing constant memory");
device->const_copy_to("__data", &dscene.data, sizeof(dscene.data));
}
@ -527,8 +529,6 @@ void Scene::update_kernel_features()
const uint max_closures = (params.background) ? get_max_closure_count() : MAX_CLOSURE;
dscene.data.max_closures = max_closures;
dscene.data.max_shaders = shaders.size();
dscene.data.volume_stack_size = get_volume_stack_size();
}
bool Scene::update(Progress &progress)
@ -586,6 +586,8 @@ bool Scene::load_kernels(Progress &progress, bool lock_scene)
scene_lock = thread_scoped_lock(mutex);
}
update_kernel_features();
const uint kernel_features = dscene.data.kernel_features;
if (!kernels_loaded || loaded_kernel_features != kernel_features) {
@ -656,10 +658,25 @@ int Scene::get_volume_stack_size() const
/* Quick non-expensive check. Can over-estimate maximum possible nested level, but does not
* require expensive calculation during pre-processing. */
bool has_volume_object = false;
for (const Object *object : objects) {
if (object->check_is_volume()) {
if (!object->get_geometry()->has_volume) {
continue;
}
if (object->intersects_volume) {
/* Object intersects another volume, assume it's possible to go deeper in the stack. */
/* TODO(sergey): This might count nesting twice (A intersects B and B intersects A), but
* can't think of a computantially cheap algorithm. Dividing my 2 doesn't work because of
* Venn diagram example with 3 circles. */
++volume_stack_size;
}
else if (!has_volume_object) {
/* Allocate space for at least one volume object. */
++volume_stack_size;
}
has_volume_object = true;
if (volume_stack_size == MAX_VOLUME_STACK_SIZE) {
break;
@ -668,6 +685,8 @@ int Scene::get_volume_stack_size() const
volume_stack_size = min(volume_stack_size, MAX_VOLUME_STACK_SIZE);
VLOG(3) << "Detected required volume stack size " << volume_stack_size;
return volume_stack_size;
}

View File

@ -270,7 +270,6 @@ class Scene : public NodeOwner {
void enable_update_stats();
void update_kernel_features();
bool update(Progress &progress);
bool has_shadow_catcher();
@ -333,6 +332,7 @@ class Scene : public NodeOwner {
bool kernels_loaded;
uint loaded_kernel_features;
void update_kernel_features();
bool load_kernels(Progress &progress, bool lock_scene = true);
bool has_shadow_catcher_ = false;

View File

@ -539,19 +539,12 @@ bool Session::update_scene(int width, int height)
Camera *cam = scene->camera;
cam->set_screen_size(width, height);
/* First detect which kernel features are used and allocate working memory.
* This helps estimate how may device memory is available for the scene and
* how much we need to allocate on the host instead. */
scene->update_kernel_features();
const bool scene_update_result = scene->update(progress);
path_trace_->load_kernels();
path_trace_->alloc_work_memory();
if (scene->update(progress)) {
return true;
}
return false;
return scene_update_result;
}
static string status_append(const string &status, const string &suffix)