Fix T101787, T102786. Cycles: Improved out-of-memory messaging on Metal

This patch adds a new `max_working_set_exceeded()` check on Metal so that we can display a "System is out of GPU memory" message to the user. Without this, we get obtuse "CommandBuffer failed" errors at render time due to exceeding the size limit of resident resources.

Likely fix for T101787 & T102786.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D16713
This commit is contained in:
Michael Jones (Apple) 2022-12-07 13:28:59 +00:00
parent 4d05a000cb
commit 2dc51fccb8
Notes: blender-bot 2023-02-14 07:31:32 +01:00
Referenced by issue #102786, Blender crash with Intel Metal rendering on macOS
Referenced by issue #101787, GPU Metal Render fails with error "CommandBuffer Failed: cycles metal integrator_ reset" on M2 MacBook Air
2 changed files with 28 additions and 3 deletions

View File

@ -117,6 +117,8 @@ class MetalDevice : public Device {
/* ------------------------------------------------------------------ */
/* low-level memory management */
bool max_working_set_exceeded(size_t safety_margin = 8 * 1024 * 1024) const;
MetalMem *generic_alloc(device_memory &mem);
void generic_copy_to(device_memory &mem);

View File

@ -446,6 +446,14 @@ void MetalDevice::erase_allocation(device_memory &mem)
}
}
bool MetalDevice::max_working_set_exceeded(size_t safety_margin) const
{
/* We're allowed to allocate beyond the safe working set size, but then if all resources are made
* resident we will get command buffer failures at render time. */
size_t available = [mtlDevice recommendedMaxWorkingSetSize] - safety_margin;
return (stats.mem_used > available);
}
MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem)
{
size_t size = mem.memory_size();
@ -523,6 +531,11 @@ MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem)
mmem->use_UMA = false;
}
if (max_working_set_exceeded()) {
set_error("System is out of GPU memory");
return nullptr;
}
return mmem;
}
@ -921,9 +934,8 @@ void MetalDevice::tex_alloc(device_texture &mem)
<< string_human_readable_size(mem.memory_size()) << ")";
mtlTexture = [mtlDevice newTextureWithDescriptor:desc];
assert(mtlTexture);
if (!mtlTexture) {
set_error("System is out of GPU memory");
return;
}
@ -955,7 +967,10 @@ void MetalDevice::tex_alloc(device_texture &mem)
<< string_human_readable_size(mem.memory_size()) << ")";
mtlTexture = [mtlDevice newTextureWithDescriptor:desc];
assert(mtlTexture);
if (!mtlTexture) {
set_error("System is out of GPU memory");
return;
}
[mtlTexture replaceRegion:MTLRegionMake2D(0, 0, mem.data_width, mem.data_height)
mipmapLevel:0
@ -1017,6 +1032,10 @@ void MetalDevice::tex_alloc(device_texture &mem)
need_texture_info = true;
texture_info[slot].data = uint64_t(slot) | (sampler_index << 32);
if (max_working_set_exceeded()) {
set_error("System is out of GPU memory");
}
}
void MetalDevice::tex_free(device_texture &mem)
@ -1077,6 +1096,10 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
}
}
}
if (max_working_set_exceeded()) {
set_error("System is out of GPU memory");
}
}
CCL_NAMESPACE_END