Metal: Improve command buffer handling and workload scheduling.

Improve handling for cases where maximum in-flight command buffer count is exceeded. This can occur during light-baking operations. Ensures the application handles this gracefully and also improves workload pipelining by situationally stalling until GPU work has completed, if too much work is queued up. This may have a tangible benefit for T103742 by ensuring Blender does not queue up too much GPU work. Authored by Apple: Michael Parkin-White Ref T96261 Ref T103742 Depends on D17018 Reviewed By: fclem Maniphest Tasks: T103742, T96261 Differential Revision: https://developer.blender.org/D17019
Referenced by issue #103742, MacOS Metal GPU backend: slower viewport performance than OpenGL and flickering window animation when switching applications Referenced by issue #96261, Metal Viewport
2023-01-23 17:47:11 +01:00 · 2023-01-23 17:47:11 +01:00 · 84c25fdcaa · 2023-02-14 11:28:43 +01:00
parent 139fb38d4f
commit 84c25fdcaa
5 changed files with 46 additions and 6 deletions
--- a/intern/ghost/intern/GHOST_ContextCGL.h
+++ b/intern/ghost/intern/GHOST_ContextCGL.h
@ -23,6 +23,22 @@
@class NSView;

 class GHOST_ContextCGL : public GHOST_Context {
+
+ public:
+  /* Defines the number of simultaneous command buffers which can be in flight.
+   * The default limit of `64` is considered to be optimal for Blender. Too many command buffers
+   * will result in workload fragmnetation and additional system-level overhead. This limit should
+   * also only be increased if the application is consistently exceeding the limit, and there are
+   * no command buffer leaks.
+   *
+   * If this limit is reached, starting a new command buffer will fail. The Metal backend will
+   * therefore stall until completion and log a warning when this limit is reached in order to
+   * ensure correct function of the app.
+   *
+   * It is generally preferable to reduce the prevalence of GPU_flush or GPU Context switches
+   * (which will both break command submissions), rather than increasing this limit. */
+  static const int max_command_buffer_count = 64;
+
 public:
  /**
   * Constructor.
--- a/intern/ghost/intern/GHOST_ContextCGL.mm
+++ b/intern/ghost/intern/GHOST_ContextCGL.mm
@ -529,7 +529,8 @@ void GHOST_ContextCGL::metalInit()
    id<MTLDevice> device = m_metalLayer.device;

    /* Create a command queue for blit/present operation. */
-    m_metalCmdQueue = (MTLCommandQueue *)[device newCommandQueue];
+    m_metalCmdQueue = (MTLCommandQueue *)[device
+        newCommandQueueWithMaxCommandBufferCount:GHOST_ContextCGL::max_command_buffer_count];
    [m_metalCmdQueue retain];

    /* Create shaders for blit operation. */
--- a/source/blender/editors/screen/glutil.c
+++ b/source/blender/editors/screen/glutil.c
@ -26,6 +26,7 @@
 #include "GPU_texture.h"

 #ifdef __APPLE__
+#  include "GPU_context.h"
 #  include "GPU_state.h"
 #endif

@ -281,7 +282,9 @@ void immDrawPixelsTexTiled_scaling_clipping(IMMDrawPixelsTexState *state,
       * This doesn't seem to be too slow,
       * but still would be nice to have fast and nice solution. */
 #ifdef __APPLE__
-      GPU_flush();
+      if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
+        GPU_flush();
+      }
 #endif
    }
  }
--- a/source/blender/gpu/metal/mtl_command_buffer.mm
+++ b/source/blender/gpu/metal/mtl_command_buffer.mm
@ -8,6 +8,8 @@
 #include "mtl_debug.hh"
 #include "mtl_framebuffer.hh"

+#include "intern/GHOST_ContextCGL.h"
+
 #include <fstream>

 using namespace blender;
@ -45,9 +47,15 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin()
  if (active_command_buffer_ == nil) {

    /* Verify number of active command buffers is below limit.
-     * Exceeding this limit will mean we either have a leak/GPU hang
-     * or we should increase the command buffer limit during MTLQueue creation */
-    BLI_assert(MTLCommandBufferManager::num_active_cmd_bufs < MTL_MAX_COMMAND_BUFFERS);
+     * Exceeding this limit will mean we either have a command buffer leak/GPU hang
+     * or we should increase the command buffer limit during MTLQueue creation.
+     * Excessive command buffers can also be caused by frequent GPUContext switches, which cause
+     * the GPU pipeline to flush. This is common during indirect light baking operations.
+     *
+     * NOTE: We currently stall until completion of GPU work upon ::submit if we have reached the
+     * in-flight command buffer limit. */
+    BLI_assert(MTLCommandBufferManager::num_active_cmd_bufs <
+               GHOST_ContextCGL::max_command_buffer_count);

    if (G.debug & G_DEBUG_GPU) {
      /* Debug: Enable Advanced Errors for GPU work execution. */
@ -137,6 +145,19 @@ bool MTLCommandBufferManager::submit(bool wait)
  /* Submit command buffer to GPU. */
  [active_command_buffer_ commit];

+  /* If we have too many active command buffers in flight, wait until completed to avoid running
+   * out. We can increase */
+  if (MTLCommandBufferManager::num_active_cmd_bufs >=
+      (GHOST_ContextCGL::max_command_buffer_count - 1)) {
+    wait = true;
+    MTL_LOG_WARNING(
+        "Maximum number of command buffers in flight. Host will wait until GPU work has "
+        "completed. Consider increasing GHOST_ContextCGL::max_command_buffer_count or reducing "
+        "work fragmentation to better utilise system hardware. Command buffers are flushed upon "
+        "GPUContext switches, this is the most common cause of excessive command buffer "
+        "generation.\n");
+  }
+
  if (wait || (G.debug & G_DEBUG_GPU)) {
    /* Wait until current GPU work has finished executing. */
    [active_command_buffer_ waitUntilCompleted];
--- a/source/blender/gpu/metal/mtl_common.hh
+++ b/source/blender/gpu/metal/mtl_common.hh
@ -9,7 +9,6 @@
 #define MTL_MAX_DRAWABLES 3
 #define MTL_MAX_SET_BYTES_SIZE 4096
 #define MTL_FORCE_WAIT_IDLE 0
-#define MTL_MAX_COMMAND_BUFFERS 64

 /* Number of frames for which we retain in-flight resources such as scratch buffers.
 * Set as number of GPU frames in flight, plus an additional value for extra possible CPU frame. */