Metal: MTLUniformBuffer module implementation

Initial implementation. Authored by Apple: Michael Parkin-White Ref T96261 Reviewed By: fclem Differential Revision: https://developer.blender.org/D15357
Referenced by issue #96261, Metal Viewport
2022-07-19 17:11:03 +02:00 · 2022-07-19 17:11:03 +02:00 · 9835d5e58b · 2023-02-14 05:22:18 +01:00
parent 6bba4d864e
commit 9835d5e58b
7 changed files with 260 additions and 26 deletions
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@ -199,6 +199,7 @@ set(METAL_SRC
  metal/mtl_state.mm
  metal/mtl_texture.mm
  metal/mtl_texture_util.mm
+  metal/mtl_uniform_buffer.mm

  metal/mtl_backend.hh
  metal/mtl_capabilities.hh
@ -210,6 +211,7 @@ set(METAL_SRC
  metal/mtl_query.hh
  metal/mtl_state.hh
  metal/mtl_texture.hh
+  metal/mtl_uniform_buffer.hh
 )

 # Select Backend source based on availability
--- a/source/blender/gpu/metal/mtl_backend.mm
+++ b/source/blender/gpu/metal/mtl_backend.mm
@ -10,6 +10,7 @@
 #include "mtl_backend.hh"
 #include "mtl_context.hh"
 #include "mtl_framebuffer.hh"
+#include "mtl_uniform_buffer.hh"
 #include "mtl_query.hh"

 #include "gpu_capabilities_private.hh"
@ -81,8 +82,7 @@ Texture *MTLBackend::texture_alloc(const char *name)

 UniformBuf *MTLBackend::uniformbuf_alloc(int size, const char *name)
 {
-  /* TODO(Metal): Implement MTLUniformBuf. */
-  return nullptr;
+  return new MTLUniformBuf(size, name);
 };

 StorageBuf *MTLBackend::storagebuf_alloc(int size, GPUUsageType usage, const char *name)
--- a/source/blender/gpu/metal/mtl_context.hh
+++ b/source/blender/gpu/metal/mtl_context.hh
@ -3,6 +3,7 @@
 /** \file
 * \ingroup gpu
 */
+
 #pragma once

 #include "MEM_guardedalloc.h"
@ -625,6 +626,11 @@ class MTLContext : public Context {

  void memory_statistics_get(int *total_mem, int *free_mem) override;

+  static MTLContext *get()
+  {
+    return static_cast<MTLContext *>(Context::get());
+  }
+
  void debug_group_begin(const char *name, int index) override;
  void debug_group_end() override;

--- a/source/blender/gpu/metal/mtl_memory.hh
+++ b/source/blender/gpu/metal/mtl_memory.hh
@ -78,8 +78,10 @@
 *  Usage:
 *    MTLContext::get_global_memory_manager();  - static routine to fetch global memory manager.
 *
- *    gpu::MTLBuffer *allocate_buffer(size, is_cpu_visibile, bytes=nullptr)
- *    gpu::MTLBuffer *allocate_buffer_aligned(size, alignment, is_cpu_visibile, bytes=nullptr)
+ *    gpu::MTLBuffer *allocate(size, is_cpu_visibile)
+ *    gpu::MTLBuffer *allocate_aligned(size, alignment, is_cpu_visibile)
+ *    gpu::MTLBuffer *allocate_with_data(size, is_cpu_visibile, data_ptr)
+ *    gpu::MTLBuffer *allocate_aligned_with_data(size, alignment, is_cpu_visibile, data_ptr)
 */

 /* Debug memory statistics: Disabled by Macro rather than guarded for
@ -389,11 +391,13 @@ class MTLBufferPool {
  void init(id<MTLDevice> device);
  ~MTLBufferPool();

-  gpu::MTLBuffer *allocate_buffer(uint64_t size, bool cpu_visible, const void *bytes = nullptr);
-  gpu::MTLBuffer *allocate_buffer_aligned(uint64_t size,
-                                          uint alignment,
-                                          bool cpu_visible,
-                                          const void *bytes = nullptr);
+  gpu::MTLBuffer *allocate(uint64_t size, bool cpu_visible);
+  gpu::MTLBuffer *allocate_aligned(uint64_t size, uint alignment, bool cpu_visible);
+  gpu::MTLBuffer *allocate_with_data(uint64_t size, bool cpu_visible, const void *data = nullptr);
+  gpu::MTLBuffer *allocate_aligned_with_data(uint64_t size,
+                                             uint alignment,
+                                             bool cpu_visible,
+                                             const void *data = nullptr);
  bool free_buffer(gpu::MTLBuffer *buffer);

  /* Flush MTLSafeFreeList buffers, for completed lists in `completed_safelist_queue_`,
--- a/source/blender/gpu/metal/mtl_memory.mm
+++ b/source/blender/gpu/metal/mtl_memory.mm
@ -57,17 +57,23 @@ void MTLBufferPool::free()
  buffer_pools_.clear();
 }

-gpu::MTLBuffer *MTLBufferPool::allocate_buffer(uint64_t size, bool cpu_visible, const void *bytes)
+gpu::MTLBuffer *MTLBufferPool::allocate(uint64_t size, bool cpu_visible)
 {
  /* Allocate buffer with default HW-compatible alignment of 256 bytes.
   * See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf for more. */
-  return this->allocate_buffer_aligned(size, 256, cpu_visible, bytes);
+  return this->allocate_aligned(size, 256, cpu_visible);
 }

-gpu::MTLBuffer *MTLBufferPool::allocate_buffer_aligned(uint64_t size,
-                                                       uint alignment,
-                                                       bool cpu_visible,
-                                                       const void *bytes)
+gpu::MTLBuffer *MTLBufferPool::allocate_with_data(uint64_t size,
+                                                  bool cpu_visible,
+                                                  const void *data)
+{
+  /* Allocate buffer with default HW-compatible alignemnt of 256 bytes.
+   * See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf for more. */
+  return this->allocate_aligned_with_data(size, 256, cpu_visible, data);
+}
+
+gpu::MTLBuffer *MTLBufferPool::allocate_aligned(uint64_t size, uint alignment, bool cpu_visible)
 {
  /* Check not required. Main GPU module usage considered thread-safe. */
  // BLI_assert(BLI_thread_is_main());
@ -153,15 +159,6 @@ gpu::MTLBuffer *MTLBufferPool::allocate_buffer_aligned(uint64_t size,
  /* Flag buffer as actively in-use. */
  new_buffer->flag_in_use(true);

-  /* Upload initial data if provided -- Size based on original size param, not aligned size*/
-  if (bytes) {
-    BLI_assert(!(options & MTLResourceStorageModePrivate));
-    BLI_assert(size <= aligned_alloc_size);
-    BLI_assert(size <= [new_buffer->get_metal_buffer() length]);
-    memcpy(new_buffer->get_host_ptr(), bytes, size);
-    new_buffer->flush_range(0, size);
-  }
-
 #if MTL_DEBUG_MEMORY_STATISTICS == 1
  this->per_frame_allocation_count++;
 #endif
@ -169,6 +166,23 @@ gpu::MTLBuffer *MTLBufferPool::allocate_buffer_aligned(uint64_t size,
  return new_buffer;
 }

+gpu::MTLBuffer *MTLBufferPool::allocate_aligned_with_data(uint64_t size,
+                                                          uint alignment,
+                                                          bool cpu_visible,
+                                                          const void *data)
+{
+  gpu::MTLBuffer *buf = this->allocate_aligned(size, 256, cpu_visible);
+
+  /* Upload initial data. */
+  BLI_assert(data != nullptr);
+  BLI_assert(!(buf->get_resource_options() & MTLResourceStorageModePrivate));
+  BLI_assert(size <= buf->get_size());
+  BLI_assert(size <= [buf->get_metal_buffer() length]);
+  memcpy(buf->get_host_ptr(), data, size);
+  buf->flush_range(0, size);
+  return buf;
+}
+
 bool MTLBufferPool::free_buffer(gpu::MTLBuffer *buffer)
 {
  /* Ensure buffer is flagged as in-use. I.e. has not already been returned to memory pools. */
@ -356,7 +370,7 @@ void MTLBufferPool::insert_buffer_into_pool(MTLResourceOptions options, gpu::MTL

 #if MTL_DEBUG_MEMORY_STATISTICS == 1
  /* Debug statistics. */
-  allocations_in_pool_ += buffer->size;
+  allocations_in_pool_ += buffer->get_size();
  buffers_in_pool_++;
 #endif
 }
@ -413,7 +427,7 @@ void MTLSafeFreeList::decrement_reference()
 {
  lock_.lock();
  BLI_assert(in_free_queue_ == false);
-  int ref_count = reference_count_--;
+  int ref_count = --reference_count_;

  if (ref_count == 0) {
    MTLContext::get_global_memory_manager().push_completed_safe_list(this);
--- a/source/blender/gpu/metal/mtl_uniform_buffer.hh
+++ b/source/blender/gpu/metal/mtl_uniform_buffer.hh
@ -0,0 +1,48 @@
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+#include "gpu_uniform_buffer_private.hh"
+
+#include "mtl_context.hh"
+
+namespace blender::gpu {
+
+/**
+ * Implementation of Uniform Buffers using Metal.
+ **/
+class MTLUniformBuf : public UniformBuf {
+ private:
+  /* Allocation Handle. */
+  gpu::MTLBuffer *metal_buffer_ = nullptr;
+
+  /* Whether buffer has contents, if false, no GPU buffer will
+   * have yet been allocated. */
+  bool has_data_ = false;
+
+  /* Bindstate tracking. */
+  int bind_slot_ = -1;
+  MTLContext *bound_ctx_ = nullptr;
+
+ public:
+  MTLUniformBuf(size_t size, const char *name);
+  ~MTLUniformBuf();
+
+  void update(const void *data) override;
+  void bind(int slot) override;
+  void unbind() override;
+
+  id<MTLBuffer> get_metal_buffer(int *r_offset);
+  int get_size();
+  const char *get_name()
+  {
+    return name_;
+  }
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("MTLUniformBuf");
+};
+
+}  // namespace blender::gpu
--- a/source/blender/gpu/metal/mtl_uniform_buffer.mm
+++ b/source/blender/gpu/metal/mtl_uniform_buffer.mm
@ -0,0 +1,160 @@
+/** \file
+ * \ingroup gpu
+ */
+
+#include "BKE_global.h"
+
+#include "BLI_string.h"
+
+#include "gpu_backend.hh"
+#include "gpu_context_private.hh"
+
+#include "mtl_backend.hh"
+#include "mtl_context.hh"
+#include "mtl_debug.hh"
+#include "mtl_uniform_buffer.hh"
+
+namespace blender::gpu {
+
+MTLUniformBuf::MTLUniformBuf(size_t size, const char *name) : UniformBuf(size, name)
+{
+}
+
+MTLUniformBuf::~MTLUniformBuf()
+{
+  if (metal_buffer_ != nullptr) {
+    metal_buffer_->free();
+    metal_buffer_ = nullptr;
+  }
+  has_data_ = false;
+
+  /* Ensure UBO is not bound to active CTX.
+   * UBO bindings are reset upon Context-switch so we do not need
+   * to check deactivated context's. */
+  MTLContext *ctx = MTLContext::get();
+  if (ctx) {
+    for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
+      MTLUniformBufferBinding &slot = ctx->pipeline_state.ubo_bindings[i];
+      if (slot.bound && slot.ubo == this) {
+        slot.bound = false;
+        slot.ubo = nullptr;
+      }
+    }
+  }
+}
+
+void MTLUniformBuf::update(const void *data)
+{
+  BLI_assert(this);
+  BLI_assert(size_in_bytes_ > 0);
+
+  /* Free existing allocation.
+   * The previous UBO resource will be tracked by the memory manager,
+   * in case dependent GPU work is still executing. */
+  if (metal_buffer_ != nullptr) {
+    metal_buffer_->free();
+    metal_buffer_ = nullptr;
+  }
+
+  /* Allocate MTL buffer */
+  MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
+  BLI_assert(ctx);
+  BLI_assert(ctx->device);
+  UNUSED_VARS_NDEBUG(ctx);
+
+  if (data != nullptr) {
+    metal_buffer_ = MTLContext::get_global_memory_manager().allocate_with_data(
+        size_in_bytes_, true, data);
+    has_data_ = true;
+
+    metal_buffer_->set_label(@"Uniform Buffer");
+    BLI_assert(metal_buffer_ != nullptr);
+    BLI_assert(metal_buffer_->get_metal_buffer() != nil);
+  }
+  else {
+    /* If data is not yet present, no buffer will be allocated and MTLContext will use an empty
+     * null buffer, containing zeroes, if the UBO is bound. */
+    metal_buffer_ = nullptr;
+    has_data_ = false;
+  }
+}
+
+void MTLUniformBuf::bind(int slot)
+{
+  if (slot < 0) {
+    MTL_LOG_WARNING("Failed to bind UBO %p. uniform location %d invalid.\n", this, slot);
+    return;
+  }
+
+  BLI_assert(slot < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+
+  /* Bind current UBO to active context. */
+  MTLContext *ctx = MTLContext::get();
+  BLI_assert(ctx);
+
+  MTLUniformBufferBinding &ctx_ubo_bind_slot = ctx->pipeline_state.ubo_bindings[slot];
+  ctx_ubo_bind_slot.ubo = this;
+  ctx_ubo_bind_slot.bound = true;
+
+  bind_slot_ = slot;
+  bound_ctx_ = ctx;
+
+  /* Check if we have any deferred data to upload. */
+  if (data_ != nullptr) {
+    this->update(data_);
+    MEM_SAFE_FREE(data_);
+  }
+
+  /* Ensure there is atleast an empty dummy buffer. */
+  if (metal_buffer_ == nullptr) {
+    this->update(nullptr);
+  }
+}
+
+void MTLUniformBuf::unbind()
+{
+  /* Unbind in debug mode to validate missing binds.
+   * Otherwise, only perform a full unbind upon destruction
+   * to ensure no lingering references. */
+#ifndef NDEBUG
+  if (true) {
+#else
+  if (G.debug & G_DEBUG_GPU) {
+#endif
+    if (bound_ctx_ != nullptr && bind_slot_ > -1) {
+      MTLUniformBufferBinding &ctx_ubo_bind_slot =
+          bound_ctx_->pipeline_state.ubo_bindings[bind_slot_];
+      if (ctx_ubo_bind_slot.bound && ctx_ubo_bind_slot.ubo == this) {
+        ctx_ubo_bind_slot.bound = false;
+        ctx_ubo_bind_slot.ubo = nullptr;
+      }
+    }
+  }
+
+  /* Reset bind index. */
+  bind_slot_ = -1;
+  bound_ctx_ = nullptr;
+}
+
+id<MTLBuffer> MTLUniformBuf::get_metal_buffer(int *r_offset)
+{
+  BLI_assert(this);
+  *r_offset = 0;
+  if (metal_buffer_ != nullptr && has_data_) {
+    *r_offset = 0;
+    metal_buffer_->debug_ensure_used();
+    return metal_buffer_->get_metal_buffer();
+  }
+  else {
+    *r_offset = 0;
+    return nil;
+  }
+}
+
+int MTLUniformBuf::get_size()
+{
+  BLI_assert(this);
+  return size_in_bytes_;
+}
+
+}  // blender::gpu