Metal: MTLUniformBuffer module implementation

Initial implementation.

Authored by Apple: Michael Parkin-White
Ref T96261

Reviewed By: fclem
Differential Revision: https://developer.blender.org/D15357
This commit is contained in:
Jason Fielder 2022-07-19 17:11:03 +02:00 committed by Clément Foucault
parent 6bba4d864e
commit 9835d5e58b
Notes: blender-bot 2023-02-14 05:22:18 +01:00
Referenced by issue #96261, Metal Viewport
7 changed files with 260 additions and 26 deletions

View File

@ -199,6 +199,7 @@ set(METAL_SRC
metal/mtl_state.mm
metal/mtl_texture.mm
metal/mtl_texture_util.mm
metal/mtl_uniform_buffer.mm
metal/mtl_backend.hh
metal/mtl_capabilities.hh
@ -210,6 +211,7 @@ set(METAL_SRC
metal/mtl_query.hh
metal/mtl_state.hh
metal/mtl_texture.hh
metal/mtl_uniform_buffer.hh
)
# Select Backend source based on availability

View File

@ -10,6 +10,7 @@
#include "mtl_backend.hh"
#include "mtl_context.hh"
#include "mtl_framebuffer.hh"
#include "mtl_uniform_buffer.hh"
#include "mtl_query.hh"
#include "gpu_capabilities_private.hh"
@ -81,8 +82,7 @@ Texture *MTLBackend::texture_alloc(const char *name)
UniformBuf *MTLBackend::uniformbuf_alloc(int size, const char *name)
{
/* TODO(Metal): Implement MTLUniformBuf. */
return nullptr;
return new MTLUniformBuf(size, name);
};
StorageBuf *MTLBackend::storagebuf_alloc(int size, GPUUsageType usage, const char *name)

View File

@ -3,6 +3,7 @@
/** \file
* \ingroup gpu
*/
#pragma once
#include "MEM_guardedalloc.h"
@ -625,6 +626,11 @@ class MTLContext : public Context {
void memory_statistics_get(int *total_mem, int *free_mem) override;
static MTLContext *get()
{
return static_cast<MTLContext *>(Context::get());
}
void debug_group_begin(const char *name, int index) override;
void debug_group_end() override;

View File

@ -78,8 +78,10 @@
* Usage:
* MTLContext::get_global_memory_manager(); - static routine to fetch global memory manager.
*
* gpu::MTLBuffer *allocate_buffer(size, is_cpu_visibile, bytes=nullptr)
* gpu::MTLBuffer *allocate_buffer_aligned(size, alignment, is_cpu_visibile, bytes=nullptr)
* gpu::MTLBuffer *allocate(size, is_cpu_visibile)
* gpu::MTLBuffer *allocate_aligned(size, alignment, is_cpu_visibile)
* gpu::MTLBuffer *allocate_with_data(size, is_cpu_visibile, data_ptr)
* gpu::MTLBuffer *allocate_aligned_with_data(size, alignment, is_cpu_visibile, data_ptr)
*/
/* Debug memory statistics: Disabled by Macro rather than guarded for
@ -389,11 +391,13 @@ class MTLBufferPool {
void init(id<MTLDevice> device);
~MTLBufferPool();
gpu::MTLBuffer *allocate_buffer(uint64_t size, bool cpu_visible, const void *bytes = nullptr);
gpu::MTLBuffer *allocate_buffer_aligned(uint64_t size,
uint alignment,
bool cpu_visible,
const void *bytes = nullptr);
gpu::MTLBuffer *allocate(uint64_t size, bool cpu_visible);
gpu::MTLBuffer *allocate_aligned(uint64_t size, uint alignment, bool cpu_visible);
gpu::MTLBuffer *allocate_with_data(uint64_t size, bool cpu_visible, const void *data = nullptr);
gpu::MTLBuffer *allocate_aligned_with_data(uint64_t size,
uint alignment,
bool cpu_visible,
const void *data = nullptr);
bool free_buffer(gpu::MTLBuffer *buffer);
/* Flush MTLSafeFreeList buffers, for completed lists in `completed_safelist_queue_`,

View File

@ -57,17 +57,23 @@ void MTLBufferPool::free()
buffer_pools_.clear();
}
gpu::MTLBuffer *MTLBufferPool::allocate_buffer(uint64_t size, bool cpu_visible, const void *bytes)
gpu::MTLBuffer *MTLBufferPool::allocate(uint64_t size, bool cpu_visible)
{
/* Allocate buffer with default HW-compatible alignment of 256 bytes.
* See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf for more. */
return this->allocate_buffer_aligned(size, 256, cpu_visible, bytes);
return this->allocate_aligned(size, 256, cpu_visible);
}
gpu::MTLBuffer *MTLBufferPool::allocate_buffer_aligned(uint64_t size,
uint alignment,
bool cpu_visible,
const void *bytes)
gpu::MTLBuffer *MTLBufferPool::allocate_with_data(uint64_t size,
bool cpu_visible,
const void *data)
{
/* Allocate buffer with default HW-compatible alignemnt of 256 bytes.
* See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf for more. */
return this->allocate_aligned_with_data(size, 256, cpu_visible, data);
}
gpu::MTLBuffer *MTLBufferPool::allocate_aligned(uint64_t size, uint alignment, bool cpu_visible)
{
/* Check not required. Main GPU module usage considered thread-safe. */
// BLI_assert(BLI_thread_is_main());
@ -153,15 +159,6 @@ gpu::MTLBuffer *MTLBufferPool::allocate_buffer_aligned(uint64_t size,
/* Flag buffer as actively in-use. */
new_buffer->flag_in_use(true);
/* Upload initial data if provided -- Size based on original size param, not aligned size*/
if (bytes) {
BLI_assert(!(options & MTLResourceStorageModePrivate));
BLI_assert(size <= aligned_alloc_size);
BLI_assert(size <= [new_buffer->get_metal_buffer() length]);
memcpy(new_buffer->get_host_ptr(), bytes, size);
new_buffer->flush_range(0, size);
}
#if MTL_DEBUG_MEMORY_STATISTICS == 1
this->per_frame_allocation_count++;
#endif
@ -169,6 +166,23 @@ gpu::MTLBuffer *MTLBufferPool::allocate_buffer_aligned(uint64_t size,
return new_buffer;
}
gpu::MTLBuffer *MTLBufferPool::allocate_aligned_with_data(uint64_t size,
uint alignment,
bool cpu_visible,
const void *data)
{
gpu::MTLBuffer *buf = this->allocate_aligned(size, 256, cpu_visible);
/* Upload initial data. */
BLI_assert(data != nullptr);
BLI_assert(!(buf->get_resource_options() & MTLResourceStorageModePrivate));
BLI_assert(size <= buf->get_size());
BLI_assert(size <= [buf->get_metal_buffer() length]);
memcpy(buf->get_host_ptr(), data, size);
buf->flush_range(0, size);
return buf;
}
bool MTLBufferPool::free_buffer(gpu::MTLBuffer *buffer)
{
/* Ensure buffer is flagged as in-use. I.e. has not already been returned to memory pools. */
@ -356,7 +370,7 @@ void MTLBufferPool::insert_buffer_into_pool(MTLResourceOptions options, gpu::MTL
#if MTL_DEBUG_MEMORY_STATISTICS == 1
/* Debug statistics. */
allocations_in_pool_ += buffer->size;
allocations_in_pool_ += buffer->get_size();
buffers_in_pool_++;
#endif
}
@ -413,7 +427,7 @@ void MTLSafeFreeList::decrement_reference()
{
lock_.lock();
BLI_assert(in_free_queue_ == false);
int ref_count = reference_count_--;
int ref_count = --reference_count_;
if (ref_count == 0) {
MTLContext::get_global_memory_manager().push_completed_safe_list(this);

View File

@ -0,0 +1,48 @@
/** \file
* \ingroup gpu
*/
#pragma once
#include "MEM_guardedalloc.h"
#include "gpu_uniform_buffer_private.hh"
#include "mtl_context.hh"
namespace blender::gpu {
/**
* Implementation of Uniform Buffers using Metal.
**/
class MTLUniformBuf : public UniformBuf {
private:
/* Allocation Handle. */
gpu::MTLBuffer *metal_buffer_ = nullptr;
/* Whether buffer has contents, if false, no GPU buffer will
* have yet been allocated. */
bool has_data_ = false;
/* Bindstate tracking. */
int bind_slot_ = -1;
MTLContext *bound_ctx_ = nullptr;
public:
MTLUniformBuf(size_t size, const char *name);
~MTLUniformBuf();
void update(const void *data) override;
void bind(int slot) override;
void unbind() override;
id<MTLBuffer> get_metal_buffer(int *r_offset);
int get_size();
const char *get_name()
{
return name_;
}
MEM_CXX_CLASS_ALLOC_FUNCS("MTLUniformBuf");
};
} // namespace blender::gpu

View File

@ -0,0 +1,160 @@
/** \file
* \ingroup gpu
*/
#include "BKE_global.h"
#include "BLI_string.h"
#include "gpu_backend.hh"
#include "gpu_context_private.hh"
#include "mtl_backend.hh"
#include "mtl_context.hh"
#include "mtl_debug.hh"
#include "mtl_uniform_buffer.hh"
namespace blender::gpu {
MTLUniformBuf::MTLUniformBuf(size_t size, const char *name) : UniformBuf(size, name)
{
}
MTLUniformBuf::~MTLUniformBuf()
{
if (metal_buffer_ != nullptr) {
metal_buffer_->free();
metal_buffer_ = nullptr;
}
has_data_ = false;
/* Ensure UBO is not bound to active CTX.
* UBO bindings are reset upon Context-switch so we do not need
* to check deactivated context's. */
MTLContext *ctx = MTLContext::get();
if (ctx) {
for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
MTLUniformBufferBinding &slot = ctx->pipeline_state.ubo_bindings[i];
if (slot.bound && slot.ubo == this) {
slot.bound = false;
slot.ubo = nullptr;
}
}
}
}
void MTLUniformBuf::update(const void *data)
{
BLI_assert(this);
BLI_assert(size_in_bytes_ > 0);
/* Free existing allocation.
* The previous UBO resource will be tracked by the memory manager,
* in case dependent GPU work is still executing. */
if (metal_buffer_ != nullptr) {
metal_buffer_->free();
metal_buffer_ = nullptr;
}
/* Allocate MTL buffer */
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
BLI_assert(ctx->device);
UNUSED_VARS_NDEBUG(ctx);
if (data != nullptr) {
metal_buffer_ = MTLContext::get_global_memory_manager().allocate_with_data(
size_in_bytes_, true, data);
has_data_ = true;
metal_buffer_->set_label(@"Uniform Buffer");
BLI_assert(metal_buffer_ != nullptr);
BLI_assert(metal_buffer_->get_metal_buffer() != nil);
}
else {
/* If data is not yet present, no buffer will be allocated and MTLContext will use an empty
* null buffer, containing zeroes, if the UBO is bound. */
metal_buffer_ = nullptr;
has_data_ = false;
}
}
void MTLUniformBuf::bind(int slot)
{
if (slot < 0) {
MTL_LOG_WARNING("Failed to bind UBO %p. uniform location %d invalid.\n", this, slot);
return;
}
BLI_assert(slot < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
/* Bind current UBO to active context. */
MTLContext *ctx = MTLContext::get();
BLI_assert(ctx);
MTLUniformBufferBinding &ctx_ubo_bind_slot = ctx->pipeline_state.ubo_bindings[slot];
ctx_ubo_bind_slot.ubo = this;
ctx_ubo_bind_slot.bound = true;
bind_slot_ = slot;
bound_ctx_ = ctx;
/* Check if we have any deferred data to upload. */
if (data_ != nullptr) {
this->update(data_);
MEM_SAFE_FREE(data_);
}
/* Ensure there is atleast an empty dummy buffer. */
if (metal_buffer_ == nullptr) {
this->update(nullptr);
}
}
void MTLUniformBuf::unbind()
{
/* Unbind in debug mode to validate missing binds.
* Otherwise, only perform a full unbind upon destruction
* to ensure no lingering references. */
#ifndef NDEBUG
if (true) {
#else
if (G.debug & G_DEBUG_GPU) {
#endif
if (bound_ctx_ != nullptr && bind_slot_ > -1) {
MTLUniformBufferBinding &ctx_ubo_bind_slot =
bound_ctx_->pipeline_state.ubo_bindings[bind_slot_];
if (ctx_ubo_bind_slot.bound && ctx_ubo_bind_slot.ubo == this) {
ctx_ubo_bind_slot.bound = false;
ctx_ubo_bind_slot.ubo = nullptr;
}
}
}
/* Reset bind index. */
bind_slot_ = -1;
bound_ctx_ = nullptr;
}
id<MTLBuffer> MTLUniformBuf::get_metal_buffer(int *r_offset)
{
BLI_assert(this);
*r_offset = 0;
if (metal_buffer_ != nullptr && has_data_) {
*r_offset = 0;
metal_buffer_->debug_ensure_used();
return metal_buffer_->get_metal_buffer();
}
else {
*r_offset = 0;
return nil;
}
}
int MTLUniformBuf::get_size()
{
BLI_assert(this);
return size_in_bytes_;
}
} // blender::gpu