Metal: Fix GPencil texture buffer attribute packing issue and cutting tool rendering.

Line Loop topology support for cutting tool and add support for packing several vertex attributes across individual pixels within a texture buffer.

Authored by Apple: Michael Parkin-White

Ref T96261

Reviewed By: fclem

Maniphest Tasks: T96261

Differential Revision: https://developer.blender.org/D16783
This commit is contained in:
Jason Fielder 2022-12-20 14:08:37 +01:00 committed by Clément Foucault
parent 2712265598
commit df1fe18ed7
Notes: blender-bot 2023-02-14 06:00:51 +01:00
Referenced by issue #96261, Metal Viewport
3 changed files with 113 additions and 59 deletions

View File

@ -39,8 +39,16 @@ uchar *MTLImmediate::begin()
metal_primitive_mode_ = mtl_prim_type_to_topology_class(metal_primitive_type_);
has_begun_ = true;
/* If prim type is line loop, add an extra vertex at the end for placing the closing line,
* as metal does not support this primitive type. We treat this as a Line strip with one
* extra value. */
int vertex_alloc_length = vertex_len;
if (prim_type == GPU_PRIM_LINE_LOOP) {
vertex_alloc_length++;
}
/* Allocate a range of data and return host-accessible pointer. */
const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_len);
const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_alloc_length);
current_allocation_ = context_->get_scratchbuffer_manager()
.scratch_buffer_allocate_range_aligned(bytes_needed, 256);
[current_allocation_.metal_buffer retain];
@ -266,71 +274,88 @@ void MTLImmediate::end()
* For immediate mode, generating these is currently very cheap, as we use
* fast scratch buffer allocations. Though we may benefit from caching of
* frequently used buffer sizes. */
bool rendered = false;
if (mtl_needs_topology_emulation(this->prim_type)) {
/* Debug safety check for SSBO FETCH MODE. */
if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
BLI_assert(false && "Topology emulation not supported with SSBO Vertex Fetch mode");
}
/* Emulate Tri-fan. */
if (this->prim_type == GPU_PRIM_TRI_FAN) {
/* Prepare Triangle-Fan emulation index buffer on CPU based on number of input
* vertices. */
uint32_t base_vert_count = this->vertex_idx;
uint32_t num_triangles = max_ii(base_vert_count - 2, 0);
uint32_t fan_index_count = num_triangles * 3;
BLI_assert(num_triangles > 0);
switch (this->prim_type) {
case GPU_PRIM_TRI_FAN: {
/* Debug safety check for SSBO FETCH MODE. */
if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
BLI_assert(
false &&
"Topology emulation for TriangleFan not supported with SSBO Vertex Fetch mode");
}
uint32_t alloc_size = sizeof(uint32_t) * fan_index_count;
uint32_t *index_buffer = nullptr;
/* Prepare Triangle-Fan emulation index buffer on CPU based on number of input
* vertices. */
uint32_t base_vert_count = this->vertex_idx;
uint32_t num_triangles = max_ii(base_vert_count - 2, 0);
uint32_t fan_index_count = num_triangles * 3;
BLI_assert(num_triangles > 0);
MTLTemporaryBuffer allocation =
context_->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(
alloc_size, 128);
index_buffer = (uint32_t *)allocation.data;
uint32_t alloc_size = sizeof(uint32_t) * fan_index_count;
uint32_t *index_buffer = nullptr;
int a = 0;
for (int i = 0; i < num_triangles; i++) {
index_buffer[a++] = 0;
index_buffer[a++] = i + 1;
index_buffer[a++] = i + 2;
}
MTLTemporaryBuffer allocation =
context_->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(
alloc_size, 128);
index_buffer = (uint32_t *)allocation.data;
@autoreleasepool {
int a = 0;
for (int i = 0; i < num_triangles; i++) {
index_buffer[a++] = 0;
index_buffer[a++] = i + 1;
index_buffer[a++] = i + 2;
}
id<MTLBuffer> index_buffer_mtl = nil;
uint32_t index_buffer_offset = 0;
@autoreleasepool {
/* Region of scratch buffer used for topology emulation element data.
* NOTE(Metal): We do not need to manually flush as the entire scratch
* buffer for current command buffer is flushed upon submission. */
index_buffer_mtl = allocation.metal_buffer;
index_buffer_offset = allocation.buffer_offset;
id<MTLBuffer> index_buffer_mtl = nil;
uint32_t index_buffer_offset = 0;
/* Set depth stencil state (requires knowledge of primitive type). */
context_->ensure_depth_stencil_state(MTLPrimitiveTypeTriangle);
/* Region of scratch buffer used for topology emulation element data.
* NOTE(Metal): We do not need to manually flush as the entire scratch
* buffer for current command buffer is flushed upon submission. */
index_buffer_mtl = allocation.metal_buffer;
index_buffer_offset = allocation.buffer_offset;
/* Bind Vertex Buffer. */
rps.bind_vertex_buffer(
current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
/* Set depth stencil state (requires knowledge of primitive type). */
context_->ensure_depth_stencil_state(MTLPrimitiveTypeTriangle);
/* Draw. */
[rec drawIndexedPrimitives:MTLPrimitiveTypeTriangle
indexCount:fan_index_count
indexType:MTLIndexTypeUInt32
indexBuffer:index_buffer_mtl
indexBufferOffset:index_buffer_offset];
}
}
else {
/* TODO(Metal): Topology emulation for line loop.
* NOTE(Metal): This is currently not used anywhere and modified at the high
* level for efficiency in such cases. */
BLI_assert_msg(false, "LineLoop requires emulation support in immediate mode.");
/* Bind Vertex Buffer. */
rps.bind_vertex_buffer(
current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
/* Draw. */
[rec drawIndexedPrimitives:MTLPrimitiveTypeTriangle
indexCount:fan_index_count
indexType:MTLIndexTypeUInt32
indexBuffer:index_buffer_mtl
indexBufferOffset:index_buffer_offset];
context_->main_command_buffer.register_draw_counters(fan_index_count);
}
rendered = true;
} break;
case GPU_PRIM_LINE_LOOP: {
/* Patch final vertex of line loop to close. Rendered using LineStrip.
* Note: vertex_len represents original length, however, allocated Metal
* buffer contains space for one extra vertex when LineLoop is used. */
uchar *buffer_data = reinterpret_cast<uchar *>(current_allocation_.data);
memcpy(buffer_data + (vertex_len)*vertex_format.stride,
buffer_data,
vertex_format.stride);
this->vertex_idx++;
} break;
default: {
BLI_assert_unreachable();
} break;
}
}
else {
/* If not yet rendered, run through main render path. LineLoop primitive topology emulation
* will simply amend original data passed into default rendering path. */
if (!rendered) {
MTLPrimitiveType primitive_type = metal_primitive_type_;
int vertex_count = this->vertex_idx;

View File

@ -39,10 +39,10 @@ static inline MTLPrimitiveType gpu_prim_type_to_metal(GPUPrimType prim_type)
return MTLPrimitiveTypePoint;
case GPU_PRIM_LINES:
case GPU_PRIM_LINES_ADJ:
case GPU_PRIM_LINE_LOOP:
return MTLPrimitiveTypeLine;
case GPU_PRIM_LINE_STRIP:
case GPU_PRIM_LINE_STRIP_ADJ:
case GPU_PRIM_LINE_LOOP:
return MTLPrimitiveTypeLineStrip;
case GPU_PRIM_TRIS:
case GPU_PRIM_TRI_FAN:

View File

@ -1621,6 +1621,7 @@ bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo)
}
/* Verify Texture and vertex buffer alignment. */
const GPUVertFormat *format = GPU_vertbuf_get_format(vbo);
int bytes_per_pixel = get_mtl_format_bytesize(mtl_format);
int bytes_per_row = bytes_per_pixel * w_;
@ -1628,12 +1629,40 @@ bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo)
uint32_t align_requirement = static_cast<uint32_t>(
[mtl_ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]);
/* Verify per-vertex size aligns with texture size. */
const GPUVertFormat *format = GPU_vertbuf_get_format(vbo);
BLI_assert(bytes_per_pixel == format->stride &&
"Pixel format stride MUST match the texture format stride -- These being different "
"is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex");
UNUSED_VARS_NDEBUG(format);
/* If stride is larger than bytes per pixel, but format has multiple attributes,
* split attributes across several pixels. */
if (format->stride > bytes_per_pixel && format->attr_len > 1) {
/* We need to increase the number of pixels available to store additional attributes.
* First ensure that the total stride of the vertex format fits uniformly into
* multiple pixels. If these sizes are different, then attributes are of differing
* sizes and this operation is unsupported. */
if (bytes_per_pixel * format->attr_len != format->stride) {
BLI_assert_msg(false,
"Cannot split attributes across multiple pixels as attribute format sizes do "
"not match.");
return false;
}
/* Provide a single pixel per attribute. */
/* Increase bytes per row to ensure there are enough bytes for all vertex attribute data. */
bytes_per_row *= format->attr_len;
BLI_assert(bytes_per_row == format->stride * w_);
/* Multiply width of image to provide one attribute per pixel. */
w_ *= format->attr_len;
BLI_assert(bytes_per_row == bytes_per_pixel * w_);
BLI_assert_msg(w_ == mtl_vbo->vertex_len * format->attr_len,
"Image should contain one pixel for each attribute in every vertex.");
}
else {
/* Verify per-vertex size aligns with texture size. */
BLI_assert(bytes_per_pixel == format->stride &&
"Pixel format stride MUST match the texture format stride -- These being different "
"is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex."
" If multiple attributes are used. Each attribute is to be packed into its own "
"individual pixel when stride length is exceeded. ");
}
/* Create texture descriptor. */
BLI_assert(type_ == GPU_TEXTURE_BUFFER);