Metal: Fix GPencil texture buffer attribute packing issue and cutting tool rendering.

Line Loop topology support for cutting tool and add support for packing several vertex attributes across individual pixels within a texture buffer. Authored by Apple: Michael Parkin-White Ref T96261 Reviewed By: fclem Maniphest Tasks: T96261 Differential Revision: https://developer.blender.org/D16783
Referenced by issue #96261, Metal Viewport
2022-12-20 14:08:37 +01:00 · 2022-12-20 14:08:37 +01:00 · df1fe18ed7 · 2023-02-14 06:00:51 +01:00
parent 2712265598
commit df1fe18ed7
3 changed files with 113 additions and 59 deletions
--- a/source/blender/gpu/metal/mtl_immediate.mm
+++ b/source/blender/gpu/metal/mtl_immediate.mm
@ -39,8 +39,16 @@ uchar *MTLImmediate::begin()
  metal_primitive_mode_ = mtl_prim_type_to_topology_class(metal_primitive_type_);
  has_begun_ = true;

+  /* If prim type is line loop, add an extra vertex at the end for placing the closing line,
+   * as metal does not support this primitive type. We treat this as a Line strip with one
+   * extra value. */
+  int vertex_alloc_length = vertex_len;
+  if (prim_type == GPU_PRIM_LINE_LOOP) {
+    vertex_alloc_length++;
+  }
+
  /* Allocate a range of data and return host-accessible pointer. */
-  const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_len);
+  const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_alloc_length);
  current_allocation_ = context_->get_scratchbuffer_manager()
                            .scratch_buffer_allocate_range_aligned(bytes_needed, 256);
  [current_allocation_.metal_buffer retain];
@ -266,71 +274,88 @@ void MTLImmediate::end()
       * For immediate mode, generating these is currently very cheap, as we use
       * fast scratch buffer allocations. Though we may benefit from caching of
       * frequently used buffer sizes. */
+      bool rendered = false;
      if (mtl_needs_topology_emulation(this->prim_type)) {

-        /* Debug safety check for SSBO FETCH MODE. */
-        if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
-          BLI_assert(false && "Topology emulation not supported with SSBO Vertex Fetch mode");
-        }
-
        /* Emulate Tri-fan. */
-        if (this->prim_type == GPU_PRIM_TRI_FAN) {
-          /* Prepare Triangle-Fan emulation index buffer on CPU based on number of input
-           * vertices. */
-          uint32_t base_vert_count = this->vertex_idx;
-          uint32_t num_triangles = max_ii(base_vert_count - 2, 0);
-          uint32_t fan_index_count = num_triangles * 3;
-          BLI_assert(num_triangles > 0);
+        switch (this->prim_type) {
+          case GPU_PRIM_TRI_FAN: {
+            /* Debug safety check for SSBO FETCH MODE. */
+            if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+              BLI_assert(
+                  false &&
+                  "Topology emulation for TriangleFan not supported with SSBO Vertex Fetch mode");
+            }

-          uint32_t alloc_size = sizeof(uint32_t) * fan_index_count;
-          uint32_t *index_buffer = nullptr;
+            /* Prepare Triangle-Fan emulation index buffer on CPU based on number of input
+             * vertices. */
+            uint32_t base_vert_count = this->vertex_idx;
+            uint32_t num_triangles = max_ii(base_vert_count - 2, 0);
+            uint32_t fan_index_count = num_triangles * 3;
+            BLI_assert(num_triangles > 0);

-          MTLTemporaryBuffer allocation =
-              context_->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(
-                  alloc_size, 128);
-          index_buffer = (uint32_t *)allocation.data;
+            uint32_t alloc_size = sizeof(uint32_t) * fan_index_count;
+            uint32_t *index_buffer = nullptr;

-          int a = 0;
-          for (int i = 0; i < num_triangles; i++) {
-            index_buffer[a++] = 0;
-            index_buffer[a++] = i + 1;
-            index_buffer[a++] = i + 2;
-          }
+            MTLTemporaryBuffer allocation =
+                context_->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(
+                    alloc_size, 128);
+            index_buffer = (uint32_t *)allocation.data;

-          @autoreleasepool {
+            int a = 0;
+            for (int i = 0; i < num_triangles; i++) {
+              index_buffer[a++] = 0;
+              index_buffer[a++] = i + 1;
+              index_buffer[a++] = i + 2;
+            }

-            id<MTLBuffer> index_buffer_mtl = nil;
-            uint32_t index_buffer_offset = 0;
+            @autoreleasepool {

-            /* Region of scratch buffer used for topology emulation element data.
-             * NOTE(Metal): We do not need to manually flush as the entire scratch
-             * buffer for current command buffer is flushed upon submission. */
-            index_buffer_mtl = allocation.metal_buffer;
-            index_buffer_offset = allocation.buffer_offset;
+              id<MTLBuffer> index_buffer_mtl = nil;
+              uint32_t index_buffer_offset = 0;

-            /* Set depth stencil state (requires knowledge of primitive type). */
-            context_->ensure_depth_stencil_state(MTLPrimitiveTypeTriangle);
+              /* Region of scratch buffer used for topology emulation element data.
+               * NOTE(Metal): We do not need to manually flush as the entire scratch
+               * buffer for current command buffer is flushed upon submission. */
+              index_buffer_mtl = allocation.metal_buffer;
+              index_buffer_offset = allocation.buffer_offset;

-            /* Bind Vertex Buffer. */
-            rps.bind_vertex_buffer(
-                current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+              /* Set depth stencil state (requires knowledge of primitive type). */
+              context_->ensure_depth_stencil_state(MTLPrimitiveTypeTriangle);

-            /* Draw. */
-            [rec drawIndexedPrimitives:MTLPrimitiveTypeTriangle
-                            indexCount:fan_index_count
-                             indexType:MTLIndexTypeUInt32
-                           indexBuffer:index_buffer_mtl
-                     indexBufferOffset:index_buffer_offset];
-          }
-        }
-        else {
-          /* TODO(Metal): Topology emulation for line loop.
-           * NOTE(Metal): This is currently not used anywhere and modified at the high
-           * level for efficiency in such cases. */
-          BLI_assert_msg(false, "LineLoop requires emulation support in immediate mode.");
+              /* Bind Vertex Buffer. */
+              rps.bind_vertex_buffer(
+                  current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+
+              /* Draw. */
+              [rec drawIndexedPrimitives:MTLPrimitiveTypeTriangle
+                              indexCount:fan_index_count
+                               indexType:MTLIndexTypeUInt32
+                             indexBuffer:index_buffer_mtl
+                       indexBufferOffset:index_buffer_offset];
+              context_->main_command_buffer.register_draw_counters(fan_index_count);
+            }
+            rendered = true;
+          } break;
+          case GPU_PRIM_LINE_LOOP: {
+            /* Patch final vertex of line loop to close. Rendered using LineStrip.
+             * Note: vertex_len represents original length, however, allocated Metal
+             * buffer contains space for one extra vertex when LineLoop is used. */
+            uchar *buffer_data = reinterpret_cast<uchar *>(current_allocation_.data);
+            memcpy(buffer_data + (vertex_len)*vertex_format.stride,
+                   buffer_data,
+                   vertex_format.stride);
+            this->vertex_idx++;
+          } break;
+          default: {
+            BLI_assert_unreachable();
+          } break;
        }
      }
-      else {
+
+      /* If not yet rendered, run through main render path. LineLoop primitive topology emulation
+       * will simply amend original data passed into default rendering path. */
+      if (!rendered) {
        MTLPrimitiveType primitive_type = metal_primitive_type_;
        int vertex_count = this->vertex_idx;

--- a/source/blender/gpu/metal/mtl_primitive.hh
+++ b/source/blender/gpu/metal/mtl_primitive.hh
@ -39,10 +39,10 @@ static inline MTLPrimitiveType gpu_prim_type_to_metal(GPUPrimType prim_type)
      return MTLPrimitiveTypePoint;
    case GPU_PRIM_LINES:
    case GPU_PRIM_LINES_ADJ:
-    case GPU_PRIM_LINE_LOOP:
      return MTLPrimitiveTypeLine;
    case GPU_PRIM_LINE_STRIP:
    case GPU_PRIM_LINE_STRIP_ADJ:
+    case GPU_PRIM_LINE_LOOP:
      return MTLPrimitiveTypeLineStrip;
    case GPU_PRIM_TRIS:
    case GPU_PRIM_TRI_FAN:
--- a/source/blender/gpu/metal/mtl_texture.mm
+++ b/source/blender/gpu/metal/mtl_texture.mm
@ -1621,6 +1621,7 @@ bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo)
  }

  /* Verify Texture and vertex buffer alignment. */
+  const GPUVertFormat *format = GPU_vertbuf_get_format(vbo);
  int bytes_per_pixel = get_mtl_format_bytesize(mtl_format);
  int bytes_per_row = bytes_per_pixel * w_;

@ -1628,12 +1629,40 @@ bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo)
  uint32_t align_requirement = static_cast<uint32_t>(
      [mtl_ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]);

-  /* Verify per-vertex size aligns with texture size. */
-  const GPUVertFormat *format = GPU_vertbuf_get_format(vbo);
-  BLI_assert(bytes_per_pixel == format->stride &&
-             "Pixel format stride MUST match the texture format stride -- These being different "
-             "is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex");
-  UNUSED_VARS_NDEBUG(format);
+  /* If stride is larger than bytes per pixel, but format has multiple attributes,
+   * split attributes across several pixels. */
+  if (format->stride > bytes_per_pixel && format->attr_len > 1) {
+
+    /* We need to increase the number of pixels available to store additional attributes.
+     * First ensure that the total stride of the vertex format fits uniformly into
+     * multiple pixels. If these sizes are different, then attributes are of differing
+     * sizes and this operation is unsupported. */
+    if (bytes_per_pixel * format->attr_len != format->stride) {
+      BLI_assert_msg(false,
+                     "Cannot split attributes across multiple pixels as attribute format sizes do "
+                     "not match.");
+      return false;
+    }
+
+    /* Provide a single pixel per attribute. */
+    /* Increase bytes per row to ensure there are enough bytes for all vertex attribute data. */
+    bytes_per_row *= format->attr_len;
+    BLI_assert(bytes_per_row == format->stride * w_);
+
+    /* Multiply width of image to provide one attribute per pixel. */
+    w_ *= format->attr_len;
+    BLI_assert(bytes_per_row == bytes_per_pixel * w_);
+    BLI_assert_msg(w_ == mtl_vbo->vertex_len * format->attr_len,
+                   "Image should contain one pixel for each attribute in every vertex.");
+  }
+  else {
+    /* Verify per-vertex size aligns with texture size. */
+    BLI_assert(bytes_per_pixel == format->stride &&
+               "Pixel format stride MUST match the texture format stride -- These being different "
+               "is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex."
+               " If multiple attributes are used. Each attribute is to be packed into its own "
+               "individual pixel when stride length is exceeded. ");
+  }

  /* Create texture descriptor. */
  BLI_assert(type_ == GPU_TEXTURE_BUFFER);