GPUBatch: Merge phase and ownership flags and cleanup there usage

Also add new flags to communicate specific behavior to future backend.
Referenced by commit 7ce764c9ec, Fix T79896 Awful performance with Dyntopo on Referenced by issue #79896, Awful performance with Dyntopo on
2020-08-10 03:17:35 +02:00 · 2020-08-10 03:17:35 +02:00 · 9443da6166 · 2023-02-14 00:06:52 +01:00
parent e0f5f95e66
commit 9443da6166
6 changed files with 123 additions and 119 deletions
--- a/source/blender/draw/intern/draw_cache_impl_mesh.c
+++ b/source/blender/draw/intern/draw_cache_impl_mesh.c
@ -1248,7 +1248,7 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
          saved_elem_ranges[i] = cache->surface_per_mat[i]->elem;
          /* Avoid deletion as the batch is owner. */
          cache->surface_per_mat[i]->elem = NULL;
-          cache->surface_per_mat[i]->owns_flag &= ~GPU_BATCH_OWNS_INDEX;
+          cache->surface_per_mat[i]->flag &= ~GPU_BATCH_OWNS_INDEX;
        }
      }
      /* We can't discard batches at this point as they have been
--- a/source/blender/draw/intern/draw_instance_data.c
+++ b/source/blender/draw/intern/draw_instance_data.c
@ -156,7 +156,7 @@ GPUBatch *DRW_temp_batch_instance_request(DRWInstanceDataList *idatalist,
                                ((batch->inst[0] == instancer->verts[0]) &&
                                 (batch->inst[1] == instancer->verts[1]));
  bool is_compatible = (batch->prim_type == geom->prim_type) && instancer_compat &&
-                       (batch->phase == GPU_BATCH_READY_TO_DRAW) && (batch->elem == geom->elem);
+                       (batch->flag & GPU_BATCH_BUILDING) == 0 && (batch->elem == geom->elem);
  for (int i = 0; i < GPU_BATCH_VBO_MAX_LEN && is_compatible; i++) {
    if (batch->verts[i] != geom->verts[i]) {
      is_compatible = false;
@ -167,7 +167,7 @@ GPUBatch *DRW_temp_batch_instance_request(DRWInstanceDataList *idatalist,
    instancing_batch_references_remove(batch);
    GPU_batch_clear(batch);
    /* Save args and init later. */
-    batch->phase = GPU_BATCH_READY_TO_BUILD;
+    batch->flag = GPU_BATCH_BUILDING;
    handle->buf = buf;
    handle->instancer = instancer;
    handle->geom = geom;
@ -234,7 +234,7 @@ void DRW_instance_buffer_finish(DRWInstanceDataList *idatalist)
  BLI_memblock_iternew(idatalist->pool_instancing, &iter);
  while ((handle_inst = BLI_memblock_iterstep(&iter))) {
    GPUBatch *batch = handle_inst->batch;
-    if (batch && batch->phase == GPU_BATCH_READY_TO_BUILD) {
+    if (batch && batch->flag == GPU_BATCH_BUILDING) {
      GPUVertBuf *inst_buf = handle_inst->buf;
      GPUBatch *inst_batch = handle_inst->instancer;
      GPUBatch *geom = handle_inst->geom;
--- a/source/blender/editors/mesh/editmesh_knife.c
+++ b/source/blender/editors/mesh/editmesh_knife.c
@ -1151,8 +1151,6 @@ static void knifetool_draw(const bContext *UNUSED(C), ARegion *UNUSED(region), v
    /* draw any snapped verts first */
    rgba_uchar_to_float(fcol, kcd->colors.point_a);
    GPU_batch_uniform_4fv(batch, "color", fcol);
-    GPU_matrix_bind(batch->interface);
-    GPU_shader_set_srgb_uniform(batch->interface);
    GPU_point_size(11);
    if (snapped_verts_count > 0) {
      GPU_batch_draw_range(batch, 0, snapped_verts_count);
--- a/source/blender/gpu/GPU_batch.h
+++ b/source/blender/gpu/GPU_batch.h
@ -26,51 +26,78 @@

 #pragma once

+#include "BLI_utildefines.h"
+
 #include "GPU_element.h"
 #include "GPU_shader.h"
 #include "GPU_shader_interface.h"
 #include "GPU_vertex_buffer.h"

-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef enum {
-  GPU_BATCH_UNUSED,
-  GPU_BATCH_READY_TO_FORMAT,
-  GPU_BATCH_READY_TO_BUILD,
-  GPU_BATCH_BUILDING,
-  GPU_BATCH_READY_TO_DRAW,
-} GPUBatchPhase;
-
 #define GPU_BATCH_VBO_MAX_LEN 6
 #define GPU_BATCH_INST_VBO_MAX_LEN 2
 #define GPU_BATCH_VAO_STATIC_LEN 3
 #define GPU_BATCH_VAO_DYN_ALLOC_COUNT 16

-typedef struct GPUBatch {
-  /* geometry */
+typedef enum eGPUBatchFlag {
+  /** Invalid default state. */
+  GPU_BATCH_INVALID = 0,

+  /** GPUVertBuf ownership. (One bit per vbo) */
+  GPU_BATCH_OWNS_VBO = (1 << 0),
+  GPU_BATCH_OWNS_VBO_MAX = (GPU_BATCH_OWNS_VBO << (GPU_BATCH_VBO_MAX_LEN - 1)),
+  GPU_BATCH_OWNS_VBO_ANY = ((GPU_BATCH_OWNS_VBO << GPU_BATCH_VBO_MAX_LEN) - 1),
+  /** Instance GPUVertBuf ownership. (One bit per vbo) */
+  GPU_BATCH_OWNS_INST_VBO = (GPU_BATCH_OWNS_VBO_MAX << 1),
+  GPU_BATCH_OWNS_INST_VBO_MAX = (GPU_BATCH_OWNS_INST_VBO << (GPU_BATCH_INST_VBO_MAX_LEN - 1)),
+  GPU_BATCH_OWNS_INST_VBO_ANY = ((GPU_BATCH_OWNS_INST_VBO << GPU_BATCH_INST_VBO_MAX_LEN) - 1) &
+                                ~GPU_BATCH_OWNS_VBO_ANY,
+  /** GPUIndexBuf ownership. */
+  GPU_BATCH_OWNS_INDEX = (GPU_BATCH_OWNS_INST_VBO_MAX << 1),
+
+  /** Has been initialized. At least one VBO is set. */
+  GPU_BATCH_INIT = (1 << 16),
+  /** Batch is initialized but it's VBOs are still being populated. (optional) */
+  GPU_BATCH_BUILDING = (1 << 16),
+  /** Cached data need to be rebuild. (VAO, PSO, ...) */
+  GPU_BATCH_DIRTY_BINDINGS = (1 << 17),
+  GPU_BATCH_DIRTY_INTERFACE = (1 << 18),
+  GPU_BATCH_DIRTY = (GPU_BATCH_DIRTY_BINDINGS | GPU_BATCH_DIRTY_INTERFACE),
+} eGPUBatchFlag;
+
+#define GPU_BATCH_OWNS_NONE GPU_BATCH_INVALID
+
+BLI_STATIC_ASSERT(GPU_BATCH_OWNS_INDEX < GPU_BATCH_INIT,
+                  "eGPUBatchFlag: Error: status flags are shadowed by the ownership bits!")
+
+ENUM_OPERATORS(eGPUBatchFlag)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * IMPORTANT: Do not allocate manually as the real struct is bigger (i.e: GLBatch). This is only
+ * the common and "public" part of the struct. Use the provided allocator.
+ **/
+typedef struct GPUBatch {
  /** verts[0] is required, others can be NULL */
  GPUVertBuf *verts[GPU_BATCH_VBO_MAX_LEN];
  /** Instance attributes. */
  GPUVertBuf *inst[GPU_BATCH_INST_VBO_MAX_LEN];
  /** NULL if element list not needed */
  GPUIndexBuf *elem;
-
-  GPUShader *shader;
-
+  /** Bookeeping. */
+  eGPUBatchFlag flag;
+  /** Type of geometry to draw. */
  GPUPrimType prim_type;

-  /* cached values (avoid dereferencing later) */
-  uint32_t vao_id;
-  const struct GPUShaderInterface *interface;
-
-  /* book-keeping */
-  uint owns_flag;
-  /** used to free all vaos. this implies all vaos were created under the same context. */
+  /** Current assigned shader. */
+  struct GPUShader *shader;
+  /** Last context used to draw this batch. */
  struct GPUContext *context;
-  GPUBatchPhase phase;
+
+  struct GPUShaderInterface *interface;
+  GLuint vao_id;

  /* Vao management: remembers all geometry state (vertex attribute bindings & element buffer)
   * for each shader interface. Start with a static number of vaos and fallback to dynamic count
@ -91,16 +118,16 @@ typedef struct GPUBatch {
  };
 } GPUBatch;

-enum {
-  GPU_BATCH_OWNS_VBO = (1 << 0),
-  /* each vbo index gets bit-shifted */
-  GPU_BATCH_OWNS_INSTANCES = (1 << 30),
-  GPU_BATCH_OWNS_INDEX = (1u << 31u),
-};
-
 GPUBatch *GPU_batch_calloc(uint count);
-GPUBatch *GPU_batch_create_ex(GPUPrimType, GPUVertBuf *, GPUIndexBuf *, uint owns_flag);
-void GPU_batch_init_ex(GPUBatch *, GPUPrimType, GPUVertBuf *, GPUIndexBuf *, uint owns_flag);
+GPUBatch *GPU_batch_create_ex(GPUPrimType prim,
+                              GPUVertBuf *vert,
+                              GPUIndexBuf *elem,
+                              eGPUBatchFlag own_flag);
+void GPU_batch_init_ex(GPUBatch *batch,
+                       GPUPrimType prim,
+                       GPUVertBuf *vert,
+                       GPUIndexBuf *elem,
+                       eGPUBatchFlag own_flag);
 void GPU_batch_copy(GPUBatch *batch_dst, GPUBatch *batch_src);

 #define GPU_batch_create(prim, verts, elem) GPU_batch_create_ex(prim, verts, elem, 0)
--- a/source/blender/gpu/intern/gpu_batch.cc
+++ b/source/blender/gpu/intern/gpu_batch.cc
@ -94,19 +94,22 @@ GPUBatch *GPU_batch_calloc(uint count)
 GPUBatch *GPU_batch_create_ex(GPUPrimType prim_type,
                              GPUVertBuf *verts,
                              GPUIndexBuf *elem,
-                              uint owns_flag)
+                              eGPUBatchFlag owns_flag)
 {
  GPUBatch *batch = GPU_batch_calloc(1);
  GPU_batch_init_ex(batch, prim_type, verts, elem, owns_flag);
  return batch;
 }

-void GPU_batch_init_ex(
-    GPUBatch *batch, GPUPrimType prim_type, GPUVertBuf *verts, GPUIndexBuf *elem, uint owns_flag)
+void GPU_batch_init_ex(GPUBatch *batch,
+                       GPUPrimType prim_type,
+                       GPUVertBuf *verts,
+                       GPUIndexBuf *elem,
+                       eGPUBatchFlag owns_flag)
 {
-#if TRUST_NO_ONE
-  assert(verts != NULL);
-#endif
+  BLI_assert(verts != NULL);
+  /* Do not pass any other flag */
+  BLI_assert((owns_flag & ~(GPU_BATCH_OWNS_VBO | GPU_BATCH_OWNS_INDEX)) == 0);

  batch->verts[0] = verts;
  for (int v = 1; v < GPU_BATCH_VBO_MAX_LEN; v++) {
@ -117,15 +120,16 @@ void GPU_batch_init_ex(
  }
  batch->elem = elem;
  batch->prim_type = prim_type;
-  batch->phase = GPU_BATCH_READY_TO_DRAW;
-  batch->is_dynamic_vao_count = false;
-  batch->owns_flag = owns_flag;
+  batch->flag = owns_flag | GPU_BATCH_INIT | GPU_BATCH_DIRTY;
+  batch->context = NULL;
+  batch->shader = NULL;
 }

 /* This will share the VBOs with the new batch. */
 void GPU_batch_copy(GPUBatch *batch_dst, GPUBatch *batch_src)
 {
-  GPU_batch_init_ex(batch_dst, GPU_PRIM_POINTS, batch_src->verts[0], batch_src->elem, 0);
+  GPU_batch_init_ex(
+      batch_dst, GPU_PRIM_POINTS, batch_src->verts[0], batch_src->elem, GPU_BATCH_INVALID);

  batch_dst->prim_type = batch_src->prim_type;
  for (int v = 1; v < GPU_BATCH_VBO_MAX_LEN; v++) {
@ -135,25 +139,25 @@ void GPU_batch_copy(GPUBatch *batch_dst, GPUBatch *batch_src)

 void GPU_batch_clear(GPUBatch *batch)
 {
-  if (batch->owns_flag & GPU_BATCH_OWNS_INDEX) {
+  GPU_batch_vao_cache_clear(batch);
+  if (batch->flag & GPU_BATCH_OWNS_INDEX) {
    GPU_indexbuf_discard(batch->elem);
  }
-  if (batch->owns_flag & GPU_BATCH_OWNS_INSTANCES) {
-    GPU_vertbuf_discard(batch->inst[0]);
-    GPU_VERTBUF_DISCARD_SAFE(batch->inst[1]);
-  }
-  if ((batch->owns_flag & ~GPU_BATCH_OWNS_INDEX) != 0) {
-    for (int v = 0; v < GPU_BATCH_VBO_MAX_LEN; v++) {
-      if (batch->verts[v] == NULL) {
-        break;
-      }
-      if (batch->owns_flag & (1 << v)) {
-        GPU_vertbuf_discard(batch->verts[v]);
+  if (batch->flag & GPU_BATCH_OWNS_VBO_ANY) {
+    for (int v = 0; (v < GPU_BATCH_VBO_MAX_LEN) && batch->verts[v]; v++) {
+      if (batch->flag & (GPU_BATCH_OWNS_VBO << v)) {
+        GPU_VERTBUF_DISCARD_SAFE(batch->verts[v]);
      }
    }
  }
-  GPU_batch_vao_cache_clear(batch);
-  batch->phase = GPU_BATCH_UNUSED;
+  if (batch->flag & GPU_BATCH_OWNS_INST_VBO_ANY) {
+    for (int v = 0; (v < GPU_BATCH_INST_VBO_MAX_LEN) && batch->inst[v]; v++) {
+      if (batch->flag & (GPU_BATCH_OWNS_INST_VBO << v)) {
+        GPU_VERTBUF_DISCARD_SAFE(batch->inst[v]);
+      }
+    }
+  }
+  batch->flag = GPU_BATCH_INVALID;
 }

 void GPU_batch_discard(GPUBatch *batch)
@ -162,105 +166,77 @@ void GPU_batch_discard(GPUBatch *batch)
  MEM_freeN(batch);
 }

+/* NOTE: Override ONLY the first instance vbo (and free them if owned). */
 void GPU_batch_instbuf_set(GPUBatch *batch, GPUVertBuf *inst, bool own_vbo)
 {
-#if TRUST_NO_ONE
-  assert(inst != NULL);
-#endif
-  /* redo the bindings */
-  GPU_batch_vao_cache_clear(batch);
+  BLI_assert(inst);
+  batch->flag |= GPU_BATCH_DIRTY_BINDINGS;

-  if (batch->inst[0] != NULL && (batch->owns_flag & GPU_BATCH_OWNS_INSTANCES)) {
+  if (batch->inst[0] && (batch->flag & GPU_BATCH_OWNS_INST_VBO)) {
    GPU_vertbuf_discard(batch->inst[0]);
-    GPU_VERTBUF_DISCARD_SAFE(batch->inst[1]);
  }
  batch->inst[0] = inst;

-  if (own_vbo) {
-    batch->owns_flag |= GPU_BATCH_OWNS_INSTANCES;
-  }
-  else {
-    batch->owns_flag &= ~GPU_BATCH_OWNS_INSTANCES;
-  }
+  SET_FLAG_FROM_TEST(batch->flag, own_vbo, GPU_BATCH_OWNS_INST_VBO);
 }

+/* NOTE: Override any previously assigned elem (and free it if owned). */
 void GPU_batch_elembuf_set(GPUBatch *batch, GPUIndexBuf *elem, bool own_ibo)
 {
-  BLI_assert(elem != NULL);
-  /* redo the bindings */
-  GPU_batch_vao_cache_clear(batch);
+  BLI_assert(elem);
+  batch->flag |= GPU_BATCH_DIRTY_BINDINGS;

-  if (batch->elem != NULL && (batch->owns_flag & GPU_BATCH_OWNS_INDEX)) {
+  if (batch->elem && (batch->flag & GPU_BATCH_OWNS_INDEX)) {
    GPU_indexbuf_discard(batch->elem);
  }
  batch->elem = elem;

-  if (own_ibo) {
-    batch->owns_flag |= GPU_BATCH_OWNS_INDEX;
-  }
-  else {
-    batch->owns_flag &= ~GPU_BATCH_OWNS_INDEX;
-  }
+  SET_FLAG_FROM_TEST(batch->flag, own_ibo, GPU_BATCH_OWNS_INDEX);
 }

-/* A bit of a quick hack. Should be streamlined as the vbos handling */
 int GPU_batch_instbuf_add_ex(GPUBatch *batch, GPUVertBuf *insts, bool own_vbo)
 {
-  /* redo the bindings */
-  GPU_batch_vao_cache_clear(batch);
+  BLI_assert(insts);
+  batch->flag |= GPU_BATCH_DIRTY_BINDINGS;

  for (uint v = 0; v < GPU_BATCH_INST_VBO_MAX_LEN; v++) {
    if (batch->inst[v] == NULL) {
-#if TRUST_NO_ONE
      /* for now all VertexBuffers must have same vertex_len */
-      if (batch->inst[0] != NULL) {
-        /* Allow for different size of vertex buf (will choose the smallest number of verts). */
-        // assert(insts->vertex_len == batch->inst[0]->vertex_len);
-        assert(own_vbo == ((batch->owns_flag & GPU_BATCH_OWNS_INSTANCES) != 0));
+      if (batch->inst[0]) {
+        /* Allow for different size of vertex buf (will choose the smallest
+         * number of verts). */
+        // BLI_assert(insts->vertex_len == batch->inst[0]->vertex_len);
      }
-#endif
+
      batch->inst[v] = insts;
-      if (own_vbo) {
-        batch->owns_flag |= GPU_BATCH_OWNS_INSTANCES;
-      }
+      SET_FLAG_FROM_TEST(batch->flag, own_vbo, (eGPUBatchFlag)(GPU_BATCH_OWNS_INST_VBO << v));
      return v;
    }
  }
-
  /* we only make it this far if there is no room for another GPUVertBuf */
-#if TRUST_NO_ONE
-  assert(false);
-#endif
+  BLI_assert(0 && "Not enough Instance VBO slot in batch");
  return -1;
 }

 /* Returns the index of verts in the batch. */
 int GPU_batch_vertbuf_add_ex(GPUBatch *batch, GPUVertBuf *verts, bool own_vbo)
 {
-  /* redo the bindings */
-  GPU_batch_vao_cache_clear(batch);
+  BLI_assert(verts);
+  batch->flag |= GPU_BATCH_DIRTY_BINDINGS;

  for (uint v = 0; v < GPU_BATCH_VBO_MAX_LEN; v++) {
    if (batch->verts[v] == NULL) {
-#if TRUST_NO_ONE
      /* for now all VertexBuffers must have same vertex_len */
      if (batch->verts[0] != NULL) {
-        assert(verts->vertex_len == batch->verts[0]->vertex_len);
+        BLI_assert(verts->vertex_len == batch->verts[0]->vertex_len);
      }
-#endif
      batch->verts[v] = verts;
-      /* TODO: mark dirty so we can keep attribute bindings up-to-date */
-      if (own_vbo) {
-        batch->owns_flag |= (1 << v);
-      }
+      SET_FLAG_FROM_TEST(batch->flag, own_vbo, (eGPUBatchFlag)(GPU_BATCH_OWNS_VBO << v));
      return v;
    }
  }
-
  /* we only make it this far if there is no room for another GPUVertBuf */
-#if TRUST_NO_ONE
-  assert(false);
-#endif
+  BLI_assert(0 && "Not enough VBO slot in batch");
  return -1;
 }

@ -368,6 +344,9 @@ void GPU_batch_set_shader(GPUBatch *batch, GPUShader *shader)
 {
  batch->interface = shader->interface;
  batch->shader = shader;
+  if (batch->flag & GPU_BATCH_DIRTY_BINDINGS) {
+    GPU_batch_vao_cache_clear(batch);
+  }
  batch->vao_id = batch_vao_get(batch);
  GPU_shader_bind(batch->shader);
  GPU_matrix_bind(batch->shader->interface);
@ -510,7 +489,7 @@ static void batch_update_program_bindings(GPUBatch *batch, uint i_first)
 * \{ */

 #define GET_UNIFORM \
-  const GPUShaderInput *uniform = GPU_shaderinterface_uniform(batch->interface, name); \
+  const GPUShaderInput *uniform = GPU_shaderinterface_uniform(batch->shader->interface, name); \
  BLI_assert(uniform);

 void GPU_batch_uniform_1i(GPUBatch *batch, const char *name, int value)
--- a/source/blender/gpu/intern/gpu_immediate.cc
+++ b/source/blender/gpu/intern/gpu_immediate.cc
@ -317,7 +317,7 @@ GPUBatch *immBeginBatch(GPUPrimType prim_type, uint vertex_len)
  imm.vertex_data = verts->data;

  imm.batch = GPU_batch_create_ex(prim_type, verts, NULL, GPU_BATCH_OWNS_VBO);
-  imm.batch->phase = GPU_BATCH_BUILDING;
+  imm.batch->flag |= GPU_BATCH_BUILDING;

  return imm.batch;
 }
@ -419,7 +419,7 @@ void immEnd(void)
      /* TODO: resize only if vertex count is much smaller */
    }
    GPU_batch_set_shader(imm.batch, imm.bound_program);
-    imm.batch->phase = GPU_BATCH_READY_TO_DRAW;
+    imm.batch->flag &= ~GPU_BATCH_BUILDING;
    imm.batch = NULL; /* don't free, batch belongs to caller */
  }
  else {