DRW: Opti: Replace bound tex/ubo tracking array by bitfields

release_texture_slots() and release_ubo_slots() were one hotspot when
drawing taking ~9% of total CPU counters for no reason.

This was because of the loops using GPU_max_textures that was overkill and
slow.

Replace those by a simple 64bit bitwise OR operation.
This commit is contained in:
Clément Foucault 2019-04-05 20:45:32 +02:00
parent 2219f28a68
commit fefc9c95e4
Notes: blender-bot 2023-02-14 03:05:14 +01:00
Referenced by issue #64186, Graphical persistance in EEVEE
Referenced by issue #63404, EEVEE Volume Material Outpouts don't show in viewport or renders.
3 changed files with 100 additions and 83 deletions

View File

@ -644,18 +644,7 @@ static void drw_viewport_var_init(void)
}
/* Alloc array of texture reference. */
if (DST.RST.bound_texs == NULL) {
DST.RST.bound_texs = MEM_callocN(sizeof(GPUTexture *) * GPU_max_textures(), "Bound GPUTexture refs");
}
if (DST.RST.bound_tex_slots == NULL) {
DST.RST.bound_tex_slots = MEM_callocN(sizeof(char) * GPU_max_textures(), "Bound Texture Slots");
}
if (DST.RST.bound_ubos == NULL) {
DST.RST.bound_ubos = MEM_callocN(sizeof(GPUUniformBuffer *) * GPU_max_ubo_binds(), "Bound GPUUniformBuffer refs");
}
if (DST.RST.bound_ubo_slots == NULL) {
DST.RST.bound_ubo_slots = MEM_callocN(sizeof(char) * GPU_max_ubo_binds(), "Bound Ubo Slots");
}
memset(&DST.RST, 0x0, sizeof(DST.RST));
if (G_draw.view_ubo == NULL) {
G_draw.view_ubo = DRW_uniformbuffer_create(sizeof(ViewUboStorage), NULL);
@ -2796,11 +2785,6 @@ void DRW_engines_free(void)
DRW_TEXTURE_FREE_SAFE(G_draw.weight_ramp);
MEM_SAFE_FREE(g_pos_format);
MEM_SAFE_FREE(DST.RST.bound_texs);
MEM_SAFE_FREE(DST.RST.bound_tex_slots);
MEM_SAFE_FREE(DST.RST.bound_ubos);
MEM_SAFE_FREE(DST.RST.bound_ubo_slots);
MEM_SAFE_FREE(DST.uniform_names.buffer);
DRW_opengl_context_disable();

View File

@ -313,6 +313,7 @@ typedef struct DRWDebugSphere {
/* ------------- DRAW MANAGER ------------ */
#define DST_MAX_SLOTS 64 /* Cannot be changed without modifying RST.bound_tex_slots */
#define MAX_CLIP_PLANES 6 /* GL_MAX_CLIP_PLANES is at least 6 */
#define STENCIL_UNDEFINED 256
typedef struct DRWManager {
@ -394,12 +395,16 @@ typedef struct DRWManager {
/** GPU Resource State: Memory storage between drawing. */
struct {
GPUTexture **bound_texs;
char *bound_tex_slots;
int bind_tex_inc;
GPUUniformBuffer **bound_ubos;
char *bound_ubo_slots;
int bind_ubo_inc;
/* High end GPUs supports up to 32 binds per shader stage.
* We only use textures during the vertex and fragment stage,
* so 2 * 32 slots is a nice limit. */
GPUTexture *bound_texs[DST_MAX_SLOTS];
uint64_t bound_tex_slots;
uint64_t bound_tex_slots_persist;
GPUUniformBuffer *bound_ubos[DST_MAX_SLOTS];
uint64_t bound_ubo_slots;
uint64_t bound_ubo_slots_persist;
} RST;
struct {

View File

@ -22,9 +22,9 @@
#include "draw_manager.h"
#include "BLI_math_bits.h"
#include "BLI_mempool.h"
#include "BKE_global.h"
#include "GPU_draw.h"
@ -892,55 +892,97 @@ enum {
BIND_PERSIST = 2, /* Release slot only after the next shader change. */
};
static void set_bound_flags(uint64_t *slots, uint64_t *persist_slots, int slot_idx, char bind_type)
{
uint64_t slot = 1lu << slot_idx;
*slots |= slot;
if (bind_type == BIND_PERSIST) {
*persist_slots |= slot;
}
}
static int get_empty_slot_index(uint64_t slots)
{
uint64_t empty_slots = ~slots;
/* Find first empty slot using bitscan. */
if (empty_slots != 0) {
if ((empty_slots & 0xFFFFFFFFlu) != 0) {
return (int)bitscan_forward_uint(empty_slots);
}
else {
return (int)bitscan_forward_uint(empty_slots >> 32) + 32;
}
}
else {
/* Greater than GPU_max_textures() */
return 99999;
}
}
static void bind_texture(GPUTexture *tex, char bind_type)
{
int index;
char *slot_flags = DST.RST.bound_tex_slots;
int bind_num = GPU_texture_bound_number(tex);
if (bind_num == -1) {
for (int i = 0; i < GPU_max_textures(); ++i) {
index = DST.RST.bind_tex_inc = (DST.RST.bind_tex_inc + 1) % GPU_max_textures();
if (slot_flags[index] == BIND_NONE) {
if (DST.RST.bound_texs[index] != NULL) {
GPU_texture_unbind(DST.RST.bound_texs[index]);
}
GPU_texture_bind(tex, index);
DST.RST.bound_texs[index] = tex;
slot_flags[index] = bind_type;
// printf("Binds Texture %d %p\n", DST.RST.bind_tex_inc, tex);
return;
int idx = GPU_texture_bound_number(tex);
if (idx == -1) {
/* Texture isn't bound yet. Find an empty slot and bind it. */
idx = get_empty_slot_index(DST.RST.bound_tex_slots);
if (idx < GPU_max_textures()) {
GPUTexture **gpu_tex_slot = &DST.RST.bound_texs[idx];
/* Unbind any previous texture. */
if (*gpu_tex_slot != NULL) {
GPU_texture_unbind(*gpu_tex_slot);
}
GPU_texture_bind(tex, idx);
*gpu_tex_slot = tex;
}
else {
printf("Not enough texture slots! Reduce number of textures used by your shader.\n");
return;
}
printf("Not enough texture slots! Reduce number of textures used by your shader.\n");
}
slot_flags[bind_num] = bind_type;
else {
/* This texture slot was released but the tex
* is still bound. Just flag the slot again. */
BLI_assert(DST.RST.bound_texs[idx] == tex);
}
set_bound_flags(&DST.RST.bound_tex_slots,
&DST.RST.bound_tex_slots_persist,
idx, bind_type);
}
static void bind_ubo(GPUUniformBuffer *ubo, char bind_type)
{
int index;
char *slot_flags = DST.RST.bound_ubo_slots;
int bind_num = GPU_uniformbuffer_bindpoint(ubo);
if (bind_num == -1) {
for (int i = 0; i < GPU_max_ubo_binds(); ++i) {
index = DST.RST.bind_ubo_inc = (DST.RST.bind_ubo_inc + 1) % GPU_max_ubo_binds();
if (slot_flags[index] == BIND_NONE) {
if (DST.RST.bound_ubos[index] != NULL) {
GPU_uniformbuffer_unbind(DST.RST.bound_ubos[index]);
}
GPU_uniformbuffer_bind(ubo, index);
DST.RST.bound_ubos[index] = ubo;
slot_flags[index] = bind_type;
return;
int idx = GPU_uniformbuffer_bindpoint(ubo);
if (idx == -1) {
/* UBO isn't bound yet. Find an empty slot and bind it. */
idx = get_empty_slot_index(DST.RST.bound_ubo_slots);
if (idx < GPU_max_ubo_binds()) {
GPUUniformBuffer **gpu_ubo_slot = &DST.RST.bound_ubos[idx];
/* Unbind any previous UBO. */
if (*gpu_ubo_slot != NULL) {
GPU_uniformbuffer_unbind(*gpu_ubo_slot);
}
GPU_uniformbuffer_bind(ubo, idx);
*gpu_ubo_slot = ubo;
}
else {
/* printf so user can report bad behavior */
printf("Not enough ubo slots! This should not happen!\n");
/* This is not depending on user input.
* It is our responsibility to make sure there is enough slots. */
BLI_assert(0);
return;
}
/* printf so user can report bad behavior */
printf("Not enough ubo slots! This should not happen!\n");
/* This is not depending on user input.
* It is our responsibility to make sure there is enough slots. */
BLI_assert(0);
}
slot_flags[bind_num] = bind_type;
else {
/* This UBO slot was released but the UBO is
* still bound here. Just flag the slot again. */
BLI_assert(DST.RST.bound_ubos[idx] == ubo);
}
set_bound_flags(&DST.RST.bound_ubo_slots,
&DST.RST.bound_ubo_slots_persist,
idx, bind_type);
}
#ifndef NDEBUG
@ -994,37 +1036,23 @@ static bool ubo_bindings_validate(DRWShadingGroup *shgroup)
static void release_texture_slots(bool with_persist)
{
if (with_persist) {
memset(DST.RST.bound_tex_slots, 0x0, sizeof(*DST.RST.bound_tex_slots) * GPU_max_textures());
DST.RST.bound_tex_slots = 0;
DST.RST.bound_tex_slots_persist = 0;
}
else {
for (int i = 0; i < GPU_max_textures(); ++i) {
if (DST.RST.bound_tex_slots[i] != BIND_PERSIST) {
DST.RST.bound_tex_slots[i] = BIND_NONE;
}
}
DST.RST.bound_tex_slots &= DST.RST.bound_tex_slots_persist;
}
/* Reset so that slots are consistently assigned for different shader
* draw calls, to avoid shader specialization/patching by the driver. */
DST.RST.bind_tex_inc = 0;
}
static void release_ubo_slots(bool with_persist)
{
if (with_persist) {
memset(DST.RST.bound_ubo_slots, 0x0, sizeof(*DST.RST.bound_ubo_slots) * GPU_max_ubo_binds());
DST.RST.bound_ubo_slots = 0;
DST.RST.bound_ubo_slots_persist = 0;
}
else {
for (int i = 0; i < GPU_max_ubo_binds(); ++i) {
if (DST.RST.bound_ubo_slots[i] != BIND_PERSIST) {
DST.RST.bound_ubo_slots[i] = BIND_NONE;
}
}
DST.RST.bound_ubo_slots &= DST.RST.bound_ubo_slots_persist;
}
/* Reset so that slots are consistently assigned for different shader
* draw calls, to avoid shader specialization/patching by the driver. */
DST.RST.bind_ubo_inc = 0;
}
static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
@ -1331,7 +1359,7 @@ static void drw_draw_pass_ex(DRWPass *pass, DRWShadingGroup *start_group, DRWSha
}
/* Clear Bound textures */
for (int i = 0; i < GPU_max_textures(); i++) {
for (int i = 0; i < DST_MAX_SLOTS; i++) {
if (DST.RST.bound_texs[i] != NULL) {
GPU_texture_unbind(DST.RST.bound_texs[i]);
DST.RST.bound_texs[i] = NULL;
@ -1339,7 +1367,7 @@ static void drw_draw_pass_ex(DRWPass *pass, DRWShadingGroup *start_group, DRWSha
}
/* Clear Bound Ubos */
for (int i = 0; i < GPU_max_ubo_binds(); i++) {
for (int i = 0; i < DST_MAX_SLOTS; i++) {
if (DST.RST.bound_ubos[i] != NULL) {
GPU_uniformbuffer_unbind(DST.RST.bound_ubos[i]);
DST.RST.bound_ubos[i] = NULL;