GPUPass: Refactor gpupass caching system to allow fast gpumaterial creation.

This is part of the work needed to refactor the material parameters update.

Now the gpupass cache is polled before adding the gpumaterial to the
deferred compilation queue.

We store gpupasses in a single linked list grouped based on their hashes.
This is not the most efficient way but it can be improved upon later.
This commit is contained in:
Clément Foucault 2018-06-07 11:58:15 +02:00
parent 366ac88d68
commit cdbda1c3d8
Notes: blender-bot 2023-02-14 09:48:25 +01:00
Referenced by issue #55327, Skip deferred rendering when material is cached
7 changed files with 214 additions and 111 deletions

View File

@ -732,7 +732,14 @@ static DRWShadingGroup *drw_shgroup_material_create_ex(GPUPass *gpupass, DRWPass
return NULL;
}
DRWShadingGroup *grp = drw_shgroup_create_ex(GPU_pass_shader(gpupass), pass);
GPUShader *sh = GPU_pass_shader_get(gpupass);
if (!sh) {
/* Shader not yet compiled */
return NULL;
}
DRWShadingGroup *grp = drw_shgroup_create_ex(sh, pass);
return grp;
}
@ -808,7 +815,7 @@ DRWShadingGroup *DRW_shgroup_material_create(
DRWShadingGroup *shgroup = drw_shgroup_material_create_ex(gpupass, pass);
if (shgroup) {
drw_shgroup_init(shgroup, GPU_pass_shader(gpupass));
drw_shgroup_init(shgroup, GPU_pass_shader_get(gpupass));
drw_shgroup_material_inputs(shgroup, material);
}
@ -825,7 +832,7 @@ DRWShadingGroup *DRW_shgroup_material_instance_create(
shgroup->type = DRW_SHG_INSTANCE;
shgroup->instance_geom = geom;
drw_call_calc_orco(ob, shgroup->instance_orcofac);
drw_shgroup_instance_init(shgroup, GPU_pass_shader(gpupass), geom, format);
drw_shgroup_instance_init(shgroup, GPU_pass_shader_get(gpupass), geom, format);
drw_shgroup_material_inputs(shgroup, material);
}
@ -843,7 +850,7 @@ DRWShadingGroup *DRW_shgroup_material_empty_tri_batch_create(
if (shgroup) {
/* Calling drw_shgroup_init will cause it to call GWN_draw_primitive(). */
drw_shgroup_init(shgroup, GPU_pass_shader(gpupass));
drw_shgroup_init(shgroup, GPU_pass_shader_get(gpupass));
shgroup->type = DRW_SHG_TRIANGLE_BATCH;
shgroup->instance_count = tri_count * 3;
drw_shgroup_material_inputs(shgroup, material);

View File

@ -62,7 +62,6 @@ typedef struct DRWDeferredShader {
struct DRWDeferredShader *prev, *next;
GPUMaterial *mat;
char *vert, *geom, *frag, *defs;
} DRWDeferredShader;
typedef struct DRWShaderCompiler {
@ -80,11 +79,6 @@ typedef struct DRWShaderCompiler {
static void drw_deferred_shader_free(DRWDeferredShader *dsh)
{
/* Make sure it is not queued before freeing. */
MEM_SAFE_FREE(dsh->vert);
MEM_SAFE_FREE(dsh->geom);
MEM_SAFE_FREE(dsh->frag);
MEM_SAFE_FREE(dsh->defs);
MEM_freeN(dsh);
}
@ -129,12 +123,7 @@ static void drw_deferred_shader_compilation_exec(void *custom_data, short *stop,
BLI_spin_unlock(&comp->list_lock);
/* Do the compilation. */
GPU_material_generate_pass(
comp->mat_compiling->mat,
comp->mat_compiling->vert,
comp->mat_compiling->geom,
comp->mat_compiling->frag,
comp->mat_compiling->defs);
GPU_material_compile(comp->mat_compiling->mat);
*progress = (float)comp->shaders_done / (float)total;
*do_update = true;
@ -165,25 +154,21 @@ static void drw_deferred_shader_compilation_free(void *custom_data)
MEM_freeN(comp);
}
static void drw_deferred_shader_add(
GPUMaterial *mat, const char *vert, const char *geom, const char *frag_lib, const char *defines)
static void drw_deferred_shader_add(GPUMaterial *mat)
{
/* Do not deferre the compilation if we are rendering for image. */
if (DRW_state_is_image_render() || !USE_DEFERRED_COMPILATION) {
/* Double checking that this GPUMaterial is not going to be
* compiled by another thread. */
DRW_deferred_shader_remove(mat);
GPU_material_generate_pass(mat, vert, geom, frag_lib, defines);
printf("%s GPUMaterial %p\n", __func__, mat);
GPU_material_compile(mat);
return;
}
DRWDeferredShader *dsh = MEM_callocN(sizeof(DRWDeferredShader), "Deferred Shader");
dsh->mat = mat;
if (vert) dsh->vert = BLI_strdup(vert);
if (geom) dsh->geom = BLI_strdup(geom);
if (frag_lib) dsh->frag = BLI_strdup(frag_lib);
if (defines) dsh->defs = BLI_strdup(defines);
BLI_assert(DST.draw_ctx.evil_C);
wmWindowManager *wm = CTX_wm_manager(DST.draw_ctx.evil_C);
@ -361,10 +346,13 @@ GPUMaterial *DRW_shader_create_from_world(
if (mat == NULL) {
mat = GPU_material_from_nodetree(
scene, wo->nodetree, &wo->gpumaterial, engine_type, options);
scene, wo->nodetree, &wo->gpumaterial, engine_type, options,
vert, geom, frag_lib, defines);
}
drw_deferred_shader_add(mat, vert, geom, frag_lib, defines);
if (GPU_material_status(mat) == GPU_MAT_QUEUED) {
drw_deferred_shader_add(mat);
}
return mat;
}
@ -380,10 +368,13 @@ GPUMaterial *DRW_shader_create_from_material(
if (mat == NULL) {
mat = GPU_material_from_nodetree(
scene, ma->nodetree, &ma->gpumaterial, engine_type, options);
scene, ma->nodetree, &ma->gpumaterial, engine_type, options,
vert, geom, frag_lib, defines);
}
drw_deferred_shader_add(mat, vert, geom, frag_lib, defines);
if (GPU_material_status(mat) == GPU_MAT_QUEUED) {
drw_deferred_shader_add(mat);
}
return mat;
}

View File

@ -246,9 +246,9 @@ struct GPUUniformBuffer *GPU_material_sss_profile_get(
GPUMaterial *GPU_material_from_nodetree_find(
struct ListBase *gpumaterials, const void *engine_type, int options);
GPUMaterial *GPU_material_from_nodetree(
struct Scene *scene, struct bNodeTree *ntree, struct ListBase *gpumaterials, const void *engine_type, int options);
void GPU_material_generate_pass(
GPUMaterial *mat, const char *vert_code, const char *geom_code, const char *frag_lib, const char *defines);
struct Scene *scene, struct bNodeTree *ntree, struct ListBase *gpumaterials, const void *engine_type, int options,
const char *vert_code, const char *geom_code, const char *frag_lib, const char *defines);
void GPU_material_compile(GPUMaterial *mat);
void GPU_material_free(struct ListBase *gpumaterial);
void GPU_materials_free(void);
@ -270,6 +270,7 @@ bool GPU_material_do_color_management(GPUMaterial *mat);
bool GPU_material_use_domain_surface(GPUMaterial *mat);
bool GPU_material_use_domain_volume(GPUMaterial *mat);
void GPU_pass_cache_init(void);
void GPU_pass_cache_garbage_collect(void);
void GPU_pass_cache_free(void);

View File

@ -40,10 +40,11 @@
#include "BLI_blenlib.h"
#include "BLI_hash_mm2a.h"
#include "BLI_linklist.h"
#include "BLI_link_utils.h"
#include "BLI_utildefines.h"
#include "BLI_dynstr.h"
#include "BLI_ghash.h"
#include "BLI_threads.h"
#include "PIL_time.h"
@ -75,39 +76,54 @@ static char *glsl_material_library = NULL;
* same for 2 different Materials. Unused GPUPasses are free by Garbage collection.
**/
static LinkNode *pass_cache = NULL; /* GPUPass */
/* Only use one linklist that contains the GPUPasses grouped by hash. */
static GPUPass *pass_cache = NULL;
static SpinLock pass_cache_spin;
static uint32_t gpu_pass_hash(const char *vert, const char *geom, const char *frag, const char *defs)
static uint32_t gpu_pass_hash(const char *frag_gen, const char *defs)
{
BLI_HashMurmur2A hm2a;
BLI_hash_mm2a_init(&hm2a, 0);
BLI_hash_mm2a_add(&hm2a, (unsigned char *)frag, strlen(frag));
BLI_hash_mm2a_add(&hm2a, (unsigned char *)vert, strlen(vert));
BLI_hash_mm2a_add(&hm2a, (unsigned char *)frag_gen, strlen(frag_gen));
if (defs)
BLI_hash_mm2a_add(&hm2a, (unsigned char *)defs, strlen(defs));
if (geom)
BLI_hash_mm2a_add(&hm2a, (unsigned char *)geom, strlen(geom));
return BLI_hash_mm2a_end(&hm2a);
}
/* Search by hash then by exact string match. */
static GPUPass *gpu_pass_cache_lookup(
const char *vert, const char *geom, const char *frag, const char *defs, uint32_t hash)
/* Search by hash only. Return first pass with the same hash.
* There is hash collision if (pass->next && pass->next->hash == hash) */
static GPUPass *gpu_pass_cache_lookup(uint32_t hash)
{
for (LinkNode *ln = pass_cache; ln; ln = ln->next) {
GPUPass *pass = (GPUPass *)ln->link;
BLI_spin_lock(&pass_cache_spin);
/* Could be optimized with a Lookup table. */
for (GPUPass *pass = pass_cache; pass; pass = pass->next) {
if (pass->hash == hash) {
/* Note: Could be made faster if that becomes a real bottleneck. */
if ((defs != NULL) && (strcmp(pass->defines, defs) != 0)) { /* Pass */ }
else if ((geom != NULL) && (strcmp(pass->geometrycode, geom) != 0)) { /* Pass */ }
else if ((strcmp(pass->fragmentcode, frag) == 0) &&
(strcmp(pass->vertexcode, vert) == 0))
{
return pass;
}
BLI_spin_unlock(&pass_cache_spin);
return pass;
}
}
BLI_spin_unlock(&pass_cache_spin);
return NULL;
}
/* Check all possible passes with the same hash. */
static GPUPass *gpu_pass_cache_resolve_collision(
GPUPass *pass, const char *vert, const char *geom, const char *frag, const char *defs, uint32_t hash)
{
BLI_spin_lock(&pass_cache_spin);
/* Collision, need to strcmp the whole shader. */
for (; pass && (pass->hash == hash); pass = pass->next) {
if ((defs != NULL) && (strcmp(pass->defines, defs) != 0)) { /* Pass */ }
else if ((geom != NULL) && (strcmp(pass->geometrycode, geom) != 0)) { /* Pass */ }
else if ((strcmp(pass->fragmentcode, frag) == 0) &&
(strcmp(pass->vertexcode, vert) == 0))
{
BLI_spin_unlock(&pass_cache_spin);
return pass;
}
}
BLI_spin_unlock(&pass_cache_spin);
return NULL;
}
@ -1099,12 +1115,12 @@ void GPU_code_generate_glsl_lib(void)
/* GPU pass binding/unbinding */
GPUShader *GPU_pass_shader(GPUPass *pass)
GPUShader *GPU_pass_shader_get(GPUPass *pass)
{
return pass->shader;
}
static void gpu_nodes_extract_dynamic_inputs(GPUShader *shader, ListBase *inputs, ListBase *nodes)
void GPU_nodes_extract_dynamic_inputs(GPUShader *shader, ListBase *inputs, ListBase *nodes)
{
GPUNode *node;
GPUInput *next, *input;
@ -1896,16 +1912,25 @@ void GPU_nodes_prune(ListBase *nodes, GPUNodeLink *outlink)
}
}
static bool gpu_pass_is_valid(GPUPass *pass)
{
/* Shader is not null if compilation is successful,
* refcount is positive if compilation as not yet been done. */
return (pass->shader != NULL || pass->refcount > 0);
}
GPUPass *GPU_generate_pass_new(
GPUMaterial *material,
GPUNodeLink *frag_outlink, struct GPUVertexAttribs *attribs,
ListBase *nodes, ListBase *inputs,
const char *vert_code, const char *geom_code,
const char *frag_lib, const char *defines)
GPUNodeLink *frag_outlink,
struct GPUVertexAttribs *attribs,
ListBase *nodes,
const char *vert_code,
const char *geom_code,
const char *frag_lib,
const char *defines)
{
char *vertexcode, *geometrycode, *fragmentcode;
GPUShader *shader;
GPUPass *pass;
GPUPass *pass = NULL, *pass_hash = NULL;
/* prune unused nodes */
GPU_nodes_prune(nodes, frag_outlink);
@ -1914,6 +1939,24 @@ GPUPass *GPU_generate_pass_new(
/* generate code */
char *fragmentgen = code_generate_fragment(material, nodes, frag_outlink->output);
/* Cache lookup: Reuse shaders already compiled */
uint32_t hash = gpu_pass_hash(fragmentgen, defines);
pass_hash = gpu_pass_cache_lookup(hash);
if (pass_hash && (pass_hash->next == NULL || pass_hash->next->hash != hash)) {
/* No collision, just return the pass. */
MEM_freeN(fragmentgen);
if (!gpu_pass_is_valid(pass_hash)) {
/* Shader has already been created but failed to compile. */
return NULL;
}
pass_hash->refcount += 1;
return pass_hash;
}
/* Either the shader is not compiled or there is a hash collision...
* continue generating the shader strings. */
char *tmp = BLI_strdupcat(frag_lib, glsl_material_library);
vertexcode = code_generate_vertex(nodes, vert_code, (geom_code != NULL));
@ -1923,51 +1966,62 @@ GPUPass *GPU_generate_pass_new(
MEM_freeN(fragmentgen);
MEM_freeN(tmp);
/* Cache lookup: Reuse shaders already compiled */
uint32_t hash = gpu_pass_hash(vertexcode, geometrycode, fragmentcode, defines);
pass = gpu_pass_cache_lookup(vertexcode, geometrycode, fragmentcode, defines, hash);
if (pass_hash) {
/* Cache lookup: Reuse shaders already compiled */
pass = gpu_pass_cache_resolve_collision(pass_hash, vertexcode, geometrycode, fragmentcode, defines, hash);
}
if (pass) {
/* Cache hit. Reuse the same GPUPass and GPUShader. */
shader = pass->shader;
pass->refcount += 1;
if (!gpu_pass_is_valid(pass)) {
/* Shader has already been created but failed to compile. */
return NULL;
}
MEM_SAFE_FREE(vertexcode);
MEM_SAFE_FREE(fragmentcode);
MEM_SAFE_FREE(geometrycode);
pass->refcount += 1;
}
else {
/* Cache miss. (Re)compile the shader. */
shader = GPU_shader_create(vertexcode,
fragmentcode,
geometrycode,
NULL,
defines);
/* We still create a pass even if shader compilation
* fails to avoid trying to compile again and again. */
pass = MEM_callocN(sizeof(GPUPass), "GPUPass");
pass->shader = shader;
pass->shader = NULL;
pass->refcount = 1;
pass->hash = hash;
pass->vertexcode = vertexcode;
pass->fragmentcode = fragmentcode;
pass->geometrycode = geometrycode;
pass->libcode = glsl_material_library;
pass->defines = (defines) ? BLI_strdup(defines) : NULL;
pass->compiled = false;
BLI_linklist_prepend(&pass_cache, pass);
BLI_spin_lock(&pass_cache_spin);
if (pass_hash != NULL) {
/* Add after the first pass having the same hash. */
pass->next = pass_hash->next;
pass_hash->next = pass;
}
else {
/* No other pass have same hash, just prepend to the list. */
BLI_LINKS_PREPEND(pass_cache, pass);
}
BLI_spin_unlock(&pass_cache_spin);
}
/* did compilation failed ? */
if (!shader) {
gpu_nodes_free(nodes);
/* Pass will not be used. Don't increment refcount. */
pass->refcount--;
return NULL;
}
else {
gpu_nodes_extract_dynamic_inputs(shader, inputs, nodes);
return pass;
return pass;
}
void GPU_pass_compile(GPUPass *pass)
{
if (!pass->compiled) {
pass->shader = GPU_shader_create(pass->vertexcode,
pass->fragmentcode,
pass->geometrycode,
NULL,
pass->defines);
pass->compiled = true;
}
}
@ -2006,23 +2060,36 @@ void GPU_pass_cache_garbage_collect(void)
lasttime = ctime;
LinkNode *next, **prev_ln = &pass_cache;
for (LinkNode *ln = pass_cache; ln; ln = next) {
GPUPass *pass = (GPUPass *)ln->link;
next = ln->next;
BLI_spin_lock(&pass_cache_spin);
GPUPass *next, **prev_pass = &pass_cache;
for (GPUPass *pass = pass_cache; pass; pass = next) {
next = pass->next;
if (pass->refcount == 0) {
gpu_pass_free(pass);
/* Remove from list */
MEM_freeN(ln);
*prev_ln = next;
*prev_pass = next;
gpu_pass_free(pass);
}
else {
prev_ln = &ln->next;
prev_pass = &pass->next;
}
}
BLI_spin_unlock(&pass_cache_spin);
}
void GPU_pass_cache_init(void)
{
BLI_spin_init(&pass_cache_spin);
}
void GPU_pass_cache_free(void)
{
BLI_linklist_free(pass_cache, (LinkNodeFreeFP)gpu_pass_free);
BLI_spin_lock(&pass_cache_spin);
while (pass_cache) {
GPUPass *next = pass_cache->next;
gpu_pass_free(pass_cache);
pass_cache = next;
}
BLI_spin_unlock(&pass_cache_spin);
BLI_spin_end(&pass_cache_spin);
}

View File

@ -157,33 +157,30 @@ typedef struct GPUInput {
} GPUInput;
struct GPUPass {
struct GPUPass *next;
struct GPUShader *shader;
char *fragmentcode;
char *geometrycode;
char *vertexcode;
char *defines;
const char *libcode;
unsigned int refcount; /* Orphaned GPUPasses gets freed by the garbage collector. */
uint32_t hash; /* Identity hash generated from all GLSL code. */
bool compiled; /* Did we already tried to compile the attached GPUShader. */
};
typedef struct GPUPass GPUPass;
GPUPass *GPU_generate_pass_new(
GPUMaterial *material,
GPUNodeLink *frag_outlink, struct GPUVertexAttribs *attribs,
ListBase *nodes, ListBase *inputs,
ListBase *nodes,
const char *vert_code, const char *geom_code,
const char *frag_lib, const char *defines);
GPUPass *GPU_generate_pass(
ListBase *nodes, ListBase *inputs, struct GPUNodeLink *outlink,
struct GPUVertexAttribs *attribs, int *builtin,
const GPUMatType type, const char *name,
const bool use_opensubdiv);
struct GPUShader *GPU_pass_shader(GPUPass *pass);
struct GPUShader *GPU_pass_shader_get(GPUPass *pass);
void GPU_nodes_extract_dynamic_inputs(struct GPUShader *shader, ListBase *inputs, ListBase *nodes);
void GPU_nodes_get_vertex_attributes(ListBase *nodes, struct GPUVertexAttribs *attribs);
void GPU_nodes_prune(ListBase *nodes, struct GPUNodeLink *outlink);
@ -191,6 +188,7 @@ void GPU_pass_bind(GPUPass *pass, ListBase *inputs, double time, int mipmap);
void GPU_pass_update_uniforms(GPUPass *pass, ListBase *inputs);
void GPU_pass_unbind(GPUPass *pass, ListBase *inputs);
void GPU_pass_compile(GPUPass *pass);
void GPU_pass_release(GPUPass *pass);
void GPU_pass_free_nodes(ListBase *nodes);

View File

@ -602,7 +602,8 @@ GPUMaterial *GPU_material_from_nodetree_find(
* so only do this when they are needed.
*/
GPUMaterial *GPU_material_from_nodetree(
Scene *scene, struct bNodeTree *ntree, ListBase *gpumaterials, const void *engine_type, int options)
Scene *scene, struct bNodeTree *ntree, ListBase *gpumaterials, const void *engine_type, int options,
const char *vert_code, const char *geom_code, const char *frag_lib, const char *defines)
{
LinkData *link;
bool has_volume_output, has_surface_output;
@ -631,11 +632,38 @@ GPUMaterial *GPU_material_from_nodetree(
* generated VBOs are ready to accept the future shader. */
GPU_nodes_prune(&mat->nodes, mat->outlink);
GPU_nodes_get_vertex_attributes(&mat->nodes, &mat->attribs);
mat->status = GPU_MAT_QUEUED;
/* Create source code and search pass cache for an already compiled version. */
mat->pass = GPU_generate_pass_new(mat,
mat->outlink,
&mat->attribs,
&mat->nodes,
vert_code,
geom_code,
frag_lib,
defines);
if (mat->pass == NULL) {
/* We had a cache hit and the shader has already failed to compile. */
mat->status = GPU_MAT_FAILED;
}
else {
GPUShader *sh = GPU_pass_shader_get(mat->pass);
if (sh != NULL) {
/* We had a cache hit and the shader is already compiled. */
mat->status = GPU_MAT_SUCCESS;
GPU_nodes_extract_dynamic_inputs(sh, &mat->inputs, &mat->nodes);
}
else {
mat->status = GPU_MAT_QUEUED;
}
}
}
else {
mat->status = GPU_MAT_FAILED;
}
/* note that even if building the shader fails in some way, we still keep
* it to avoid trying to compile again and again, and simple do not use
* it to avoid trying to compile again and again, and simply do not use
* the actual shader on drawing */
link = MEM_callocN(sizeof(LinkData), "GPUMaterialLink");
@ -645,17 +673,26 @@ GPUMaterial *GPU_material_from_nodetree(
return mat;
}
void GPU_material_generate_pass(
GPUMaterial *mat, const char *vert_code, const char *geom_code, const char *frag_lib, const char *defines)
void GPU_material_compile(GPUMaterial *mat)
{
BLI_assert(mat->pass == NULL); /* Only run once! */
if (mat->outlink) {
mat->pass = GPU_generate_pass_new(
mat, mat->outlink, &mat->attribs, &mat->nodes, &mat->inputs, vert_code, geom_code, frag_lib, defines);
mat->status = (mat->pass) ? GPU_MAT_SUCCESS : GPU_MAT_FAILED;
/* Only run once! */
BLI_assert(mat->status == GPU_MAT_QUEUED);
BLI_assert(mat->pass);
/* NOTE: The shader may have already been compiled here since we are
* sharing GPUShader across GPUMaterials. In this case it's a no-op. */
GPU_pass_compile(mat->pass);
GPUShader *sh = GPU_pass_shader_get(mat->pass);
if (sh != NULL) {
mat->status = GPU_MAT_SUCCESS;
GPU_nodes_extract_dynamic_inputs(sh, &mat->inputs, &mat->nodes);
}
else {
mat->status = GPU_MAT_FAILED;
GPU_pass_free_nodes(&mat->nodes);
GPU_pass_release(mat->pass);
mat->pass = NULL;
}
}

View File

@ -186,6 +186,8 @@ void WM_init_opengl(void)
GPU_set_anisotropic(U.anisotropic_filter);
GPU_set_gpu_mipmapping(U.use_gpu_mipmap);
GPU_pass_cache_init();
#ifdef WITH_OPENSUBDIV
BKE_subsurf_osd_init();
#endif