GPUMaterial: Add Material shader cache.

This is mostly to avoid re-compilation when using undo/redo operators.
This also has the benefit to reuse the same GPUShader for multiple materials using the same nodetree configuration.

The cache stores GPUPasses that already contains the shader code and a hash to test for matches.
We use refcounts to know when a GPUPass is not used anymore.

I had to move the GPUInput list from GPUPass to GPUMaterial because it's containing references to the material nodetree and cannot be reused.

A garbage collection is hardcoded to run every 60 seconds to free every unused GPUPass.
This commit is contained in:
Clément Foucault 2018-03-11 23:43:09 +01:00
parent 7194259fb1
commit 765d7242d5
7 changed files with 204 additions and 103 deletions

View File

@ -582,15 +582,14 @@ static DRWShadingGroup *drw_shgroup_material_create_ex(GPUPass *gpupass, DRWPass
return grp;
}
static DRWShadingGroup *drw_shgroup_material_inputs(
DRWShadingGroup *grp, struct GPUMaterial *material, GPUPass *gpupass)
static DRWShadingGroup *drw_shgroup_material_inputs(DRWShadingGroup *grp, struct GPUMaterial *material)
{
/* TODO : Ideally we should not convert. But since the whole codegen
* is relying on GPUPass we keep it as is for now. */
/* Converting dynamic GPUInput to DRWUniform */
ListBase *inputs = &gpupass->inputs;
ListBase *inputs = GPU_material_get_inputs(material);
/* Converting dynamic GPUInput to DRWUniform */
for (GPUInput *input = inputs->first; input; input = input->next) {
/* Textures */
if (input->ima) {
@ -656,7 +655,7 @@ DRWShadingGroup *DRW_shgroup_material_create(
if (shgroup) {
drw_interface_init(shgroup, GPU_pass_shader(gpupass));
drw_shgroup_material_inputs(shgroup, material, gpupass);
drw_shgroup_material_inputs(shgroup, material);
}
return shgroup;
@ -673,7 +672,7 @@ DRWShadingGroup *DRW_shgroup_material_instance_create(
shgroup->instance_geom = geom;
drw_call_calc_orco(ob->data, shgroup->instance_orcofac);
drw_interface_instance_init(shgroup, GPU_pass_shader(gpupass), geom, format);
drw_shgroup_material_inputs(shgroup, material, gpupass);
drw_shgroup_material_inputs(shgroup, material);
}
return shgroup;
@ -693,7 +692,7 @@ DRWShadingGroup *DRW_shgroup_material_empty_tri_batch_create(
drw_interface_init(shgroup, GPU_pass_shader(gpupass));
shgroup->type = DRW_SHG_TRIANGLE_BATCH;
shgroup->instance_count = tri_count * 3;
drw_shgroup_material_inputs(shgroup, material, gpupass);
drw_shgroup_material_inputs(shgroup, material);
}
return shgroup;

View File

@ -1922,6 +1922,8 @@ void view3d_main_region_draw(const bContext *C, ARegion *ar)
BLI_rcti_translate(&rect, -ar->winrct.xmin, -ar->winrct.ymin);
GPU_viewport_draw_to_screen(rv3d->viewport, &rect);
GPU_pass_cache_garbage_collect();
v3d->flag |= V3D_INVALID_BACKBUF;
}

View File

@ -270,6 +270,7 @@ bool GPU_material_bound(GPUMaterial *material);
struct Scene *GPU_material_scene(GPUMaterial *material);
GPUMatType GPU_Material_get_type(GPUMaterial *material);
struct GPUPass *GPU_material_get_pass(GPUMaterial *material);
struct ListBase *GPU_material_get_inputs(GPUMaterial *material);
GPUMaterialStatus GPU_material_status(GPUMaterial *mat);
struct GPUUniformBuffer *GPU_material_get_uniform_buffer(GPUMaterial *material);
@ -377,6 +378,9 @@ void GPU_material_update_fvar_offset(GPUMaterial *gpu_material,
struct DerivedMesh *dm);
#endif
void GPU_pass_cache_garbage_collect(void);
void GPU_pass_cache_free(void);
#ifdef __cplusplus
}
#endif

View File

@ -39,10 +39,14 @@
#include "DNA_node_types.h"
#include "BLI_blenlib.h"
#include "BLI_hash_mm2a.h"
#include "BLI_linklist.h"
#include "BLI_utildefines.h"
#include "BLI_dynstr.h"
#include "BLI_ghash.h"
#include "PIL_time.h"
#include "GPU_extensions.h"
#include "GPU_glew.h"
#include "GPU_material.h"
@ -64,6 +68,50 @@ extern char datatoc_gpu_shader_geometry_glsl[];
static char *glsl_material_library = NULL;
/* -------------------- GPUPass Cache ------------------ */
/**
* Internal shader cache: This prevent the shader recompilation / stall when
* using undo/redo AND also allows for GPUPass reuse if the Shader code is the
* same for 2 different Materials. Unused GPUPasses are free by Garbage collection.
**/
static LinkNode *pass_cache = NULL; /* GPUPass */
static uint32_t gpu_pass_hash(const char *vert, const char *geom, const char *frag, const char *defs)
{
BLI_HashMurmur2A hm2a;
BLI_hash_mm2a_init(&hm2a, 0);
BLI_hash_mm2a_add(&hm2a, (unsigned char *)frag, strlen(frag));
BLI_hash_mm2a_add(&hm2a, (unsigned char *)vert, strlen(vert));
if (defs)
BLI_hash_mm2a_add(&hm2a, (unsigned char *)defs, strlen(defs));
if (geom)
BLI_hash_mm2a_add(&hm2a, (unsigned char *)geom, strlen(geom));
return BLI_hash_mm2a_end(&hm2a);
}
/* Search by hash then by exact string match. */
static GPUPass *gpu_pass_cache_lookup(
const char *vert, const char *geom, const char *frag, const char *defs, uint32_t hash)
{
for (LinkNode *ln = pass_cache; ln; ln = ln->next) {
GPUPass *pass = (GPUPass *)ln->link;
if (pass->hash == hash) {
/* Note: Could be made faster if that becomes a real bottleneck. */
if ((defs != NULL) && (strcmp(pass->defines, defs) != 0)) { /* Pass */ }
else if ((geom != NULL) && (strcmp(pass->geometrycode, geom) != 0)) { /* Pass */ }
else if ((strcmp(pass->fragmentcode, frag) == 0) &&
(strcmp(pass->vertexcode, vert) == 0))
{
return pass;
}
}
}
return NULL;
}
/* -------------------- GPU Codegen ------------------ */
/* type definitions and constants */
@ -1175,15 +1223,13 @@ GPUShader *GPU_pass_shader(GPUPass *pass)
return pass->shader;
}
static void gpu_nodes_extract_dynamic_inputs_new(GPUPass *pass, ListBase *nodes)
static void gpu_nodes_extract_dynamic_inputs_new(GPUShader *shader, ListBase *inputs, ListBase *nodes)
{
GPUShader *shader = pass->shader;
GPUNode *node;
GPUInput *next, *input;
ListBase *inputs = &pass->inputs;
int extract, z;
memset(inputs, 0, sizeof(*inputs));
BLI_listbase_clear(inputs);
if (!shader)
return;
@ -1236,15 +1282,13 @@ static void gpu_nodes_extract_dynamic_inputs_new(GPUPass *pass, ListBase *nodes)
GPU_shader_unbind();
}
static void gpu_nodes_extract_dynamic_inputs(GPUPass *pass, ListBase *nodes)
static void gpu_nodes_extract_dynamic_inputs(GPUShader *shader, ListBase *inputs, ListBase *nodes)
{
GPUShader *shader = pass->shader;
GPUNode *node;
GPUInput *next, *input;
ListBase *inputs = &pass->inputs;
int extract, z;
memset(inputs, 0, sizeof(*inputs));
BLI_listbase_clear(inputs);
if (!shader)
return;
@ -1322,11 +1366,10 @@ static void gpu_nodes_extract_dynamic_inputs(GPUPass *pass, ListBase *nodes)
GPU_shader_unbind();
}
void GPU_pass_bind(GPUPass *pass, double time, int mipmap)
void GPU_pass_bind(GPUPass *pass, ListBase *inputs, double time, int mipmap)
{
GPUInput *input;
GPUShader *shader = pass->shader;
ListBase *inputs = &pass->inputs;
if (!shader)
return;
@ -1351,11 +1394,10 @@ void GPU_pass_bind(GPUPass *pass, double time, int mipmap)
}
}
void GPU_pass_update_uniforms(GPUPass *pass)
void GPU_pass_update_uniforms(GPUPass *pass, ListBase *inputs)
{
GPUInput *input;
GPUShader *shader = pass->shader;
ListBase *inputs = &pass->inputs;
if (!shader)
return;
@ -1376,11 +1418,10 @@ void GPU_pass_update_uniforms(GPUPass *pass)
}
}
void GPU_pass_unbind(GPUPass *pass)
void GPU_pass_unbind(GPUPass *pass, ListBase *inputs)
{
GPUInput *input;
GPUShader *shader = pass->shader;
ListBase *inputs = &pass->inputs;
if (!shader)
return;
@ -1679,7 +1720,7 @@ static void gpu_node_output(GPUNode *node, const GPUType type, GPUNodeLink **lin
BLI_addtail(&node->outputs, output);
}
static void gpu_inputs_free(ListBase *inputs)
void GPU_inputs_free(ListBase *inputs)
{
GPUInput *input;
@ -1697,7 +1738,7 @@ static void gpu_node_free(GPUNode *node)
{
GPUOutput *output;
gpu_inputs_free(&node->inputs);
GPU_inputs_free(&node->inputs);
for (output = node->outputs.first; output; output = output->next)
if (output->link) {
@ -2072,78 +2113,83 @@ void GPU_nodes_prune(ListBase *nodes, GPUNodeLink *outlink)
}
GPUPass *GPU_generate_pass_new(
struct GPUMaterial *material,
ListBase *nodes, struct GPUNodeLink *frag_outlink,
GPUVertexAttribs *attribs,
GPUMaterial *material,
GPUNodeLink *frag_outlink, struct GPUVertexAttribs *attribs,
ListBase *nodes, ListBase *inputs,
const char *vert_code, const char *geom_code,
const char *frag_lib, const char *defines)
{
char *vertexcode, *geometrycode, *fragmentcode;
GPUShader *shader;
GPUPass *pass;
char *vertexgen, *fragmentgen, *tmp;
char *vertexcode, *geometrycode, *fragmentcode;
/* prune unused nodes */
GPU_nodes_prune(nodes, frag_outlink);
GPU_nodes_get_vertex_attributes(nodes, attribs);
/* generate code and compile with opengl */
fragmentgen = code_generate_fragment(material, nodes, frag_outlink->output, true);
vertexgen = code_generate_vertex_new(nodes, vert_code, (geom_code != NULL));
/* generate code */
char *fragmentgen = code_generate_fragment(material, nodes, frag_outlink->output, true);
char *tmp = BLI_strdupcat(frag_lib, glsl_material_library);
tmp = BLI_strdupcat(frag_lib, glsl_material_library);
vertexcode = code_generate_vertex_new(nodes, vert_code, (geom_code != NULL));
geometrycode = (geom_code) ? code_generate_geometry_new(nodes, geom_code) : NULL;
fragmentcode = BLI_strdupcat(tmp, fragmentgen);
vertexcode = BLI_strdup(vertexgen);
if (geom_code) {
geometrycode = code_generate_geometry_new(nodes, geom_code);
}
else {
geometrycode = NULL;
}
shader = GPU_shader_create(vertexcode,
fragmentcode,
geometrycode,
NULL,
defines);
MEM_freeN(tmp);
/* failed? */
if (!shader) {
if (fragmentcode)
MEM_freeN(fragmentcode);
if (vertexcode)
MEM_freeN(vertexcode);
if (geometrycode)
MEM_freeN(geometrycode);
MEM_freeN(fragmentgen);
MEM_freeN(vertexgen);
gpu_nodes_free(nodes);
return NULL;
}
/* create pass */
pass = MEM_callocN(sizeof(GPUPass), "GPUPass");
pass->shader = shader;
pass->fragmentcode = fragmentcode;
pass->geometrycode = geometrycode;
pass->vertexcode = vertexcode;
pass->libcode = glsl_material_library;
/* extract dynamic inputs and throw away nodes */
gpu_nodes_extract_dynamic_inputs_new(pass, nodes);
MEM_freeN(fragmentgen);
MEM_freeN(vertexgen);
MEM_freeN(tmp);
return pass;
/* Cache lookup: Reuse shaders already compiled */
uint32_t hash = gpu_pass_hash(vertexcode, geometrycode, fragmentcode, defines);
pass = gpu_pass_cache_lookup(vertexcode, geometrycode, fragmentcode, defines, hash);
if (pass) {
/* Cache hit. Reuse the same GPUPass and GPUShader. */
shader = pass->shader;
pass->refcount += 1;
MEM_SAFE_FREE(vertexcode);
MEM_SAFE_FREE(fragmentcode);
MEM_SAFE_FREE(geometrycode);
}
else {
/* Cache miss. (Re)compile the shader. */
shader = GPU_shader_create(vertexcode,
fragmentcode,
geometrycode,
NULL,
defines);
/* We still create a pass even if shader compilation
* fails to avoid trying to compile again and again. */
pass = MEM_callocN(sizeof(GPUPass), "GPUPass");
pass->shader = shader;
pass->refcount = 1;
pass->hash = hash;
pass->vertexcode = vertexcode;
pass->fragmentcode = fragmentcode;
pass->geometrycode = geometrycode;
pass->libcode = glsl_material_library;
pass->defines = (defines) ? BLI_strdup(defines) : NULL;
BLI_linklist_prepend(&pass_cache, pass);
}
/* did compilation failed ? */
if (!shader) {
gpu_nodes_free(nodes);
/* Pass will not be used. Don't increment refcount. */
pass->refcount--;
return NULL;
}
else {
gpu_nodes_extract_dynamic_inputs_new(shader, inputs, nodes);
return pass;
}
}
/* TODO(fclem) Remove for 2.8 */
GPUPass *GPU_generate_pass(
ListBase *nodes, GPUNodeLink *outlink,
ListBase *nodes, ListBase *inputs, GPUNodeLink *outlink,
GPUVertexAttribs *attribs, int *builtins,
const GPUMatType type, const char *UNUSED(name),
const bool use_opensubdiv,
@ -2199,30 +2245,36 @@ GPUPass *GPU_generate_pass(
/* create pass */
pass = MEM_callocN(sizeof(GPUPass), "GPUPass");
pass->refcount = 1;
pass->shader = shader;
pass->fragmentcode = fragmentcode;
pass->geometrycode = geometrycode;
pass->vertexcode = vertexcode;
pass->libcode = glsl_material_library;
BLI_linklist_prepend(&pass_cache, pass);
/* extract dynamic inputs and throw away nodes */
gpu_nodes_extract_dynamic_inputs(pass, nodes);
gpu_nodes_extract_dynamic_inputs(shader, inputs, nodes);
gpu_nodes_free(nodes);
return pass;
}
void GPU_pass_free(GPUPass *pass)
void GPU_pass_release(GPUPass *pass)
{
BLI_assert(pass->refcount > 0);
pass->refcount--;
}
static void gpu_pass_free(GPUPass *pass)
{
BLI_assert(pass->refcount == 0);
GPU_shader_free(pass->shader);
gpu_inputs_free(&pass->inputs);
if (pass->fragmentcode)
MEM_freeN(pass->fragmentcode);
if (pass->geometrycode)
MEM_freeN(pass->geometrycode);
if (pass->vertexcode)
MEM_freeN(pass->vertexcode);
MEM_SAFE_FREE(pass->fragmentcode);
MEM_SAFE_FREE(pass->geometrycode);
MEM_SAFE_FREE(pass->vertexcode);
MEM_SAFE_FREE(pass->defines);
MEM_freeN(pass);
}
@ -2231,3 +2283,34 @@ void GPU_pass_free_nodes(ListBase *nodes)
gpu_nodes_free(nodes);
}
void GPU_pass_cache_garbage_collect(void)
{
static int lasttime = 0;
const int shadercollectrate = 60; /* hardcoded for now. */
int ctime = (int)PIL_check_seconds_timer();
if (ctime < shadercollectrate + lasttime)
return;
lasttime = ctime;
LinkNode *next, **prev_ln = &pass_cache;
for (LinkNode *ln = pass_cache; ln; ln = next) {
GPUPass *pass = (GPUPass *)ln->link;
next = ln->next;
if (pass->refcount == 0) {
gpu_pass_free(pass);
/* Remove from list */
MEM_freeN(ln);
*prev_ln = next;
}
else {
prev_ln = &ln->next;
}
}
}
void GPU_pass_cache_free(void)
{
BLI_linklist_free(pass_cache, (LinkNodeFreeFP)gpu_pass_free);
}

View File

@ -157,25 +157,27 @@ typedef struct GPUInput {
} GPUInput;
struct GPUPass {
ListBase inputs;
struct GPUShader *shader;
char *fragmentcode;
char *geometrycode;
char *vertexcode;
char *defines;
const char *libcode;
unsigned int refcount; /* Orphaned GPUPasses gets freed by the garbage collector. */
uint32_t hash; /* Identity hash generated from all GLSL code. */
};
typedef struct GPUPass GPUPass;
GPUPass *GPU_generate_pass_new(
struct GPUMaterial *material,
ListBase *nodes, struct GPUNodeLink *frag_outlink,
struct GPUVertexAttribs *attribs,
GPUMaterial *material,
GPUNodeLink *frag_outlink, struct GPUVertexAttribs *attribs,
ListBase *nodes, ListBase *inputs,
const char *vert_code, const char *geom_code,
const char *frag_lib, const char *defines);
GPUPass *GPU_generate_pass(
ListBase *nodes, struct GPUNodeLink *outlink,
ListBase *nodes, ListBase *inputs, struct GPUNodeLink *outlink,
struct GPUVertexAttribs *attribs, int *builtin,
const GPUMatType type, const char *name,
const bool use_opensubdiv,
@ -186,13 +188,15 @@ struct GPUShader *GPU_pass_shader(GPUPass *pass);
void GPU_nodes_get_vertex_attributes(ListBase *nodes, struct GPUVertexAttribs *attribs);
void GPU_nodes_prune(ListBase *nodes, struct GPUNodeLink *outlink);
void GPU_pass_bind(GPUPass *pass, double time, int mipmap);
void GPU_pass_update_uniforms(GPUPass *pass);
void GPU_pass_unbind(GPUPass *pass);
void GPU_pass_bind(GPUPass *pass, ListBase *inputs, double time, int mipmap);
void GPU_pass_update_uniforms(GPUPass *pass, ListBase *inputs);
void GPU_pass_unbind(GPUPass *pass, ListBase *inputs);
void GPU_pass_free(GPUPass *pass);
void GPU_pass_release(GPUPass *pass);
void GPU_pass_free_nodes(ListBase *nodes);
void GPU_inputs_free(ListBase *inputs);
void gpu_codegen_init(void);
void gpu_codegen_exit(void);

View File

@ -116,6 +116,7 @@ struct GPUMaterial {
/* for binding the material */
GPUPass *pass;
ListBase inputs; /* GPUInput */
GPUVertexAttribs attribs;
int builtins;
int alpha, obcolalpha;
@ -221,7 +222,7 @@ static int gpu_material_construct_end(GPUMaterial *material, const char *passnam
{
if (material->outlink) {
GPUNodeLink *outlink = material->outlink;
material->pass = GPU_generate_pass(&material->nodes, outlink,
material->pass = GPU_generate_pass(&material->nodes, &material->inputs, outlink,
&material->attribs, &material->builtins, material->type,
passname,
material->is_opensubdiv,
@ -282,9 +283,10 @@ void GPU_material_free(ListBase *gpumaterial)
DRW_deferred_shader_remove(material);
GPU_pass_free_nodes(&material->nodes);
GPU_inputs_free(&material->inputs);
if (material->pass)
GPU_pass_free(material->pass);
GPU_pass_release(material->pass);
if (material->ubo != NULL) {
GPU_uniformbuffer_free(material->ubo);
@ -356,7 +358,7 @@ void GPU_material_bind(
}
/* note material must be bound before setting uniforms */
GPU_pass_bind(material->pass, time, mipmap);
GPU_pass_bind(material->pass, &material->inputs, time, mipmap);
/* handle per material built-ins */
if (material->builtins & GPU_VIEW_MATRIX) {
@ -376,7 +378,7 @@ void GPU_material_bind(
}
}
GPU_pass_update_uniforms(material->pass);
GPU_pass_update_uniforms(material->pass, &material->inputs);
material->bound = 1;
}
@ -449,7 +451,7 @@ void GPU_material_unbind(GPUMaterial *material)
{
if (material->pass) {
material->bound = 0;
GPU_pass_unbind(material->pass);
GPU_pass_unbind(material->pass, &material->inputs);
}
}
@ -473,6 +475,11 @@ GPUPass *GPU_material_get_pass(GPUMaterial *material)
return material->pass;
}
ListBase *GPU_material_get_inputs(GPUMaterial *material)
{
return &material->inputs;
}
GPUUniformBuffer *GPU_material_get_uniform_buffer(GPUMaterial *material)
{
return material->ubo;
@ -2562,7 +2569,7 @@ void GPU_material_generate_pass(
BLI_assert(mat->pass == NULL); /* Only run once! */
if (mat->outlink) {
mat->pass = GPU_generate_pass_new(
mat, &mat->nodes, mat->outlink, &mat->attribs, vert_code, geom_code, frag_lib, defines);
mat, mat->outlink, &mat->attribs, &mat->nodes, &mat->inputs, vert_code, geom_code, frag_lib, defines);
mat->status = (mat->pass) ? GPU_MAT_SUCCESS : GPU_MAT_FAILED;
}
}
@ -2729,7 +2736,7 @@ GPUShaderExport *GPU_shader_export(struct Scene *scene, struct Material *ma)
if (pass && pass->fragmentcode && pass->vertexcode) {
shader = MEM_callocN(sizeof(GPUShaderExport), "GPUShaderExport");
for (input = pass->inputs.first; input; input = input->next) {
for (input = mat->inputs.first; input; input = input->next) {
GPUInputUniform *uniform = MEM_callocN(sizeof(GPUInputUniform), "GPUInputUniform");
if (input->ima) {
@ -2906,7 +2913,7 @@ void GPU_material_update_fvar_offset(GPUMaterial *gpu_material,
{
GPUPass *pass = gpu_material->pass;
GPUShader *shader = (pass != NULL ? pass->shader : NULL);
ListBase *inputs = (pass != NULL ? &pass->inputs : NULL);
ListBase *inputs = (pass != NULL ? &gpu_material->inputs : NULL);
GPUInput *input;
if (shader == NULL) {

View File

@ -116,6 +116,7 @@
#include "BLF_api.h"
#include "BLT_lang.h"
#include "GPU_material.h"
#include "GPU_buffers.h"
#include "GPU_draw.h"
#include "GPU_init_exit.h"
@ -553,6 +554,7 @@ void WM_exit_ext(bContext *C, const bool do_python)
BLF_exit();
if (!G.background) {
GPU_pass_cache_free();
DRW_opengl_context_destroy();
}