Metal: Optimize shader local memory usage.
Due to shader global scope emulation via class interface, global constant arrays in shaders are allocated in per-thread shader local memory. To reduce memory pressure, placing these constant arrays inside function scope will ensure they only reside within device constant memory. This results in a tangible 1.5-2x performance uplift for the specific shaders affected. Authored by Apple: Michael Parkin-White Ref T96261 Reviewed By: fclem Maniphest Tasks: T96261 Differential Revision: https://developer.blender.org/D17089
This commit is contained in:
parent
dea924a91f
commit
596ee79a9f
Notes:
blender-bot
2023-02-14 06:42:54 +01:00
Referenced by issue #96261, Metal Viewport
|
@ -6,16 +6,17 @@
|
|||
|
||||
#define M_4PI 12.5663706143591729
|
||||
|
||||
const mat3 CUBE_ROTATIONS[6] = mat3[](
|
||||
mat3(vec3(0.0, 0.0, -1.0), vec3(0.0, -1.0, 0.0), vec3(-1.0, 0.0, 0.0)),
|
||||
mat3(vec3(0.0, 0.0, 1.0), vec3(0.0, -1.0, 0.0), vec3(1.0, 0.0, 0.0)),
|
||||
mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, 0.0, 1.0), vec3(0.0, -1.0, 0.0)),
|
||||
mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, 0.0, -1.0), vec3(0.0, 1.0, 0.0)),
|
||||
mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, -1.0, 0.0), vec3(0.0, 0.0, -1.0)),
|
||||
mat3(vec3(-1.0, 0.0, 0.0), vec3(0.0, -1.0, 0.0), vec3(0.0, 0.0, 1.0)));
|
||||
|
||||
vec3 get_cubemap_vector(vec2 co, int face)
|
||||
{
|
||||
/* NOTE(Metal): Declaring constant array in function scope to avoid increasing local shader
|
||||
* memory pressure. */
|
||||
const mat3 CUBE_ROTATIONS[6] = mat3[](
|
||||
mat3(vec3(0.0, 0.0, -1.0), vec3(0.0, -1.0, 0.0), vec3(-1.0, 0.0, 0.0)),
|
||||
mat3(vec3(0.0, 0.0, 1.0), vec3(0.0, -1.0, 0.0), vec3(1.0, 0.0, 0.0)),
|
||||
mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, 0.0, 1.0), vec3(0.0, -1.0, 0.0)),
|
||||
mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, 0.0, -1.0), vec3(0.0, 1.0, 0.0)),
|
||||
mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, -1.0, 0.0), vec3(0.0, 0.0, -1.0)),
|
||||
mat3(vec3(-1.0, 0.0, 0.0), vec3(0.0, -1.0, 0.0), vec3(0.0, 0.0, 1.0)));
|
||||
return normalize(CUBE_ROTATIONS[face] * vec3(co * 2.0 - 1.0, 1.0));
|
||||
}
|
||||
|
||||
|
|
|
@ -4,13 +4,16 @@
|
|||
|
||||
/* 4x4 bayer matrix prepared for 8bit UNORM precision error. */
|
||||
#define P(x) (((x + 0.5) * (1.0 / 16.0) - 0.5) * (1.0 / 255.0))
|
||||
const vec4 dither_mat4x4[4] = vec4[4](vec4(P(0.0), P(8.0), P(2.0), P(10.0)),
|
||||
vec4(P(12.0), P(4.0), P(14.0), P(6.0)),
|
||||
vec4(P(3.0), P(11.0), P(1.0), P(9.0)),
|
||||
vec4(P(15.0), P(7.0), P(13.0), P(5.0)));
|
||||
|
||||
float dither(void)
|
||||
{
|
||||
/* NOTE(Metal): Declaring constant array in function scope to avoid increasing local shader
|
||||
* memory pressure. */
|
||||
const vec4 dither_mat4x4[4] = vec4[4](vec4(P(0.0), P(8.0), P(2.0), P(10.0)),
|
||||
vec4(P(12.0), P(4.0), P(14.0), P(6.0)),
|
||||
vec4(P(3.0), P(11.0), P(1.0), P(9.0)),
|
||||
vec4(P(15.0), P(7.0), P(13.0), P(5.0)));
|
||||
|
||||
ivec2 co = ivec2(gl_FragCoord.xy) % 4;
|
||||
return dither_mat4x4[co.x][co.y];
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
|
||||
|
||||
/* TODO: Theme? */
|
||||
const vec4 pinned_col = vec4(1.0, 0.0, 0.0, 1.0);
|
||||
|
||||
void main()
|
||||
{
|
||||
/* TODO: Theme? */
|
||||
const vec4 pinned_col = vec4(1.0, 0.0, 0.0, 1.0);
|
||||
|
||||
bool is_selected = (flag & (VERT_UV_SELECT | FACE_UV_SELECT)) != 0u;
|
||||
bool is_pinned = (flag & VERT_UV_PINNED) != 0u;
|
||||
vec4 deselect_col = (is_pinned) ? pinned_col : vec4(color.rgb, 1.0);
|
||||
|
|
|
@ -1,15 +1,6 @@
|
|||
|
||||
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
|
||||
|
||||
/* Corners for cell outlines. 0.45 is arbitrary. Any value below 0.5 can be used to avoid
|
||||
* overlapping of the outlines. */
|
||||
const vec3 corners[4] = vec3[4](vec3(-0.45, 0.45, 0.0),
|
||||
vec3(0.45, 0.45, 0.0),
|
||||
vec3(0.45, -0.45, 0.0),
|
||||
vec3(-0.45, -0.45, 0.0));
|
||||
|
||||
const int indices[8] = int[8](0, 1, 1, 2, 2, 3, 3, 0);
|
||||
|
||||
vec4 flag_to_color(uint flag)
|
||||
{
|
||||
/* Color mapping for flags */
|
||||
|
@ -88,6 +79,16 @@ void main()
|
|||
}
|
||||
}
|
||||
#endif
|
||||
/* NOTE(Metal): Declaring constant arrays in function scope to avoid increasing local shader
|
||||
* memory pressure. */
|
||||
const int indices[8] = int[8](0, 1, 1, 2, 2, 3, 3, 0);
|
||||
|
||||
/* Corners for cell outlines. 0.45 is arbitrary. Any value below 0.5 can be used to avoid
|
||||
* overlapping of the outlines. */
|
||||
const vec3 corners[4] = vec3[4](vec3(-0.45, 0.45, 0.0),
|
||||
vec3(0.45, 0.45, 0.0),
|
||||
vec3(0.45, -0.45, 0.0),
|
||||
vec3(-0.45, -0.45, 0.0));
|
||||
|
||||
vec3 pos = domainOriginOffset + cellSize * (vec3(cell_co + adaptiveCellOffset) + cell_offset);
|
||||
vec3 rotated_pos = rot_mat * corners[indices[gl_VertexID % 8]];
|
||||
|
|
|
@ -1,13 +1,6 @@
|
|||
|
||||
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
|
||||
|
||||
const vec3 corners[4] = vec3[4](vec3(0.0, 0.2, -0.5),
|
||||
vec3(-0.2 * 0.866, -0.2 * 0.5, -0.5),
|
||||
vec3(0.2 * 0.866, -0.2 * 0.5, -0.5),
|
||||
vec3(0.0, 0.0, 0.5));
|
||||
|
||||
const int indices[12] = int[12](0, 1, 1, 2, 2, 0, 0, 3, 1, 3, 2, 3);
|
||||
|
||||
/* Straight Port from BKE_defvert_weight_to_rgb()
|
||||
* TODO: port this to a color ramp. */
|
||||
vec3 weight_to_color(float weight)
|
||||
|
@ -177,6 +170,15 @@ void main()
|
|||
mat3 rot_mat = rotation_from_vector(vector);
|
||||
|
||||
# ifdef USE_NEEDLE
|
||||
/* NOTE(Metal): Declaring constant arrays in function scope to avoid increasing local shader
|
||||
* memory pressure. */
|
||||
const vec3 corners[4] = vec3[4](vec3(0.0, 0.2, -0.5),
|
||||
vec3(-0.2 * 0.866, -0.2 * 0.5, -0.5),
|
||||
vec3(0.2 * 0.866, -0.2 * 0.5, -0.5),
|
||||
vec3(0.0, 0.0, 0.5));
|
||||
|
||||
const int indices[12] = int[12](0, 1, 1, 2, 2, 0, 0, 3, 1, 3, 2, 3);
|
||||
|
||||
vec3 rotated_pos = rot_mat * corners[indices[gl_VertexID % 12]];
|
||||
pos += rotated_pos * vector_length * displaySize * cellSize;
|
||||
# else
|
||||
|
|
|
@ -12,42 +12,6 @@
|
|||
/* 4bits for corner id */
|
||||
#define CORNER_VEC_OFS 2u
|
||||
#define CORNER_VEC_RANGE BIT_RANGE(4)
|
||||
const vec2 cornervec[36] = vec2[36](vec2(0.0, 1.0),
|
||||
vec2(0.02, 0.805),
|
||||
vec2(0.067, 0.617),
|
||||
vec2(0.169, 0.45),
|
||||
vec2(0.293, 0.293),
|
||||
vec2(0.45, 0.169),
|
||||
vec2(0.617, 0.076),
|
||||
vec2(0.805, 0.02),
|
||||
vec2(1.0, 0.0),
|
||||
vec2(-1.0, 0.0),
|
||||
vec2(-0.805, 0.02),
|
||||
vec2(-0.617, 0.067),
|
||||
vec2(-0.45, 0.169),
|
||||
vec2(-0.293, 0.293),
|
||||
vec2(-0.169, 0.45),
|
||||
vec2(-0.076, 0.617),
|
||||
vec2(-0.02, 0.805),
|
||||
vec2(0.0, 1.0),
|
||||
vec2(0.0, -1.0),
|
||||
vec2(-0.02, -0.805),
|
||||
vec2(-0.067, -0.617),
|
||||
vec2(-0.169, -0.45),
|
||||
vec2(-0.293, -0.293),
|
||||
vec2(-0.45, -0.169),
|
||||
vec2(-0.617, -0.076),
|
||||
vec2(-0.805, -0.02),
|
||||
vec2(-1.0, 0.0),
|
||||
vec2(1.0, 0.0),
|
||||
vec2(0.805, -0.02),
|
||||
vec2(0.617, -0.067),
|
||||
vec2(0.45, -0.169),
|
||||
vec2(0.293, -0.293),
|
||||
vec2(0.169, -0.45),
|
||||
vec2(0.076, -0.617),
|
||||
vec2(0.02, -0.805),
|
||||
vec2(0.0, -1.0));
|
||||
|
||||
#define INNER_FLAG uint(1 << 10) /* is inner vert */
|
||||
|
||||
|
@ -60,6 +24,45 @@ const vec2 cornervec[36] = vec2[36](vec2(0.0, 1.0),
|
|||
|
||||
void main()
|
||||
{
|
||||
/* NOTE(Metal): Declaring constant array in function scope to avoid increasing local shader
|
||||
* memory pressure.*/
|
||||
const vec2 cornervec[36] = vec2[36](vec2(0.0, 1.0),
|
||||
vec2(0.02, 0.805),
|
||||
vec2(0.067, 0.617),
|
||||
vec2(0.169, 0.45),
|
||||
vec2(0.293, 0.293),
|
||||
vec2(0.45, 0.169),
|
||||
vec2(0.617, 0.076),
|
||||
vec2(0.805, 0.02),
|
||||
vec2(1.0, 0.0),
|
||||
vec2(-1.0, 0.0),
|
||||
vec2(-0.805, 0.02),
|
||||
vec2(-0.617, 0.067),
|
||||
vec2(-0.45, 0.169),
|
||||
vec2(-0.293, 0.293),
|
||||
vec2(-0.169, 0.45),
|
||||
vec2(-0.076, 0.617),
|
||||
vec2(-0.02, 0.805),
|
||||
vec2(0.0, 1.0),
|
||||
vec2(0.0, -1.0),
|
||||
vec2(-0.02, -0.805),
|
||||
vec2(-0.067, -0.617),
|
||||
vec2(-0.169, -0.45),
|
||||
vec2(-0.293, -0.293),
|
||||
vec2(-0.45, -0.169),
|
||||
vec2(-0.617, -0.076),
|
||||
vec2(-0.805, -0.02),
|
||||
vec2(-1.0, 0.0),
|
||||
vec2(1.0, 0.0),
|
||||
vec2(0.805, -0.02),
|
||||
vec2(0.617, -0.067),
|
||||
vec2(0.45, -0.169),
|
||||
vec2(0.293, -0.293),
|
||||
vec2(0.169, -0.45),
|
||||
vec2(0.076, -0.617),
|
||||
vec2(0.02, -0.805),
|
||||
vec2(0.0, -1.0));
|
||||
|
||||
uint cflag = vflag & CNR_FLAG_RANGE;
|
||||
uint vofs = (vflag >> CORNER_VEC_OFS) & CORNER_VEC_RANGE;
|
||||
|
||||
|
|
|
@ -1,25 +1,5 @@
|
|||
#pragma BLENDER_REQUIRE(gpu_shader_colorspace_lib.glsl)
|
||||
|
||||
const vec2 offsets4[4] = vec2[4](
|
||||
vec2(-0.5, 0.5), vec2(0.5, 0.5), vec2(-0.5, -0.5), vec2(-0.5, -0.5));
|
||||
|
||||
const vec2 offsets16[16] = vec2[16](vec2(-1.5, 1.5),
|
||||
vec2(-0.5, 1.5),
|
||||
vec2(0.5, 1.5),
|
||||
vec2(1.5, 1.5),
|
||||
vec2(-1.5, 0.5),
|
||||
vec2(-0.5, 0.5),
|
||||
vec2(0.5, 0.5),
|
||||
vec2(1.5, 0.5),
|
||||
vec2(-1.5, -0.5),
|
||||
vec2(-0.5, -0.5),
|
||||
vec2(0.5, -0.5),
|
||||
vec2(1.5, -0.5),
|
||||
vec2(-1.5, -1.5),
|
||||
vec2(-0.5, -1.5),
|
||||
vec2(0.5, -1.5),
|
||||
vec2(1.5, -1.5));
|
||||
|
||||
//#define GPU_NEAREST
|
||||
#define sample_glyph_offset(texel, ofs) \
|
||||
texture_1D_custom_bilinear_filter(texCoord_interp + ofs * texel)
|
||||
|
@ -92,6 +72,11 @@ void main()
|
|||
fragColor.a = 0.0;
|
||||
|
||||
if (interp_size == 1) {
|
||||
/* NOTE(Metal): Declaring constant array in function scope to avoid increasing local shader
|
||||
* memory pressure.*/
|
||||
const vec2 offsets4[4] = vec2[4](
|
||||
vec2(-0.5, 0.5), vec2(0.5, 0.5), vec2(-0.5, -0.5), vec2(-0.5, -0.5));
|
||||
|
||||
/* 3x3 blur */
|
||||
/* Manual unroll for perf. (stupid glsl compiler) */
|
||||
fragColor.a += sample_glyph_offset(texel, offsets4[0]);
|
||||
|
@ -101,6 +86,25 @@ void main()
|
|||
fragColor.a *= (1.0 / 4.0);
|
||||
}
|
||||
else {
|
||||
/* NOTE(Metal): Declaring constant array in function scope to avoid increasing local shader
|
||||
* memory pressure.*/
|
||||
const vec2 offsets16[16] = vec2[16](vec2(-1.5, 1.5),
|
||||
vec2(-0.5, 1.5),
|
||||
vec2(0.5, 1.5),
|
||||
vec2(1.5, 1.5),
|
||||
vec2(-1.5, 0.5),
|
||||
vec2(-0.5, 0.5),
|
||||
vec2(0.5, 0.5),
|
||||
vec2(1.5, 0.5),
|
||||
vec2(-1.5, -0.5),
|
||||
vec2(-0.5, -0.5),
|
||||
vec2(0.5, -0.5),
|
||||
vec2(1.5, -0.5),
|
||||
vec2(-1.5, -1.5),
|
||||
vec2(-0.5, -1.5),
|
||||
vec2(0.5, -1.5),
|
||||
vec2(1.5, -1.5));
|
||||
|
||||
/* 5x5 blur */
|
||||
/* Manual unroll for perf. (stupid glsl compiler) */
|
||||
fragColor.a += sample_glyph_offset(texel, offsets16[0]);
|
||||
|
|
Loading…
Reference in New Issue