Workbench: Use non-negative lighting evaluation

This makes the lighting a bit more diffuse but don't produce negative
values.

Add a bias of 1.5f to make the lighting a bit more directionnal.

The implementation is based on:
https://github.com/kayru/Probulator/blob/master/Source/Probulator/SphericalHarmonics.h#L136
which is derived from:
http://www.geomerics.com/wp-content/uploads/2015/08/CEDEC_Geomerics_ReconstructingDiffuseLighting1.pdf

The shader implementation is optimized and has the same runtime cost
as previous method:
* no sh eval : 0.13ms
* prev sh eval : 0.14ms
* new sh eval : 0.22ms
* new sh eval opti : 0.14ms
This commit is contained in:
Clément Foucault 2018-11-19 18:03:06 +01:00
parent 2b56d21839
commit 7bb512594c
Notes: blender-bot 2023-02-14 04:59:00 +01:00
Referenced by issue #57952, Workbench: stack-buffer-overflow after recent lighting evaluation changes
6 changed files with 122 additions and 15 deletions

View File

@ -62,7 +62,7 @@
#define STUDIOLIGHT_ICON_SIZE 96
/* Only 1 - 5 is supported */
#define STUDIOLIGHT_SH_BANDS 3
#define STUDIOLIGHT_SH_BANDS 2
#define STUDIOLIGHT_SH_COEFS_LEN (STUDIOLIGHT_SH_BANDS * STUDIOLIGHT_SH_BANDS)

View File

@ -558,19 +558,45 @@ static void studiolight_spherical_harmonics_apply_windowing(float (*sh)[3], floa
}
}
BLI_INLINE void studiolight_spherical_harmonics_eval(StudioLight *sl, float color[3], float normal[3])
static float studiolight_spherical_harmonics_geomerics_eval(const float normal[3], float sh0, float sh1, float sh2, float sh3)
{
/* Use Geomerics non-linear SH. */
/* http://www.geomerics.com/wp-content/uploads/2015/08/CEDEC_Geomerics_ReconstructingDiffuseLighting1.pdf */
float R0 = sh0 * M_1_PI;
float R1[3] = {-sh3, sh2, -sh1};
mul_v3_fl(R1, 0.5f * M_1_PI * 1.5f); /* 1.5f is to improve the contrast a bit. */
float lenR1 = len_v3(R1);
mul_v3_fl(R1, 1.0f / lenR1);
float q = 0.5f * (1.0f + dot_v3v3(R1, normal));
float p = 1.0f + 2.0f * lenR1 / R0;
float a = (1.0f - lenR1 / R0) / (1.0f + lenR1 / R0);
return R0 * (a + (1.0f - a) * (p + 1.0f) * powf(q, p));
}
BLI_INLINE void studiolight_spherical_harmonics_eval(StudioLight *sl, float color[3], const float normal[3])
{
#if STUDIOLIGHT_SH_BANDS == 2
float (*sh)[3] = (float (*)[3])sl->spherical_harmonics_coefs;
for (int i = 0; i < 3; ++i) {
color[i] = studiolight_spherical_harmonics_geomerics_eval(normal, sh[0][i], sh[1][i], sh[2][i], sh[3][i]);
}
return;
#else
/* L0 */
mul_v3_v3fl(color, sl->spherical_harmonics_coefs[0], 0.282095f);
#if STUDIOLIGHT_SH_BANDS > 1 /* L1 */
# if STUDIOLIGHT_SH_BANDS > 1 /* L1 */
const float nx = normal[0];
const float ny = normal[1];
const float nz = normal[2];
madd_v3_v3fl(color, sl->spherical_harmonics_coefs[1], -0.488603f * nz);
madd_v3_v3fl(color, sl->spherical_harmonics_coefs[2], 0.488603f * ny);
madd_v3_v3fl(color, sl->spherical_harmonics_coefs[3], -0.488603f * nx);
#endif
#if STUDIOLIGHT_SH_BANDS > 2 /* L2 */
# endif
# if STUDIOLIGHT_SH_BANDS > 2 /* L2 */
const float nx2 = SQUARE(nx);
const float ny2 = SQUARE(ny);
const float nz2 = SQUARE(nz);
@ -579,9 +605,9 @@ BLI_INLINE void studiolight_spherical_harmonics_eval(StudioLight *sl, float colo
madd_v3_v3fl(color, sl->spherical_harmonics_coefs[6], 0.315392f * (3.0f * ny2 - 1.0f));
madd_v3_v3fl(color, sl->spherical_harmonics_coefs[7], -1.092548 * nx * ny);
madd_v3_v3fl(color, sl->spherical_harmonics_coefs[8], 0.546274 * (nx2 - nz2));
#endif
# endif
/* L3 coefs are 0 */
#if STUDIOLIGHT_SH_BANDS > 4 /* L4 */
# if STUDIOLIGHT_SH_BANDS > 4 /* L4 */
const float nx4 = SQUARE(nx2);
const float ny4 = SQUARE(ny2);
const float nz4 = SQUARE(nz2);
@ -594,6 +620,7 @@ BLI_INLINE void studiolight_spherical_harmonics_eval(StudioLight *sl, float colo
madd_v3_v3fl(color, sl->spherical_harmonics_coefs[15], 0.9461746957575601f * (nx2 - nz2) * (-1.0f + 7.0f * ny2));
madd_v3_v3fl(color, sl->spherical_harmonics_coefs[16], -1.7701307697799304f * nx * ny * (nx2 - 3.0f * nz2));
madd_v3_v3fl(color, sl->spherical_harmonics_coefs[17], 0.6258357354491761f * (nx4 - 6.0f * nz2 * nx2 + nz4));
# endif
#endif
}

View File

@ -1,18 +1,50 @@
#define BLINN
#if STUDIOLIGHT_SH_BANDS == 2
vec3 spherical_harmonics(vec3 N, vec3 sh_coefs[STUDIOLIGHT_SH_MAX_COMPONENTS])
{
/* http://www.geomerics.com/wp-content/uploads/2015/08/CEDEC_Geomerics_ReconstructingDiffuseLighting1.pdf */
/* Highly optimized form, precompute as much as we can. */
/**
* R1 = 0.5 * vec3(L3.r, L2.r, L1.r);
* sh_coefs[0..2] = R1 / length(R1);
**/
vec3 q;
q.x = dot(sh_coefs[1], N);
q.y = dot(sh_coefs[2], N);
q.z = dot(sh_coefs[3], N);
q = 0.5 * q + 0.5;
/**
* R0 = L0.r;
* lr1_r0 = lenR1 / R0;
* p = 1.0 + 2.0 * lr1_r0;
* a = (1.0 - lr1_r0) / (1.0 + lr1_r0);
* return R0 * (a + (1.0 - a) * (p + 1.0) * pow(q, p));
*
* sh_coefs[4] = p;
* sh_coefs[5] = R0 * a;
* sh_coefs[0] = R0 * (1.0 - a) * (p + 1.0);
**/
q = pow(q, sh_coefs[4]);
return sh_coefs[0] * q + sh_coefs[5];
}
#else
vec3 spherical_harmonics(vec3 N, vec3 sh_coefs[STUDIOLIGHT_SH_MAX_COMPONENTS])
{
vec3 sh = 0.282095 * sh_coefs[0];
#if STUDIOLIGHT_SH_BANDS > 1
# if STUDIOLIGHT_SH_BANDS > 1
float nx = N.x;
float ny = N.y;
float nz = N.z;
sh += -0.488603 * nz * sh_coefs[1];
sh += 0.488603 * ny * sh_coefs[2];
sh += -0.488603 * nx * sh_coefs[3];
#endif
#if STUDIOLIGHT_SH_BANDS > 2
# endif
# if STUDIOLIGHT_SH_BANDS > 2
float nx2 = nx * nx;
float ny2 = ny * ny;
float nz2 = nz * nz;
@ -21,8 +53,8 @@ vec3 spherical_harmonics(vec3 N, vec3 sh_coefs[STUDIOLIGHT_SH_MAX_COMPONENTS])
sh += 0.315392 * (3.0 * ny2 - 1.0) * sh_coefs[6];
sh += -1.092548 * nx * ny * sh_coefs[7];
sh += 0.546274 * (nx2 - nz2) * sh_coefs[8];
#endif
#if STUDIOLIGHT_SH_BANDS > 4
# endif
# if STUDIOLIGHT_SH_BANDS > 4
float nx4 = nx2 * nx2;
float ny4 = ny2 * ny2;
float nz4 = nz2 * nz2;
@ -35,9 +67,10 @@ vec3 spherical_harmonics(vec3 N, vec3 sh_coefs[STUDIOLIGHT_SH_MAX_COMPONENTS])
sh += (0.9461746957575601 * (nx2 - nz2) * (-1.0 + 7.0 * ny2)) * sh_coefs[15];
sh += (-1.7701307697799304 * nx * ny * (nx2 - 3.0 * nz2)) * sh_coefs[16];
sh += (0.6258357354491761 * (nx4 - 6.0 * nz2 * nx2 + nz4)) * sh_coefs[17];
#endif
# endif
return sh;
}
#endif
vec3 get_world_diffuse_light(WorldData world_data, vec3 N)
{

View File

@ -92,7 +92,7 @@ char *workbench_material_build_defines(WORKBENCH_PrivateData *wpd, bool use_text
}
BLI_dynstr_appendf(ds, "#define STUDIOLIGHT_SH_BANDS %d\n", STUDIOLIGHT_SH_BANDS);
BLI_dynstr_appendf(ds, "#define STUDIOLIGHT_SH_MAX_COMPONENTS %d\n", STUDIOLIGHT_SH_EFFECTIVE_COEFS_LEN);
BLI_dynstr_appendf(ds, "#define STUDIOLIGHT_SH_MAX_COMPONENTS %d\n", WORKBENCH_SH_DATA_LEN);
str = BLI_dynstr_get_cstring(ds);
BLI_dynstr_free(ds);

View File

@ -138,8 +138,10 @@ typedef struct WORKBENCH_UBO_Light {
float energy;
} WORKBENCH_UBO_Light;
#define WORKBENCH_SH_DATA_LEN ((STUDIOLIGHT_SH_BANDS == 2) ? 6 : STUDIOLIGHT_SH_EFFECTIVE_COEFS_LEN)
typedef struct WORKBENCH_UBO_World {
float spherical_harmonics_coefs[STUDIOLIGHT_SH_EFFECTIVE_COEFS_LEN][4];
float spherical_harmonics_coefs[WORKBENCH_SH_DATA_LEN][4];
float background_color_low[4];
float background_color_high[4];
float object_outline_color[4];

View File

@ -36,10 +36,55 @@ void studiolight_update_world(StudioLight *sl, WORKBENCH_UBO_World *wd)
{
BKE_studiolight_ensure_flag(sl, STUDIOLIGHT_SPHERICAL_HARMONICS_COEFFICIENTS_CALCULATED);
#if STUDIOLIGHT_SH_BANDS == 2
/* Use Geomerics non-linear SH. */
mul_v3_v3fl(wd->spherical_harmonics_coefs[0], sl->spherical_harmonics_coefs[0], M_1_PI);
/* Swizzle to make shader code simpler. */
for (int i = 0; i < 3; ++i) {
copy_v3_fl3(wd->spherical_harmonics_coefs[i+1], -sl->spherical_harmonics_coefs[3][i],
sl->spherical_harmonics_coefs[2][i],
-sl->spherical_harmonics_coefs[1][i]);
mul_v3_fl(wd->spherical_harmonics_coefs[i+1], M_1_PI * 1.5f); /* 1.5f is to improve the contrast a bit. */
}
/* Precompute as much as we can. See shader code for derivation. */
float len_r1[3], lr1_r0[3], p[3], a[3];
for (int i = 0; i < 3; ++i) {
mul_v3_fl(wd->spherical_harmonics_coefs[i+1], 0.5f);
len_r1[i] = len_v3(wd->spherical_harmonics_coefs[i+1]);
mul_v3_fl(wd->spherical_harmonics_coefs[i+1], 1.0f / len_r1[i]);
}
/* lr1_r0 = lenR1 / R0; */
copy_v3_v3(lr1_r0, wd->spherical_harmonics_coefs[0]);
invert_v3(lr1_r0);
mul_v3_v3(lr1_r0, len_r1);
/* p = 1.0 + 2.0 * lr1_r0; */
copy_v3_v3(p, lr1_r0);
mul_v3_fl(p, 2.0f);
add_v3_fl(p, 1.0f);
/* a = (1.0 - lr1_r0) / (1.0 + lr1_r0); */
copy_v3_v3(a, lr1_r0);
add_v3_fl(a, 1.0f);
invert_v3(a);
negate_v3(lr1_r0);
add_v3_fl(lr1_r0, 1.0f);
mul_v3_v3(a, lr1_r0);
/* sh_coefs[4] = p; */
copy_v3_v3(wd->spherical_harmonics_coefs[4], p);
/* sh_coefs[5] = R0 * a; */
mul_v3_v3v3(wd->spherical_harmonics_coefs[5], wd->spherical_harmonics_coefs[0], a);
/* sh_coefs[0] = R0 * (1.0 - a) * (p + 1.0); */
negate_v3(a);
add_v3_fl(a, 1.0f);
add_v3_fl(p, 1.0f);
mul_v3_v3(a, p);
mul_v3_v3(wd->spherical_harmonics_coefs[0], a);
#else
for (int i = 0; i < STUDIOLIGHT_SH_EFFECTIVE_COEFS_LEN; i++) {
/* Can't memcpy because of alignment */
copy_v3_v3(wd->spherical_harmonics_coefs[i], sl->spherical_harmonics_coefs[i]);
}
#endif
}
static void compute_parallel_lines_nor_and_dist(const float v1[2], const float v2[2], const float v3[2], float r_line[2])