Workbench: Use non-negative lighting evaluation

This makes the lighting a bit more diffuse but don't produce negative values. Add a bias of 1.5f to make the lighting a bit more directionnal. The implementation is based on: https://github.com/kayru/Probulator/blob/master/Source/Probulator/SphericalHarmonics.h#L136 which is derived from: http://www.geomerics.com/wp-content/uploads/2015/08/CEDEC_Geomerics_ReconstructingDiffuseLighting1.pdf The shader implementation is optimized and has the same runtime cost as previous method: * no sh eval : 0.13ms * prev sh eval : 0.14ms * new sh eval : 0.22ms * new sh eval opti : 0.14ms
Referenced by issue #57952, Workbench: stack-buffer-overflow after recent lighting evaluation changes
2018-11-19 18:03:06 +01:00 · 2018-11-19 18:03:06 +01:00 · 7bb512594c · 2023-02-14 04:59:00 +01:00
parent 2b56d21839
commit 7bb512594c
6 changed files with 122 additions and 15 deletions
--- a/source/blender/blenkernel/BKE_studiolight.h
+++ b/source/blender/blenkernel/BKE_studiolight.h
@ -62,7 +62,7 @@
 #define STUDIOLIGHT_ICON_SIZE 96

 /* Only 1 - 5 is supported */
-#define STUDIOLIGHT_SH_BANDS 3
+#define STUDIOLIGHT_SH_BANDS 2

 #define STUDIOLIGHT_SH_COEFS_LEN (STUDIOLIGHT_SH_BANDS * STUDIOLIGHT_SH_BANDS)

--- a/source/blender/blenkernel/intern/studiolight.c
+++ b/source/blender/blenkernel/intern/studiolight.c
@ -558,19 +558,45 @@ static void studiolight_spherical_harmonics_apply_windowing(float (*sh)[3], floa
 	}
 }

-BLI_INLINE void studiolight_spherical_harmonics_eval(StudioLight *sl, float color[3], float normal[3])
+static float studiolight_spherical_harmonics_geomerics_eval(const float normal[3], float sh0, float sh1, float sh2, float sh3)
 {
+	/* Use Geomerics non-linear SH. */
+	/* http://www.geomerics.com/wp-content/uploads/2015/08/CEDEC_Geomerics_ReconstructingDiffuseLighting1.pdf */
+	float R0 = sh0 * M_1_PI;
+
+	float R1[3] = {-sh3, sh2, -sh1};
+	mul_v3_fl(R1, 0.5f * M_1_PI * 1.5f); /* 1.5f is to improve the contrast a bit. */
+	float lenR1 = len_v3(R1);
+	mul_v3_fl(R1, 1.0f / lenR1);
+	float q = 0.5f * (1.0f + dot_v3v3(R1, normal));
+
+	float p = 1.0f + 2.0f * lenR1 / R0;
+	float a = (1.0f - lenR1 / R0) / (1.0f + lenR1 / R0);
+
+	return R0 * (a + (1.0f - a) * (p + 1.0f) * powf(q, p));
+}
+
+BLI_INLINE void studiolight_spherical_harmonics_eval(StudioLight *sl, float color[3], const float normal[3])
+{
+#if STUDIOLIGHT_SH_BANDS == 2
+	float (*sh)[3] = (float (*)[3])sl->spherical_harmonics_coefs;
+	for (int i = 0; i < 3; ++i) {
+		color[i] = studiolight_spherical_harmonics_geomerics_eval(normal, sh[0][i], sh[1][i], sh[2][i], sh[3][i]);
+	}
+	return;
+#else
+
 	/* L0 */
 	mul_v3_v3fl(color, sl->spherical_harmonics_coefs[0], 0.282095f);
-#if STUDIOLIGHT_SH_BANDS > 1 /* L1 */
+#  if STUDIOLIGHT_SH_BANDS > 1 /* L1 */
 	const float nx = normal[0];
 	const float ny = normal[1];
 	const float nz = normal[2];
 	madd_v3_v3fl(color, sl->spherical_harmonics_coefs[1], -0.488603f * nz);
 	madd_v3_v3fl(color, sl->spherical_harmonics_coefs[2],  0.488603f * ny);
 	madd_v3_v3fl(color, sl->spherical_harmonics_coefs[3], -0.488603f * nx);
-#endif
-#if STUDIOLIGHT_SH_BANDS > 2 /* L2 */
+#  endif
+#  if STUDIOLIGHT_SH_BANDS > 2 /* L2 */
 	const float nx2 = SQUARE(nx);
 	const float ny2 = SQUARE(ny);
 	const float nz2 = SQUARE(nz);
@ -579,9 +605,9 @@ BLI_INLINE void studiolight_spherical_harmonics_eval(StudioLight *sl, float colo
 	madd_v3_v3fl(color, sl->spherical_harmonics_coefs[6], 0.315392f * (3.0f * ny2 - 1.0f));
 	madd_v3_v3fl(color, sl->spherical_harmonics_coefs[7], -1.092548 * nx * ny);
 	madd_v3_v3fl(color, sl->spherical_harmonics_coefs[8], 0.546274 * (nx2 - nz2));
-#endif
+#  endif
 	/* L3 coefs are 0 */
-#if STUDIOLIGHT_SH_BANDS > 4 /* L4 */
+#  if STUDIOLIGHT_SH_BANDS > 4 /* L4 */
 	const float nx4 = SQUARE(nx2);
 	const float ny4 = SQUARE(ny2);
 	const float nz4 = SQUARE(nz2);
@ -594,6 +620,7 @@ BLI_INLINE void studiolight_spherical_harmonics_eval(StudioLight *sl, float colo
 	madd_v3_v3fl(color, sl->spherical_harmonics_coefs[15],  0.9461746957575601f * (nx2 - nz2) * (-1.0f + 7.0f * ny2));
 	madd_v3_v3fl(color, sl->spherical_harmonics_coefs[16], -1.7701307697799304f * nx * ny * (nx2 - 3.0f * nz2));
 	madd_v3_v3fl(color, sl->spherical_harmonics_coefs[17],  0.6258357354491761f * (nx4 - 6.0f * nz2 * nx2 + nz4));
+#  endif
 #endif
 }

--- a/source/blender/draw/engines/workbench/shaders/workbench_world_light_lib.glsl
+++ b/source/blender/draw/engines/workbench/shaders/workbench_world_light_lib.glsl
@ -1,18 +1,50 @@
 #define BLINN

+#if STUDIOLIGHT_SH_BANDS == 2
+vec3 spherical_harmonics(vec3 N, vec3 sh_coefs[STUDIOLIGHT_SH_MAX_COMPONENTS])
+{
+	/* http://www.geomerics.com/wp-content/uploads/2015/08/CEDEC_Geomerics_ReconstructingDiffuseLighting1.pdf */
+	/* Highly optimized form, precompute as much as we can. */
+	/**
+	 * R1 = 0.5 * vec3(L3.r, L2.r, L1.r);
+	 * sh_coefs[0..2] = R1 / length(R1);
+	 **/
+	vec3 q;
+	q.x = dot(sh_coefs[1], N);
+	q.y = dot(sh_coefs[2], N);
+	q.z = dot(sh_coefs[3], N);
+	q = 0.5 * q + 0.5;
+
+	/**
+	 * R0 = L0.r;
+	 * lr1_r0 = lenR1 / R0;
+	 * p = 1.0 + 2.0 * lr1_r0;
+	 * a = (1.0 - lr1_r0) / (1.0 + lr1_r0);
+	 * return R0 * (a + (1.0 - a) * (p + 1.0) * pow(q, p));
+	 *
+	 * sh_coefs[4] = p;
+	 * sh_coefs[5] = R0 * a;
+	 * sh_coefs[0] = R0 * (1.0 - a) * (p + 1.0);
+	 **/
+	q = pow(q, sh_coefs[4]);
+	return sh_coefs[0] * q + sh_coefs[5];
+}
+
+#else
+
 vec3 spherical_harmonics(vec3 N, vec3 sh_coefs[STUDIOLIGHT_SH_MAX_COMPONENTS])
 {
 	vec3 sh = 0.282095 * sh_coefs[0];

-#if STUDIOLIGHT_SH_BANDS > 1
+#  if STUDIOLIGHT_SH_BANDS > 1
 	float nx = N.x;
 	float ny = N.y;
 	float nz = N.z;
 	sh += -0.488603 * nz * sh_coefs[1];
 	sh += 0.488603 * ny * sh_coefs[2];
 	sh += -0.488603 * nx * sh_coefs[3];
-#endif
-#if STUDIOLIGHT_SH_BANDS > 2
+#  endif
+#  if STUDIOLIGHT_SH_BANDS > 2
 	float nx2 = nx * nx;
 	float ny2 = ny * ny;
 	float nz2 = nz * nz;
@ -21,8 +53,8 @@ vec3 spherical_harmonics(vec3 N, vec3 sh_coefs[STUDIOLIGHT_SH_MAX_COMPONENTS])
 	sh += 0.315392 * (3.0 * ny2 - 1.0) * sh_coefs[6];
 	sh += -1.092548 * nx * ny * sh_coefs[7];
 	sh += 0.546274 * (nx2 - nz2) * sh_coefs[8];
-#endif
-#if STUDIOLIGHT_SH_BANDS > 4
+#  endif
+#  if STUDIOLIGHT_SH_BANDS > 4
 	float nx4 = nx2 * nx2;
 	float ny4 = ny2 * ny2;
 	float nz4 = nz2 * nz2;
@ -35,9 +67,10 @@ vec3 spherical_harmonics(vec3 N, vec3 sh_coefs[STUDIOLIGHT_SH_MAX_COMPONENTS])
 	sh += (0.9461746957575601 * (nx2 - nz2) * (-1.0 + 7.0 * ny2)) * sh_coefs[15];
 	sh += (-1.7701307697799304 * nx * ny * (nx2 - 3.0 * nz2)) * sh_coefs[16];
 	sh += (0.6258357354491761 * (nx4 - 6.0 * nz2 * nx2 + nz4)) * sh_coefs[17];
-#endif
+#  endif
 	return sh;
 }
+#endif

 vec3 get_world_diffuse_light(WorldData world_data, vec3 N)
 {
--- a/source/blender/draw/engines/workbench/workbench_materials.c
+++ b/source/blender/draw/engines/workbench/workbench_materials.c
@ -92,7 +92,7 @@ char *workbench_material_build_defines(WORKBENCH_PrivateData *wpd, bool use_text
 	}

 	BLI_dynstr_appendf(ds, "#define STUDIOLIGHT_SH_BANDS %d\n", STUDIOLIGHT_SH_BANDS);
-	BLI_dynstr_appendf(ds, "#define STUDIOLIGHT_SH_MAX_COMPONENTS %d\n", STUDIOLIGHT_SH_EFFECTIVE_COEFS_LEN);
+	BLI_dynstr_appendf(ds, "#define STUDIOLIGHT_SH_MAX_COMPONENTS %d\n", WORKBENCH_SH_DATA_LEN);

 	str = BLI_dynstr_get_cstring(ds);
 	BLI_dynstr_free(ds);
--- a/source/blender/draw/engines/workbench/workbench_private.h
+++ b/source/blender/draw/engines/workbench/workbench_private.h
@ -138,8 +138,10 @@ typedef struct WORKBENCH_UBO_Light {
 	float energy;
 } WORKBENCH_UBO_Light;

+#define WORKBENCH_SH_DATA_LEN ((STUDIOLIGHT_SH_BANDS == 2) ? 6 : STUDIOLIGHT_SH_EFFECTIVE_COEFS_LEN)
+
 typedef struct WORKBENCH_UBO_World {
-	float spherical_harmonics_coefs[STUDIOLIGHT_SH_EFFECTIVE_COEFS_LEN][4];
+	float spherical_harmonics_coefs[WORKBENCH_SH_DATA_LEN][4];
 	float background_color_low[4];
 	float background_color_high[4];
 	float object_outline_color[4];
--- a/source/blender/draw/engines/workbench/workbench_studiolight.c
+++ b/source/blender/draw/engines/workbench/workbench_studiolight.c
@ -36,10 +36,55 @@ void studiolight_update_world(StudioLight *sl, WORKBENCH_UBO_World *wd)
 {
 	BKE_studiolight_ensure_flag(sl, STUDIOLIGHT_SPHERICAL_HARMONICS_COEFFICIENTS_CALCULATED);

+#if STUDIOLIGHT_SH_BANDS == 2
+	/* Use Geomerics non-linear SH. */
+	mul_v3_v3fl(wd->spherical_harmonics_coefs[0], sl->spherical_harmonics_coefs[0], M_1_PI);
+	/* Swizzle to make shader code simpler. */
+	for (int i = 0; i < 3; ++i) {
+		copy_v3_fl3(wd->spherical_harmonics_coefs[i+1], -sl->spherical_harmonics_coefs[3][i],
+		                                                 sl->spherical_harmonics_coefs[2][i],
+		                                                -sl->spherical_harmonics_coefs[1][i]);
+		mul_v3_fl(wd->spherical_harmonics_coefs[i+1], M_1_PI * 1.5f); /* 1.5f is to improve the contrast a bit. */
+	}
+
+	/* Precompute as much as we can. See shader code for derivation. */
+	float len_r1[3], lr1_r0[3], p[3], a[3];
+	for (int i = 0; i < 3; ++i) {
+		mul_v3_fl(wd->spherical_harmonics_coefs[i+1], 0.5f);
+		len_r1[i] = len_v3(wd->spherical_harmonics_coefs[i+1]);
+		mul_v3_fl(wd->spherical_harmonics_coefs[i+1], 1.0f / len_r1[i]);
+	}
+	/* lr1_r0 = lenR1 / R0; */
+	copy_v3_v3(lr1_r0, wd->spherical_harmonics_coefs[0]);
+	invert_v3(lr1_r0);
+	mul_v3_v3(lr1_r0, len_r1);
+	/* p = 1.0 + 2.0 * lr1_r0; */
+	copy_v3_v3(p, lr1_r0);
+	mul_v3_fl(p, 2.0f);
+	add_v3_fl(p, 1.0f);
+	/* a = (1.0 - lr1_r0) / (1.0 + lr1_r0); */
+	copy_v3_v3(a, lr1_r0);
+	add_v3_fl(a, 1.0f);
+	invert_v3(a);
+	negate_v3(lr1_r0);
+	add_v3_fl(lr1_r0, 1.0f);
+	mul_v3_v3(a, lr1_r0);
+	/* sh_coefs[4] = p; */
+	copy_v3_v3(wd->spherical_harmonics_coefs[4], p);
+	/* sh_coefs[5] = R0 * a; */
+	mul_v3_v3v3(wd->spherical_harmonics_coefs[5], wd->spherical_harmonics_coefs[0], a);
+	/* sh_coefs[0] = R0 * (1.0 - a) * (p + 1.0); */
+	negate_v3(a);
+	add_v3_fl(a, 1.0f);
+	add_v3_fl(p, 1.0f);
+	mul_v3_v3(a, p);
+	mul_v3_v3(wd->spherical_harmonics_coefs[0], a);
+#else
 	for (int i = 0; i < STUDIOLIGHT_SH_EFFECTIVE_COEFS_LEN; i++) {
 		/* Can't memcpy because of alignment */
 		copy_v3_v3(wd->spherical_harmonics_coefs[i], sl->spherical_harmonics_coefs[i]);
 	}
+#endif
 }

 static void compute_parallel_lines_nor_and_dist(const float v1[2], const float v2[2], const float v3[2], float r_line[2])