Cycles: Overhaul ensure_valid_reflection to fix issues with normal- and bumpmapping

This function is supposed to prevent the black artifacts caused by strong normal- or bumpmapping, but failed in some cases. Now the code correctly handles all test files and previous issues I am aware of and also has extensive comments describing the algorithm and the math behind it. Basically, the main problem was that there can be multiple valid solutions that fulfil the reflection angle criterium, but I had assumed that only one would exist and therefore simply picked the first solution with a positive term in srqt(). Now, the code uses additional validity checks and a simple heuristic to pick the best valid solution. Additionally, the code messed up very shallow reflections even if the normal map strength was zero due to the constant limit for the outgoing ray angle, which caused shallow incoming rays to fail the initial test even when reflected directly on Ng. Now, the code accounts for this by reducing the threshold in the case of a shallow incoming ray, ensuring that at least N=Ng is always a valid solution. Reviewers: brecht Differential Revision: https://developer.blender.org/D3816
Referenced by issue #60806, CUDA error: Launch failed in cuCtxSynchronize(), line 1679
2018-10-21 03:41:31 +02:00 · 2018-10-21 03:41:31 +02:00 · 65b25df801 · 2023-02-14 08:38:11 +01:00
parent 2046817c08
commit 65b25df801
2 changed files with 126 additions and 29 deletions
--- a/intern/cycles/kernel/kernel_montecarlo.h
+++ b/intern/cycles/kernel/kernel_montecarlo.h
@ -187,7 +187,10 @@ ccl_device float2 regular_polygon_sample(float corners, float rotation, float u,
 ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
 {
 	float3 R = 2*dot(N, I)*N - I;
-	if(dot(Ng, R) >= 0.05f) {
+
+	/* Reflection rays may always be at least as shallow as the incoming ray. */
+	float threshold = min(0.9f*dot(Ng, I), 0.01f);
+	if(dot(Ng, R) >= threshold) {
 		return N;
 	}

@ -195,22 +198,86 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
 	 * The X axis is found by normalizing the component of N that's orthogonal to Ng.
 	 * The Y axis isn't actually needed.
 	 */
-	float3 X = normalize(N - dot(N, Ng)*Ng);
+	float NdotNg = dot(N, Ng);
+	float3 X = normalize(N - NdotNg*Ng);

-	/* Calculate N.z and N.x in the local coordinate system. */
-	float Iz = dot(I, Ng);
-	float Ix2 = sqr(dot(I, X)), Iz2 = sqr(Iz);
-	float Ix2Iz2 = Ix2 + Iz2;
+	/* Calculate N.z and N.x in the local coordinate system.
+	 *
+	 * The goal of this computation is to find a N' that is rotated towards Ng just enough
+	 * to lift R' above the threshold (here called t), therefore dot(R', Ng) = t.
+	 *
+	 * According to the standard reflection equation, this means that we want dot(2*dot(N', I)*N' - I, Ng) = t.
+	 *
+	 * Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get 2*dot(N', I)*N'.z - I.z = t.
+	 *
+	 * The rotation is simple to express in the coordinate system we formed - since N lies in the X-Z-plane, we know that
+	 * N' will also lie in the X-Z-plane, so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z .
+	 *
+	 * Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2).
+	 *
+	 * With these simplifications, we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t.
+	 *
+	 * The only unknown here is N'.z, so we can solve for that.
+	 *
+	 * The equation has four solutions in general:
+	 *
+	 * N'.z = +-sqrt(0.5*(+-sqrt(I.x^2*(I.x^2 + I.z^2 - t^2)) + t*I.z + I.x^2 + I.z^2)/(I.x^2 + I.z^2))
+	 * We can simplify this expression a bit by grouping terms:
+	 *
+	 * a = I.x^2 + I.z^2
+	 * b = sqrt(I.x^2 * (a - t^2))
+	 * c = I.z*t + a
+	 * N'.z = +-sqrt(0.5*(+-b + c)/a)
+	 *
+	 * Two solutions can immediately be discarded because they're negative so N' would lie in the lower hemisphere.
+	 */
+	float Ix = dot(I, X), Iz = dot(I, Ng);
+	float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
+	float a = Ix2 + Iz2;

-	float a = safe_sqrtf(Ix2*(Ix2Iz2 - sqr(0.05f)));
-	float b = Iz*0.05f + Ix2Iz2;
-	float c = (a + b > 0.0f)? (a + b) : (-a + b);
+	float b = safe_sqrtf(Ix2*(a - sqr(threshold)));
+	float c = Iz*threshold + a;

-	float Nz = safe_sqrtf(0.5f * c * (1.0f / Ix2Iz2));
-	float Nx = safe_sqrtf(1.0f - sqr(Nz));
+	/* Evaluate both solutions.
+	 * In many cases one can be immediately discarded (if N'.z would be imaginary or larger than one), so check for that first.
+	 * If no option is viable (might happen in extreme cases like N being in the wrong hemisphere), give up and return Ng. */
+	float fac = 0.5f/a;
+	float N1_z2 = fac*(b+c), N2_z2 = fac*(-b+c);
+	bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f));
+	bool valid2 = (N2_z2 > 1e-5f) && (N2_z2 <= (1.0f + 1e-5f));

-	/* Transform back into global coordinates. */
-	return Nx*X + Nz*Ng;
+	float2 N_new;
+	if(valid1 && valid2) {
+		/* If both are possible, do the expensive reflection-based check. */
+		float2 N1 = make_float2(safe_sqrtf(1.0f - N1_z2), safe_sqrtf(N1_z2));
+		float2 N2 = make_float2(safe_sqrtf(1.0f - N2_z2), safe_sqrtf(N2_z2));
+
+		float R1 = 2*(N1.x*Ix + N1.y*Iz)*N1.y - Iz;
+		float R2 = 2*(N2.x*Ix + N2.y*Iz)*N2.y - Iz;
+
+		valid1 = (R1 >= 1e-5f);
+		valid2 = (R2 >= 1e-5f);
+		if(valid1 && valid2) {
+			/* If both solutions are valid, return the one with the shallower reflection since it will be closer to the input
+			 * (if the original reflection wasn't shallow, we would not be in this part of the function). */
+			N_new = (R1 < R2)? N1 : N2;
+		}
+		else {
+			/* If only one reflection is valid (= positive), pick that one. */
+			N_new = (R1 > R2)? N1 : N2;
+		}
+
+	}
+	else if(valid1 || valid2) {
+		/* Only one solution passes the N'.z criterium, so pick that one. */
+		float Nz2 = valid1? N1_z2 : N2_z2;
+		N_new = make_float2(safe_sqrtf(1.0f - Nz2), safe_sqrtf(Nz2));
+	}
+	else {
+		return Ng;
+	}
+
+	return N_new.x*X + N_new.y*Ng;
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/shaders/stdosl.h
+++ b/intern/cycles/kernel/shaders/stdosl.h
@ -284,33 +284,63 @@ point rotate (point p, float angle, point a, point b)

 normal ensure_valid_reflection(normal Ng, vector I, normal N)
 {
+    /* The implementation here mirrors the one in kernel_montecarlo.h,
+     * check there for an explanation of the algorithm. */
+
    float sqr(float x) { return x*x; }

    vector R = 2*dot(N, I)*N - I;
-    if (dot(Ng, R) >= 0.05) {
+
+    float threshold = min(0.9*dot(Ng, I), 0.01);
+    if(dot(Ng, R) >= threshold) {
        return N;
    }

-    /* Form coordinate system with Ng as the Z axis and N inside the X-Z-plane.
-     * The X axis is found by normalizing the component of N that's orthogonal to Ng.
-     * The Y axis isn't actually needed.
-     */
-    vector X = normalize(N - dot(N, Ng)*Ng);
+    float NdotNg = dot(N, Ng);
+    vector X = normalize(N - NdotNg*Ng);

-    /* Calculate N.z and N.x in the local coordinate system. */
    float Ix = dot(I, X), Iz = dot(I, Ng);
-    float Ix2 = sqr(dot(I, X)), Iz2 = sqr(dot(I, Ng));
-    float Ix2Iz2 = Ix2 + Iz2;
+    float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
+    float a = Ix2 + Iz2;

-    float a = sqrt(Ix2*(Ix2Iz2 - sqr(0.05)));
-    float b = Iz*0.05 + Ix2Iz2;
-    float c = (a + b > 0.0)? (a + b) : (-a + b);
+    float b = sqrt(Ix2*(a - sqr(threshold)));
+    float c = Iz*threshold + a;

-    float Nz = sqrt(0.5 * c * (1.0 / Ix2Iz2));
-    float Nx = sqrt(1.0 - sqr(Nz));
+    float fac = 0.5/a;
+    float N1_z2 = fac*(b+c), N2_z2 = fac*(-b+c);
+    int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5));
+    int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5));

-    /* Transform back into global coordinates. */
-    return Nx*X + Nz*Ng;
+    float N_new_x, N_new_z;
+    if(valid1 && valid2) {
+        float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2);
+        float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2);
+
+        float R1 = 2*(N1_x*Ix + N1_z*Iz)*N1_z - Iz;
+        float R2 = 2*(N2_x*Ix + N2_z*Iz)*N2_z - Iz;
+
+        valid1 = (R1 >= 1e-5);
+        valid2 = (R2 >= 1e-5);
+        if(valid1 && valid2) {
+            N_new_x = (R1 < R2)? N1_x : N2_x;
+            N_new_z = (R1 < R2)? N1_z : N2_z;
+        }
+        else {
+            N_new_x = (R1 > R2)? N1_x : N2_x;
+            N_new_z = (R1 > R2)? N1_z : N2_z;
+        }
+
+    }
+    else if(valid1 || valid2) {
+        float Nz2 = valid1? N1_z2 : N2_z2;
+        N_new_x = sqrt(1.0 - Nz2);
+        N_new_z = sqrt(Nz2);
+    }
+    else {
+        return Ng;
+    }
+
+    return N_new_x*X + N_new_z*Ng;
 }