Cycles: optimize ensure_valid_reflection(), reduces render time by about 1%

This is an implementation that is about 1.5-2.1 times faster. It gives a result
that is on average 6° different from the old implementation. The difference is
because normals (Ng, N, N') are not selected to be coplanar, but instead
reflection R is lifted the least amount and the N' is computed as a bisector.

Differential Revision: https://developer.blender.org/D10084
This commit is contained in:
Mikhail 2021-03-15 17:59:38 +01:00 committed by Brecht Van Lommel
parent fbe0165aad
commit 21bc1a99ba
Notes: blender-bot 2023-02-14 06:17:14 +01:00
Referenced by commit c07c7957c6, Revert "Cycles: optimize ensure_valid_reflection(), reduces render time by about 1%"
2 changed files with 33 additions and 147 deletions

View File

@ -195,108 +195,31 @@ ccl_device float2 regular_polygon_sample(float corners, float rotation, float u,
ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
{
float3 R = 2 * dot(N, I) * N - I;
float3 R;
float NI = dot(N, I);
float NgR, threshold;
/* Reflection rays may always be at least as shallow as the incoming ray. */
float threshold = min(0.9f * dot(Ng, I), 0.01f);
if (dot(Ng, R) >= threshold) {
return N;
}
/* Check if the incident ray is coming from behind normal N. */
if (NI > 0) {
/* Normal reflection */
R = (2 * NI) * N - I;
NgR = dot(Ng, R);
/* Form coordinate system with Ng as the Z axis and N inside the X-Z-plane.
* The X axis is found by normalizing the component of N that's orthogonal to Ng.
* The Y axis isn't actually needed.
*/
float NdotNg = dot(N, Ng);
float3 X = normalize(N - NdotNg * Ng);
/* Keep math expressions. */
/* clang-format off */
/* Calculate N.z and N.x in the local coordinate system.
*
* The goal of this computation is to find a N' that is rotated towards Ng just enough
* to lift R' above the threshold (here called t), therefore dot(R', Ng) = t.
*
* According to the standard reflection equation,
* this means that we want dot(2*dot(N', I)*N' - I, Ng) = t.
*
* Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get
* 2*dot(N', I)*N'.z - I.z = t.
*
* The rotation is simple to express in the coordinate system we formed -
* since N lies in the X-Z-plane, we know that N' will also lie in the X-Z-plane,
* so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z .
*
* Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2).
*
* With these simplifications,
* we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t.
*
* The only unknown here is N'.z, so we can solve for that.
*
* The equation has four solutions in general:
*
* N'.z = +-sqrt(0.5*(+-sqrt(I.x^2*(I.x^2 + I.z^2 - t^2)) + t*I.z + I.x^2 + I.z^2)/(I.x^2 + I.z^2))
* We can simplify this expression a bit by grouping terms:
*
* a = I.x^2 + I.z^2
* b = sqrt(I.x^2 * (a - t^2))
* c = I.z*t + a
* N'.z = +-sqrt(0.5*(+-b + c)/a)
*
* Two solutions can immediately be discarded because they're negative so N' would lie in the
* lower hemisphere.
*/
/* clang-format on */
float Ix = dot(I, X), Iz = dot(I, Ng);
float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
float a = Ix2 + Iz2;
float b = safe_sqrtf(Ix2 * (a - sqr(threshold)));
float c = Iz * threshold + a;
/* Evaluate both solutions.
* In many cases one can be immediately discarded (if N'.z would be imaginary or larger than
* one), so check for that first. If no option is viable (might happen in extreme cases like N
* being in the wrong hemisphere), give up and return Ng. */
float fac = 0.5f / a;
float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c);
bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f));
bool valid2 = (N2_z2 > 1e-5f) && (N2_z2 <= (1.0f + 1e-5f));
float2 N_new;
if (valid1 && valid2) {
/* If both are possible, do the expensive reflection-based check. */
float2 N1 = make_float2(safe_sqrtf(1.0f - N1_z2), safe_sqrtf(N1_z2));
float2 N2 = make_float2(safe_sqrtf(1.0f - N2_z2), safe_sqrtf(N2_z2));
float R1 = 2 * (N1.x * Ix + N1.y * Iz) * N1.y - Iz;
float R2 = 2 * (N2.x * Ix + N2.y * Iz) * N2.y - Iz;
valid1 = (R1 >= 1e-5f);
valid2 = (R2 >= 1e-5f);
if (valid1 && valid2) {
/* If both solutions are valid, return the one with the shallower reflection since it will be
* closer to the input (if the original reflection wasn't shallow, we would not be in this
* part of the function). */
N_new = (R1 < R2) ? N1 : N2;
/* Reflection rays may always be at least as shallow as the incoming ray. */
threshold = min(0.9f * dot(Ng, I), 0.01f);
if (NgR >= threshold) {
return N;
}
else {
/* If only one reflection is valid (= positive), pick that one. */
N_new = (R1 > R2) ? N1 : N2;
}
}
else if (valid1 || valid2) {
/* Only one solution passes the N'.z criterium, so pick that one. */
float Nz2 = valid1 ? N1_z2 : N2_z2;
N_new = make_float2(safe_sqrtf(1.0f - Nz2), safe_sqrtf(Nz2));
}
else {
return Ng;
/* Bad incident */
R = -I;
NgR = dot(Ng, R);
threshold = 0.01f;
}
return N_new.x * X + N_new.y * Ng;
R = R + Ng * (threshold - NgR); /* Lift the reflection above the threshold. */
return normalize(I * len(R) + R * len(I)); /* Find a bisector. */
}
CCL_NAMESPACE_END

View File

@ -84,67 +84,30 @@ closure color principled_hair(normal N,
closure color henyey_greenstein(float g) BUILTIN;
closure color absorption() BUILTIN;
normal ensure_valid_reflection(normal Ng, vector I, normal N)
normal ensure_valid_reflection(normal Ng, normal I, normal N)
{
/* The implementation here mirrors the one in kernel_montecarlo.h,
* check there for an explanation of the algorithm. */
vector R;
float NI = dot(N, I);
float NgR, threshold;
float sqr(float x)
{
return x * x;
}
vector R = 2 * dot(N, I) * N - I;
float threshold = min(0.9 * dot(Ng, I), 0.01);
if (dot(Ng, R) >= threshold) {
return N;
}
float NdotNg = dot(N, Ng);
vector X = normalize(N - NdotNg * Ng);
float Ix = dot(I, X), Iz = dot(I, Ng);
float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
float a = Ix2 + Iz2;
float b = sqrt(Ix2 * (a - sqr(threshold)));
float c = Iz * threshold + a;
float fac = 0.5 / a;
float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c);
int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5));
int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5));
float N_new_x, N_new_z;
if (valid1 && valid2) {
float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2);
float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2);
float R1 = 2 * (N1_x * Ix + N1_z * Iz) * N1_z - Iz;
float R2 = 2 * (N2_x * Ix + N2_z * Iz) * N2_z - Iz;
valid1 = (R1 >= 1e-5);
valid2 = (R2 >= 1e-5);
if (valid1 && valid2) {
N_new_x = (R1 < R2) ? N1_x : N2_x;
N_new_z = (R1 < R2) ? N1_z : N2_z;
if (NI > 0) {
R = (2 * NI) * N - I;
NgR = dot(Ng, R);
threshold = min(0.9 * dot(Ng, I), 0.01);
if (NgR >= threshold) {
return N;
}
else {
N_new_x = (R1 > R2) ? N1_x : N2_x;
N_new_z = (R1 > R2) ? N1_z : N2_z;
}
}
else if (valid1 || valid2) {
float Nz2 = valid1 ? N1_z2 : N2_z2;
N_new_x = sqrt(1.0 - Nz2);
N_new_z = sqrt(Nz2);
}
else {
return Ng;
R = -I;
NgR = dot(Ng, R);
threshold = 0.01;
}
return N_new_x * X + N_new_z * Ng;
R = R + Ng * (threshold - NgR);
return normalize(I * length(R) + R * length(I));
}
#endif /* CCL_STDOSL_H */