Cycles: some tiny hair intersection optimizations that help maybe 2%.

This commit is contained in:
Brecht Van Lommel 2014-04-23 01:10:26 +02:00
parent 0f85174d50
commit 6b9ca06088
1 changed files with 41 additions and 32 deletions

View File

@ -152,48 +152,47 @@ ccl_device_inline void curvebounds(float *lower, float *upper, float *extremta,
float halfdiscroot = (p2 * p2 - 3 * p3 * p1);
float ta = -1.0f;
float tb = -1.0f;
*extremta = -1.0f;
*extremtb = -1.0f;
*upper = p0;
*lower = p0 + p1 + p2 + p3;
*lower = (p0 + p1) + (p2 + p3);
*extrema = *upper;
*extremb = *lower;
if(*lower >= *upper) {
*upper = *lower;
*lower = p0;
}
if(halfdiscroot >= 0) {
float inv3p3 = (1.0f/3.0f)/p3;
halfdiscroot = sqrt(halfdiscroot);
ta = (-p2 - halfdiscroot) / (3 * p3);
tb = (-p2 + halfdiscroot) / (3 * p3);
ta = (-p2 - halfdiscroot) * inv3p3;
tb = (-p2 + halfdiscroot) * inv3p3;
}
float t2;
float t3;
if(ta > 0.0f && ta < 1.0f) {
t2 = ta * ta;
t3 = t2 * ta;
*extremta = ta;
*extrema = p3 * t3 + p2 * t2 + p1 * ta + p0;
if(*extrema > *upper) {
*upper = *extrema;
}
if(*extrema < *lower) {
*lower = *extrema;
}
*upper = fmaxf(*extrema, *upper);
*lower = fminf(*extrema, *lower);
}
if(tb > 0.0f && tb < 1.0f) {
t2 = tb * tb;
t3 = t2 * tb;
*extremtb = tb;
*extremb = p3 * t3 + p2 * t2 + p1 * tb + p0;
if(*extremb >= *upper) {
*upper = *extremb;
}
if(*extremb <= *lower) {
*lower = *extremb;
}
*upper = fmaxf(*extremb, *upper);
*lower = fminf(*extremb, *lower);
}
}
@ -483,10 +482,11 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y);
float d0 = d - r_curr;
float d1 = d + r_curr;
float inv_mw_extension = 1.0f/mw_extension;
if (d0 >= 0)
coverage = (min(d1 / mw_extension, 1.0f) - min(d0 / mw_extension, 1.0f)) * 0.5f;
coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * 0.5f;
else // inside
coverage = (min(d1 / mw_extension, 1.0f) + min(-d0 / mw_extension, 1.0f)) * 0.5f;
coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * 0.5f;
}
if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) {
@ -496,12 +496,19 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
}
t = p_curr.z;
/* stochastic fade from minimum width */
if(difl != 0.0f && lcg_state) {
if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage))
return hit;
}
}
else {
float l = len(p_en - p_st);
/* minimum width extension */
float or1 = r1;
float or2 = r2;
if(difl != 0.0f) {
mw_extension = min(len(p_st - P) * difl, extmax);
or1 = r1 < mw_extension ? mw_extension : r1;
@ -509,12 +516,14 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
or2 = r2 < mw_extension ? mw_extension : r2;
}
/* --- */
float3 tg = (p_en - p_st) / l;
gd = (or2 - or1) / l;
float invl = 1.0f/l;
float3 tg = (p_en - p_st) * invl;
gd = (or2 - or1) * invl;
float difz = -dot(p_st,tg);
float cyla = 1.0f - (tg.z * tg.z * (1 + gd*gd));
float invcyla = 1.0f/cyla;
float halfb = (-p_st.z - tg.z*(difz + gd*(difz*gd + or1)));
float tcentre = -halfb/cyla;
float tcentre = -halfb*invcyla;
float zcentre = difz + (tg.z * tcentre);
float3 tdif = - p_st;
tdif.z += tcentre;
@ -529,7 +538,7 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
}
float rootd = sqrtf(td);
float correction = ((-tb - rootd)/(2*cyla));
float correction = (-tb - rootd) * 0.5f * invcyla;
t = tcentre + correction;
float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
@ -540,7 +549,7 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
dp_en *= -1;
if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) {
correction = ((-tb + rootd)/(2*cyla));
correction = (-tb + rootd) * 0.5f * invcyla;
t = tcentre + correction;
}
@ -550,23 +559,23 @@ ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
continue;
}
float w = (zcentre + (tg.z * correction))/l;
float w = (zcentre + (tg.z * correction)) * invl;
w = clamp((float)w, 0.0f, 1.0f);
/* compute u on the curve segment */
u = i_st * (1 - w) + i_en * w;
r_curr = r1 + (r2 - r1) * w;
r_ext = or1 + (or2 - or1) * w;
coverage = r_curr/r_ext;
/* stochastic fade from minimum width */
if(difl != 0.0f && lcg_state) {
r_curr = r1 + (r2 - r1) * w;
r_ext = or1 + (or2 - or1) * w;
coverage = r_curr/r_ext;
if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage))
return hit;
}
}
/* we found a new intersection */
/* stochastic fade from minimum width */
if(lcg_state && coverage != 1.0f) {
if(lcg_step_float(lcg_state) > coverage)
return hit;
}
#ifdef __VISIBILITY_FLAG__
/* visibility flag test. we do it here under the assumption
* that most triangles are culled by node flags */