Fix T41066: An actual fix for curve intersection on FMA-enabled CPUs

This commit is contained in:
Sv. Lockal 2015-03-07 16:15:01 +00:00
parent b7a759502f
commit c8fb488b08
Notes: blender-bot 2023-06-21 19:23:24 +02:00
Referenced by issue #41066, Particle Carpet renders different in 2.71
2 changed files with 4 additions and 4 deletions

View File

@ -709,7 +709,7 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isec
const ssef sphere_dif1 = (dif + dif_second) * 0.5f;
const ssef dir = load4f(direction);
const ssef sphere_b_tmp = dot3_splat(dir, sphere_dif1);
const ssef sphere_dif2 = nmsub(sphere_b_tmp, dir, sphere_dif1);
const ssef sphere_dif2 = nmadd(sphere_b_tmp, dir, sphere_dif1);
#endif
float mr = max(r1, r2);

View File

@ -151,7 +151,7 @@ __forceinline ssef maxi(const ssef& a, const ssef& b) {
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
#if defined(__KERNEL_AVX2__) && !defined(_MSC_VER) // see T41066
#if defined(__KERNEL_AVX2__)
__forceinline const ssef madd (const ssef& a, const ssef& b, const ssef& c) { return _mm_fmadd_ps(a,b,c); }
__forceinline const ssef msub (const ssef& a, const ssef& b, const ssef& c) { return _mm_fmsub_ps(a,b,c); }
__forceinline const ssef nmadd(const ssef& a, const ssef& b, const ssef& c) { return _mm_fnmadd_ps(a,b,c); }
@ -159,8 +159,8 @@ __forceinline const ssef nmsub(const ssef& a, const ssef& b, const ssef& c) { re
#else
__forceinline const ssef madd (const ssef& a, const ssef& b, const ssef& c) { return a*b+c; }
__forceinline const ssef msub (const ssef& a, const ssef& b, const ssef& c) { return a*b-c; }
__forceinline const ssef nmadd(const ssef& a, const ssef& b, const ssef& c) { return -a*b-c;}
__forceinline const ssef nmsub(const ssef& a, const ssef& b, const ssef& c) { return c-a*b; }
__forceinline const ssef nmadd(const ssef& a, const ssef& b, const ssef& c) { return c-a*b;}
__forceinline const ssef nmsub(const ssef& a, const ssef& b, const ssef& c) { return -a*b-c; }
#endif
////////////////////////////////////////////////////////////////////////////////