Cycles: remove extended precision hacks, no longer needed with SSE2 requirement.
Differential Revision: https://developer.blender.org/D2079
This commit is contained in:
parent
1ba90582f5
commit
39ae324918
Notes:
blender-bot
2023-02-14 10:21:15 +01:00
Referenced by issue #48790, Wrong render with HDRi when there Sun lamp with strength=0 Referenced by issue #48791, Crash extruding using Ctrl+click + snap to face
|
@ -26,16 +26,6 @@ CCL_NAMESPACE_BEGIN
|
|||
|
||||
static const int BVH_SORT_THRESHOLD = 4096;
|
||||
|
||||
/* Silly workaround for float extended precision that happens when compiling
|
||||
* on x86, due to one float staying in 80 bit precision register and the other
|
||||
* not, which causes the strictly weak ordering to break.
|
||||
*/
|
||||
#if !defined(__i386__)
|
||||
# define NO_EXTENDED_PRECISION
|
||||
#else
|
||||
# define NO_EXTENDED_PRECISION volatile
|
||||
#endif
|
||||
|
||||
struct BVHReferenceCompare {
|
||||
public:
|
||||
int dim;
|
||||
|
@ -52,8 +42,8 @@ public:
|
|||
__forceinline int compare(const BVHReference& ra,
|
||||
const BVHReference& rb) const
|
||||
{
|
||||
NO_EXTENDED_PRECISION float ca = ra.bounds().min[dim] + ra.bounds().max[dim];
|
||||
NO_EXTENDED_PRECISION float cb = rb.bounds().min[dim] + rb.bounds().max[dim];
|
||||
float ca = ra.bounds().min[dim] + ra.bounds().max[dim];
|
||||
float cb = rb.bounds().min[dim] + rb.bounds().max[dim];
|
||||
|
||||
if(ca < cb) return -1;
|
||||
else if(ca > cb) return 1;
|
||||
|
|
|
@ -27,15 +27,6 @@
|
|||
#define BVH_QNODE_LEAF_SIZE 1
|
||||
#define TRI_NODE_SIZE 3
|
||||
|
||||
/* silly workaround for float extended precision that happens when compiling
|
||||
* without sse support on x86, it results in different results for float ops
|
||||
* that you would otherwise expect to compare correctly */
|
||||
#if !defined(__i386__) || defined(__SSE__)
|
||||
# define NO_EXTENDED_PRECISION
|
||||
#else
|
||||
# define NO_EXTENDED_PRECISION volatile
|
||||
#endif
|
||||
|
||||
#include "geom_attribute.h"
|
||||
#include "geom_object.h"
|
||||
#include "geom_triangle.h"
|
||||
|
|
|
@ -108,23 +108,23 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
|
||||
|
||||
/* intersect ray against child nodes */
|
||||
NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
|
||||
NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
|
||||
float c0lox = (node0.x - P.x) * idir.x;
|
||||
float c0hix = (node0.z - P.x) * idir.x;
|
||||
float c0loy = (node1.x - P.y) * idir.y;
|
||||
float c0hiy = (node1.z - P.y) * idir.y;
|
||||
float c0loz = (node2.x - P.z) * idir.z;
|
||||
float c0hiz = (node2.z - P.z) * idir.z;
|
||||
float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
|
||||
float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
|
||||
|
||||
NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
|
||||
NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
|
||||
float c1lox = (node0.y - P.x) * idir.x;
|
||||
float c1hix = (node0.w - P.x) * idir.x;
|
||||
float c1loy = (node1.y - P.y) * idir.y;
|
||||
float c1hiy = (node1.w - P.y) * idir.y;
|
||||
float c1loz = (node2.y - P.z) * idir.z;
|
||||
float c1hiz = (node2.w - P.z) * idir.z;
|
||||
float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
|
||||
float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
|
||||
|
||||
/* decide which nodes to traverse next */
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
|
|
|
@ -119,23 +119,23 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
|
||||
|
||||
/* intersect ray against child nodes */
|
||||
NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
|
||||
NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
|
||||
float c0lox = (node0.x - P.x) * idir.x;
|
||||
float c0hix = (node0.z - P.x) * idir.x;
|
||||
float c0loy = (node1.x - P.y) * idir.y;
|
||||
float c0hiy = (node1.z - P.y) * idir.y;
|
||||
float c0loz = (node2.x - P.z) * idir.z;
|
||||
float c0hiz = (node2.z - P.z) * idir.z;
|
||||
float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
|
||||
float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
|
||||
|
||||
NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
|
||||
NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
|
||||
float c1lox = (node0.y - P.x) * idir.x;
|
||||
float c1hix = (node0.w - P.x) * idir.x;
|
||||
float c1loy = (node1.y - P.y) * idir.y;
|
||||
float c1hiy = (node1.w - P.y) * idir.y;
|
||||
float c1loz = (node2.y - P.z) * idir.z;
|
||||
float c1hiz = (node2.w - P.z) * idir.z;
|
||||
float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
|
||||
float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
|
||||
|
||||
/* decide which nodes to traverse next */
|
||||
traverseChild0 = (c0max >= c0min);
|
||||
|
|
|
@ -115,23 +115,23 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
|
||||
|
||||
/* intersect ray against child nodes */
|
||||
NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
|
||||
NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
|
||||
float c0lox = (node0.x - P.x) * idir.x;
|
||||
float c0hix = (node0.z - P.x) * idir.x;
|
||||
float c0loy = (node1.x - P.y) * idir.y;
|
||||
float c0hiy = (node1.z - P.y) * idir.y;
|
||||
float c0loz = (node2.x - P.z) * idir.z;
|
||||
float c0hiz = (node2.z - P.z) * idir.z;
|
||||
float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
|
||||
float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
|
||||
|
||||
NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
|
||||
NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
|
||||
float c1lox = (node0.y - P.x) * idir.x;
|
||||
float c1hix = (node0.w - P.x) * idir.x;
|
||||
float c1loy = (node1.y - P.y) * idir.y;
|
||||
float c1hiy = (node1.w - P.y) * idir.y;
|
||||
float c1loz = (node2.y - P.z) * idir.z;
|
||||
float c1hiz = (node2.w - P.z) * idir.z;
|
||||
float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
|
||||
float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
|
||||
|
||||
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
|
||||
if(difl != 0.0f) {
|
||||
|
|
|
@ -105,23 +105,23 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
|
||||
|
||||
/* intersect ray against child nodes */
|
||||
NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
|
||||
NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
|
||||
float c0lox = (node0.x - P.x) * idir.x;
|
||||
float c0hix = (node0.z - P.x) * idir.x;
|
||||
float c0loy = (node1.x - P.y) * idir.y;
|
||||
float c0hiy = (node1.z - P.y) * idir.y;
|
||||
float c0loz = (node2.x - P.z) * idir.z;
|
||||
float c0hiz = (node2.z - P.z) * idir.z;
|
||||
float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
|
||||
float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
|
||||
|
||||
NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
|
||||
NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
|
||||
float c1lox = (node0.y - P.x) * idir.x;
|
||||
float c1hix = (node0.w - P.x) * idir.x;
|
||||
float c1loy = (node1.y - P.y) * idir.y;
|
||||
float c1hiy = (node1.w - P.y) * idir.y;
|
||||
float c1loz = (node2.y - P.z) * idir.z;
|
||||
float c1hiz = (node2.w - P.z) * idir.z;
|
||||
float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
|
||||
float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
|
||||
|
||||
/* decide which nodes to traverse next */
|
||||
traverseChild0 = (c0max >= c0min);
|
||||
|
|
|
@ -109,23 +109,23 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
|||
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
|
||||
|
||||
/* intersect ray against child nodes */
|
||||
NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
|
||||
NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
|
||||
float c0lox = (node0.x - P.x) * idir.x;
|
||||
float c0hix = (node0.z - P.x) * idir.x;
|
||||
float c0loy = (node1.x - P.y) * idir.y;
|
||||
float c0hiy = (node1.z - P.y) * idir.y;
|
||||
float c0loz = (node2.x - P.z) * idir.z;
|
||||
float c0hiz = (node2.z - P.z) * idir.z;
|
||||
float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
|
||||
float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
|
||||
|
||||
NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
|
||||
NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
|
||||
NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
|
||||
NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
|
||||
NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
|
||||
float c1lox = (node0.y - P.x) * idir.x;
|
||||
float c1hix = (node0.w - P.x) * idir.x;
|
||||
float c1loy = (node1.y - P.y) * idir.y;
|
||||
float c1hiy = (node1.w - P.y) * idir.y;
|
||||
float c1loz = (node2.y - P.z) * idir.z;
|
||||
float c1hiz = (node2.w - P.z) * idir.z;
|
||||
float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
|
||||
float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
|
||||
|
||||
/* decide which nodes to traverse next */
|
||||
traverseChild0 = (c0max >= c0min);
|
||||
|
|
Loading…
Reference in New Issue