Fix T75895: Unable to Compile Cycles on NAVI/Linux

This patch will add some compiler hints to break unrolling in the
nestled for loops of the voronoi node.

Reviewed by: Brecht van Lommel

Differential Revision: https://developer.blender.org/D7574
This commit is contained in:
Jeroen Bakker 2020-04-30 14:15:10 +02:00
parent 36bf067ddc
commit 6121c28501
Notes: blender-bot 2023-02-14 03:00:45 +01:00
Referenced by issue #75895, LLVM ERROR: Error while trying to spill SGPR4_SGPR5 from class SReg_64: Cannot scavenge register without an emergency spill slot!
5 changed files with 18 additions and 7 deletions

View File

@ -71,6 +71,7 @@ __device__ half __float2half(const float f)
#define ccl_may_alias
#define ccl_addr_space
#define ccl_restrict __restrict__
#define ccl_loop_no_unroll
/* TODO(sergey): In theory we might use references with CUDA, however
* performance impact yet to be investigated.
*/

View File

@ -43,6 +43,7 @@
#define ccl_local __local
#define ccl_local_param __local
#define ccl_private __private
#define ccl_loop_no_unroll __attribute__((opencl_unroll_hint(1)))
#define ccl_restrict restrict
#define ccl_ref
#define ccl_align(n) __attribute__((aligned(n)))

View File

@ -70,6 +70,7 @@ __device__ half __float2half(const float f)
#define ccl_private
#define ccl_may_alias
#define ccl_addr_space
#define ccl_loop_no_unroll
#define ccl_restrict __restrict__
#define ccl_ref
#define ccl_align(n) __align__(n)

View File

@ -684,7 +684,8 @@ ccl_device void voronoi_f1_4d(float4 coord,
float4 targetPosition = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
for (int u = -1; u <= 1; u++) {
for (int k = -1; k <= 1; k++) {
for (int j = -1; j <= 1; j++) {
ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
{
for (int i = -1; i <= 1; i++) {
float4 cellOffset = make_float4(i, j, k, u);
float4 pointPosition = cellOffset +
@ -722,7 +723,8 @@ ccl_device void voronoi_smooth_f1_4d(float4 coord,
float4 smoothPosition = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
for (int u = -2; u <= 2; u++) {
for (int k = -2; k <= 2; k++) {
for (int j = -2; j <= 2; j++) {
ccl_loop_no_unroll for (int j = -2; j <= 2; j++)
{
for (int i = -2; i <= 2; i++) {
float4 cellOffset = make_float4(i, j, k, u);
float4 pointPosition = cellOffset +
@ -765,7 +767,8 @@ ccl_device void voronoi_f2_4d(float4 coord,
float4 positionF2 = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
for (int u = -1; u <= 1; u++) {
for (int k = -1; k <= 1; k++) {
for (int j = -1; j <= 1; j++) {
ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
{
for (int i = -1; i <= 1; i++) {
float4 cellOffset = make_float4(i, j, k, u);
float4 pointPosition = cellOffset +
@ -803,7 +806,8 @@ ccl_device void voronoi_distance_to_edge_4d(float4 coord, float randomness, floa
float minDistance = 8.0f;
for (int u = -1; u <= 1; u++) {
for (int k = -1; k <= 1; k++) {
for (int j = -1; j <= 1; j++) {
ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
{
for (int i = -1; i <= 1; i++) {
float4 cellOffset = make_float4(i, j, k, u);
float4 vectorToPoint = cellOffset +
@ -822,7 +826,8 @@ ccl_device void voronoi_distance_to_edge_4d(float4 coord, float randomness, floa
minDistance = 8.0f;
for (int u = -1; u <= 1; u++) {
for (int k = -1; k <= 1; k++) {
for (int j = -1; j <= 1; j++) {
ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
{
for (int i = -1; i <= 1; i++) {
float4 cellOffset = make_float4(i, j, k, u);
float4 vectorToPoint = cellOffset +
@ -851,7 +856,8 @@ ccl_device void voronoi_n_sphere_radius_4d(float4 coord, float randomness, float
float minDistance = 8.0f;
for (int u = -1; u <= 1; u++) {
for (int k = -1; k <= 1; k++) {
for (int j = -1; j <= 1; j++) {
ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
{
for (int i = -1; i <= 1; i++) {
float4 cellOffset = make_float4(i, j, k, u);
float4 pointPosition = cellOffset +
@ -871,7 +877,8 @@ ccl_device void voronoi_n_sphere_radius_4d(float4 coord, float randomness, float
float4 closestPointToClosestPoint = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
for (int u = -1; u <= 1; u++) {
for (int k = -1; k <= 1; k++) {
for (int j = -1; j <= 1; j++) {
ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
{
for (int i = -1; i <= 1; i++) {
if (i == 0 && j == 0 && k == 0 && u == 0) {
continue;

View File

@ -45,6 +45,7 @@
# define ccl_restrict __restrict
# define ccl_ref &
# define ccl_optional_struct_init
# define ccl_loop_no_unroll
# define __KERNEL_WITH_SSE_ALIGN__
# if defined(_WIN32) && !defined(FREE_WINDOWS)