Fix T74423: Cycles rendering artifacts with CUDA 10.2

Work around what appears to be a compiler bug, just changing the code a bit without any functional changes.
Referenced by issue #74423, CUDA 10.2 and GTX 1080 render artifacts with path tracing
2020-04-22 16:27:25 +02:00 · 2020-04-22 16:27:25 +02:00 · cf5147f69f · 2023-02-14 04:10:15 +01:00
parent 138b0c970e
commit cf5147f69f
2 changed files with 23 additions and 20 deletions
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@ -199,32 +199,33 @@ ccl_device float pmj_sample_1D(KernelGlobals *kg, int sample, int rng_hash, int
 {
  /* Fallback to random */
  if (sample >= NUM_PMJ_SAMPLES) {
-    int p = rng_hash + dimension;
+    const int p = rng_hash + dimension;
    return cmj_randfloat(sample, p);
  }
-  uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
-  int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
-  return __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ (tmp_rng & 0x007fffff)) -
-         1.0f;
+  else {
+    const uint mask = cmj_hash_simple(dimension, rng_hash) & 0x007fffff;
+    const int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
+    return __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ mask) - 1.0f;
+  }
 }

-ccl_device void pmj_sample_2D(
-    KernelGlobals *kg, int sample, int rng_hash, int dimension, float *fx, float *fy)
+ccl_device float2 pmj_sample_2D(KernelGlobals *kg, int sample, int rng_hash, int dimension)
 {
  if (sample >= NUM_PMJ_SAMPLES) {
-    int p = rng_hash + dimension;
-    *fx = cmj_randfloat(sample, p);
-    *fy = cmj_randfloat(sample, p + 1);
-    return;
+    const int p = rng_hash + dimension;
+    const float fx = cmj_randfloat(sample, p);
+    const float fy = cmj_randfloat(sample, p + 1);
+    return make_float2(fx, fy);
+  }
+  else {
+    const int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
+    const uint maskx = cmj_hash_simple(dimension, rng_hash) & 0x007fffff;
+    const uint masky = cmj_hash_simple(dimension + 1, rng_hash) & 0x007fffff;
+    const float fx = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ maskx) - 1.0f;
+    const float fy = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index + 1) ^ masky) -
+                     1.0f;
+    return make_float2(fx, fy);
  }
-  uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
-  int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
-  *fx = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ (tmp_rng & 0x007fffff)) -
-        1.0f;
-  tmp_rng = cmj_hash_simple(dimension + 1, rng_hash);
-  *fy = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index + 1) ^
-                        (tmp_rng & 0x007fffff)) -
-        1.0f;
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@ -102,7 +102,9 @@ ccl_device_forceinline void path_rng_2D(KernelGlobals *kg,
  return;
 #endif
  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) {
-    pmj_sample_2D(kg, sample, rng_hash, dimension, fx, fy);
+    const float2 f = pmj_sample_2D(kg, sample, rng_hash, dimension);
+    *fx = f.x;
+    *fy = f.y;
    return;
  }
 #ifdef __CMJ__