Cycles: add Sobol-Burley sampling pattern

Based on the paper "Practical Hash-based Owen Scrambling" by Brent Burley, 2020, Journal of Computer Graphics Techniques. It is distinct from the existing Sobol sampler in two important ways: * It is Owen scrambled, which gives it a much better convergence rate in many situations. * It uses padding for higher dimensions, rather than using higher Sobol dimensions directly. In practice this is advantagous because high-dimensional Sobol sequences have holes in their sampling patterns that don't resolve until an unreasonable number of samples are taken. (See Burley's paper for details.) The pattern reduces noise in some benchmark scenes, however it is also slower, particularly on the CPU. So for now Progressive Multi-Jittered sampling remains the default. Differential Revision: https://developer.blender.org/D15679
Referenced by issue #91747, PMJ Sample generations using PMJ samples instead of PMJ02 samples
2022-08-18 20:45:09 +02:00 · 2022-08-18 20:45:09 +02:00 · a06c9b5ca8 · 2023-03-12 02:21:43 +01:00
parent 35a41a49a8
commit a06c9b5ca8
12 changed files with 393 additions and 88 deletions
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@ -83,6 +83,7 @@ enum_use_layer_samples = (
 enum_sampling_pattern = (
    ('SOBOL', "Sobol", "Use Sobol random sampling pattern", 0),
    ('PROGRESSIVE_MULTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern", 1),
+    ('SOBOL_BURLEY', "Sobol-Burley", "Use Sobol-Burley random sampling pattern", 2),
 )

 enum_volume_sampling = (
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@ -274,6 +274,8 @@ set(SRC_KERNEL_SAMPLE_HEADERS
  sample/mapping.h
  sample/mis.h
  sample/pattern.h
+  sample/sobol_burley.h
+  sample/util.h
 )

 set(SRC_KERNEL_UTIL_HEADERS
--- a/intern/cycles/kernel/integrator/path_state.h
+++ b/intern/cycles/kernel/integrator/path_state.h
@ -321,8 +321,10 @@ ccl_device_inline float path_state_rng_1D_hash(KernelGlobals kg,
  /* Use a hash instead of dimension, this is not great but avoids adding
   * more dimensions to each bounce which reduces quality of dimensions we
   * are already using. */
-  return path_rng_1D(
-      kg, cmj_hash_simple(rng_state->rng_hash, hash), rng_state->sample, rng_state->rng_offset);
+  return path_rng_1D(kg,
+                     hash_wang_seeded_uint(rng_state->rng_hash, hash),
+                     rng_state->sample,
+                     rng_state->rng_offset);
 }

 ccl_device_inline float path_branched_rng_1D(KernelGlobals kg,
--- a/intern/cycles/kernel/integrator/subsurface_random_walk.h
+++ b/intern/cycles/kernel/integrator/subsurface_random_walk.h
@ -229,7 +229,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
  const float phase_log = logf((diffusion_length + 1.0f) / (diffusion_length - 1.0f));

  /* Modify state for RNGs, decorrelated from other paths. */
-  rng_state.rng_hash = cmj_hash(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef);
+  rng_state.rng_hash = hash_cmj_seeded_uint(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef);

  /* Random walk until we hit the surface again. */
  bool hit = false;
--- a/intern/cycles/kernel/sample/jitter.h
+++ b/intern/cycles/kernel/sample/jitter.h
@ -1,20 +1,12 @@
 /* SPDX-License-Identifier: Apache-2.0
 * Copyright 2011-2022 Blender Foundation */

+#include "kernel/sample/util.h"
+#include "util/hash.h"
+
 #pragma once
 CCL_NAMESPACE_BEGIN

-ccl_device_inline uint32_t laine_karras_permutation(uint32_t x, uint32_t seed)
-{
-  x += seed;
-  x ^= (x * 0x6c50b47cu);
-  x ^= x * 0xb82f1e52u;
-  x ^= x * 0xc7afe638u;
-  x ^= x * 0x8d22f6e6u;
-
-  return x;
-}
-
 ccl_device_inline uint32_t nested_uniform_scramble(uint32_t x, uint32_t seed)
 {
  x = reverse_integer_bits(x);
@ -24,46 +16,6 @@ ccl_device_inline uint32_t nested_uniform_scramble(uint32_t x, uint32_t seed)
  return x;
 }

-ccl_device_inline uint cmj_hash(uint i, uint p)
-{
-  i ^= p;
-  i ^= i >> 17;
-  i ^= i >> 10;
-  i *= 0xb36534e5;
-  i ^= i >> 12;
-  i ^= i >> 21;
-  i *= 0x93fc4795;
-  i ^= 0xdf6e307f;
-  i ^= i >> 17;
-  i *= 1 | p >> 18;
-
-  return i;
-}
-
-ccl_device_inline uint cmj_hash_simple(uint i, uint p)
-{
-  i = (i ^ 61) ^ p;
-  i += i << 3;
-  i ^= i >> 4;
-  i *= 0x27d4eb2d;
-  return i;
-}
-
-ccl_device_inline float cmj_randfloat(uint i, uint p)
-{
-  return cmj_hash(i, p) * (1.0f / 4294967808.0f);
-}
-
-ccl_device_inline float cmj_randfloat_simple(uint i, uint p)
-{
-  return cmj_hash_simple(i, p) * (1.0f / (float)0xFFFFFFFF);
-}
-
-ccl_device_inline float cmj_randfloat_simple_dist(uint i, uint p, float d)
-{
-  return cmj_hash_simple(i, p) * (d / (float)0xFFFFFFFF);
-}
-
 ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, uint rng_hash, uint dimension)
 {
  uint hash = rng_hash;
@ -71,16 +23,12 @@ ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, uint rng_hash, uin
  if (kernel_data.integrator.scrambling_distance < 1.0f) {
    hash = kernel_data.integrator.seed;

-    jitter_x = cmj_randfloat_simple_dist(
-        dimension, rng_hash, kernel_data.integrator.scrambling_distance);
+    jitter_x = hash_wang_seeded_float(dimension, rng_hash) *
+               kernel_data.integrator.scrambling_distance;
  }

  /* Perform Owen shuffle of the sample number to reorder the samples. */
-#ifdef _SIMPLE_HASH_
-  const uint rv = cmj_hash_simple(dimension, hash);
-#else /* Use a _REGULAR_HASH_. */
-  const uint rv = cmj_hash(dimension, hash);
-#endif
+  const uint rv = hash_cmj_seeded_uint(dimension, hash);
 #ifdef _XOR_SHUFFLE_
 #  warning "Using XOR shuffle."
  const uint s = sample ^ rv;
@ -101,11 +49,7 @@ ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, uint rng_hash, uin

 #ifndef _NO_CRANLEY_PATTERSON_ROTATION_
  /* Use Cranley-Patterson rotation to displace the sample pattern. */
-#  ifdef _SIMPLE_HASH_
-  float dx = cmj_randfloat_simple(d, hash);
-#  else
-  float dx = cmj_randfloat(d, hash);
-#  endif
+  float dx = hash_cmj_seeded_float(d, hash);
  /* Jitter sample locations and map back into [0 1]. */
  fx = fx + dx + jitter_x;
  fx = fx - floorf(fx);
@ -129,18 +73,14 @@ ccl_device void pmj_sample_2D(KernelGlobals kg,
  if (kernel_data.integrator.scrambling_distance < 1.0f) {
    hash = kernel_data.integrator.seed;

-    jitter_x = cmj_randfloat_simple_dist(
-        dimension, rng_hash, kernel_data.integrator.scrambling_distance);
-    jitter_y = cmj_randfloat_simple_dist(
-        dimension + 1, rng_hash, kernel_data.integrator.scrambling_distance);
+    jitter_x = hash_wang_seeded_float(dimension, rng_hash) *
+               kernel_data.integrator.scrambling_distance;
+    jitter_y = hash_wang_seeded_float(dimension + 1, rng_hash) *
+               kernel_data.integrator.scrambling_distance;
  }

  /* Perform a shuffle on the sample number to reorder the samples. */
-#ifdef _SIMPLE_HASH_
-  const uint rv = cmj_hash_simple(dimension, hash);
-#else /* Use a _REGULAR_HASH_. */
-  const uint rv = cmj_hash(dimension, hash);
-#endif
+  const uint rv = hash_cmj_seeded_uint(dimension, hash);
 #ifdef _XOR_SHUFFLE_
 #  warning "Using XOR shuffle."
  const uint s = sample ^ rv;
@ -159,13 +99,8 @@ ccl_device void pmj_sample_2D(KernelGlobals kg,

 #ifndef _NO_CRANLEY_PATTERSON_ROTATION_
  /* Use Cranley-Patterson rotation to displace the sample pattern. */
-#  ifdef _SIMPLE_HASH_
-  float dx = cmj_randfloat_simple(d, hash);
-  float dy = cmj_randfloat_simple(d + 1, hash);
-#  else
-  float dx = cmj_randfloat(d, hash);
-  float dy = cmj_randfloat(d + 1, hash);
-#  endif
+  float dx = hash_cmj_seeded_float(d, hash);
+  float dy = hash_cmj_seeded_float(d + 1, hash);
  /* Jitter sample locations and map back to the unit square [0 1]x[0 1]. */
  float sx = fx + dx + jitter_x;
  float sy = fy + dy + jitter_y;
--- a/intern/cycles/kernel/sample/pattern.h
+++ b/intern/cycles/kernel/sample/pattern.h
@ -4,6 +4,7 @@
 #pragma once

 #include "kernel/sample/jitter.h"
+#include "kernel/sample/sobol_burley.h"
 #include "util/hash.h"

 CCL_NAMESPACE_BEGIN
@ -48,6 +49,10 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals kg,
  return (float)drand48();
 #endif

+  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
+    return sobol_burley_sample_1D(sample, dimension, rng_hash);
+  }
+
 #ifdef __SOBOL__
  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ)
 #endif
@ -66,7 +71,7 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals kg,
  /* Hash rng with dimension to solve correlation issues.
   * See T38710, T50116.
   */
-  uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
+  uint tmp_rng = hash_wang_seeded_uint(dimension, rng_hash);
  shift = tmp_rng * (kernel_data.integrator.scrambling_distance / (float)0xFFFFFFFF);

  return r + shift - floorf(r + shift);
@ -86,6 +91,11 @@ ccl_device_forceinline void path_rng_2D(KernelGlobals kg,
  return;
 #endif

+  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
+    sobol_burley_sample_2D(sample, dimension, rng_hash, fx, fy);
+    return;
+  }
+
 #ifdef __SOBOL__
  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ)
 #endif
--- a/intern/cycles/kernel/sample/sobol_burley.h
+++ b/intern/cycles/kernel/sample/sobol_burley.h
@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+/*
+ * A shuffled, Owen-scrambled Sobol sampler, implemented with the
+ * techniques from the paper "Practical Hash-based Owen Scrambling"
+ * by Brent Burley, 2020, Journal of Computer Graphics Techniques.
+ *
+ * Note that unlike a standard high-dimensional Sobol sequence, this
+ * Sobol sampler uses padding to achieve higher dimensions, as described
+ * in Burley's paper.
+ */
+
+#pragma once
+
+#include "kernel/sample/util.h"
+#include "util/hash.h"
+#include "util/math.h"
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/*
+ * Computes a single dimension of a sample from an Owen-scrambled
+ * Sobol sequence.  This is used in the main sampling functions,
+ * sobol_burley_sample_#D(), below.
+ *
+ * - rev_bit_index: the sample index, with reversed order bits.
+ * - dimension:     the sample dimension.
+ * - scramble_seed: the Owen scrambling seed.
+ *
+ * Note that the seed must be well randomized before being
+ * passed to this function.
+ */
+ccl_device_forceinline float sobol_burley(uint rev_bit_index, uint dimension, uint scramble_seed)
+{
+  uint result = 0;
+
+  if (dimension == 0) {
+    // Fast-path for dimension 0, which is just Van der corput.
+    // This makes a notable difference in performance since we reuse
+    // dimensions for padding, and dimension 0 is reused the most.
+    result = reverse_integer_bits(rev_bit_index);
+  }
+  else {
+    uint i = 0;
+    while (rev_bit_index != 0) {
+      uint j = count_leading_zeros(rev_bit_index);
+      result ^= sobol_burley_table[dimension][i + j];
+      i += j + 1;
+
+      // We can't do "<<= j + 1" because that can overflow the shift
+      // operator, which doesn't do what we need on at least x86.
+      rev_bit_index <<= j;
+      rev_bit_index <<= 1;
+    }
+  }
+
+  // Apply Owen scrambling.
+  result = reverse_integer_bits(reversed_bit_owen(result, scramble_seed));
+
+  return uint_to_float_excl(result);
+}
+
+/*
+ * Computes a 1D Owen-scrambled and shuffled Sobol sample.
+ */
+ccl_device float sobol_burley_sample_1D(uint index, uint dimension, uint seed)
+{
+  // Include the dimension in the seed, so we get decorrelated
+  // sequences for different dimensions via shuffling.
+  seed ^= hash_hp_uint(dimension);
+
+  // Shuffle.
+  index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xbff95bfe);
+
+  return sobol_burley(index, 0, seed ^ 0x635c77bd);
+}
+
+/*
+ * Computes a 2D Owen-scrambled and shuffled Sobol sample.
+ */
+ccl_device void sobol_burley_sample_2D(
+    uint index, uint dimension_set, uint seed, ccl_private float *x, ccl_private float *y)
+{
+  // Include the dimension set in the seed, so we get decorrelated
+  // sequences for different dimension sets via shuffling.
+  seed ^= hash_hp_uint(dimension_set);
+
+  // Shuffle.
+  index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xf8ade99a);
+
+  *x = sobol_burley(index, 0, seed ^ 0xe0aaaf76);
+  *y = sobol_burley(index, 1, seed ^ 0x94964d4e);
+}
+
+/*
+ * Computes a 3D Owen-scrambled and shuffled Sobol sample.
+ */
+ccl_device void sobol_burley_sample_3D(uint index,
+                                       uint dimension_set,
+                                       uint seed,
+                                       ccl_private float *x,
+                                       ccl_private float *y,
+                                       ccl_private float *z)
+{
+  // Include the dimension set in the seed, so we get decorrelated
+  // sequences for different dimension sets via shuffling.
+  seed ^= hash_hp_uint(dimension_set);
+
+  // Shuffle.
+  index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xcaa726ac);
+
+  *x = sobol_burley(index, 0, seed ^ 0x9e78e391);
+  *y = sobol_burley(index, 1, seed ^ 0x67c33241);
+  *z = sobol_burley(index, 2, seed ^ 0x78c395c5);
+}
+
+/*
+ * Computes a 4D Owen-scrambled and shuffled Sobol sample.
+ */
+ccl_device void sobol_burley_sample_4D(uint index,
+                                       uint dimension_set,
+                                       uint seed,
+                                       ccl_private float *x,
+                                       ccl_private float *y,
+                                       ccl_private float *z,
+                                       ccl_private float *w)
+{
+  // Include the dimension set in the seed, so we get decorrelated
+  // sequences for different dimension sets via shuffling.
+  seed ^= hash_hp_uint(dimension_set);
+
+  // Shuffle.
+  index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xc2c1a055);
+
+  *x = sobol_burley(index, 0, seed ^ 0x39468210);
+  *y = sobol_burley(index, 1, seed ^ 0xe9d8a845);
+  *z = sobol_burley(index, 2, seed ^ 0x5f32b482);
+  *w = sobol_burley(index, 3, seed ^ 0x1524cc56);
+}
+
+CCL_NAMESPACE_END
--- a/intern/cycles/kernel/sample/util.h
+++ b/intern/cycles/kernel/sample/util.h
@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/*
+ * Performs base-2 Owen scrambling on a reversed-bit integer.
+ *
+ * This is equivalent to the Laine-Karras permutation, but much higher
+ * quality.  See https://psychopath.io/post/2021_01_30_building_a_better_lk_hash
+ */
+ccl_device_inline uint reversed_bit_owen(uint n, uint seed)
+{
+  n ^= n * 0x3d20adea;
+  n += seed;
+  n *= (seed >> 16) | 1;
+  n ^= n * 0x05526c56;
+  n ^= n * 0x53a22864;
+
+  return n;
+}
+
+/*
+ * Performs base-2 Owen scrambling on a reversed-bit integer.
+ *
+ * This is here for backwards-compatibility, and can be replaced
+ * with reversed_bit_owen() above at some point.
+ * See https://developer.blender.org/D15679#426304
+ */
+ccl_device_inline uint laine_karras_permutation(uint x, uint seed)
+{
+  x += seed;
+  x ^= (x * 0x6c50b47cu);
+  x ^= x * 0xb82f1e52u;
+  x ^= x * 0xc7afe638u;
+  x ^= x * 0x8d22f6e6u;
+
+  return x;
+}
+
+CCL_NAMESPACE_END
--- a/intern/cycles/kernel/tables.h
+++ b/intern/cycles/kernel/tables.h
@ -63,4 +63,57 @@ ccl_inline_constant float cie_colour_match[][3] = {
  {0.0001f, 0.0000f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, {0.0000f, 0.0000f, 0.0000f}
 };

+/*
+ * The direction vectors for the first four dimensions of the Sobol
+ * sequence, stored with reversed-order bits.
+ *
+ * This is used in the Sobol-Burley sampler implementation.  We don't
+ * need more than four dimensions because we achieve higher dimensions
+ * with padding.  They're stored with reversed bits because we need
+ * them reversed for the fast hash-based Owen scrambling anyway, and
+ * this avoids doing that at run time.
+ */
+ccl_inline_constant unsigned int sobol_burley_table[4][32] = {
+  {
+    0x00000001, 0x00000002, 0x00000004, 0x00000008,
+    0x00000010, 0x00000020, 0x00000040, 0x00000080,
+    0x00000100, 0x00000200, 0x00000400, 0x00000800,
+    0x00001000, 0x00002000, 0x00004000, 0x00008000,
+    0x00010000, 0x00020000, 0x00040000, 0x00080000,
+    0x00100000, 0x00200000, 0x00400000, 0x00800000,
+    0x01000000, 0x02000000, 0x04000000, 0x08000000,
+    0x10000000, 0x20000000, 0x40000000, 0x80000000,
+  },
+  {
+    0x00000001, 0x00000003, 0x00000005, 0x0000000f,
+    0x00000011, 0x00000033, 0x00000055, 0x000000ff,
+    0x00000101, 0x00000303, 0x00000505, 0x00000f0f,
+    0x00001111, 0x00003333, 0x00005555, 0x0000ffff,
+    0x00010001, 0x00030003, 0x00050005, 0x000f000f,
+    0x00110011, 0x00330033, 0x00550055, 0x00ff00ff,
+    0x01010101, 0x03030303, 0x05050505, 0x0f0f0f0f,
+    0x11111111, 0x33333333, 0x55555555, 0xffffffff,
+  },
+  {
+    0x00000001, 0x00000003, 0x00000006, 0x00000009,
+    0x00000017, 0x0000003a, 0x00000071, 0x000000a3,
+    0x00000116, 0x00000339, 0x00000677, 0x000009aa,
+    0x00001601, 0x00003903, 0x00007706, 0x0000aa09,
+    0x00010117, 0x0003033a, 0x00060671, 0x000909a3,
+    0x00171616, 0x003a3939, 0x00717777, 0x00a3aaaa,
+    0x01170001, 0x033a0003, 0x06710006, 0x09a30009,
+    0x16160017, 0x3939003a, 0x77770071, 0xaaaa00a3,
+  },
+  {
+    0x00000001, 0x00000003, 0x00000004, 0x0000000a,
+    0x0000001f, 0x0000002e, 0x00000045, 0x000000c9,
+    0x0000011b, 0x000002a4, 0x0000079a, 0x00000b67,
+    0x0000101e, 0x0000302d, 0x00004041, 0x0000a0c3,
+    0x0001f104, 0x0002e28a, 0x000457df, 0x000c9bae,
+    0x0011a105, 0x002a7289, 0x0079e7db, 0x00b6dba4,
+    0x0100011a, 0x030002a7, 0x0400079e, 0x0a000b6d,
+    0x1f001001, 0x2e003003, 0x45004004, 0xc900a00a,
+  },
+};
+
 /* clang-format on */
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@ -178,6 +178,7 @@ enum PathTraceDimension {
 enum SamplingPattern {
  SAMPLING_PATTERN_SOBOL = 0,
  SAMPLING_PATTERN_PMJ = 1,
+  SAMPLING_PATTERN_SOBOL_BURLEY = 2,

  SAMPLING_NUM_PATTERNS,
 };
--- a/intern/cycles/scene/integrator.cpp
+++ b/intern/cycles/scene/integrator.cpp
@ -89,6 +89,7 @@ NODE_DEFINE(Integrator)
  static NodeEnum sampling_pattern_enum;
  sampling_pattern_enum.insert("sobol", SAMPLING_PATTERN_SOBOL);
  sampling_pattern_enum.insert("pmj", SAMPLING_PATTERN_PMJ);
+  sampling_pattern_enum.insert("sobol_burley", SAMPLING_PATTERN_SOBOL_BURLEY);
  SOCKET_ENUM(sampling_pattern, "Sampling Pattern", sampling_pattern_enum, SAMPLING_PATTERN_SOBOL);
  SOCKET_FLOAT(scrambling_distance, "Scrambling Distance", 1.0f);

@ -260,7 +261,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene

      dscene->sample_pattern_lut.copy_to_device();
    }
-    else {
+    else if (kintegrator->sampling_pattern == SAMPLING_PATTERN_PMJ) {
      constexpr int sequence_size = NUM_PMJ_SAMPLES;
      constexpr int num_sequences = NUM_PMJ_PATTERNS;
      float2 *directions = (float2 *)dscene->sample_pattern_lut.alloc(sequence_size *
--- a/intern/cycles/util/hash.h
+++ b/intern/cycles/util/hash.h
@ -8,6 +8,23 @@

 CCL_NAMESPACE_BEGIN

+/* [0, uint_max] -> [0.0, 1.0) */
+ccl_device_forceinline float uint_to_float_excl(uint n)
+{
+  // Note: we divide by 4294967808 instead of 2^32 because the latter
+  // leads to a [0.0, 1.0] mapping instead of [0.0, 1.0) due to floating
+  // point rounding error. 4294967808 unfortunately leaves (precisely)
+  // one unused ulp between the max number this outputs and 1.0, but
+  // that's the best you can do with this construction.
+  return (float)n * (1.0f / 4294967808.0f);
+}
+
+/* [0, uint_max] -> [0.0, 1.0] */
+ccl_device_forceinline float uint_to_float_incl(uint n)
+{
+  return (float)n * (1.0f / (float)0xFFFFFFFFu);
+}
+
 /* ***** Jenkins Lookup3 Hash Functions ***** */

 /* Source: http://burtleburtle.net/bob/c/lookup3.c */
@ -116,22 +133,22 @@ ccl_device_inline uint hash_uint4(uint kx, uint ky, uint kz, uint kw)

 ccl_device_inline float hash_uint_to_float(uint kx)
 {
-  return (float)hash_uint(kx) / (float)0xFFFFFFFFu;
+  return uint_to_float_incl(hash_uint(kx));
 }

 ccl_device_inline float hash_uint2_to_float(uint kx, uint ky)
 {
-  return (float)hash_uint2(kx, ky) / (float)0xFFFFFFFFu;
+  return uint_to_float_incl(hash_uint2(kx, ky));
 }

 ccl_device_inline float hash_uint3_to_float(uint kx, uint ky, uint kz)
 {
-  return (float)hash_uint3(kx, ky, kz) / (float)0xFFFFFFFFu;
+  return uint_to_float_incl(hash_uint3(kx, ky, kz));
 }

 ccl_device_inline float hash_uint4_to_float(uint kx, uint ky, uint kz, uint kw)
 {
-  return (float)hash_uint4(kx, ky, kz, kw) / (float)0xFFFFFFFFu;
+  return uint_to_float_incl(hash_uint4(kx, ky, kz, kw));
 }

 /* Hashing float or float[234] into a float in the range [0, 1]. */
@ -359,6 +376,101 @@ ccl_device_inline avxi hash_avxi4(avxi kx, avxi ky, avxi kz, avxi kw)

 #endif

+/* ***** Hash Prospector Hash Functions *****
+ *
+ * These are based on the high-quality 32-bit hash/mixings functions from
+ * https://github.com/skeeto/hash-prospector
+ */
+
+ccl_device_inline uint hash_hp_uint(uint i)
+{
+  // The actual mixing function from Hash Prospector.
+  i ^= i >> 16;
+  i *= 0x21f0aaad;
+  i ^= i >> 15;
+  i *= 0xd35a2d97;
+  i ^= i >> 15;
+
+  // The xor is just to make input zero not map to output zero.
+  // The number is randomly selected and isn't special.
+  return i ^ 0xe6fe3beb;
+}
+
+/* Seedable version of hash_hp_uint() above. */
+ccl_device_inline uint hash_hp_seeded_uint(uint i, uint seed)
+{
+  // Manipulate the seed so it doesn't interact poorly with n when they
+  // are both e.g. incrementing.  This isn't fool-proof, but is good
+  // enough for practical use.
+  seed ^= seed << 19;
+
+  return hash_hp_uint(i ^ seed);
+}
+
+/* Outputs [0.0, 1.0]. */
+ccl_device_inline float hash_hp_seeded_float(uint i, uint seed)
+{
+  return uint_to_float_incl(hash_hp_seeded_uint(i, seed));
+}
+
+/* ***** CMJ Hash Functions *****
+ *
+ * These are based on one of the hash functions in the paper
+ * "Correlated Multi-Jittered Sampling" by Andrew Kensler, 2013.
+ *
+ * These are here for backwards-compatibility, and can be replaced
+ * by the Hash Prospector hashes above at some point.
+ * See https://developer.blender.org/D15679#426304
+ */
+
+ccl_device_inline uint hash_cmj_seeded_uint(uint i, uint seed)
+{
+  i ^= seed;
+  i ^= i >> 17;
+  i ^= i >> 10;
+  i *= 0xb36534e5;
+  i ^= i >> 12;
+  i ^= i >> 21;
+  i *= 0x93fc4795;
+  i ^= 0xdf6e307f;
+  i ^= i >> 17;
+  i *= 1 | seed >> 18;
+
+  return i;
+}
+
+/* Outputs [0.0, 1.0]. */
+ccl_device_inline float hash_cmj_seeded_float(uint i, uint seed)
+{
+  return uint_to_float_excl(hash_cmj_seeded_uint(i, seed));
+}
+
+/* ***** Modified Wang Hash Functions *****
+ *
+ * These are based on a bespoke modified version of the Wang hash, and
+ * can serve as a faster hash when quality isn't critical.
+ *
+ * The original Wang hash is documented here:
+ * https://www.burtleburtle.net/bob/hash/integer.html
+ */
+
+ccl_device_inline uint hash_wang_seeded_uint(uint i, uint seed)
+{
+  i = (i ^ 61) ^ seed;
+  i += i << 3;
+  i ^= i >> 4;
+  i *= 0x27d4eb2d;
+  return i;
+}
+
+/* Outputs [0.0, 1.0]. */
+ccl_device_inline float hash_wang_seeded_float(uint i, uint seed)
+{
+  return uint_to_float_incl(hash_wang_seeded_uint(i, seed));
+}
+
+/* ********** */
+
 #ifndef __KERNEL_GPU__
 static inline uint hash_string(const char *str)
 {