Cycles: use more PMJ patterns and make their size adaptive.

This resolves some issues with correlation artifacts at higher sample counts. Fix T101356, correlation issues in new PMJ pattern. Differential Revision: https://developer.blender.org/D16561
Referenced by issue #101356, New PMJ does not converge to the same result as Sobol-Burley
2022-11-21 18:16:27 +01:00 · 2022-11-21 18:16:27 +01:00 · 03b5be4e3c · 2023-02-13 14:24:32 +01:00
parent 41a3de878f
commit 03b5be4e3c
4 changed files with 45 additions and 39 deletions
--- a/intern/cycles/kernel/data_template.h
+++ b/intern/cycles/kernel/data_template.h
@ -183,6 +183,7 @@ KERNEL_STRUCT_MEMBER(integrator, int, use_lamp_mis)
 KERNEL_STRUCT_MEMBER(integrator, int, use_caustics)
 /* Sampling pattern. */
 KERNEL_STRUCT_MEMBER(integrator, int, sampling_pattern)
+KERNEL_STRUCT_MEMBER(integrator, int, pmj_sequence_size)
 KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance)
 /* Volume render. */
 KERNEL_STRUCT_MEMBER(integrator, int, use_volumes)
@ -205,6 +206,11 @@ KERNEL_STRUCT_MEMBER(integrator, int, use_surface_guiding)
 KERNEL_STRUCT_MEMBER(integrator, int, use_volume_guiding)
 KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_direct_light)
 KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_mis_weights)
+
+/* Padding. */
+KERNEL_STRUCT_MEMBER(integrator, int, pad1)
+KERNEL_STRUCT_MEMBER(integrator, int, pad2)
+KERNEL_STRUCT_MEMBER(integrator, int, pad3)
 KERNEL_STRUCT_END(KernelIntegrator)

 /* SVM. For shader specialization. */
--- a/intern/cycles/kernel/sample/jitter.h
+++ b/intern/cycles/kernel/sample/jitter.h
@ -7,6 +7,25 @@
 #pragma once
 CCL_NAMESPACE_BEGIN

+ccl_device uint pmj_shuffled_sample_index(KernelGlobals kg, uint sample, uint dimension, uint seed)
+{
+  const uint sample_count = kernel_data.integrator.pmj_sequence_size;
+
+  /* Shuffle the pattern order and sample index to better decorrelate
+   * dimensions and make the most of the finite patterns we have.
+   * The funky sample mask stuff is to ensure that we only shuffle
+   * *within* the current sample pattern, which is necessary to avoid
+   * early repeat pattern use. */
+  const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
+  /* sample_count should always be a power of two, so this results in a mask. */
+  const uint sample_mask = sample_count - 1;
+  const uint sample_shuffled = nested_uniform_scramble(sample,
+                                                       hash_wang_seeded_uint(dimension, seed));
+  sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask);
+
+  return ((pattern_i * sample_count) + sample) % (sample_count * NUM_PMJ_PATTERNS);
+}
+
 ccl_device float pmj_sample_1D(KernelGlobals kg,
                               uint sample,
                               const uint rng_hash,
@ -20,22 +39,9 @@ ccl_device float pmj_sample_1D(KernelGlobals kg,
    seed = kernel_data.integrator.seed;
  }

-  /* Shuffle the pattern order and sample index to better decorrelate
-   * dimensions and make the most of the finite patterns we have.
-   * The funky sample mask stuff is to ensure that we only shuffle
-   * *within* the current sample pattern, which is necessary to avoid
-   * early repeat pattern use. */
-  const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
-  /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */
-  const uint sample_mask = NUM_PMJ_SAMPLES - 1;
-  const uint sample_shuffled = nested_uniform_scramble(sample,
-                                                       hash_wang_seeded_uint(dimension, seed));
-  sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask);
-
  /* Fetch the sample. */
-  const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) %
-                     (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS);
-  float x = kernel_data_fetch(sample_pattern_lut, index * 2);
+  const uint index = pmj_shuffled_sample_index(kg, sample, dimension, seed);
+  float x = kernel_data_fetch(sample_pattern_lut, index * NUM_PMJ_DIMENSIONS);

  /* Do limited Cranley-Patterson rotation when using scrambling distance. */
  if (kernel_data.integrator.scrambling_distance < 1.0f) {
@ -61,23 +67,10 @@ ccl_device float2 pmj_sample_2D(KernelGlobals kg,
    seed = kernel_data.integrator.seed;
  }

-  /* Shuffle the pattern order and sample index to better decorrelate
-   * dimensions and make the most of the finite patterns we have.
-   * The funky sample mask stuff is to ensure that we only shuffle
-   * *within* the current sample pattern, which is necessary to avoid
-   * early repeat pattern use. */
-  const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
-  /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */
-  const uint sample_mask = NUM_PMJ_SAMPLES - 1;
-  const uint sample_shuffled = nested_uniform_scramble(sample,
-                                                       hash_wang_seeded_uint(dimension, seed));
-  sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask);
-
  /* Fetch the sample. */
-  const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) %
-                     (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS);
-  float x = kernel_data_fetch(sample_pattern_lut, index * 2);
-  float y = kernel_data_fetch(sample_pattern_lut, index * 2 + 1);
+  const uint index = pmj_shuffled_sample_index(kg, sample, dimension, seed);
+  float x = kernel_data_fetch(sample_pattern_lut, index * NUM_PMJ_DIMENSIONS);
+  float y = kernel_data_fetch(sample_pattern_lut, index * NUM_PMJ_DIMENSIONS + 1);

  /* Do limited Cranley-Patterson rotation when using scrambling distance. */
  if (kernel_data.integrator.scrambling_distance < 1.0f) {
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@ -1382,12 +1382,13 @@ static_assert_align(KernelShaderEvalInput, 16);

 /* Pre-computed sample table sizes for PMJ02 sampler.
 *
- * NOTE: divisions *must* be a power of two, and patterns
+ * NOTE: min and max samples *must* be a power of two, and patterns
 * ideally should be as well.
 */
-#define NUM_PMJ_DIVISIONS 32
-#define NUM_PMJ_SAMPLES ((NUM_PMJ_DIVISIONS) * (NUM_PMJ_DIVISIONS))
-#define NUM_PMJ_PATTERNS 64
+#define MIN_PMJ_SAMPLES 256
+#define MAX_PMJ_SAMPLES 8192
+#define NUM_PMJ_DIMENSIONS 2
+#define NUM_PMJ_PATTERNS 256

 /* Device kernels.
 *
--- a/intern/cycles/scene/integrator.cpp
+++ b/intern/cycles/scene/integrator.cpp
@ -257,12 +257,18 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
    kintegrator->light_inv_rr_threshold = 0.0f;
  }

+  constexpr int num_sequences = NUM_PMJ_PATTERNS;
+  int sequence_size = clamp(next_power_of_two(aa_samples - 1), MIN_PMJ_SAMPLES, MAX_PMJ_SAMPLES);
  if (kintegrator->sampling_pattern == SAMPLING_PATTERN_PMJ &&
-      dscene->sample_pattern_lut.size() == 0) {
-    constexpr int sequence_size = NUM_PMJ_SAMPLES;
-    constexpr int num_sequences = NUM_PMJ_PATTERNS;
+      dscene->sample_pattern_lut.size() !=
+          (sequence_size * NUM_PMJ_DIMENSIONS * NUM_PMJ_PATTERNS)) {
+    kintegrator->pmj_sequence_size = sequence_size;
+
+    if (dscene->sample_pattern_lut.size() != 0) {
+      dscene->sample_pattern_lut.free();
+    }
    float2 *directions = (float2 *)dscene->sample_pattern_lut.alloc(sequence_size * num_sequences *
-                                                                    2);
+                                                                    NUM_PMJ_DIMENSIONS);
    TaskPool pool;
    for (int j = 0; j < num_sequences; ++j) {
      float2 *sequence = directions + j * sequence_size;