Cycles: limit the internal sample index of Sobol-Burley for performance

This is done based on the render sample count so that it doesn't impact
sampling quality. It's similar in spirit to the adaptive table size in D16561,
but in this case for performance rather than memory usage.

Differential Revision: https://developer.blender.org/D16726
This commit is contained in:
Nathan Vegdahl 2022-12-14 16:43:12 +01:00 committed by Brecht Van Lommel
parent b0cc8e8dde
commit d9192aaa6d
4 changed files with 88 additions and 15 deletions

View File

@ -180,6 +180,7 @@ KERNEL_STRUCT_MEMBER(integrator, int, use_caustics)
/* Sampling pattern. */
KERNEL_STRUCT_MEMBER(integrator, int, sampling_pattern)
KERNEL_STRUCT_MEMBER(integrator, int, tabulated_sobol_sequence_size)
KERNEL_STRUCT_MEMBER(integrator, int, sobol_index_mask)
KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance)
/* Volume render. */
KERNEL_STRUCT_MEMBER(integrator, int, use_volumes)
@ -204,7 +205,6 @@ KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_mis_weights)
/* Padding. */
KERNEL_STRUCT_MEMBER(integrator, int, pad1)
KERNEL_STRUCT_MEMBER(integrator, int, pad2)
KERNEL_STRUCT_END(KernelIntegrator)
/* SVM. For shader specialization. */

View File

@ -3,8 +3,8 @@
#pragma once
#include "kernel/sample/tabulated_sobol.h"
#include "kernel/sample/sobol_burley.h"
#include "kernel/sample/tabulated_sobol.h"
#include "util/hash.h"
CCL_NAMESPACE_BEGIN
@ -23,7 +23,8 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals kg,
#endif
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
return sobol_burley_sample_1D(sample, dimension, rng_hash);
const uint index_mask = kernel_data.integrator.sobol_index_mask;
return sobol_burley_sample_1D(sample, dimension, rng_hash, index_mask);
}
else {
return tabulated_sobol_sample_1D(kg, sample, rng_hash, dimension);
@ -40,7 +41,8 @@ ccl_device_forceinline float2 path_rng_2D(KernelGlobals kg,
#endif
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
return sobol_burley_sample_2D(sample, dimension, rng_hash);
const uint index_mask = kernel_data.integrator.sobol_index_mask;
return sobol_burley_sample_2D(sample, dimension, rng_hash, index_mask);
}
else {
return tabulated_sobol_sample_2D(kg, sample, rng_hash, dimension);
@ -57,7 +59,8 @@ ccl_device_forceinline float3 path_rng_3D(KernelGlobals kg,
#endif
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
return sobol_burley_sample_3D(sample, dimension, rng_hash);
const uint index_mask = kernel_data.integrator.sobol_index_mask;
return sobol_burley_sample_3D(sample, dimension, rng_hash, index_mask);
}
else {
return tabulated_sobol_sample_3D(kg, sample, rng_hash, dimension);
@ -74,7 +77,8 @@ ccl_device_forceinline float4 path_rng_4D(KernelGlobals kg,
#endif
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
return sobol_burley_sample_4D(sample, dimension, rng_hash);
const uint index_mask = kernel_data.integrator.sobol_index_mask;
return sobol_burley_sample_4D(sample, dimension, rng_hash, index_mask);
}
else {
return tabulated_sobol_sample_4D(kg, sample, rng_hash, dimension);

View File

@ -65,31 +65,75 @@ ccl_device_forceinline float sobol_burley(uint rev_bit_index,
}
/*
* Computes a 1D Owen-scrambled and shuffled Sobol sample.
* NOTE: the functions below intentionally produce samples that are
* uncorrelated between functions. For example, a 1D sample and 2D
* sample produced with the same index, dimension, and seed are
* uncorrelated with each other. This allows more care-free usage
* of the functions together, without having to worry about
* e.g. 1D and 2D samples being accidentally correlated with each
* other.
*/
ccl_device float sobol_burley_sample_1D(uint index, uint const dimension, uint seed)
/*
* Computes a 1D Owen-scrambled and shuffled Sobol sample.
*
* `index` is the index of the sample in the sequence.
*
* `dimension` is which dimensions of the sample you want to fetch. Note
* that different 1D dimensions are uncorrelated. For samples with > 1D
* stratification, use the multi-dimensional sampling methods below.
*
* `seed`: different seeds produce statistically independent,
* uncorrelated sequences.
*
* `shuffled_index_mask` limits the sample sequence length, improving
* performance. It must be a string of binary 1 bits followed by a
* string of binary 0 bits (e.g. 0xffff0000) for the sampler to operate
* correctly. In general, `reverse_integer_bits(shuffled_index_mask)`
* should be >= the maximum number of samples expected to be taken. A safe
* default (but least performant) is 0xffffffff, for maximum sequence
* length.
*/
ccl_device float sobol_burley_sample_1D(uint index,
uint const dimension,
uint seed,
uint shuffled_index_mask)
{
/* Include the dimension in the seed, so we get decorrelated
* sequences for different dimensions via shuffling. */
seed ^= hash_hp_uint(dimension);
/* Shuffle. */
/* Shuffle and mask. The masking is just for better
* performance at low sample counts. */
index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xbff95bfe);
index &= shuffled_index_mask;
return sobol_burley(index, 0, seed ^ 0x635c77bd);
}
/*
* Computes a 2D Owen-scrambled and shuffled Sobol sample.
*
* `dimension_set` is which two dimensions of the sample you want to
* fetch. For example, 0 is the first two, 1 is the second two, etc.
* The dimensions within a single set are stratified, but different sets
* are uncorrelated.
*
* See sobol_burley_sample_1D for further usage details.
*/
ccl_device float2 sobol_burley_sample_2D(uint index, const uint dimension_set, uint seed)
ccl_device float2 sobol_burley_sample_2D(uint index,
const uint dimension_set,
uint seed,
uint shuffled_index_mask)
{
/* Include the dimension set in the seed, so we get decorrelated
* sequences for different dimension sets via shuffling. */
seed ^= hash_hp_uint(dimension_set);
/* Shuffle. */
/* Shuffle and mask. The masking is just for better
* performance at low sample counts. */
index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xf8ade99a);
index &= shuffled_index_mask;
return make_float2(sobol_burley(index, 0, seed ^ 0xe0aaaf76),
sobol_burley(index, 1, seed ^ 0x94964d4e));
@ -97,15 +141,27 @@ ccl_device float2 sobol_burley_sample_2D(uint index, const uint dimension_set, u
/*
* Computes a 3D Owen-scrambled and shuffled Sobol sample.
*
* `dimension_set` is which three dimensions of the sample you want to
* fetch. For example, 0 is the first three, 1 is the second three, etc.
* The dimensions within a single set are stratified, but different sets
* are uncorrelated.
*
* See sobol_burley_sample_1D for further usage details.
*/
ccl_device float3 sobol_burley_sample_3D(uint index, const uint dimension_set, uint seed)
ccl_device float3 sobol_burley_sample_3D(uint index,
const uint dimension_set,
uint seed,
uint shuffled_index_mask)
{
/* Include the dimension set in the seed, so we get decorrelated
* sequences for different dimension sets via shuffling. */
seed ^= hash_hp_uint(dimension_set);
/* Shuffle. */
/* Shuffle and mask. The masking is just for better
* performance at low sample counts. */
index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xcaa726ac);
index &= shuffled_index_mask;
return make_float3(sobol_burley(index, 0, seed ^ 0x9e78e391),
sobol_burley(index, 1, seed ^ 0x67c33241),
@ -114,15 +170,27 @@ ccl_device float3 sobol_burley_sample_3D(uint index, const uint dimension_set, u
/*
* Computes a 4D Owen-scrambled and shuffled Sobol sample.
*
* `dimension_set` is which four dimensions of the sample you want to
* fetch. For example, 0 is the first four, 1 is the second four, etc.
* The dimensions within a single set are stratified, but different sets
* are uncorrelated.
*
* See sobol_burley_sample_1D for further usage details.
*/
ccl_device float4 sobol_burley_sample_4D(uint index, const uint dimension_set, uint seed)
ccl_device float4 sobol_burley_sample_4D(uint index,
const uint dimension_set,
uint seed,
uint shuffled_index_mask)
{
/* Include the dimension set in the seed, so we get decorrelated
* sequences for different dimension sets via shuffling. */
seed ^= hash_hp_uint(dimension_set);
/* Shuffle. */
/* Shuffle and mask. The masking is just for better
* performance at low sample counts. */
index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xc2c1a055);
index &= shuffled_index_mask;
return make_float4(sobol_burley(index, 0, seed ^ 0x39468210),
sobol_burley(index, 1, seed ^ 0xe9d8a845),

View File

@ -253,6 +253,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
kintegrator->sampling_pattern = sampling_pattern;
kintegrator->scrambling_distance = scrambling_distance;
kintegrator->sobol_index_mask = reverse_integer_bits(next_power_of_two(aa_samples - 1) - 1);
kintegrator->use_light_tree = scene->integrator->use_light_tree;
if (light_sampling_threshold > 0.0f) {