Distance Scrambling for for Cycles X - Sobol version

Cycles:Distance Scrambling for Cycles Sobol Sampler

This option implements micro jittering an is based on the INRIA
research paper [[ https://hal.inria.fr/hal-01325702/document | on micro jittering ]]
and work by Lukas Stockner for implementing the scrambling distance.
It works by controlling the correlation between pixels by either using
a user supplied value or an adaptive algorithm to limit the maximum
deviation of the sample values between pixels.

This is a follow up of https://developer.blender.org/D12316

The PMJ version can be found here: https://developer.blender.org/D12511

Reviewed By: leesonw

Differential Revision: https://developer.blender.org/D12318
This commit is contained in:
William Leeson 2021-10-26 08:30:15 +02:00 committed by William Leeson
parent 4094868f73
commit 366262bef5
17 changed files with 128 additions and 29 deletions

View File

@ -342,6 +342,24 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default='PROGRESSIVE_MUTI_JITTER',
)
scrambling_distance: FloatProperty(
name="Scrambling Distance",
default=1.0,
min=0.0, max=1.0,
description="Lower values give faster rendering with GPU rendering and less noise with all devices at the cost of possible artifacts if set too low",
)
preview_scrambling_distance: BoolProperty(
name="Scrambling Distance viewport",
default=False,
description="Uses the Scrambling Distance value for the viewport. Faster but may flicker",
)
adaptive_scrambling_distance: BoolProperty(
name="Adaptive Scrambling Distance",
default=False,
description="Uses a formula to adapt the scrambling distance strength based on the sample count",
)
use_layer_samples: EnumProperty(
name="Layer Samples",
description="How to use per view layer sample settings",

View File

@ -289,6 +289,13 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
col = layout.column(align=True)
col.active = not(cscene.use_adaptive_sampling)
col.prop(cscene, "sampling_pattern", text="Pattern")
col = layout.column(align=True)
col.active = cscene.sampling_pattern == 'SOBOL' and not cscene.use_adaptive_sampling
col.prop(cscene, "scrambling_distance", text="Scrambling Distance Strength")
col.prop(cscene, "adaptive_scrambling_distance", text="Adaptive Scrambling Distance")
col = layout.column(align=True)
col.active = ((cscene.scrambling_distance < 1.0) or cscene.adaptive_scrambling_distance) and cscene.sampling_pattern == 'SOBOL' and not cscene.use_adaptive_sampling
col.prop(cscene, "preview_scrambling_distance", text="Viewport Scrambling Distance")
layout.separator()

View File

@ -352,6 +352,21 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
integrator->set_adaptive_min_samples(get_int(cscene, "adaptive_min_samples"));
}
int samples = get_int(cscene, "samples");
float scrambling_distance = get_float(cscene, "scrambling_distance");
bool adaptive_scrambling_distance = get_boolean(cscene, "adaptive_scrambling_distance");
if (adaptive_scrambling_distance) {
scrambling_distance *= 4.0f / sqrtf(samples);
}
/* only use scrambling distance in the viewport if user wants to and disable with AS */
bool preview_scrambling_distance = get_boolean(cscene, "preview_scrambling_distance");
if ((preview && !preview_scrambling_distance) || sampling_pattern != SAMPLING_PATTERN_SOBOL)
scrambling_distance = 1.0f;
VLOG(1) << "Used Scrambling Distance: " << scrambling_distance;
integrator->set_scrambling_distance(scrambling_distance);
if (get_boolean(cscene, "use_fast_gi")) {
if (preview) {
integrator->set_ao_bounces(get_int(cscene, "ao_bounces"));

View File

@ -258,7 +258,10 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
* schedules work in halves of available number of paths. */
work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
work_tile_scheduler_.reset(effective_buffer_params_, start_sample, samples_num);
work_tile_scheduler_.reset(effective_buffer_params_,
start_sample,
samples_num,
device_scene_->data.integrator.scrambling_distance);
enqueue_reset();

View File

@ -48,7 +48,8 @@ ccl_device_inline uint round_up_to_power_of_two(uint x)
TileSize tile_calculate_best_size(const int2 &image_size,
const int num_samples,
const int max_num_path_states)
const int max_num_path_states,
const float scrambling_distance)
{
if (max_num_path_states == 1) {
/* Simple case: avoid any calculation, which could cause rounding issues. */
@ -71,17 +72,54 @@ TileSize tile_calculate_best_size(const int2 &image_size,
* - Keep values a power of two, for more integer fit into the maximum number of paths. */
TileSize tile_size;
/* Calculate tile size as if it is the most possible one to fit an entire range of samples.
* The idea here is to keep tiles as small as possible, and keep device occupied by scheduling
* multiple tiles with the same coordinates rendering different samples. */
const int num_path_states_per_sample = max_num_path_states / num_samples;
if (num_path_states_per_sample != 0) {
tile_size.width = round_down_to_power_of_two(lround(sqrt(num_path_states_per_sample)));
tile_size.height = tile_size.width;
if (scrambling_distance < 0.9f) {
/* Prefer large tiles for scrambling distance. */
if (image_size.x * image_size.y <= num_path_states_per_sample) {
tile_size.width = image_size.x;
tile_size.height = image_size.y;
}
else {
/* Pick the option with the biggest tile size */
int heightOption = num_path_states_per_sample / image_size.x;
int widthOption = num_path_states_per_sample / image_size.y;
// Check if these options are possible
if ((heightOption > 0) || (widthOption > 0)) {
int area1 = image_size.x * heightOption;
int area2 = widthOption * image_size.y;
/* The option with the biggest pixel area */
if (area1 >= area2) {
tile_size.width = image_size.x;
tile_size.height = heightOption;
}
else {
tile_size.width = widthOption;
tile_size.height = image_size.y;
}
}
else { // Large tiles are not an option so use square tiles
if (num_path_states_per_sample != 0) {
tile_size.width = round_down_to_power_of_two(lround(sqrt(num_path_states_per_sample)));
tile_size.height = tile_size.width;
}
else {
tile_size.width = tile_size.height = 1;
}
}
}
}
else {
tile_size.width = tile_size.height = 1;
/* Calculate tile size as if it is the most possible one to fit an entire range of samples.
* The idea here is to keep tiles as small as possible, and keep device occupied by scheduling
* multiple tiles with the same coordinates rendering different samples. */
if (num_path_states_per_sample != 0) {
tile_size.width = round_down_to_power_of_two(lround(sqrt(num_path_states_per_sample)));
tile_size.height = tile_size.width;
}
else {
tile_size.width = tile_size.height = 1;
}
}
if (num_samples == 1) {
@ -93,7 +131,7 @@ TileSize tile_calculate_best_size(const int2 &image_size,
tile_size.num_samples = min(round_up_to_power_of_two(lround(sqrt(num_samples / 2))),
static_cast<uint>(num_samples));
const int tile_area = tile_size.width / tile_size.height;
const int tile_area = tile_size.width * tile_size.height;
tile_size.num_samples = min(tile_size.num_samples, max_num_path_states / tile_area);
}

View File

@ -51,6 +51,7 @@ std::ostream &operator<<(std::ostream &os, const TileSize &tile_size);
* possible, and have as many threads active for every tile as possible. */
TileSize tile_calculate_best_size(const int2 &image_size,
const int num_samples,
const int max_num_path_states);
const int max_num_path_states,
const float scrambling_distance);
CCL_NAMESPACE_END

View File

@ -33,13 +33,17 @@ void WorkTileScheduler::set_max_num_path_states(int max_num_path_states)
max_num_path_states_ = max_num_path_states;
}
void WorkTileScheduler::reset(const BufferParams &buffer_params, int sample_start, int samples_num)
void WorkTileScheduler::reset(const BufferParams &buffer_params,
int sample_start,
int samples_num,
float scrambling_distance)
{
/* Image buffer parameters. */
image_full_offset_px_.x = buffer_params.full_x;
image_full_offset_px_.y = buffer_params.full_y;
image_size_px_ = make_int2(buffer_params.width, buffer_params.height);
scrambling_distance_ = scrambling_distance;
offset_ = buffer_params.offset;
stride_ = buffer_params.stride;
@ -54,7 +58,8 @@ void WorkTileScheduler::reset(const BufferParams &buffer_params, int sample_star
void WorkTileScheduler::reset_scheduler_state()
{
tile_size_ = tile_calculate_best_size(image_size_px_, samples_num_, max_num_path_states_);
tile_size_ = tile_calculate_best_size(
image_size_px_, samples_num_, max_num_path_states_, scrambling_distance_);
VLOG(3) << "Will schedule tiles of size " << tile_size_;

View File

@ -38,7 +38,10 @@ class WorkTileScheduler {
void set_max_num_path_states(int max_num_path_states);
/* Scheduling will happen for pixels within a big tile denotes by its parameters. */
void reset(const BufferParams &buffer_params, int sample_start, int samples_num);
void reset(const BufferParams &buffer_params,
int sample_start,
int samples_num,
float scrambling_distance);
/* Get work for a device.
* Returns true if there is still work to be done and initialize the work tile to all
@ -68,6 +71,9 @@ class WorkTileScheduler {
* Will be passed over to the KernelWorkTile. */
int offset_, stride_;
/* Scrambling Distance requires adapted tile size */
float scrambling_distance_;
/* Start sample of index and number of samples which are to be rendered.
* The scheduler will cover samples range of [start, start + num] over the entire image
* (splitting into a smaller work tiles). */

View File

@ -79,7 +79,7 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals kg,
* See T38710, T50116.
*/
uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
shift = tmp_rng * (1.0f / (float)0xFFFFFFFF);
shift = tmp_rng * (kernel_data.integrator.scrambling_distance / (float)0xFFFFFFFF);
return r + shift - floorf(r + shift);
#endif

View File

@ -1184,9 +1184,9 @@ typedef struct KernelIntegrator {
float volume_step_rate;
int has_shadow_catcher;
float scrambling_distance;
/* padding */
int pad1;
} KernelIntegrator;
static_assert_align(KernelIntegrator, 16);

View File

@ -81,6 +81,7 @@ NODE_DEFINE(Integrator)
sampling_pattern_enum.insert("sobol", SAMPLING_PATTERN_SOBOL);
sampling_pattern_enum.insert("pmj", SAMPLING_PATTERN_PMJ);
SOCKET_ENUM(sampling_pattern, "Sampling Pattern", sampling_pattern_enum, SAMPLING_PATTERN_SOBOL);
SOCKET_FLOAT(scrambling_distance, "Scrambling Distance", 1.0f);
static NodeEnum denoiser_type_enum;
denoiser_type_enum.insert("optix", DENOISER_OPTIX);
@ -192,6 +193,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
sample_clamp_indirect * 3.0f;
kintegrator->sampling_pattern = new_sampling_pattern;
kintegrator->scrambling_distance = scrambling_distance;
if (light_sampling_threshold > 0.0f) {
kintegrator->light_inv_rr_threshold = 1.0f / light_sampling_threshold;

View File

@ -76,6 +76,7 @@ class Integrator : public Node {
NODE_SOCKET_API(float, adaptive_threshold)
NODE_SOCKET_API(SamplingPattern, sampling_pattern)
NODE_SOCKET_API(float, scrambling_distance)
NODE_SOCKET_API(bool, use_denoise);
NODE_SOCKET_API(DenoiserType, denoiser_type);

View File

@ -24,23 +24,26 @@ CCL_NAMESPACE_BEGIN
TEST(tile_calculate_best_size, Basic)
{
/* Make sure CPU-like case is handled properly. */
EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 1, 1), TileSize(1, 1, 1));
EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 100, 1), TileSize(1, 1, 1));
EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 1, 1, 1.0f), TileSize(1, 1, 1));
EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 100, 1, 1.0f), TileSize(1, 1, 1));
/* Enough path states to fit an entire image with all samples. */
EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 1, 1920 * 1080),
EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 1, 1920 * 1080, 1.0f),
TileSize(1920, 1080, 1));
EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 100, 1920 * 1080 * 100),
EXPECT_EQ(tile_calculate_best_size(make_int2(1920, 1080), 100, 1920 * 1080 * 100, 1.0f),
TileSize(1920, 1080, 100));
}
TEST(tile_calculate_best_size, Extreme)
{
EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 262144, 131072), TileSize(1, 1, 512));
EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 1048576, 131072), TileSize(1, 1, 1024));
EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 10485760, 131072), TileSize(1, 1, 4096));
EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 262144, 131072, 1.0f),
TileSize(1, 1, 512));
EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 1048576, 131072, 1.0f),
TileSize(1, 1, 1024));
EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 10485760, 131072, 1.0f),
TileSize(1, 1, 4096));
EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 8192 * 8192 * 2, 1024),
EXPECT_EQ(tile_calculate_best_size(make_int2(32, 32), 8192 * 8192 * 2, 1024, 1.0f),
TileSize(1, 1, 1024));
}

@ -1 +1 @@
Subproject commit 80d9e7ee122c626cbbcd1da554683bce79f8d3df
Subproject commit 8ee2942570f08d10484bb2328d0d1b0aaaa0367c

@ -1 +1 @@
Subproject commit 27fe7f3a4f964b53af436c4da4ddea337eff0c7e
Subproject commit f2a08d80ccd3c13af304525778df3905f95bd44d

@ -1 +1 @@
Subproject commit 42da56aa73726710107031787af5eea186797984
Subproject commit 16467648282500cc229c271f62201ef897f2c2c3

@ -1 +1 @@
Subproject commit 7c5acb95df918503d11cfc43172ce13901019289
Subproject commit 2e8c879248822c8e500ed49d79acc605e5aa75b9