Page MenuHome
Paste P1251

(An Untitled Masterwork)
ActivePublic

Authored by Brecht Van Lommel (brecht) on Mon, Feb 10, 6:45 PM.
Tags
None
Subscribers
None
Tokens
"Cup of Joe" token, awarded by juang3d.
This diff is licensed under the GNU General Public License, Version 2.0, from e-cycles January 2019.
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 9cbb2e8..86736a5 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -189,6 +189,12 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default=False,
)
+ use_auto_tiles: BoolProperty(
+ name="Auto Tiles",
+ description="Automatically manage tile size and draw order for optimal performance and memory usage",
+ default=True,
+ )
+
samples: IntProperty(
name="Samples",
description="Number of samples to render for each pixel",
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 6675476..5787ac5 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -622,6 +622,7 @@ class CYCLES_RENDER_PT_performance_tiles(CyclesButtonsPanel, Panel):
sub.prop(rd, "tile_x", text="Tiles X")
sub.prop(rd, "tile_y", text="Y")
col.prop(cscene, "tile_order", text="Order")
+ col.prop(cscene, "use_auto_tiles", text="Auto tiles")
sub = col.column()
sub.active = not rd.use_save_buffers
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index 1d13438..edd2f80 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -306,6 +306,25 @@ void BlenderSync::sync_integrator()
integrator->subsurface_samples = subsurface_samples;
integrator->volume_samples = volume_samples;
}
+ int samples = get_int(cscene, "samples");
+ if(get_boolean(cscene, "use_square_samples")){
+ samples *= samples;
+ }
+ if (samples < 64 ){
+ integrator->scrambling_distance = 1.0f;
+ }
+ else if (samples < 256){
+ integrator->scrambling_distance = 0.5f - (samples-64)*0.3f/192;
+ }
+ else if (samples < 1024){
+ integrator->scrambling_distance = 0.2f - (samples-256)*0.1f/768;
+ }
+ else if (samples < 4096){
+ integrator->scrambling_distance = 0.1f - (samples-1024)*0.08f/3072;
+ }
+ else{
+ integrator->scrambling_distance = 0.02f;
+ }
if(b_scene.render().use_simplify()) {
if(preview) {
@@ -743,6 +762,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
/* device type */
vector<DeviceInfo>& devices = Device::available_devices();
+ bool split = false;
/* device default CPU */
foreach(DeviceInfo& device, devices) {
@@ -795,6 +815,9 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
foreach(DeviceInfo& info, devices) {
if(info.id == id) {
used_devices.push_back(info);
+ if (info.use_split_kernel) {
+ split = true;
+ }
break;
}
}
@@ -818,6 +841,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
int aa_samples = get_int(cscene, "aa_samples");
int preview_samples = get_int(cscene, "preview_samples");
int preview_aa_samples = get_int(cscene, "preview_aa_samples");
+ bool bpt = (get_enum(cscene,"progressive") == 0);
if(get_boolean(cscene, "use_square_samples")) {
aa_samples = aa_samples * aa_samples;
@@ -862,15 +886,58 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
else {
int tile_x = b_engine.tile_x();
int tile_y = b_engine.tile_y();
-
+
+ /* auto tile size*/
+ if (get_boolean(cscene, "use_auto_tiles")){
+ /* rays to shoot per tile = tile_x*tile_y*spp. How many rays should at once are optimal depends on the GPU,
+ * so you can adapt this to your own test. Those numbers where found for the 1080Ti and seem to work well on the 980 and 2070
+ */
+
+ if (samples > 999 && split == false) { //split kernel needs bigger tile size do to it's nature to fully feed the GPU with work.
+ tile_x = 16;
+ tile_y = 16;
+ }
+ else if (samples > 256 && split == false){
+ tile_x = 32;
+ tile_y = 32;
+ }
+ else if ((samples > 64 && split == false) || (split == true && samples > 999)){
+ tile_x = 64;
+ tile_y = 64;
+ }
+ else if ((samples > 16 && split == false) || (split == true && samples > 249)){
+ tile_x = 128;
+ tile_y = 128;
+ }
+ else {
+ tile_x = 256;
+ tile_y = 256;
+ }
+ if (bpt && split == true) {
+ tile_x /= 2;
+ tile_y /= 2;
+ }
+ if (is_cpu) {
+ tile_x = tile_y = 16;
+ } //cpus like 8x8 or 16x16 tiles, whatever sample number is used.
+ }
+
params.tile_size = make_int2(tile_x, tile_y);
}
- if((BlenderSession::headless == false) && background) {
+ if((BlenderSession::headless == false) && background && !(get_boolean(cscene, "use_auto_tiles"))) {
params.tile_order = (TileOrder)get_enum(cscene, "tile_order");
}
else {
- params.tile_order = TILE_BOTTOM_TO_TOP;
+ int width = b_engine.resolution_x();
+ int height = b_engine.resolution_y();
+
+ if (width>height) {
+ params.tile_order = TILE_LEFT_TO_RIGHT;
+ }
+ else {
+ params.tile_order = TILE_BOTTOM_TO_TOP;
+ }
}
/* other parameters */
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 25557a1..b2755b0 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1655,11 +1655,14 @@ public:
min_blocks *= 8;
}
- uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h);
/* Render all samples. */
int start_sample = rtile.start_sample;
int end_sample = rtile.start_sample + rtile.num_samples;
+ uint step_samples = end_sample;
+ if (end_sample > 990){
+ step_samples = 640;
+ }
for(int sample = start_sample; sample < end_sample; sample += step_samples) {
/* Setup and copy work tile to device. */
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index b8a0648..4658b5b 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -200,8 +200,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
/* initial guess to start rolling average */
const int initial_num_samples = 1;
/* approx number of samples per second */
- int samples_per_second = (avg_time_per_sample > 0.0) ?
- int(double(time_multiplier) / avg_time_per_sample) + 1 : initial_num_samples;
+ int samples_per_second = 1600;
RenderTile subtile = tile;
subtile.start_sample = tile.sample;
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index ae88e26..e0c9a3b 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -87,6 +87,7 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals *kg,
uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
shift = tmp_rng * (1.0f/(float)0xFFFFFFFF);
+ shift *= kernel_data.integrator.scrambling_distance;
return r + shift - floorf(r + shift);
#endif
}
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 8e6e957..c26ea51 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -1384,6 +1384,7 @@ typedef struct KernelIntegrator {
/* sampler */
int sampling_pattern;
int aa_samples;
+ float scrambling_distance;
/* volume render */
int use_volumes;
@@ -1392,10 +1393,9 @@ typedef struct KernelIntegrator {
int volume_samples;
int start_sample;
-
int max_closures;
- int pad1, pad2, pad3;
+ int pad1, pad2;
} KernelIntegrator;
static_assert_align(KernelIntegrator, 16);
diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp
index ad4332a..5d907fa 100644
--- a/intern/cycles/render/integrator.cpp
+++ b/intern/cycles/render/integrator.cpp
@@ -77,6 +77,7 @@ NODE_DEFINE(Integrator)
sampling_pattern_enum.insert("sobol", SAMPLING_PATTERN_SOBOL);
sampling_pattern_enum.insert("cmj", SAMPLING_PATTERN_CMJ);
SOCKET_ENUM(sampling_pattern, "Sampling Pattern", sampling_pattern_enum, SAMPLING_PATTERN_SOBOL);
+ SOCKET_FLOAT(scrambling_distance, "Scrambling Distance", 1.0f);
return type;
}
@@ -166,6 +167,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
}
kintegrator->sampling_pattern = sampling_pattern;
+ kintegrator->scrambling_distance = scrambling_distance;
kintegrator->aa_samples = aa_samples;
if(light_sampling_threshold > 0.0f) {
diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h
index 6e81642..afd6369 100644
--- a/intern/cycles/render/integrator.h
+++ b/intern/cycles/render/integrator.h
@@ -79,6 +79,7 @@ public:
Method method;
SamplingPattern sampling_pattern;
+ float scrambling_distance;
bool need_update;