Merge branch 'blender-v3.3-release'

2022-08-18 20:31:34 +02:00 · 2022-08-18 20:31:34 +02:00 · 9961aae1e6
parent d2255aa4ed 6a4f4810f3
commit 9961aae1e6
2 changed files with 14 additions and 9 deletions
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@ -204,22 +204,26 @@ void PathTraceWorkGPU::alloc_integrator_sorting()
    integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] =
        (int *)integrator_shader_sort_counter_.device_pointer;

-    if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
+    integrator_shader_sort_prefix_sum_.alloc(sort_buckets);
+    integrator_shader_sort_prefix_sum_.zero_to_device();
+  }
+
+  if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
+    if (integrator_shader_raytrace_sort_counter_.size() < sort_buckets) {
      integrator_shader_raytrace_sort_counter_.alloc(sort_buckets);
      integrator_shader_raytrace_sort_counter_.zero_to_device();
      integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
          (int *)integrator_shader_raytrace_sort_counter_.device_pointer;
    }
+  }

-    if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
+  if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
+    if (integrator_shader_mnee_sort_counter_.size() < sort_buckets) {
      integrator_shader_mnee_sort_counter_.alloc(sort_buckets);
      integrator_shader_mnee_sort_counter_.zero_to_device();
      integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
          (int *)integrator_shader_mnee_sort_counter_.device_pointer;
    }
-
-    integrator_shader_sort_prefix_sum_.alloc(sort_buckets);
-    integrator_shader_sort_prefix_sum_.zero_to_device();
  }
 }

--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@ -29,11 +29,12 @@ using namespace metal::raytracing;

 /* Qualifiers */

-#if defined(__KERNEL_METAL_APPLE__)
+/* Inline everything for Apple GPUs. This gives ~1.1x speedup and 10% spill
+ * reduction for integator_shade_surface. However it comes at the cost of
+ * longer compile times (~4.5 minutes on M1 Max) and is disabled for that
+ * reason, until there is a user option to manually enable it. */

-/* Inline everything for Apple GPUs.
- * This gives ~1.1x speedup and 10% spill reduction for integator_shade_surface
- * at the cost of longer compile times (~4.5 minutes on M1 Max). */
+#if 0  // defined(__KERNEL_METAL_APPLE__)

 #  define ccl_device __attribute__((always_inline))
 #  define ccl_device_inline __attribute__((always_inline))