Merge branch 'blender-v3.3-release'

This commit is contained in:
Brecht Van Lommel 2022-08-18 20:31:34 +02:00
commit 9961aae1e6
2 changed files with 14 additions and 9 deletions

View File

@ -204,22 +204,26 @@ void PathTraceWorkGPU::alloc_integrator_sorting()
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] =
(int *)integrator_shader_sort_counter_.device_pointer;
if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
integrator_shader_sort_prefix_sum_.alloc(sort_buckets);
integrator_shader_sort_prefix_sum_.zero_to_device();
}
if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
if (integrator_shader_raytrace_sort_counter_.size() < sort_buckets) {
integrator_shader_raytrace_sort_counter_.alloc(sort_buckets);
integrator_shader_raytrace_sort_counter_.zero_to_device();
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
(int *)integrator_shader_raytrace_sort_counter_.device_pointer;
}
}
if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
if (integrator_shader_mnee_sort_counter_.size() < sort_buckets) {
integrator_shader_mnee_sort_counter_.alloc(sort_buckets);
integrator_shader_mnee_sort_counter_.zero_to_device();
integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
(int *)integrator_shader_mnee_sort_counter_.device_pointer;
}
integrator_shader_sort_prefix_sum_.alloc(sort_buckets);
integrator_shader_sort_prefix_sum_.zero_to_device();
}
}

View File

@ -29,11 +29,12 @@ using namespace metal::raytracing;
/* Qualifiers */
#if defined(__KERNEL_METAL_APPLE__)
/* Inline everything for Apple GPUs. This gives ~1.1x speedup and 10% spill
* reduction for integator_shade_surface. However it comes at the cost of
* longer compile times (~4.5 minutes on M1 Max) and is disabled for that
* reason, until there is a user option to manually enable it. */
/* Inline everything for Apple GPUs.
* This gives ~1.1x speedup and 10% spill reduction for integator_shade_surface
* at the cost of longer compile times (~4.5 minutes on M1 Max). */
#if 0 // defined(__KERNEL_METAL_APPLE__)
# define ccl_device __attribute__((always_inline))
# define ccl_device_inline __attribute__((always_inline))