Cycles: Occupancy tuning for new higher end M2 machines

This patch adds occupancy tuning for the newly announced high-end M2 machines, giving 10-15% render speedup over a pre-tuned build.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D17037
This commit is contained in:
Michael Jones 2023-01-19 17:55:53 +00:00 committed by Michael Jones
parent e7af2503c5
commit 08b3426df9
4 changed files with 16 additions and 2 deletions

View File

@ -49,6 +49,18 @@ struct ShaderCache {
if (MetalInfo::get_device_vendor(mtlDevice) == METAL_GPU_APPLE) {
switch (MetalInfo::get_apple_gpu_architecture(mtlDevice)) {
default:
case APPLE_M2_BIG:
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES] = {384, 128};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA] = {640, 128};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST] = {1024, 64};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW] = {704, 704};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE] = {640, 32};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY] = {896, 768};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND] = {512, 128};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW] = {32, 32};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] = {768, 576};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY] = {896, 768};
break;
case APPLE_M2:
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES] = {32, 32};
occupancy_tuning[DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA] = {832, 32};

View File

@ -278,7 +278,8 @@ int MetalDeviceQueue::num_concurrent_states(const size_t state_size) const
if (metal_device_->device_vendor == METAL_GPU_APPLE) {
result *= 4;
if (MetalInfo::get_apple_gpu_architecture(metal_device_->mtlDevice) == APPLE_M2) {
/* Increasing the state count doesn't notably benefit M1-family systems. */
if (MetalInfo::get_apple_gpu_architecture(metal_device_->mtlDevice) != APPLE_M1) {
size_t system_ram = system_physical_ram();
size_t allocated_so_far = [metal_device_->mtlDevice currentAllocatedSize];
size_t max_recommended_working_set = [metal_device_->mtlDevice recommendedMaxWorkingSetSize];

View File

@ -29,6 +29,7 @@ enum AppleGPUArchitecture {
APPLE_UNKNOWN,
APPLE_M1,
APPLE_M2,
APPLE_M2_BIG,
};
/* Contains static Metal helper functions. */

View File

@ -52,7 +52,7 @@ AppleGPUArchitecture MetalInfo::get_apple_gpu_architecture(id<MTLDevice> device)
return APPLE_M1;
}
else if (strstr(device_name, "M2")) {
return APPLE_M2;
return get_apple_gpu_core_count(device) <= 10 ? APPLE_M2 : APPLE_M2_BIG;
}
return APPLE_UNKNOWN;
}