Cycles: Allow Intel GPUs under Metal

Known Issues:
- Command buffer failures when using binary archives (binary archives is disabled for Intel GPUs as a workaround)
- Wrong texture sampler being applied (to be addressed in the future)

Ref T92212

Reviewed By: brecht

Maniphest Tasks: T92212

Differential Revision: https://developer.blender.org/D16253
This commit is contained in:
Morteza Mostajab 2022-10-19 17:09:23 +01:00 committed by Michael Jones
parent 053fc35b01
commit e6902d19a0
Notes: blender-bot 2023-02-14 05:28:01 +01:00
Referenced by issue #100749, Blender LTS: Maintenance Task 3.3
Referenced by issue #102667, Rendering an animation freezes at random frame
Referenced by issue #92212, Cycles Metal device
4 changed files with 52 additions and 1 deletions

View File

@ -339,6 +339,12 @@ bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
if (@available(macos 13.0, *)) {
if (device_vendor == METAL_GPU_INTEL) {
[options setOptimizationLevel:MTLLibraryOptimizationLevelSize];
}
}
options.fastMathEnabled = YES;
if (@available(macOS 12.0, *)) {
options.languageVersion = MTLLanguageVersion2_4;

View File

@ -317,6 +317,12 @@ bool MetalKernelPipeline::should_use_binary_archive() const
}
}
/* Workaround for Intel GPU having issue using Binary Archives */
MetalGPUVendor gpu_vendor = MetalInfo::get_device_vendor(mtlDevice);
if (gpu_vendor == METAL_GPU_INTEL) {
return false;
}
if (pso_type == PSO_GENERIC) {
/* Archive the generic kernels. */
return true;

View File

@ -110,6 +110,10 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
usable |= (vendor == METAL_GPU_AMD);
}
if (@available(macos 13.0, *)) {
usable |= (vendor == METAL_GPU_INTEL);
}
if (usable) {
metal_printf("- %s\n", device_name.c_str());
[device retain];

View File

@ -34,21 +34,48 @@ class MetalKernelContext {
kernel_assert(0);
return 0;
}
#ifdef __KERNEL_METAL_INTEL__
template<typename TextureType, typename CoordsType>
inline __attribute__((__always_inline__))
auto ccl_gpu_tex_object_read_intel_workaround(TextureType texture_array,
const uint tid, const uint sid,
CoordsType coords) const
{
switch(sid) {
default:
case 0: return texture_array[tid].tex.sample(sampler(address::repeat, filter::nearest), coords);
case 1: return texture_array[tid].tex.sample(sampler(address::clamp_to_edge, filter::nearest), coords);
case 2: return texture_array[tid].tex.sample(sampler(address::clamp_to_zero, filter::nearest), coords);
case 3: return texture_array[tid].tex.sample(sampler(address::repeat, filter::linear), coords);
case 4: return texture_array[tid].tex.sample(sampler(address::clamp_to_edge, filter::linear), coords);
case 5: return texture_array[tid].tex.sample(sampler(address::clamp_to_zero, filter::linear), coords);
}
}
#endif
// texture2d
template<>
inline __attribute__((__always_inline__))
float4 ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object_2D tex, float x, float y) const {
const uint tid(tex);
const uint sid(tex >> 32);
#ifndef __KERNEL_METAL_INTEL__
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y));
#else
return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_2d, tid, sid, float2(x, y));
#endif
}
template<>
inline __attribute__((__always_inline__))
float ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object_2D tex, float x, float y) const {
const uint tid(tex);
const uint sid(tex >> 32);
#ifndef __KERNEL_METAL_INTEL__
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y)).x;
#else
return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_2d, tid, sid, float2(x, y)).x;
#endif
}
// texture3d
@ -57,14 +84,22 @@ class MetalKernelContext {
float4 ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, float x, float y, float z) const {
const uint tid(tex);
const uint sid(tex >> 32);
#ifndef __KERNEL_METAL_INTEL__
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z));
#else
return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_3d, tid, sid, float3(x, y, z));
#endif
}
template<>
inline __attribute__((__always_inline__))
float ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, float x, float y, float z) const {
const uint tid(tex);
const uint sid(tex >> 32);
#ifndef __KERNEL_METAL_INTEL__
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z)).x;
#else
return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_3d, tid, sid, float3(x, y, z)).x;
#endif
}
# include "kernel/device/gpu/image.h"