Cycles: Allow Intel GPUs under Metal
Known Issues: - Command buffer failures when using binary archives (binary archives is disabled for Intel GPUs as a workaround) - Wrong texture sampler being applied (to be addressed in the future) Ref T92212 Differential Revision: https://developer.blender.org/D16253
This commit is contained in:
parent
3e247f0f76
commit
f9f834068e
Notes:
blender-bot
2023-02-14 10:29:30 +01:00
Referenced by issue #100749, Blender LTS: Maintenance Task 3.3 Referenced by issue #92212, Cycles Metal device
|
@ -338,6 +338,12 @@ bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
|
|||
|
||||
MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
|
||||
|
||||
if (@available(macos 13.0, *)) {
|
||||
if (device_vendor == METAL_GPU_INTEL) {
|
||||
[options setOptimizationLevel:MTLLibraryOptimizationLevelSize];
|
||||
}
|
||||
}
|
||||
|
||||
options.fastMathEnabled = YES;
|
||||
if (@available(macOS 12.0, *)) {
|
||||
options.languageVersion = MTLLanguageVersion2_4;
|
||||
|
|
|
@ -325,6 +325,12 @@ bool MetalKernelPipeline::should_use_binary_archive() const
|
|||
}
|
||||
}
|
||||
|
||||
/* Workaround for Intel GPU having issue using Binary Archives */
|
||||
MetalGPUVendor gpu_vendor = MetalInfo::get_device_vendor(mtlDevice);
|
||||
if (gpu_vendor == METAL_GPU_INTEL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (pso_type == PSO_GENERIC) {
|
||||
/* Archive the generic kernels. */
|
||||
return true;
|
||||
|
|
|
@ -110,6 +110,10 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
|
|||
usable |= (vendor == METAL_GPU_AMD);
|
||||
}
|
||||
|
||||
if (@available(macos 13.0, *)) {
|
||||
usable |= (vendor == METAL_GPU_INTEL);
|
||||
}
|
||||
|
||||
if (usable) {
|
||||
metal_printf("- %s\n", device_name.c_str());
|
||||
[device retain];
|
||||
|
|
|
@ -34,21 +34,48 @@ class MetalKernelContext {
|
|||
kernel_assert(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#ifdef __KERNEL_METAL_INTEL__
|
||||
template<typename TextureType, typename CoordsType>
|
||||
inline __attribute__((__always_inline__))
|
||||
auto ccl_gpu_tex_object_read_intel_workaround(TextureType texture_array,
|
||||
const uint tid, const uint sid,
|
||||
CoordsType coords) const
|
||||
{
|
||||
switch(sid) {
|
||||
default:
|
||||
case 0: return texture_array[tid].tex.sample(sampler(address::repeat, filter::nearest), coords);
|
||||
case 1: return texture_array[tid].tex.sample(sampler(address::clamp_to_edge, filter::nearest), coords);
|
||||
case 2: return texture_array[tid].tex.sample(sampler(address::clamp_to_zero, filter::nearest), coords);
|
||||
case 3: return texture_array[tid].tex.sample(sampler(address::repeat, filter::linear), coords);
|
||||
case 4: return texture_array[tid].tex.sample(sampler(address::clamp_to_edge, filter::linear), coords);
|
||||
case 5: return texture_array[tid].tex.sample(sampler(address::clamp_to_zero, filter::linear), coords);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// texture2d
|
||||
template<>
|
||||
inline __attribute__((__always_inline__))
|
||||
float4 ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object_2D tex, float x, float y) const {
|
||||
const uint tid(tex);
|
||||
const uint sid(tex >> 32);
|
||||
#ifndef __KERNEL_METAL_INTEL__
|
||||
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y));
|
||||
#else
|
||||
return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_2d, tid, sid, float2(x, y));
|
||||
#endif
|
||||
}
|
||||
template<>
|
||||
inline __attribute__((__always_inline__))
|
||||
float ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object_2D tex, float x, float y) const {
|
||||
const uint tid(tex);
|
||||
const uint sid(tex >> 32);
|
||||
#ifndef __KERNEL_METAL_INTEL__
|
||||
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y)).x;
|
||||
#else
|
||||
return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_2d, tid, sid, float2(x, y)).x;
|
||||
#endif
|
||||
}
|
||||
|
||||
// texture3d
|
||||
|
@ -57,14 +84,22 @@ class MetalKernelContext {
|
|||
float4 ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, float x, float y, float z) const {
|
||||
const uint tid(tex);
|
||||
const uint sid(tex >> 32);
|
||||
#ifndef __KERNEL_METAL_INTEL__
|
||||
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z));
|
||||
#else
|
||||
return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_3d, tid, sid, float3(x, y, z));
|
||||
#endif
|
||||
}
|
||||
template<>
|
||||
inline __attribute__((__always_inline__))
|
||||
float ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, float x, float y, float z) const {
|
||||
const uint tid(tex);
|
||||
const uint sid(tex >> 32);
|
||||
#ifndef __KERNEL_METAL_INTEL__
|
||||
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z)).x;
|
||||
#else
|
||||
return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_3d, tid, sid, float3(x, y, z)).x;
|
||||
#endif
|
||||
}
|
||||
# include "kernel/device/gpu/image.h"
|
||||
|
||||
|
|
Loading…
Reference in New Issue