Cleanup: make vector types make/print functions consistent between CPU and GPU

Now all the same ones are available on CPU and GPU, which was previously not
possible due to lack of operator overloadng in OpenCL. Print functions are
no-ops on some GPUs.

Ref D15535
This commit is contained in:
Brecht Van Lommel 2022-07-29 14:40:50 +02:00
parent 9990792e87
commit 1988665c3c
12 changed files with 186 additions and 163 deletions

View File

@ -189,35 +189,46 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \
} volume_write_lambda_pass{kg, this, state};
/* make_type definitions with Metal style element initializers */
#ifdef make_float2
# undef make_float2
#endif
#ifdef make_float3
# undef make_float3
#endif
#ifdef make_float4
# undef make_float4
#endif
#ifdef make_int2
# undef make_int2
#endif
#ifdef make_int3
# undef make_int3
#endif
#ifdef make_int4
# undef make_int4
#endif
#ifdef make_uchar4
# undef make_uchar4
#endif
ccl_device_forceinline float2 make_float2(const float x, const float y)
{
return float2(x, y);
}
#define make_float2(x, y) float2(x, y)
#define make_float3(x, y, z) float3(x, y, z)
#define make_float4(x, y, z, w) float4(x, y, z, w)
#define make_int2(x, y) int2(x, y)
#define make_int3(x, y, z) int3(x, y, z)
#define make_int4(x, y, z, w) int4(x, y, z, w)
#define make_uchar4(x, y, z, w) uchar4(x, y, z, w)
ccl_device_forceinline float3 make_float3(const float x, const float y, const float z)
{
return float3(x, y, z);
}
ccl_device_forceinline float4 make_float4(const float x,
const float y,
const float z,
const float w)
{
return float4(x, y, z, w);
}
ccl_device_forceinline int2 make_int2(const int x, const int y)
{
return int2(x, y);
}
ccl_device_forceinline int3 make_int3(const int x, const int y, const int z)
{
return int3(x, y, z);
}
ccl_device_forceinline int4 make_int4(const int x, const int y, const int z, const int w)
{
return int4(x, y, z, w);
}
ccl_device_forceinline uchar4 make_uchar4(const uchar x,
const uchar y,
const uchar z,
const uchar w)
{
return uchar4(x, y, z, w);
}
/* Math functions */

View File

@ -71,6 +71,18 @@ ccl_device_inline bool is_power_of_two(size_t x)
CCL_NAMESPACE_END
/* Device side printf only tested on CUDA, may work on more GPU devices. */
#if !defined(__KERNEL_GPU__) || defined(__KERNEL_CUDA__)
# define __KERNEL_PRINTF__
#endif
ccl_device_inline void print_float(ccl_private const char *label, const float a)
{
#ifdef __KERNEL_PRINTF__
printf("%s: %.8f\n", label, (double)a);
#endif
}
/* Most GPU APIs matching native vector types, so we only need to implement them for
* CPU and oneAPI. */
#if defined(__KERNEL_GPU__) && !defined(__KERNEL_ONEAPI__)

View File

@ -20,7 +20,8 @@ struct float2 {
};
ccl_device_inline float2 make_float2(float x, float y);
ccl_device_inline void print_float2(const char *label, const float2 &a);
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
ccl_device_inline void print_float2(ccl_private const char *label, const float2 a);
CCL_NAMESPACE_END

View File

@ -31,11 +31,13 @@ ccl_device_inline float2 make_float2(float x, float y)
float2 a = {x, y};
return a;
}
ccl_device_inline void print_float2(const char *label, const float2 &a)
{
printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y);
}
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
ccl_device_inline void print_float2(ccl_private const char *label, const float2 a)
{
#ifdef __KERNEL_PRINTF__
printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y);
#endif
}
CCL_NAMESPACE_END

View File

@ -47,11 +47,12 @@ struct ccl_try_align(16) float3
# endif
};
ccl_device_inline float3 make_float3(float f);
ccl_device_inline float3 make_float3(float x, float y, float z);
ccl_device_inline void print_float3(const char *label, const float3 &a);
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
ccl_device_inline float3 make_float3(float f);
ccl_device_inline void print_float3(ccl_private const char *label, const float3 a);
/* Smaller float3 for storage. For math operations this must be converted to float3, so that on the
* CPU SIMD instructions can be used. */
#if defined(__KERNEL_METAL__)

View File

@ -56,38 +56,35 @@ __forceinline float &float3::operator[](int i)
}
# endif
ccl_device_inline float3 make_float3(float f)
{
# ifdef __KERNEL_GPU__
float3 a = {f, f, f};
# else
# ifdef __KERNEL_SSE__
float3 a(_mm_set1_ps(f));
# else
float3 a = {f, f, f, f};
# endif
# endif
return a;
}
ccl_device_inline float3 make_float3(float x, float y, float z)
{
# ifdef __KERNEL_GPU__
float3 a = {x, y, z};
# if defined(__KERNEL_GPU__)
return {x, y, z};
# elif defined(__KERNEL_SSE__)
return float3(_mm_set_ps(0.0f, z, y, x));
# else
# ifdef __KERNEL_SSE__
float3 a(_mm_set_ps(0.0f, z, y, x));
# else
float3 a = {x, y, z, 0.0f};
# endif
return {x, y, z, 0.0f};
# endif
return a;
}
ccl_device_inline void print_float3(const char *label, const float3 &a)
{
printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z);
}
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
ccl_device_inline float3 make_float3(float f)
{
#if defined(__KERNEL_GPU__)
return make_float3(f, f, f);
#elif defined(__KERNEL_SSE__)
return float3(_mm_set1_ps(f));
#else
return {f, f, f, f};
#endif
}
ccl_device_inline void print_float3(ccl_private const char *label, const float3 a)
{
#ifdef __KERNEL_PRINTF__
printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z);
#endif
}
CCL_NAMESPACE_END

View File

@ -40,10 +40,11 @@ struct ccl_try_align(16) float4
# endif
};
ccl_device_inline float4 make_float4(float f);
ccl_device_inline float4 make_float4(float x, float y, float z, float w);
ccl_device_inline float4 make_float4(const int4 &i);
ccl_device_inline void print_float4(const char *label, const float4 &a);
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
ccl_device_inline float4 make_float4(float f);
ccl_device_inline float4 make_float4(const int4 i);
ccl_device_inline void print_float4(ccl_private const char *label, const float4 a);
CCL_NAMESPACE_END

View File

@ -52,40 +52,40 @@ __forceinline float &float4::operator[](int i)
}
# endif
ccl_device_inline float4 make_float4(float f)
{
# ifdef __KERNEL_SSE__
float4 a(_mm_set1_ps(f));
# else
float4 a = {f, f, f, f};
# endif
return a;
}
ccl_device_inline float4 make_float4(float x, float y, float z, float w)
{
# ifdef __KERNEL_SSE__
float4 a(_mm_set_ps(w, z, y, x));
return float4(_mm_set_ps(w, z, y, x));
# else
float4 a = {x, y, z, w};
return {x, y, z, w};
# endif
return a;
}
ccl_device_inline float4 make_float4(const int4 &i)
{
# ifdef __KERNEL_SSE__
float4 a(_mm_cvtepi32_ps(i.m128));
# else
float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w};
# endif
return a;
}
ccl_device_inline void print_float4(const char *label, const float4 &a)
{
printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w);
}
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
ccl_device_inline float4 make_float4(float f)
{
#ifdef __KERNEL_SSE__
return float4(_mm_set1_ps(f));
#else
return make_float4(f, f, f, f);
#endif
}
ccl_device_inline float4 make_float4(const int4 i)
{
#ifdef __KERNEL_SSE__
return float4(_mm_cvtepi32_ps(i.m128));
#else
return make_float4((float)i.x, (float)i.y, (float)i.z, (float)i.w);
#endif
}
ccl_device_inline void print_float4(ccl_private const char *label, const float4 a)
{
#ifdef __KERNEL_PRINTF__
printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w);
#endif
}
CCL_NAMESPACE_END

View File

@ -44,9 +44,10 @@ struct ccl_try_align(16) int3
# endif
};
ccl_device_inline int3 make_int3(int i);
ccl_device_inline int3 make_int3(int x, int y, int z);
ccl_device_inline void print_int3(const char *label, const int3 &a);
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
ccl_device_inline int3 make_int3(int i);
ccl_device_inline void print_int3(ccl_private const char *label, const int3 a);
CCL_NAMESPACE_END

View File

@ -56,38 +56,35 @@ __forceinline int &int3::operator[](int i)
}
# endif
ccl_device_inline int3 make_int3(int i)
{
# ifdef __KERNEL_GPU__
int3 a = {i, i, i};
# else
# ifdef __KERNEL_SSE__
int3 a(_mm_set1_epi32(i));
# else
int3 a = {i, i, i, i};
# endif
# endif
return a;
}
ccl_device_inline int3 make_int3(int x, int y, int z)
{
# ifdef __KERNEL_GPU__
int3 a = {x, y, z};
# if defined(__KERNEL_GPU__)
return {x, y, z};
# elif defined(__KERNEL_SSE__)
return int3(_mm_set_epi32(0, z, y, x));
# else
# ifdef __KERNEL_SSE__
int3 a(_mm_set_epi32(0, z, y, x));
# else
int3 a = {x, y, z, 0};
# endif
return {x, y, z, 0};
# endif
return a;
}
ccl_device_inline void print_int3(const char *label, const int3 &a)
{
printf("%s: %d %d %d\n", label, a.x, a.y, a.z);
}
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
ccl_device_inline int3 make_int3(int i)
{
#if defined(__KERNEL_GPU__)
return make_int3(i, i, i);
#elif defined(__KERNEL_SSE__)
return int3(_mm_set1_epi32(i));
#else
return {i, i, i, i};
#endif
}
ccl_device_inline void print_int3(ccl_private const char *label, const int3 a)
{
#ifdef __KERNEL_PRINTF__
printf("%s: %d %d %d\n", label, a.x, a.y, a.z);
#endif
}
CCL_NAMESPACE_END

View File

@ -42,11 +42,12 @@ struct ccl_try_align(16) int4
# endif
};
ccl_device_inline int4 make_int4(int i);
ccl_device_inline int4 make_int4(int x, int y, int z, int w);
ccl_device_inline int4 make_int4(const float3 &f);
ccl_device_inline int4 make_int4(const float4 &f);
ccl_device_inline void print_int4(const char *label, const int4 &a);
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
ccl_device_inline int4 make_int4(int i);
ccl_device_inline int4 make_int4(const float3 f);
ccl_device_inline int4 make_int4(const float4 f);
ccl_device_inline void print_int4(ccl_private const char *label, const int4 a);
CCL_NAMESPACE_END

View File

@ -56,52 +56,51 @@ __forceinline int &int4::operator[](int i)
}
# endif
ccl_device_inline int4 make_int4(int i)
{
# ifdef __KERNEL_SSE__
int4 a(_mm_set1_epi32(i));
# else
int4 a = {i, i, i, i};
# endif
return a;
}
ccl_device_inline int4 make_int4(int x, int y, int z, int w)
{
# ifdef __KERNEL_SSE__
int4 a(_mm_set_epi32(w, z, y, x));
return int4(_mm_set_epi32(w, z, y, x));
# else
int4 a = {x, y, z, w};
return {x, y, z, w};
# endif
return a;
}
ccl_device_inline int4 make_int4(const float3 &f)
{
# ifdef __KERNEL_SSE__
int4 a(_mm_cvtps_epi32(f.m128));
# elif defined(__KERNEL_ONEAPI__)
int4 a = {(int)f.x, (int)f.y, (int)f.z, 0};
# else
int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
# endif
return a;
}
ccl_device_inline int4 make_int4(const float4 &f)
{
# ifdef __KERNEL_SSE__
int4 a(_mm_cvtps_epi32(f.m128));
# else
int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
# endif
return a;
}
ccl_device_inline void print_int4(const char *label, const int4 &a)
{
printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w);
}
#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
ccl_device_inline int4 make_int4(int i)
{
#ifdef __KERNEL_SSE__
return int4(_mm_set1_epi32(i));
#else
return make_int4(i, i, i, i);
#endif
}
ccl_device_inline int4 make_int4(const float3 f)
{
#if defined(__KERNEL_GPU__)
return make_int4((int)f.x, (int)f.y, (int)f.z, 0);
#elif defined(__KERNEL_SSE__)
return int4(_mm_cvtps_epi32(f.m128));
#else
return make_int4((int)f.x, (int)f.y, (int)f.z, (int)f.w);
#endif
}
ccl_device_inline int4 make_int4(const float4 f)
{
#ifdef __KERNEL_SSE__
return int4(_mm_cvtps_epi32(f.m128));
#else
return make_int4((int)f.x, (int)f.y, (int)f.z, (int)f.w);
#endif
}
ccl_device_inline void print_int4(ccl_private const char *label, const int4 a)
{
#ifdef __KERNEL_PRINTF__
printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w);
#endif
}
CCL_NAMESPACE_END