Code refactor: split defines into separate header, changes to SSE type headers.

I need to use some macros defined in util_simd.h for float3/float4, to emulate
SSE4 instructions on SSE2. But due to issues with order of header includes this
was not possible, this does some refactoring to make it work.

Differential Revision: https://developer.blender.org/D2764
This commit is contained in:
Brecht Van Lommel 2017-08-02 02:09:08 +02:00
parent 5e4bad2c00
commit a8cc0d707e
10 changed files with 201 additions and 184 deletions

View File

@ -48,6 +48,7 @@
#include "util/util_logging.h"
#include "util/util_map.h"
#include "util/util_opengl.h"
#include "util/util_optimization.h"
#include "util/util_progress.h"
#include "util/util_system.h"
#include "util/util_thread.h"

View File

@ -233,6 +233,7 @@ set(SRC_FILTER_HEADERS
set(SRC_UTIL_HEADERS
../util/util_atomic.h
../util/util_color.h
../util/util_defines.h
../util/util_half.h
../util/util_hash.h
../util/util_math.h

View File

@ -38,6 +38,7 @@ set(SRC_HEADERS
util_atomic.h
util_boundbox.h
util_debug.h
util_defines.h
util_guarded_allocator.cpp
util_foreach.h
util_function.h

View File

@ -0,0 +1,134 @@
/*
* Copyright 2011-2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __UTIL_DEFINES_H__
#define __UTIL_DEFINES_H__
/* Bitness */
#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64)
# define __KERNEL_64_BIT__
#endif
/* Qualifiers for kernel code shared by CPU and GPU */
#ifndef __KERNEL_GPU__
# define ccl_device static inline
# define ccl_device_noinline static
# define ccl_global
# define ccl_constant
# define ccl_local
# define ccl_local_param
# define ccl_private
# define ccl_restrict __restrict
# define __KERNEL_WITH_SSE_ALIGN__
# if defined(_WIN32) && !defined(FREE_WINDOWS)
# define ccl_device_inline static __forceinline
# define ccl_device_forceinline static __forceinline
# define ccl_align(...) __declspec(align(__VA_ARGS__))
# ifdef __KERNEL_64_BIT__
# define ccl_try_align(...) __declspec(align(__VA_ARGS__))
# else /* __KERNEL_64_BIT__ */
# undef __KERNEL_WITH_SSE_ALIGN__
/* No support for function arguments (error C2719). */
# define ccl_try_align(...)
# endif /* __KERNEL_64_BIT__ */
# define ccl_may_alias
# define ccl_always_inline __forceinline
# define ccl_never_inline __declspec(noinline)
# define ccl_maybe_unused
# else /* _WIN32 && !FREE_WINDOWS */
# define ccl_device_inline static inline __attribute__((always_inline))
# define ccl_device_forceinline static inline __attribute__((always_inline))
# define ccl_align(...) __attribute__((aligned(__VA_ARGS__)))
# ifndef FREE_WINDOWS64
# define __forceinline inline __attribute__((always_inline))
# endif
# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__)))
# define ccl_may_alias __attribute__((__may_alias__))
# define ccl_always_inline __attribute__((always_inline))
# define ccl_never_inline __attribute__((noinline))
# define ccl_maybe_unused __attribute__((used))
# endif /* _WIN32 && !FREE_WINDOWS */
/* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */
# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */
# define ATTR_FALLTHROUGH __attribute__((fallthrough))
# else
# define ATTR_FALLTHROUGH ((void)0)
# endif
#endif /* __KERNEL_GPU__ */
/* macros */
/* hints for branch prediction, only use in code that runs a _lot_ */
#if defined(__GNUC__) && defined(__KERNEL_CPU__)
# define LIKELY(x) __builtin_expect(!!(x), 1)
# define UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
# define LIKELY(x) (x)
# define UNLIKELY(x) (x)
#endif
#if defined(__cplusplus) && ((__cplusplus >= 201103L) || (defined(_MSC_VER) && _MSC_VER >= 1800))
# define HAS_CPP11_FEATURES
#endif
#if defined(__GNUC__) || defined(__clang__)
# if defined(HAS_CPP11_FEATURES)
/* Some magic to be sure we don't have reference in the type. */
template<typename T> static inline T decltype_helper(T x) { return x; }
# define TYPEOF(x) decltype(decltype_helper(x))
# else
# define TYPEOF(x) typeof(x)
# endif
#endif
/* Causes warning:
* incompatible types when assigning to type 'Foo' from type 'Bar'
* ... the compiler optimizes away the temp var */
#ifdef __GNUC__
#define CHECK_TYPE(var, type) { \
TYPEOF(var) *__tmp; \
__tmp = (type *)NULL; \
(void)__tmp; \
} (void)0
#define CHECK_TYPE_PAIR(var_a, var_b) { \
TYPEOF(var_a) *__tmp; \
__tmp = (typeof(var_b) *)NULL; \
(void)__tmp; \
} (void)0
#else
# define CHECK_TYPE(var, type)
# define CHECK_TYPE_PAIR(var_a, var_b)
#endif
/* can be used in simple macros */
#define CHECK_TYPE_INLINE(val, type) \
((void)(((type)0) != (val)))
#ifndef __KERNEL_GPU__
# include <cassert>
# define util_assert(statement) assert(statement)
#else
# define util_assert(statement)
#endif
#endif /* __UTIL_DEFINES_H__ */

View File

@ -19,16 +19,6 @@
#ifndef __KERNEL_GPU__
/* quiet unused define warnings */
#if defined(__KERNEL_SSE2__) || \
defined(__KERNEL_SSE3__) || \
defined(__KERNEL_SSSE3__) || \
defined(__KERNEL_SSE41__) || \
defined(__KERNEL_AVX__) || \
defined(__KERNEL_AVX2__)
/* do nothing */
#endif
/* x86
*
* Compile a regular, SSE2 and SSE3 kernel. */
@ -73,48 +63,6 @@
#endif /* defined(__x86_64__) || defined(_M_X64) */
/* SSE Experiment
*
* This is disabled code for an experiment to use SSE types globally for types
* such as float3 and float4. Currently this gives an overall slowdown. */
#if 0
# define __KERNEL_SSE__
# ifndef __KERNEL_SSE2__
# define __KERNEL_SSE2__
# endif
# ifndef __KERNEL_SSE3__
# define __KERNEL_SSE3__
# endif
# ifndef __KERNEL_SSSE3__
# define __KERNEL_SSSE3__
# endif
# ifndef __KERNEL_SSE4__
# define __KERNEL_SSE4__
# endif
#endif
/* SSE Intrinsics includes
*
* We assume __KERNEL_SSEX__ flags to have been defined at this point */
/* SSE intrinsics headers */
#ifndef FREE_WINDOWS64
#ifdef _MSC_VER
# include <intrin.h>
#elif (defined(__x86_64__) || defined(__i386__))
# include <x86intrin.h>
#endif
#else
/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
* Since we can't avoid including <windows.h>, better only include that */
#include "util/util_windows.h"
#endif
#endif
#endif /* __UTIL_OPTIMIZATION_H__ */

View File

@ -18,19 +18,38 @@
#ifndef __UTIL_SIMD_TYPES_H__
#define __UTIL_SIMD_TYPES_H__
#ifndef __KERNEL_GPU__
#include <limits>
#include "util/util_debug.h"
#include "util/util_types.h"
#include "util/util_defines.h"
/* SSE Intrinsics includes
*
* We assume __KERNEL_SSEX__ flags to have been defined at this point */
/* SSE intrinsics headers */
#ifndef FREE_WINDOWS64
#ifdef _MSC_VER
# include <intrin.h>
#elif (defined(__x86_64__) || defined(__i386__))
# include <x86intrin.h>
#endif
#else
/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
* Since we can't avoid including <windows.h>, better only include that */
#include "util/util_windows.h"
#endif
CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SSE2__
struct sseb;
struct ssei;
struct ssef;
extern const __m128 _mm_lookupmask_ps[16];
/* Special Types */
@ -496,13 +515,19 @@ ccl_device_inline int bitscan(int value)
#endif /* __KERNEL_SSE2__ */
/* quiet unused define warnings */
#if defined(__KERNEL_SSE2__) || \
defined(__KERNEL_SSE3__) || \
defined(__KERNEL_SSSE3__) || \
defined(__KERNEL_SSE41__) || \
defined(__KERNEL_AVX__) || \
defined(__KERNEL_AVX2__)
/* do nothing */
#endif
CCL_NAMESPACE_END
#include "util/util_math.h"
#include "util/util_sseb.h"
#include "util/util_ssei.h"
#include "util/util_ssef.h"
#include "util/util_avxf.h"
#endif /* __KERNEL_GPU__ */
#endif /* __UTIL_SIMD_TYPES_H__ */

View File

@ -22,6 +22,9 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SSE2__
struct ssei;
struct ssef;
/*! 4-wide SSE bool type. */
struct sseb
{

View File

@ -22,6 +22,9 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SSE2__
struct sseb;
struct ssef;
/*! 4-wide SSE float type. */
struct ssef
{

View File

@ -22,6 +22,9 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_SSE2__
struct sseb;
struct ssef;
/*! 4-wide SSE integer type. */
struct ssei
{
@ -234,8 +237,10 @@ __forceinline size_t select_max(const sseb& valid, const ssei& v) { const ssei a
#else
__forceinline int reduce_min(const ssei& v) { return min(min(v[0],v[1]),min(v[2],v[3])); }
__forceinline int reduce_max(const ssei& v) { return max(max(v[0],v[1]),max(v[2],v[3])); }
__forceinline int ssei_min(int a, int b) { return (a < b)? a: b; }
__forceinline int ssei_max(int a, int b) { return (a > b)? a: b; }
__forceinline int reduce_min(const ssei& v) { return ssei_min(ssei_min(v[0],v[1]),ssei_min(v[2],v[3])); }
__forceinline int reduce_max(const ssei& v) { return ssei_max(ssei_max(v[0],v[1]),ssei_max(v[2],v[3])); }
__forceinline int reduce_add(const ssei& v) { return v[0]+v[1]+v[2]+v[3]; }
#endif

View File

@ -21,72 +21,17 @@
# include <stdlib.h>
#endif
/* Bitness */
#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64)
# define __KERNEL_64_BIT__
#endif
/* Qualifiers for kernel code shared by CPU and GPU */
#ifndef __KERNEL_GPU__
# define ccl_device static inline
# define ccl_device_noinline static
# define ccl_global
# define ccl_constant
# define ccl_local
# define ccl_local_param
# define ccl_private
# define ccl_restrict __restrict
# define __KERNEL_WITH_SSE_ALIGN__
# if defined(_WIN32) && !defined(FREE_WINDOWS)
# define ccl_device_inline static __forceinline
# define ccl_device_forceinline static __forceinline
# define ccl_align(...) __declspec(align(__VA_ARGS__))
# ifdef __KERNEL_64_BIT__
# define ccl_try_align(...) __declspec(align(__VA_ARGS__))
# else /* __KERNEL_64_BIT__ */
# undef __KERNEL_WITH_SSE_ALIGN__
/* No support for function arguments (error C2719). */
# define ccl_try_align(...)
# endif /* __KERNEL_64_BIT__ */
# define ccl_may_alias
# define ccl_always_inline __forceinline
# define ccl_never_inline __declspec(noinline)
# define ccl_maybe_unused
# else /* _WIN32 && !FREE_WINDOWS */
# define ccl_device_inline static inline __attribute__((always_inline))
# define ccl_device_forceinline static inline __attribute__((always_inline))
# define ccl_align(...) __attribute__((aligned(__VA_ARGS__)))
# ifndef FREE_WINDOWS64
# define __forceinline inline __attribute__((always_inline))
# endif
# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__)))
# define ccl_may_alias __attribute__((__may_alias__))
# define ccl_always_inline __attribute__((always_inline))
# define ccl_never_inline __attribute__((noinline))
# define ccl_maybe_unused __attribute__((used))
# endif /* _WIN32 && !FREE_WINDOWS */
/* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */
# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */
# define ATTR_FALLTHROUGH __attribute__((fallthrough))
# else
# define ATTR_FALLTHROUGH ((void)0)
# endif
#endif /* __KERNEL_GPU__ */
/* Standard Integer Types */
#if !defined(__KERNEL_GPU__) && !defined(_WIN32)
# include <stdint.h>
#endif
#include "util/util_defines.h"
#ifndef __KERNEL_GPU__
/* int8_t, uint16_t, and friends */
# ifndef _WIN32
# include <stdint.h>
# endif
/* SIMD Types */
# include "util/util_optimization.h"
#endif /* __KERNEL_GPU__ */
# include "util/util_simd.h"
#endif
CCL_NAMESPACE_BEGIN
@ -201,65 +146,8 @@ enum ExtensionType {
EXTENSION_NUM_TYPES,
};
/* macros */
/* hints for branch prediction, only use in code that runs a _lot_ */
#if defined(__GNUC__) && defined(__KERNEL_CPU__)
# define LIKELY(x) __builtin_expect(!!(x), 1)
# define UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
# define LIKELY(x) (x)
# define UNLIKELY(x) (x)
#endif
#if defined(__cplusplus) && ((__cplusplus >= 201103L) || (defined(_MSC_VER) && _MSC_VER >= 1800))
# define HAS_CPP11_FEATURES
#endif
#if defined(__GNUC__) || defined(__clang__)
# if defined(HAS_CPP11_FEATURES)
/* Some magic to be sure we don't have reference in the type. */
template<typename T> static inline T decltype_helper(T x) { return x; }
# define TYPEOF(x) decltype(decltype_helper(x))
# else
# define TYPEOF(x) typeof(x)
# endif
#endif
/* Causes warning:
* incompatible types when assigning to type 'Foo' from type 'Bar'
* ... the compiler optimizes away the temp var */
#ifdef __GNUC__
#define CHECK_TYPE(var, type) { \
TYPEOF(var) *__tmp; \
__tmp = (type *)NULL; \
(void)__tmp; \
} (void)0
#define CHECK_TYPE_PAIR(var_a, var_b) { \
TYPEOF(var_a) *__tmp; \
__tmp = (typeof(var_b) *)NULL; \
(void)__tmp; \
} (void)0
#else
# define CHECK_TYPE(var, type)
# define CHECK_TYPE_PAIR(var_a, var_b)
#endif
/* can be used in simple macros */
#define CHECK_TYPE_INLINE(val, type) \
((void)(((type)0) != (val)))
CCL_NAMESPACE_END
#ifndef __KERNEL_GPU__
# include <cassert>
# define util_assert(statement) assert(statement)
#else
# define util_assert(statement)
#endif
/* Vectorized types declaration. */
#include "util/util_types_uchar2.h"
#include "util/util_types_uchar3.h"
@ -298,5 +186,13 @@ CCL_NAMESPACE_END
#include "util/util_types_vector3_impl.h"
/* SSE types. */
#ifndef __KERNEL_GPU__
# include "util/util_sseb.h"
# include "util/util_ssei.h"
# include "util/util_ssef.h"
# include "util/util_avxf.h"
#endif
#endif /* __UTIL_TYPES_H__ */