Intern: Adding atomic_load/store support for different types.

Mostly using built-in `__atomic` functions, with a special code path
using `MemoryBarrier()`on windows.

Authored By: Sergey Sharybin (sergey)

Reviewed By: Sergey Sharybin (sergey), Ray molenkamp (LazyDodo)

Ref D15020
This commit is contained in:
YimingWu 2022-05-26 23:06:36 +08:00
parent d6badf6fde
commit fdc2b7bfa4
5 changed files with 382 additions and 0 deletions

View File

@ -64,16 +64,22 @@ ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x);
ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x);
ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x);
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new);
ATOMIC_INLINE uint64_t atomic_load_uint64(const uint64_t *v);
ATOMIC_INLINE void atomic_store_uint64(uint64_t *p, uint64_t v);
ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x);
ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x);
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x);
ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x);
ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new);
ATOMIC_INLINE int64_t atomic_load_int64(const int64_t *v);
ATOMIC_INLINE void atomic_store_int64(int64_t *p, int64_t v);
ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x);
ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x);
ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new);
ATOMIC_INLINE uint32_t atomic_load_uint32(const uint32_t *v);
ATOMIC_INLINE void atomic_store_uint32(uint32_t *p, uint32_t v);
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x);
ATOMIC_INLINE uint32_t atomic_fetch_and_or_uint32(uint32_t *p, uint32_t x);
@ -82,6 +88,8 @@ ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x);
ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x);
ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x);
ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new);
ATOMIC_INLINE int32_t atomic_load_int32(const int32_t *v);
ATOMIC_INLINE void atomic_store_int32(int32_t *p, int32_t v);
ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x);
ATOMIC_INLINE int32_t atomic_fetch_and_or_int32(int32_t *p, int32_t x);
@ -104,6 +112,8 @@ ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new);
ATOMIC_INLINE size_t atomic_load_z(const size_t *v);
ATOMIC_INLINE void atomic_store_z(size_t *p, size_t v);
/* Uses CAS loop, see warning below. */
ATOMIC_INLINE size_t atomic_fetch_and_update_max_z(size_t *p, size_t x);

View File

@ -102,6 +102,24 @@ ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new)
#endif
}
ATOMIC_INLINE size_t atomic_load_z(const size_t *v)
{
#if (LG_SIZEOF_PTR == 8)
return (size_t)atomic_load_uint64((const uint64_t *)v);
#elif (LG_SIZEOF_PTR == 4)
return (size_t)atomic_load_uint32((const uint32_t *)v);
#endif
}
ATOMIC_INLINE void atomic_store_z(size_t *p, size_t v)
{
#if (LG_SIZEOF_PTR == 8)
atomic_store_uint64((uint64_t *)p, v);
#elif (LG_SIZEOF_PTR == 4)
atomic_store_uint32((uint32_t *)p, v);
#endif
}
ATOMIC_INLINE size_t atomic_fetch_and_update_max_z(size_t *p, size_t x)
{
size_t prev_value;

View File

@ -49,6 +49,16 @@
# pragma GCC diagnostic ignored "-Wincompatible-pointer-types"
#endif
/* TODO(sergey): On x64 platform both read and write of a variable aligned to its type size is
* atomic, so in theory it is possible to avoid memory barrier and gain performance. The downside
* of that would be that it will impose requirement to value which is being operated on. */
#define __atomic_impl_load_generic(v) (MemoryBarrier(), *(v))
#define __atomic_impl_store_generic(p, v) \
do { \
*(p) = (v); \
MemoryBarrier(); \
} while (0)
/* 64-bit operations. */
/* Unsigned */
ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x)
@ -66,6 +76,16 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne
return InterlockedCompareExchange64((int64_t *)v, _new, old);
}
ATOMIC_INLINE uint64_t atomic_load_uint64(const uint64_t *v)
{
return __atomic_impl_load_generic(v);
}
ATOMIC_INLINE void atomic_store_uint64(uint64_t *p, uint64_t v)
{
__atomic_impl_store_generic(p, v);
}
ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
{
return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x);
@ -92,6 +112,16 @@ ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
return InterlockedCompareExchange64(v, _new, old);
}
ATOMIC_INLINE int64_t atomic_load_int64(const int64_t *v)
{
return __atomic_impl_load_generic(v);
}
ATOMIC_INLINE void atomic_store_int64(int64_t *p, int64_t v)
{
__atomic_impl_store_generic(p, v);
}
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
{
return InterlockedExchangeAdd64(p, x);
@ -120,6 +150,16 @@ ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _ne
return InterlockedCompareExchange((long *)v, _new, old);
}
ATOMIC_INLINE uint32_t atomic_load_uint32(const uint32_t *v)
{
return __atomic_impl_load_generic(v);
}
ATOMIC_INLINE void atomic_store_uint32(uint32_t *p, uint32_t v)
{
__atomic_impl_store_generic(p, v);
}
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
{
return InterlockedExchangeAdd(p, x);
@ -151,6 +191,16 @@ ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
return InterlockedCompareExchange((long *)v, _new, old);
}
ATOMIC_INLINE int32_t atomic_load_int32(const int32_t *v)
{
return __atomic_impl_load_generic(v);
}
ATOMIC_INLINE void atomic_store_int32(int32_t *p, int32_t v)
{
__atomic_impl_store_generic(p, v);
}
ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x)
{
return InterlockedExchangeAdd((long *)p, x);
@ -225,6 +275,9 @@ ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b)
#endif
}
#undef __atomic_impl_load_generic
#undef __atomic_impl_store_generic
#if defined(__clang__)
# pragma GCC diagnostic pop
#endif

View File

@ -98,6 +98,22 @@ ATOMIC_INLINE void atomic_spin_unlock(volatile AtomicSpinLock *lock)
/** \} */
/* -------------------------------------------------------------------- */
/** \name Common part of x64 implementation
* \{ */
/* TODO(sergey): On x64 platform both read and write of a variable aligned to its type size is
* atomic, so in theory it is possible to avoid memory barrier and gain performance. The downside
* of that would be that it will impose requirement to value which is being operated on. */
#define __atomic_impl_load_generic(v) (__sync_synchronize(), *(v))
#define __atomic_impl_store_generic(p, v) \
do { \
*(p) = (v); \
__sync_synchronize(); \
} while (0)
/** \} */
/* -------------------------------------------------------------------- */
/** \name Common part of locking fallback implementation
* \{ */
@ -158,6 +174,23 @@ static _ATOMIC_MAYBE_UNUSED AtomicSpinLock _atomic_global_lock = {0};
return original_value; \
}
#define ATOMIC_LOCKING_LOAD_DEFINE(_type) \
ATOMIC_INLINE _type##_t atomic_load_##_type(const _type##_t *v) \
{ \
atomic_spin_lock(&_atomic_global_lock); \
const _type##_t value = *v; \
atomic_spin_unlock(&_atomic_global_lock); \
return value; \
}
#define ATOMIC_LOCKING_STORE_DEFINE(_type) \
ATOMIC_INLINE void atomic_store_##_type(_type##_t *p, const _type##_t v) \
{ \
atomic_spin_lock(&_atomic_global_lock); \
*p = v; \
atomic_spin_unlock(&_atomic_global_lock); \
}
/** \} */
/* -------------------------------------------------------------------- */
@ -192,6 +225,16 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne
return __sync_val_compare_and_swap(v, old, _new);
}
ATOMIC_INLINE uint64_t atomic_load_uint64(const uint64_t *v)
{
return __atomic_load_n(v, __ATOMIC_SEQ_CST);
}
ATOMIC_INLINE void atomic_store_uint64(uint64_t *p, uint64_t v)
{
__atomic_store(p, &v, __ATOMIC_SEQ_CST);
}
/* Signed */
ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x)
{
@ -218,6 +261,16 @@ ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
return __sync_val_compare_and_swap(v, old, _new);
}
ATOMIC_INLINE int64_t atomic_load_int64(const int64_t *v)
{
return __atomic_load_n(v, __ATOMIC_SEQ_CST);
}
ATOMIC_INLINE void atomic_store_int64(int64_t *p, int64_t v)
{
__atomic_store(p, &v, __ATOMIC_SEQ_CST);
}
#elif !defined(ATOMIC_FORCE_USE_FALLBACK) && (defined(__amd64__) || defined(__x86_64__))
/* Unsigned */
ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
@ -256,6 +309,16 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne
return ret;
}
ATOMIC_INLINE uint64_t atomic_load_uint64(const uint64_t *v)
{
return __atomic_impl_load_generic(v);
}
ATOMIC_INLINE void atomic_store_uint64(uint64_t *p, uint64_t v)
{
__atomic_impl_store_generic(p, v);
}
/* Signed */
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
{
@ -292,6 +355,17 @@ ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
asm volatile("lock; cmpxchgq %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
return ret;
}
ATOMIC_INLINE int64_t atomic_load_int64(const int64_t *v)
{
return __atomic_impl_load_generic(v);
}
ATOMIC_INLINE void atomic_store_int64(int64_t *p, int64_t v)
{
__atomic_impl_store_generic(p, v);
}
#else
/* Unsigned */
@ -304,6 +378,9 @@ ATOMIC_LOCKING_FETCH_AND_SUB_DEFINE(uint64)
ATOMIC_LOCKING_CAS_DEFINE(uint64)
ATOMIC_LOCKING_LOAD_DEFINE(uint64)
ATOMIC_LOCKING_STORE_DEFINE(uint64)
/* Signed */
ATOMIC_LOCKING_ADD_AND_FETCH_DEFINE(int64)
ATOMIC_LOCKING_SUB_AND_FETCH_DEFINE(int64)
@ -313,6 +390,9 @@ ATOMIC_LOCKING_FETCH_AND_SUB_DEFINE(int64)
ATOMIC_LOCKING_CAS_DEFINE(int64)
ATOMIC_LOCKING_LOAD_DEFINE(int64)
ATOMIC_LOCKING_STORE_DEFINE(int64)
#endif
/** \} */
@ -339,6 +419,16 @@ ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _ne
return __sync_val_compare_and_swap(v, old, _new);
}
ATOMIC_INLINE uint32_t atomic_load_uint32(const uint32_t *v)
{
return __atomic_load_n(v, __ATOMIC_SEQ_CST);
}
ATOMIC_INLINE void atomic_store_uint32(uint32_t *p, uint32_t v)
{
__atomic_store(p, &v, __ATOMIC_SEQ_CST);
}
/* Signed */
ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
{
@ -355,6 +445,16 @@ ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
return __sync_val_compare_and_swap(v, old, _new);
}
ATOMIC_INLINE int32_t atomic_load_int32(const int32_t *v)
{
return __atomic_load_n(v, __ATOMIC_SEQ_CST);
}
ATOMIC_INLINE void atomic_store_int32(int32_t *p, int32_t v)
{
__atomic_store(p, &v, __ATOMIC_SEQ_CST);
}
#elif !defined(ATOMIC_FORCE_USE_FALLBACK) && \
(defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
/* Unsigned */
@ -385,6 +485,16 @@ ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _ne
return ret;
}
ATOMIC_INLINE uint32_t atomic_load_uint32(const uint32_t *v)
{
return __atomic_load_n(v, __ATOMIC_SEQ_CST);
}
ATOMIC_INLINE void atomic_store_uint32(uint32_t *p, uint32_t v)
{
__atomic_store(p, &v, __ATOMIC_SEQ_CST);
}
/* Signed */
ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
{
@ -413,6 +523,16 @@ ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
return ret;
}
ATOMIC_INLINE int32_t atomic_load_int32(const int32_t *v)
{
return __atomic_load_n(v, __ATOMIC_SEQ_CST);
}
ATOMIC_INLINE void atomic_store_int32(int32_t *p, int32_t v)
{
__atomic_store(p, &v, __ATOMIC_SEQ_CST);
}
#else
/* Unsigned */
@ -422,6 +542,9 @@ ATOMIC_LOCKING_SUB_AND_FETCH_DEFINE(uint32)
ATOMIC_LOCKING_CAS_DEFINE(uint32)
ATOMIC_LOCKING_LOAD_DEFINE(uint32)
ATOMIC_LOCKING_STORE_DEFINE(uint32)
/* Signed */
ATOMIC_LOCKING_ADD_AND_FETCH_DEFINE(int32)
@ -429,6 +552,9 @@ ATOMIC_LOCKING_SUB_AND_FETCH_DEFINE(int32)
ATOMIC_LOCKING_CAS_DEFINE(int32)
ATOMIC_LOCKING_LOAD_DEFINE(int32)
ATOMIC_LOCKING_STORE_DEFINE(int32)
#endif
#if !defined(ATOMIC_FORCE_USE_FALLBACK) && \
@ -548,6 +674,9 @@ ATOMIC_LOCKING_FETCH_AND_OR_DEFINE(int8)
/** \} */
#undef __atomic_impl_load_generic
#undef __atomic_impl_store_generic
#undef ATOMIC_LOCKING_OP_AND_FETCH_DEFINE
#undef ATOMIC_LOCKING_FETCH_AND_OP_DEFINE
#undef ATOMIC_LOCKING_ADD_AND_FETCH_DEFINE
@ -557,5 +686,7 @@ ATOMIC_LOCKING_FETCH_AND_OR_DEFINE(int8)
#undef ATOMIC_LOCKING_FETCH_AND_OR_DEFINE
#undef ATOMIC_LOCKING_FETCH_AND_AND_DEFINE
#undef ATOMIC_LOCKING_CAS_DEFINE
#undef ATOMIC_LOCKING_LOAD_DEFINE
#undef ATOMIC_LOCKING_STORE_DEFINE
#endif /* __ATOMIC_OPS_UNIX_H__ */

View File

@ -143,6 +143,40 @@ TEST(atomic, atomic_cas_uint64)
}
}
TEST(atomic, atomic_load_uint64)
{
/* Make sure alias is implemented. */
{
uint64_t value = 2;
EXPECT_EQ(atomic_load_uint64(&value), 2);
}
/* Make sure alias is using proper bitness. */
{
const uint64_t uint64_t_max = std::numeric_limits<uint64_t>::max();
uint64_t value = uint64_t_max;
EXPECT_EQ(atomic_load_uint64(&value), uint64_t_max);
}
}
TEST(atomic, atomic_store_uint64)
{
/* Make sure alias is implemented. */
{
uint64_t value = 0;
atomic_store_uint64(&value, 2);
EXPECT_EQ(value, 2);
}
/* Make sure alias is using proper bitness. */
{
const uint64_t uint64_t_max = std::numeric_limits<uint64_t>::max();
uint64_t value = 0;
atomic_store_uint64(&value, uint64_t_max);
EXPECT_EQ(value, uint64_t_max);
}
}
/** \} */
/* -------------------------------------------------------------------- */
@ -277,6 +311,40 @@ TEST(atomic, atomic_cas_int64)
}
}
TEST(atomic, atomic_load_int64)
{
/* Make sure alias is implemented. */
{
int64_t value = 2;
EXPECT_EQ(atomic_load_int64(&value), 2);
}
/* Make sure alias is using proper bitness. */
{
const int64_t int64_t_max = std::numeric_limits<int64_t>::max();
int64_t value = int64_t_max;
EXPECT_EQ(atomic_load_int64(&value), int64_t_max);
}
}
TEST(atomic, atomic_store_int64)
{
/* Make sure alias is implemented. */
{
int64_t value = 0;
atomic_store_int64(&value, 2);
EXPECT_EQ(value, 2);
}
/* Make sure alias is using proper bitness. */
{
const int64_t int64_t_max = std::numeric_limits<int64_t>::max();
int64_t value = 0;
atomic_store_int64(&value, int64_t_max);
EXPECT_EQ(value, int64_t_max);
}
}
/** \} */
/* -------------------------------------------------------------------- */
@ -358,6 +426,40 @@ TEST(atomic, atomic_cas_uint32)
}
}
TEST(atomic, atomic_load_uint32)
{
/* Make sure alias is implemented. */
{
uint32_t value = 2;
EXPECT_EQ(atomic_load_uint32(&value), 2);
}
/* Make sure alias is using proper bitness. */
{
const uint32_t uint32_t_max = std::numeric_limits<uint32_t>::max();
uint32_t value = uint32_t_max;
EXPECT_EQ(atomic_load_uint32(&value), uint32_t_max);
}
}
TEST(atomic, atomic_store_uint32)
{
/* Make sure alias is implemented. */
{
uint32_t value = 0;
atomic_store_uint32(&value, 2);
EXPECT_EQ(value, 2);
}
/* Make sure alias is using proper bitness. */
{
const uint32_t uint32_t_max = std::numeric_limits<uint32_t>::max();
uint32_t value = 0;
atomic_store_uint32(&value, uint32_t_max);
EXPECT_EQ(value, uint32_t_max);
}
}
TEST(atomic, atomic_fetch_and_add_uint32)
{
{
@ -505,6 +607,40 @@ TEST(atomic, atomic_cas_int32)
}
}
TEST(atomic, atomic_load_int32)
{
/* Make sure alias is implemented. */
{
int32_t value = 2;
EXPECT_EQ(atomic_load_int32(&value), 2);
}
/* Make sure alias is using proper bitness. */
{
const int32_t int32_t_max = std::numeric_limits<int32_t>::max();
int32_t value = int32_t_max;
EXPECT_EQ(atomic_load_int32(&value), int32_t_max);
}
}
TEST(atomic, atomic_store_int32)
{
/* Make sure alias is implemented. */
{
int32_t value = 0;
atomic_store_int32(&value, 2);
EXPECT_EQ(value, 2);
}
/* Make sure alias is using proper bitness. */
{
const int32_t int32_t_max = std::numeric_limits<int32_t>::max();
int32_t value = 0;
atomic_store_int32(&value, int32_t_max);
EXPECT_EQ(value, int32_t_max);
}
}
TEST(atomic, atomic_fetch_and_add_int32)
{
{
@ -761,6 +897,40 @@ TEST(atomic, atomic_cas_z)
}
}
TEST(atomic, atomic_load_z)
{
/* Make sure alias is implemented. */
{
size_t value = 2;
EXPECT_EQ(atomic_load_z(&value), 2);
}
/* Make sure alias is using proper bitness. */
{
const size_t size_t_max = std::numeric_limits<size_t>::max();
size_t value = size_t_max;
EXPECT_EQ(atomic_load_z(&value), size_t_max);
}
}
TEST(atomic, atomic_store_z)
{
/* Make sure alias is implemented. */
{
size_t value = 0;
atomic_store_z(&value, 2);
EXPECT_EQ(value, 2);
}
/* Make sure alias is using proper bitness. */
{
const size_t size_t_max = std::numeric_limits<size_t>::max();
size_t value = 0;
atomic_store_z(&value, size_t_max);
EXPECT_EQ(value, size_t_max);
}
}
TEST(atomic, atomic_fetch_and_update_max_z)
{
const size_t size_t_max = std::numeric_limits<size_t>::max();