Intern: Adding atomic_load/store support for different types.

Mostly using built-in `__atomic` functions, with a special code path using `MemoryBarrier()`on windows. Authored By: Sergey Sharybin (sergey) Reviewed By: Sergey Sharybin (sergey), Ray molenkamp (LazyDodo) Ref D15020
2022-05-26 23:06:36 +08:00 · 2022-05-26 23:06:36 +08:00 · fdc2b7bfa4
parent d6badf6fde
commit fdc2b7bfa4
5 changed files with 382 additions and 0 deletions
--- a/intern/atomic/atomic_ops.h
+++ b/intern/atomic/atomic_ops.h
@ -64,16 +64,22 @@ ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x);
 ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x);
 ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x);
 ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new);
+ATOMIC_INLINE uint64_t atomic_load_uint64(const uint64_t *v);
+ATOMIC_INLINE void atomic_store_uint64(uint64_t *p, uint64_t v);

 ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x);
 ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x);
 ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x);
 ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x);
 ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new);
+ATOMIC_INLINE int64_t atomic_load_int64(const int64_t *v);
+ATOMIC_INLINE void atomic_store_int64(int64_t *p, int64_t v);

 ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x);
 ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x);
 ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new);
+ATOMIC_INLINE uint32_t atomic_load_uint32(const uint32_t *v);
+ATOMIC_INLINE void atomic_store_uint32(uint32_t *p, uint32_t v);

 ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x);
 ATOMIC_INLINE uint32_t atomic_fetch_and_or_uint32(uint32_t *p, uint32_t x);
@ -82,6 +88,8 @@ ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x);
 ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x);
 ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x);
 ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new);
+ATOMIC_INLINE int32_t atomic_load_int32(const int32_t *v);
+ATOMIC_INLINE void atomic_store_int32(int32_t *p, int32_t v);

 ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x);
 ATOMIC_INLINE int32_t atomic_fetch_and_or_int32(int32_t *p, int32_t x);
@ -104,6 +112,8 @@ ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x);
 ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x);
 ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x);
 ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new);
+ATOMIC_INLINE size_t atomic_load_z(const size_t *v);
+ATOMIC_INLINE void atomic_store_z(size_t *p, size_t v);
 /* Uses CAS loop, see warning below. */
 ATOMIC_INLINE size_t atomic_fetch_and_update_max_z(size_t *p, size_t x);

--- a/intern/atomic/intern/atomic_ops_ext.h
+++ b/intern/atomic/intern/atomic_ops_ext.h
@ -102,6 +102,24 @@ ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new)
 #endif
 }

+ATOMIC_INLINE size_t atomic_load_z(const size_t *v)
+{
+#if (LG_SIZEOF_PTR == 8)
+  return (size_t)atomic_load_uint64((const uint64_t *)v);
+#elif (LG_SIZEOF_PTR == 4)
+  return (size_t)atomic_load_uint32((const uint32_t *)v);
+#endif
+}
+
+ATOMIC_INLINE void atomic_store_z(size_t *p, size_t v)
+{
+#if (LG_SIZEOF_PTR == 8)
+  atomic_store_uint64((uint64_t *)p, v);
+#elif (LG_SIZEOF_PTR == 4)
+  atomic_store_uint32((uint32_t *)p, v);
+#endif
+}
+
 ATOMIC_INLINE size_t atomic_fetch_and_update_max_z(size_t *p, size_t x)
 {
  size_t prev_value;
--- a/intern/atomic/intern/atomic_ops_msvc.h
+++ b/intern/atomic/intern/atomic_ops_msvc.h
@ -49,6 +49,16 @@
 #  pragma GCC diagnostic ignored "-Wincompatible-pointer-types"
 #endif

+/* TODO(sergey): On x64 platform both read and write of a variable aligned to its type size is
+ * atomic, so in theory it is possible to avoid memory barrier and gain performance. The downside
+ * of that would be that it will impose requirement to value which is being operated on. */
+#define __atomic_impl_load_generic(v) (MemoryBarrier(), *(v))
+#define __atomic_impl_store_generic(p, v) \
+  do { \
+    *(p) = (v); \
+    MemoryBarrier(); \
+  } while (0)
+
 /* 64-bit operations. */
 /* Unsigned */
 ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x)
@ -66,6 +76,16 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne
  return InterlockedCompareExchange64((int64_t *)v, _new, old);
 }

+ATOMIC_INLINE uint64_t atomic_load_uint64(const uint64_t *v)
+{
+  return __atomic_impl_load_generic(v);
+}
+
+ATOMIC_INLINE void atomic_store_uint64(uint64_t *p, uint64_t v)
+{
+  __atomic_impl_store_generic(p, v);
+}
+
 ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
 {
  return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x);
@ -92,6 +112,16 @@ ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
  return InterlockedCompareExchange64(v, _new, old);
 }

+ATOMIC_INLINE int64_t atomic_load_int64(const int64_t *v)
+{
+  return __atomic_impl_load_generic(v);
+}
+
+ATOMIC_INLINE void atomic_store_int64(int64_t *p, int64_t v)
+{
+  __atomic_impl_store_generic(p, v);
+}
+
 ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
 {
  return InterlockedExchangeAdd64(p, x);
@ -120,6 +150,16 @@ ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _ne
  return InterlockedCompareExchange((long *)v, _new, old);
 }

+ATOMIC_INLINE uint32_t atomic_load_uint32(const uint32_t *v)
+{
+  return __atomic_impl_load_generic(v);
+}
+
+ATOMIC_INLINE void atomic_store_uint32(uint32_t *p, uint32_t v)
+{
+  __atomic_impl_store_generic(p, v);
+}
+
 ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
 {
  return InterlockedExchangeAdd(p, x);
@ -151,6 +191,16 @@ ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
  return InterlockedCompareExchange((long *)v, _new, old);
 }

+ATOMIC_INLINE int32_t atomic_load_int32(const int32_t *v)
+{
+  return __atomic_impl_load_generic(v);
+}
+
+ATOMIC_INLINE void atomic_store_int32(int32_t *p, int32_t v)
+{
+  __atomic_impl_store_generic(p, v);
+}
+
 ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x)
 {
  return InterlockedExchangeAdd((long *)p, x);
@ -225,6 +275,9 @@ ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b)
 #endif
 }

+#undef __atomic_impl_load_generic
+#undef __atomic_impl_store_generic
+
 #if defined(__clang__)
 #  pragma GCC diagnostic pop
 #endif
--- a/intern/atomic/intern/atomic_ops_unix.h
+++ b/intern/atomic/intern/atomic_ops_unix.h
@ -98,6 +98,22 @@ ATOMIC_INLINE void atomic_spin_unlock(volatile AtomicSpinLock *lock)

 /** \} */

+/* -------------------------------------------------------------------- */
+/** \name Common part of x64 implementation
+ * \{ */
+
+/* TODO(sergey): On x64 platform both read and write of a variable aligned to its type size is
+ * atomic, so in theory it is possible to avoid memory barrier and gain performance. The downside
+ * of that would be that it will impose requirement to value which is being operated on. */
+#define __atomic_impl_load_generic(v) (__sync_synchronize(), *(v))
+#define __atomic_impl_store_generic(p, v) \
+  do { \
+    *(p) = (v); \
+    __sync_synchronize(); \
+  } while (0)
+
+/** \} */
+
 /* -------------------------------------------------------------------- */
 /** \name Common part of locking fallback implementation
 * \{ */
@ -158,6 +174,23 @@ static _ATOMIC_MAYBE_UNUSED AtomicSpinLock _atomic_global_lock = {0};
    return original_value; \
  }

+#define ATOMIC_LOCKING_LOAD_DEFINE(_type) \
+  ATOMIC_INLINE _type##_t atomic_load_##_type(const _type##_t *v) \
+  { \
+    atomic_spin_lock(&_atomic_global_lock); \
+    const _type##_t value = *v; \
+    atomic_spin_unlock(&_atomic_global_lock); \
+    return value; \
+  }
+
+#define ATOMIC_LOCKING_STORE_DEFINE(_type) \
+  ATOMIC_INLINE void atomic_store_##_type(_type##_t *p, const _type##_t v) \
+  { \
+    atomic_spin_lock(&_atomic_global_lock); \
+    *p = v; \
+    atomic_spin_unlock(&_atomic_global_lock); \
+  }
+
 /** \} */

 /* -------------------------------------------------------------------- */
@ -192,6 +225,16 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne
  return __sync_val_compare_and_swap(v, old, _new);
 }

+ATOMIC_INLINE uint64_t atomic_load_uint64(const uint64_t *v)
+{
+  return __atomic_load_n(v, __ATOMIC_SEQ_CST);
+}
+
+ATOMIC_INLINE void atomic_store_uint64(uint64_t *p, uint64_t v)
+{
+  __atomic_store(p, &v, __ATOMIC_SEQ_CST);
+}
+
 /* Signed */
 ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x)
 {
@ -218,6 +261,16 @@ ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
  return __sync_val_compare_and_swap(v, old, _new);
 }

+ATOMIC_INLINE int64_t atomic_load_int64(const int64_t *v)
+{
+  return __atomic_load_n(v, __ATOMIC_SEQ_CST);
+}
+
+ATOMIC_INLINE void atomic_store_int64(int64_t *p, int64_t v)
+{
+  __atomic_store(p, &v, __ATOMIC_SEQ_CST);
+}
+
 #elif !defined(ATOMIC_FORCE_USE_FALLBACK) && (defined(__amd64__) || defined(__x86_64__))
 /* Unsigned */
 ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
@ -256,6 +309,16 @@ ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _ne
  return ret;
 }

+ATOMIC_INLINE uint64_t atomic_load_uint64(const uint64_t *v)
+{
+  return __atomic_impl_load_generic(v);
+}
+
+ATOMIC_INLINE void atomic_store_uint64(uint64_t *p, uint64_t v)
+{
+  __atomic_impl_store_generic(p, v);
+}
+
 /* Signed */
 ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
 {
@ -292,6 +355,17 @@ ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
  asm volatile("lock; cmpxchgq %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
  return ret;
 }
+
+ATOMIC_INLINE int64_t atomic_load_int64(const int64_t *v)
+{
+  return __atomic_impl_load_generic(v);
+}
+
+ATOMIC_INLINE void atomic_store_int64(int64_t *p, int64_t v)
+{
+  __atomic_impl_store_generic(p, v);
+}
+
 #else

 /* Unsigned */
@ -304,6 +378,9 @@ ATOMIC_LOCKING_FETCH_AND_SUB_DEFINE(uint64)

 ATOMIC_LOCKING_CAS_DEFINE(uint64)

+ATOMIC_LOCKING_LOAD_DEFINE(uint64)
+ATOMIC_LOCKING_STORE_DEFINE(uint64)
+
 /* Signed */
 ATOMIC_LOCKING_ADD_AND_FETCH_DEFINE(int64)
 ATOMIC_LOCKING_SUB_AND_FETCH_DEFINE(int64)
@ -313,6 +390,9 @@ ATOMIC_LOCKING_FETCH_AND_SUB_DEFINE(int64)

 ATOMIC_LOCKING_CAS_DEFINE(int64)

+ATOMIC_LOCKING_LOAD_DEFINE(int64)
+ATOMIC_LOCKING_STORE_DEFINE(int64)
+
 #endif

 /** \} */
@ -339,6 +419,16 @@ ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _ne
  return __sync_val_compare_and_swap(v, old, _new);
 }

+ATOMIC_INLINE uint32_t atomic_load_uint32(const uint32_t *v)
+{
+  return __atomic_load_n(v, __ATOMIC_SEQ_CST);
+}
+
+ATOMIC_INLINE void atomic_store_uint32(uint32_t *p, uint32_t v)
+{
+  __atomic_store(p, &v, __ATOMIC_SEQ_CST);
+}
+
 /* Signed */
 ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
 {
@ -355,6 +445,16 @@ ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
  return __sync_val_compare_and_swap(v, old, _new);
 }

+ATOMIC_INLINE int32_t atomic_load_int32(const int32_t *v)
+{
+  return __atomic_load_n(v, __ATOMIC_SEQ_CST);
+}
+
+ATOMIC_INLINE void atomic_store_int32(int32_t *p, int32_t v)
+{
+  __atomic_store(p, &v, __ATOMIC_SEQ_CST);
+}
+
 #elif !defined(ATOMIC_FORCE_USE_FALLBACK) && \
    (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
 /* Unsigned */
@ -385,6 +485,16 @@ ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _ne
  return ret;
 }

+ATOMIC_INLINE uint32_t atomic_load_uint32(const uint32_t *v)
+{
+  return __atomic_load_n(v, __ATOMIC_SEQ_CST);
+}
+
+ATOMIC_INLINE void atomic_store_uint32(uint32_t *p, uint32_t v)
+{
+  __atomic_store(p, &v, __ATOMIC_SEQ_CST);
+}
+
 /* Signed */
 ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
 {
@ -413,6 +523,16 @@ ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
  return ret;
 }

+ATOMIC_INLINE int32_t atomic_load_int32(const int32_t *v)
+{
+  return __atomic_load_n(v, __ATOMIC_SEQ_CST);
+}
+
+ATOMIC_INLINE void atomic_store_int32(int32_t *p, int32_t v)
+{
+  __atomic_store(p, &v, __ATOMIC_SEQ_CST);
+}
+
 #else

 /* Unsigned */
@ -422,6 +542,9 @@ ATOMIC_LOCKING_SUB_AND_FETCH_DEFINE(uint32)

 ATOMIC_LOCKING_CAS_DEFINE(uint32)

+ATOMIC_LOCKING_LOAD_DEFINE(uint32)
+ATOMIC_LOCKING_STORE_DEFINE(uint32)
+
 /* Signed */

 ATOMIC_LOCKING_ADD_AND_FETCH_DEFINE(int32)
@ -429,6 +552,9 @@ ATOMIC_LOCKING_SUB_AND_FETCH_DEFINE(int32)

 ATOMIC_LOCKING_CAS_DEFINE(int32)

+ATOMIC_LOCKING_LOAD_DEFINE(int32)
+ATOMIC_LOCKING_STORE_DEFINE(int32)
+
 #endif

 #if !defined(ATOMIC_FORCE_USE_FALLBACK) && \
@ -548,6 +674,9 @@ ATOMIC_LOCKING_FETCH_AND_OR_DEFINE(int8)

 /** \} */

+#undef __atomic_impl_load_generic
+#undef __atomic_impl_store_generic
+
 #undef ATOMIC_LOCKING_OP_AND_FETCH_DEFINE
 #undef ATOMIC_LOCKING_FETCH_AND_OP_DEFINE
 #undef ATOMIC_LOCKING_ADD_AND_FETCH_DEFINE
@ -557,5 +686,7 @@ ATOMIC_LOCKING_FETCH_AND_OR_DEFINE(int8)
 #undef ATOMIC_LOCKING_FETCH_AND_OR_DEFINE
 #undef ATOMIC_LOCKING_FETCH_AND_AND_DEFINE
 #undef ATOMIC_LOCKING_CAS_DEFINE
+#undef ATOMIC_LOCKING_LOAD_DEFINE
+#undef ATOMIC_LOCKING_STORE_DEFINE

 #endif /* __ATOMIC_OPS_UNIX_H__ */
--- a/intern/atomic/tests/atomic_test.cc
+++ b/intern/atomic/tests/atomic_test.cc
@ -143,6 +143,40 @@ TEST(atomic, atomic_cas_uint64)
  }
 }

+TEST(atomic, atomic_load_uint64)
+{
+  /* Make sure alias is implemented. */
+  {
+    uint64_t value = 2;
+    EXPECT_EQ(atomic_load_uint64(&value), 2);
+  }
+
+  /* Make sure alias is using proper bitness. */
+  {
+    const uint64_t uint64_t_max = std::numeric_limits<uint64_t>::max();
+    uint64_t value = uint64_t_max;
+    EXPECT_EQ(atomic_load_uint64(&value), uint64_t_max);
+  }
+}
+
+TEST(atomic, atomic_store_uint64)
+{
+  /* Make sure alias is implemented. */
+  {
+    uint64_t value = 0;
+    atomic_store_uint64(&value, 2);
+    EXPECT_EQ(value, 2);
+  }
+
+  /* Make sure alias is using proper bitness. */
+  {
+    const uint64_t uint64_t_max = std::numeric_limits<uint64_t>::max();
+    uint64_t value = 0;
+    atomic_store_uint64(&value, uint64_t_max);
+    EXPECT_EQ(value, uint64_t_max);
+  }
+}
+
 /** \} */

 /* -------------------------------------------------------------------- */
@ -277,6 +311,40 @@ TEST(atomic, atomic_cas_int64)
  }
 }

+TEST(atomic, atomic_load_int64)
+{
+  /* Make sure alias is implemented. */
+  {
+    int64_t value = 2;
+    EXPECT_EQ(atomic_load_int64(&value), 2);
+  }
+
+  /* Make sure alias is using proper bitness. */
+  {
+    const int64_t int64_t_max = std::numeric_limits<int64_t>::max();
+    int64_t value = int64_t_max;
+    EXPECT_EQ(atomic_load_int64(&value), int64_t_max);
+  }
+}
+
+TEST(atomic, atomic_store_int64)
+{
+  /* Make sure alias is implemented. */
+  {
+    int64_t value = 0;
+    atomic_store_int64(&value, 2);
+    EXPECT_EQ(value, 2);
+  }
+
+  /* Make sure alias is using proper bitness. */
+  {
+    const int64_t int64_t_max = std::numeric_limits<int64_t>::max();
+    int64_t value = 0;
+    atomic_store_int64(&value, int64_t_max);
+    EXPECT_EQ(value, int64_t_max);
+  }
+}
+
 /** \} */

 /* -------------------------------------------------------------------- */
@ -358,6 +426,40 @@ TEST(atomic, atomic_cas_uint32)
  }
 }

+TEST(atomic, atomic_load_uint32)
+{
+  /* Make sure alias is implemented. */
+  {
+    uint32_t value = 2;
+    EXPECT_EQ(atomic_load_uint32(&value), 2);
+  }
+
+  /* Make sure alias is using proper bitness. */
+  {
+    const uint32_t uint32_t_max = std::numeric_limits<uint32_t>::max();
+    uint32_t value = uint32_t_max;
+    EXPECT_EQ(atomic_load_uint32(&value), uint32_t_max);
+  }
+}
+
+TEST(atomic, atomic_store_uint32)
+{
+  /* Make sure alias is implemented. */
+  {
+    uint32_t value = 0;
+    atomic_store_uint32(&value, 2);
+    EXPECT_EQ(value, 2);
+  }
+
+  /* Make sure alias is using proper bitness. */
+  {
+    const uint32_t uint32_t_max = std::numeric_limits<uint32_t>::max();
+    uint32_t value = 0;
+    atomic_store_uint32(&value, uint32_t_max);
+    EXPECT_EQ(value, uint32_t_max);
+  }
+}
+
 TEST(atomic, atomic_fetch_and_add_uint32)
 {
  {
@ -505,6 +607,40 @@ TEST(atomic, atomic_cas_int32)
  }
 }

+TEST(atomic, atomic_load_int32)
+{
+  /* Make sure alias is implemented. */
+  {
+    int32_t value = 2;
+    EXPECT_EQ(atomic_load_int32(&value), 2);
+  }
+
+  /* Make sure alias is using proper bitness. */
+  {
+    const int32_t int32_t_max = std::numeric_limits<int32_t>::max();
+    int32_t value = int32_t_max;
+    EXPECT_EQ(atomic_load_int32(&value), int32_t_max);
+  }
+}
+
+TEST(atomic, atomic_store_int32)
+{
+  /* Make sure alias is implemented. */
+  {
+    int32_t value = 0;
+    atomic_store_int32(&value, 2);
+    EXPECT_EQ(value, 2);
+  }
+
+  /* Make sure alias is using proper bitness. */
+  {
+    const int32_t int32_t_max = std::numeric_limits<int32_t>::max();
+    int32_t value = 0;
+    atomic_store_int32(&value, int32_t_max);
+    EXPECT_EQ(value, int32_t_max);
+  }
+}
+
 TEST(atomic, atomic_fetch_and_add_int32)
 {
  {
@ -761,6 +897,40 @@ TEST(atomic, atomic_cas_z)
  }
 }

+TEST(atomic, atomic_load_z)
+{
+  /* Make sure alias is implemented. */
+  {
+    size_t value = 2;
+    EXPECT_EQ(atomic_load_z(&value), 2);
+  }
+
+  /* Make sure alias is using proper bitness. */
+  {
+    const size_t size_t_max = std::numeric_limits<size_t>::max();
+    size_t value = size_t_max;
+    EXPECT_EQ(atomic_load_z(&value), size_t_max);
+  }
+}
+
+TEST(atomic, atomic_store_z)
+{
+  /* Make sure alias is implemented. */
+  {
+    size_t value = 0;
+    atomic_store_z(&value, 2);
+    EXPECT_EQ(value, 2);
+  }
+
+  /* Make sure alias is using proper bitness. */
+  {
+    const size_t size_t_max = std::numeric_limits<size_t>::max();
+    size_t value = 0;
+    atomic_store_z(&value, size_t_max);
+    EXPECT_EQ(value, size_t_max);
+  }
+}
+
 TEST(atomic, atomic_fetch_and_update_max_z)
 {
  const size_t size_t_max = std::numeric_limits<size_t>::max();