1 #ifndef AMREX_GPU_ATOMIC_H_
2 #define AMREX_GPU_ATOMIC_H_
3 #include <AMReX_Config.H>
13 namespace Gpu::Atomic {
28 template <
typename R,
typename I,
typename F>
30 R
atomic_op (R*
const address, R
const val,
F const f) noexcept
32 #if defined(__SYCL_DEVICE_ONLY__)
33 constexpr
auto mo = sycl::memory_order::relaxed;
34 constexpr
auto ms = sycl::memory_scope::device;
35 constexpr
auto as = sycl::access::address_space::global_space;
36 static_assert(
sizeof(R) ==
sizeof(I),
"sizeof R != sizeof I");
37 I*
const add_as_I =
reinterpret_cast<I*
>(address);
38 sycl::atomic_ref<I,mo,ms,as> a{*add_as_I};
39 I old_I = a.load(), new_I;
41 R
const new_R =
f(*(
reinterpret_cast<R const*
>(&old_I)), val);
42 new_I = *(
reinterpret_cast<I const*
>(&new_R));
43 }
while (! a.compare_exchange_strong(old_I, new_I));
44 return *(
reinterpret_cast<R const*
>(&old_I));
47 *address =
f(*address, val);
52 template <
typename R,
typename I,
typename Op,
typename Cond>
54 bool atomic_op_if (R*
const address, R
const val, Op&& op, Cond&& cond) noexcept
56 #if defined(__SYCL_DEVICE_ONLY__)
57 constexpr
auto mo = sycl::memory_order::relaxed;
58 constexpr
auto ms = sycl::memory_scope::device;
59 constexpr
auto as = sycl::access::address_space::global_space;
60 static_assert(
sizeof(R) ==
sizeof(I),
"sizeof R != sizeof I");
61 I*
const add_as_I =
reinterpret_cast<I*
>(address);
62 sycl::atomic_ref<I,mo,ms,as> a{*add_as_I};
63 I old_I = a.load(), new_I;
66 R
const tmp = op(*(
reinterpret_cast<R const*
>(&old_I)), val);
67 new_I = *(
reinterpret_cast<I const*
>(&tmp));
68 test_success = cond(tmp);
69 }
while (test_success && ! a.compare_exchange_strong(old_I, new_I));
73 R
const tmp = op(*(
reinterpret_cast<R const*
>(&old)), val);
83 #elif defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
85 template <
typename R,
typename I,
typename F>
87 R
atomic_op (R*
const address, R
const val, F
const f) noexcept
89 static_assert(
sizeof(R) ==
sizeof(I),
"sizeof R != sizeof I");
90 I*
const add_as_I =
reinterpret_cast<I*
>(address);
91 I old_I = *add_as_I, assumed_I;
94 R
const new_R =
f(*(
reinterpret_cast<R const*
>(&assumed_I)), val);
95 old_I = atomicCAS(add_as_I, assumed_I, *(
reinterpret_cast<I const*
>(&new_R)));
96 }
while (assumed_I != old_I);
97 return *(
reinterpret_cast<R const*
>(&old_I));
100 template <
typename R,
typename I,
typename Op,
typename Cond>
102 bool atomic_op_if (R*
const address, R
const val, Op&& op, Cond&& cond) noexcept
104 static_assert(
sizeof(R) ==
sizeof(I),
"sizeof R != sizeof I");
105 I*
const add_as_I =
reinterpret_cast<I*
>(address);
106 I old_I = *add_as_I, assumed_I;
110 R
const new_R = op(*(
reinterpret_cast<R const*
>(&assumed_I)), val);
111 test_success = cond(new_R);
113 old_I = atomicCAS(add_as_I, assumed_I, *(
reinterpret_cast<I const*
>(&new_R)));
115 }
while (test_success && assumed_I != old_I);
129 #ifdef AMREX_USE_SYCL
130 template<
class T, sycl::access::address_space AS = sycl::access::address_space::global_space>
137 #if defined(__SYCL_DEVICE_ONLY__)
138 constexpr
auto mo = sycl::memory_order::relaxed;
139 constexpr
auto ms = sycl::memory_scope::device;
140 sycl::atomic_ref<T,mo,ms,AS> a{*
sum};
141 return a.fetch_add(value);
151 #if defined(AMREX_USE_HIP) && defined(__gfx90a__)
154 float Add_device (
float*
const sum,
float const value) noexcept
156 return unsafeAtomicAdd(
sum, value);
160 double Add_device (
double*
const sum,
double const value) noexcept
162 return unsafeAtomicAdd(
sum, value);
166 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
180 #if defined(AMREX_USE_CUDA) && defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600)
183 double Add_device (
double*
const sum,
double const value) noexcept
192 #ifdef AMREX_USE_SYCL
193 template<
class T, sycl::access::address_space AS = sycl::access::address_space::global_space>
200 #ifdef AMREX_USE_SYCL
216 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP) || defined(AMREX_USE_SYCL)
217 template <
typename T,
typename Op,
typename Cond,
218 std::enable_if_t<
sizeof(T) ==
sizeof(
unsigned int),
int> foo = 0>
220 bool If_device (T*
const sum, T
const value, Op&& op, Cond&& cond) noexcept
222 return detail::atomic_op_if<T, unsigned int>(
sum, value,
223 std::forward<Op>(op), std::forward<Cond>(cond));
226 template <
typename T,
typename Op,
typename Cond,
227 std::enable_if_t<
sizeof(T) ==
sizeof(
unsigned long long),
int> foo = 0>
229 bool If_device (T*
const sum, T
const value, Op&& op, Cond&& cond) noexcept
231 return detail::atomic_op_if<T, unsigned long long>(
sum, value,
232 std::forward<Op>(op), std::forward<Cond>(cond));
252 template<
class T,
class Op,
class Cond>
254 bool If (T*
const add, T
const value, Op&& op, Cond&& cond) noexcept
257 return If_device(add, value, std::forward<Op>(op), std::forward<Cond>(cond));
261 T
const tmp = std::forward<Op>(op)(old, value);
262 if (std::forward<Cond>(cond)(tmp)) {
275 #ifdef AMREX_USE_SYCL
276 template<
class T, sycl::access::address_space AS = sycl::access::address_space::global_space>
283 #if defined(__SYCL_DEVICE_ONLY__)
284 Add_device<T,AS>(
sum, value);
291 #if defined(AMREX_USE_HIP) && defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 5)
293 void AddNoRet (
float*
const sum,
float const value) noexcept
295 #pragma clang diagnostic push
296 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
298 #pragma clang diagnostic pop
313 #if defined(__SYCL_DEVICE_ONLY__)
314 constexpr
auto mo = sycl::memory_order::relaxed;
315 constexpr
auto ms = sycl::memory_scope::device;
316 constexpr
auto as = sycl::access::address_space::global_space;
317 sycl::atomic_ref<T,mo,ms,as> a{*m};
318 return a.fetch_min(value);
329 float Min_device (
float*
const m,
float const value) noexcept
335 double Min_device (
double*
const m,
double const value) noexcept
340 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
354 T
Min (T*
const m, T
const value) noexcept
361 *m = (*m) < value ? (*m) : value;
376 #if defined(__SYCL_DEVICE_ONLY__)
377 constexpr
auto mo = sycl::memory_order::relaxed;
378 constexpr
auto ms = sycl::memory_scope::device;
379 constexpr
auto as = sycl::access::address_space::global_space;
380 sycl::atomic_ref<T,mo,ms,as> a{*m};
381 return a.fetch_max(value);
392 float Max_device (
float*
const m,
float const value) noexcept
398 double Max_device (
double*
const m,
double const value) noexcept
403 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
417 T
Max (T*
const m, T
const value) noexcept
424 *m = (*m) > value ? (*m) : value;
436 #if defined(__SYCL_DEVICE_ONLY__)
437 constexpr
auto mo = sycl::memory_order::relaxed;
438 constexpr
auto ms = sycl::memory_scope::device;
439 constexpr
auto as = sycl::access::address_space::global_space;
440 sycl::atomic_ref<int,mo,ms,as> a{*m};
441 return a.fetch_or(value);
444 return atomicOr(m, value);
461 #if defined(__SYCL_DEVICE_ONLY__)
462 constexpr
auto mo = sycl::memory_order::relaxed;
463 constexpr
auto ms = sycl::memory_scope::device;
464 constexpr
auto as = sycl::access::address_space::global_space;
465 sycl::atomic_ref<int,mo,ms,as> a{*m};
466 return a.fetch_and(value ? ~0x0 : 0);
469 return atomicAnd(m, value ? ~0x0 : 0);
483 template <
typename T>
485 T
Exch (T* address, T val) noexcept
487 #if defined(__SYCL_DEVICE_ONLY__)
488 constexpr
auto mo = sycl::memory_order::relaxed;
489 constexpr
auto ms = sycl::memory_scope::device;
490 constexpr
auto as = sycl::access::address_space::global_space;
491 sycl::atomic_ref<T,mo,ms,as> a{*address};
492 return a.exchange(val);
495 return atomicExch(address, val);
498 auto const old = *address;
509 template <
typename T>
511 T
CAS (T*
const address, T compare, T
const val) noexcept
513 #if defined(__SYCL_DEVICE_ONLY__)
514 constexpr
auto mo = sycl::memory_order::relaxed;
515 constexpr
auto ms = sycl::memory_scope::device;
516 constexpr
auto as = sycl::access::address_space::global_space;
517 sycl::atomic_ref<T,mo,ms,as> a{*address};
518 a.compare_exchange_strong(compare, val);
522 return atomicCAS(address, compare, val);
525 auto const old = *address;
526 *address = (old == compare ? val : old);
538 template <
typename T, std::enable_if_t<sizeof(T) == sizeof(
int),
int> = 0>
545 template <
typename T, std::enable_if_t<sizeof(T) == sizeof(
unsigned long long),
int> = 0>
562 auto const old = *prod;
574 template <
typename T, std::enable_if_t<sizeof(T) == sizeof(
int),
int> = 0>
581 template <
typename T, std::enable_if_t<sizeof(T) == sizeof(
unsigned long long),
int> = 0>
592 T
Divide (T*
const quot, T
const value) noexcept
598 auto const old = *quot;
605 namespace HostDevice::Atomic {
612 #pragma omp atomic update
619 void Add (T*
const sum, T
const value) noexcept
630 template <
typename T>
638 template <
typename T>
646 template <
typename T>
654 template <
typename T>
662 template <
typename T>
#define AMREX_FORCE_INLINE
Definition: AMReX_Extension.H:119
#define AMREX_IF_ON_DEVICE(CODE)
Definition: AMReX_GpuQualifiers.H:56
#define AMREX_IF_ON_HOST(CODE)
Definition: AMReX_GpuQualifiers.H:58
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition: AMReX_GpuQualifiers.H:20
AMREX_GPU_DEVICE AMREX_FORCE_INLINE R atomic_op(R *const address, R const val, F const f) noexcept
Definition: AMReX_GpuAtomic.H:87
AMREX_GPU_DEVICE AMREX_FORCE_INLINE bool atomic_op_if(R *const address, R const val, Op &&op, Cond &&cond) noexcept
Definition: AMReX_GpuAtomic.H:102
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T Exch(T *address, T val) noexcept
Definition: AMReX_GpuAtomic.H:485
AMREX_GPU_DEVICE AMREX_FORCE_INLINE bool If_device(T *const sum, T const value, Op &&op, Cond &&cond) noexcept
Definition: AMReX_GpuAtomic.H:220
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T CAS(T *const address, T compare, T const val) noexcept
Definition: AMReX_GpuAtomic.H:511
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T Add(T *sum, T value) noexcept
Definition: AMReX_GpuAtomic.H:198
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T Max(T *const m, T const value) noexcept
Definition: AMReX_GpuAtomic.H:417
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int LogicalOr(int *const m, int const value) noexcept
Definition: AMReX_GpuAtomic.H:434
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T Multiply(T *const prod, T const value) noexcept
Definition: AMReX_GpuAtomic.H:556
AMREX_GPU_DEVICE AMREX_FORCE_INLINE T Divide_device(T *const quot, T const value) noexcept
Definition: AMReX_GpuAtomic.H:576
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int LogicalAnd(int *const m, int const value) noexcept
Definition: AMReX_GpuAtomic.H:459
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T Min(T *const m, T const value) noexcept
Definition: AMReX_GpuAtomic.H:354
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T Divide(T *const quot, T const value) noexcept
Definition: AMReX_GpuAtomic.H:592
AMREX_GPU_DEVICE AMREX_FORCE_INLINE T Add_device(T *const sum, T const value) noexcept
Definition: AMReX_GpuAtomic.H:135
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void AddNoRet(T *sum, T value) noexcept
Definition: AMReX_GpuAtomic.H:281
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool If(T *const add, T const value, Op &&op, Cond &&cond) noexcept
Conditionally perform an atomic operation.
Definition: AMReX_GpuAtomic.H:254
AMREX_GPU_DEVICE AMREX_FORCE_INLINE T Min_device(T *const m, T const value) noexcept
Definition: AMReX_GpuAtomic.H:311
AMREX_GPU_DEVICE AMREX_FORCE_INLINE T Max_device(T *const m, T const value) noexcept
Definition: AMReX_GpuAtomic.H:374
AMREX_GPU_DEVICE AMREX_FORCE_INLINE T Multiply_device(T *const prod, T const value) noexcept
Definition: AMReX_GpuAtomic.H:540
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void Add(T *const sum, T const value) noexcept
Definition: AMReX_GpuAtomic.H:619
AMREX_FORCE_INLINE void Add_Host(T *const sum, T const value) noexcept
Definition: AMReX_GpuAtomic.H:609
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44
@ sum
Definition: AMReX_ParallelReduce.H:19
Definition: AMReX_Amr.cpp:49
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition: AMReX.H:111
Definition: AMReX_FabArrayCommI.H:841
Definition: AMReX_Functional.H:77
Definition: AMReX_GpuAtomic.H:632
AMREX_GPU_DEVICE void operator()(T *const dest, T const source) noexcept
Definition: AMReX_GpuAtomic.H:633
Definition: AMReX_GpuAtomic.H:656
AMREX_GPU_DEVICE void operator()(T *const dest, T const source) noexcept
Definition: AMReX_GpuAtomic.H:657
Definition: AMReX_GpuAtomic.H:664
AMREX_GPU_DEVICE void operator()(T *const dest, T const source) noexcept
Definition: AMReX_GpuAtomic.H:665
Definition: AMReX_GpuAtomic.H:648
AMREX_GPU_DEVICE void operator()(T *const dest, T const source) noexcept
Definition: AMReX_GpuAtomic.H:649
Definition: AMReX_GpuAtomic.H:640
AMREX_GPU_DEVICE void operator()(T *const dest, T const source) noexcept
Definition: AMReX_GpuAtomic.H:641
Definition: AMReX_Functional.H:41
Definition: AMReX_Functional.H:32
Definition: AMReX_Functional.H:68
Definition: AMReX_Functional.H:14