Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_AlgVecUtil.H
Go to the documentation of this file.
1#ifndef AMREX_ALG_VEC_UTIL_H_
2#define AMREX_ALG_VEC_UTIL_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_AlgVector.H>
6
7namespace amrex {
8
9template <class V, class Enable = void> struct IsAlgVector : std::false_type {};
10//
11template <class V>
12struct IsAlgVector<V, std::enable_if_t<std::is_same_v<AlgVector<typename V::value_type,
13 typename V::allocator_type>,
14 V> > >
15 : std::true_type {};
16
17template <typename V1, typename F>
18std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value>
19ForEach (V1 & x, F const& f)
20{
21 Long n = x.numLocalRows();
22 auto* px = x.data();
23 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
24 {
25 f(px[i]);
26 });
27}
28
29template <typename V1, typename V2, typename F>
30std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
31 IsAlgVector<std::decay_t<V2> >::value>
32ForEach (V1 & x, V2 & y, F const& f)
33{
34 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
35 Long n = x.numLocalRows();
36 auto* AMREX_RESTRICT px = x.data();
37 auto* AMREX_RESTRICT py = y.data();
38 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
39 {
40 f(px[i], py[i]);
41 });
42}
43
44template <typename V1, typename V2, typename V3, typename F>
45std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
46 IsAlgVector<std::decay_t<V2> >::value &&
47 IsAlgVector<std::decay_t<V3> >::value>
48ForEach (V1 & x, V2 & y, V3 & z, F const& f)
49{
50 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
51 AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
52 Long n = x.numLocalRows();
53 auto* AMREX_RESTRICT px = x.data();
54 auto* AMREX_RESTRICT py = y.data();
55 auto* AMREX_RESTRICT pz = z.data();
56 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
57 {
58 f(px[i], py[i], pz[i]);
59 });
60}
61
62template <typename V1, typename V2, typename V3, typename V4, typename F>
63std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
64 IsAlgVector<std::decay_t<V2> >::value &&
65 IsAlgVector<std::decay_t<V3> >::value &&
66 IsAlgVector<std::decay_t<V4> >::value>
67ForEach (V1 & x, V2 & y, V3 & z, V4 & a, F const& f)
68{
69 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
70 AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
71 AMREX_ASSERT(x.numLocalRows() == a.numLocalRows());
72 Long n = x.numLocalRows();
73 auto* AMREX_RESTRICT px = x.data();
74 auto* AMREX_RESTRICT py = y.data();
75 auto* AMREX_RESTRICT pz = z.data();
76 auto* AMREX_RESTRICT pa = a.data();
77 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
78 {
79 f(px[i], py[i], pz[i], pa[i]);
80 });
81}
82
83template <typename V1, typename V2, typename V3, typename V4, typename V5, typename F>
84std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
85 IsAlgVector<std::decay_t<V2> >::value &&
86 IsAlgVector<std::decay_t<V3> >::value &&
87 IsAlgVector<std::decay_t<V4> >::value &&
88 IsAlgVector<std::decay_t<V5> >::value>
89ForEach (V1 & x, V2 & y, V3 & z, V4 & a, V5 & b, F const& f)
90{
91 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
92 AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
93 AMREX_ASSERT(x.numLocalRows() == a.numLocalRows());
94 AMREX_ASSERT(x.numLocalRows() == b.numLocalRows());
95 Long n = x.numLocalRows();
96 auto* AMREX_RESTRICT px = x.data();
97 auto* AMREX_RESTRICT py = y.data();
98 auto* AMREX_RESTRICT pz = z.data();
99 auto* AMREX_RESTRICT pa = a.data();
100 auto* AMREX_RESTRICT pb = b.data();
101 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
102 {
103 f(px[i], py[i], pz[i], pa[i], pb[i]);
104 });
105}
106
110template <typename T, typename Allocator>
111T Dot (AlgVector<T,Allocator> const& x, AlgVector<T,Allocator> const& y, bool local = false)
112{
113 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
114 Long n = x.numLocalRows();
115 auto const* px = x.data();
116 auto const* py = y.data();
117 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
118 {
119 return px[i] * py[i];
120 });
121 if (!local) {
123 }
124 return r;
125}
126
129template <typename T, typename Allocator>
131{
132 ForEach(y, x, [=] AMREX_GPU_DEVICE (T& yi, T const& xi) { yi += a*xi; });
133}
134
137template <typename T, typename Allocator>
139{
140 ForEach(y, x, [=] AMREX_GPU_DEVICE (T& yi, T const& xi) { yi = a*yi + xi; });
141}
142
145template <typename T, typename Allocator>
147 T b, AlgVector<T,Allocator> const& xb)
148{
149 ForEach(y, xa, xb, [=] AMREX_GPU_DEVICE (T& yi, T const& xai, T const& xbi) {
150 yi = a*xai + b*xbi;
151 });
152}
153
154}
155
156#endif
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_RESTRICT
Definition AMReX_Extension.H:32
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Definition AMReX_AlgVector.H:20
amrex_long Long
Definition AMReX_INT.H:30
void ParallelForOMP(T n, L const &f) noexcept
Performance-portable kernel launch function with optional OpenMP threading.
Definition AMReX_GpuLaunch.H:243
void Sum(T &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:221
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
Definition AMReX_Amr.cpp:49
void LinComb(MF &dst, typename MF::value_type a, MF const &src_a, int acomp, typename MF::value_type b, MF const &src_b, int bcomp, int dcomp, int ncomp, IntVect const &nghost)
dst = a*src_a + b*src_b
Definition AMReX_FabArrayUtility.H:2009
void Xpay(MF &dst, typename MF::value_type a, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst = src + a * dst
Definition AMReX_FabArrayUtility.H:1974
constexpr void ForEach(TypeList< Ts... >, F &&f)
For each type t in TypeList, call f(t)
Definition AMReX_TypeList.H:82
FAB::value_type Dot(FabArray< FAB > const &x, int xcomp, FabArray< FAB > const &y, int ycomp, int ncomp, IntVect const &nghost, bool local=false)
Compute dot products of two FabArrays.
Definition AMReX_FabArrayUtility.H:1673
void Axpy(AlgVector< T, Allocator > &y, T a, AlgVector< T, Allocator > const &x)
Definition AMReX_AlgVecUtil.H:130
Definition AMReX_AlgVecUtil.H:9