Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_AlgVecUtil.H
Go to the documentation of this file.
1#ifndef AMREX_ALG_VEC_UTIL_H_
2#define AMREX_ALG_VEC_UTIL_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_AlgVector.H>
6
7namespace amrex {
8
16template <class V, class Enable = void> struct IsAlgVector : std::false_type {};
17//
18template <class V>
19struct IsAlgVector<V, std::enable_if_t<std::is_same_v<AlgVector<typename V::value_type,
20 typename V::allocator_type>,
21 V> > >
22 : std::true_type {};
23
31template <typename V1, typename F>
32std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value>
33ForEach (V1 & x, F const& f)
34{
35 Long n = x.numLocalRows();
36 auto* px = x.data();
37 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
38 {
39 f(px[i]);
40 });
41}
42
50template <typename V1, typename V2, typename F>
51std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
52 IsAlgVector<std::decay_t<V2> >::value>
53ForEach (V1 & x, V2 & y, F const& f)
54{
55 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
56 Long n = x.numLocalRows();
57 auto* AMREX_RESTRICT px = x.data();
58 auto* AMREX_RESTRICT py = y.data();
59 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
60 {
61 f(px[i], py[i]);
62 });
63}
64
73template <typename V1, typename V2, typename V3, typename F>
74std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
75 IsAlgVector<std::decay_t<V2> >::value &&
76 IsAlgVector<std::decay_t<V3> >::value>
77ForEach (V1 & x, V2 & y, V3 & z, F const& f)
78{
79 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
80 AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
81 Long n = x.numLocalRows();
82 auto* AMREX_RESTRICT px = x.data();
83 auto* AMREX_RESTRICT py = y.data();
84 auto* AMREX_RESTRICT pz = z.data();
85 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
86 {
87 f(px[i], py[i], pz[i]);
88 });
89}
90
100template <typename V1, typename V2, typename V3, typename V4, typename F>
101std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
102 IsAlgVector<std::decay_t<V2> >::value &&
103 IsAlgVector<std::decay_t<V3> >::value &&
104 IsAlgVector<std::decay_t<V4> >::value>
105ForEach (V1 & x, V2 & y, V3 & z, V4 & a, F const& f)
106{
107 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
108 AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
109 AMREX_ASSERT(x.numLocalRows() == a.numLocalRows());
110 Long n = x.numLocalRows();
111 auto* AMREX_RESTRICT px = x.data();
112 auto* AMREX_RESTRICT py = y.data();
113 auto* AMREX_RESTRICT pz = z.data();
114 auto* AMREX_RESTRICT pa = a.data();
115 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
116 {
117 f(px[i], py[i], pz[i], pa[i]);
118 });
119}
120
131template <typename V1, typename V2, typename V3, typename V4, typename V5, typename F>
132std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
133 IsAlgVector<std::decay_t<V2> >::value &&
134 IsAlgVector<std::decay_t<V3> >::value &&
135 IsAlgVector<std::decay_t<V4> >::value &&
136 IsAlgVector<std::decay_t<V5> >::value>
137ForEach (V1 & x, V2 & y, V3 & z, V4 & a, V5 & b, F const& f)
138{
139 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
140 AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
141 AMREX_ASSERT(x.numLocalRows() == a.numLocalRows());
142 AMREX_ASSERT(x.numLocalRows() == b.numLocalRows());
143 Long n = x.numLocalRows();
144 auto* AMREX_RESTRICT px = x.data();
145 auto* AMREX_RESTRICT py = y.data();
146 auto* AMREX_RESTRICT pz = z.data();
147 auto* AMREX_RESTRICT pa = a.data();
148 auto* AMREX_RESTRICT pb = b.data();
149 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
150 {
151 f(px[i], py[i], pz[i], pa[i], pb[i]);
152 });
153}
154
164template <typename T, typename Allocator>
165T Dot (AlgVector<T,Allocator> const& x, AlgVector<T,Allocator> const& y, bool local = false)
166{
167 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
168 Long n = x.numLocalRows();
169 auto const* px = x.data();
170 auto const* py = y.data();
171 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
172 {
173 return px[i] * py[i];
174 });
175 if (!local) {
177 }
178 return r;
179}
180
182template <typename T, typename Allocator>
184{
185 ForEach(y, x, [=] AMREX_GPU_DEVICE (T& yi, T const& xi) { yi += a*xi; });
186}
187
189template <typename T, typename Allocator>
191{
192 ForEach(y, x, [=] AMREX_GPU_DEVICE (T& yi, T const& xi) { yi = a*yi + xi; });
193}
194
196template <typename T, typename Allocator>
198 T b, AlgVector<T,Allocator> const& xb)
199{
200 ForEach(y, xa, xb, [=] AMREX_GPU_DEVICE (T& yi, T const& xai, T const& xbi) {
201 yi = a*xai + b*xbi;
202 });
203}
204
205}
206
207#endif
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_RESTRICT
Definition AMReX_Extension.H:32
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Distributed dense vector that mirrors the layout of an AlgPartition.
Definition AMReX_AlgVector.H:29
amrex_long Long
Definition AMReX_INT.H:30
void ParallelForOMP(T n, L const &f) noexcept
Performance-portable kernel launch function with optional OpenMP threading.
Definition AMReX_GpuLaunch.H:319
void Sum(T &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:221
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
Definition AMReX_Amr.cpp:49
void LinComb(MF &dst, typename MF::value_type a, MF const &src_a, int acomp, typename MF::value_type b, MF const &src_b, int bcomp, int dcomp, int ncomp, IntVect const &nghost)
dst = a*src_a + b*src_b
Definition AMReX_FabArrayUtility.H:2009
void Xpay(MF &dst, typename MF::value_type a, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst = src + a * dst
Definition AMReX_FabArrayUtility.H:1974
constexpr void ForEach(TypeList< Ts... >, F &&f)
For each type t in TypeList, call f(t)
Definition AMReX_TypeList.H:83
FAB::value_type Dot(FabArray< FAB > const &x, int xcomp, FabArray< FAB > const &y, int ycomp, int ncomp, IntVect const &nghost, bool local=false)
Compute dot products of two FabArrays.
Definition AMReX_FabArrayUtility.H:1673
void Axpy(AlgVector< T, Allocator > &y, T a, AlgVector< T, Allocator > const &x)
y = ax + y. For GPU builds this is asynchronous with respect to the host.
Definition AMReX_AlgVecUtil.H:183
Definition AMReX_AlgVecUtil.H:16