Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_AlgVecUtil.H
Go to the documentation of this file.
1#ifndef AMREX_ALG_VEC_UTIL_H_
2#define AMREX_ALG_VEC_UTIL_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_AlgVector.H>
6
7namespace amrex {
8
16template <class V> struct IsAlgVector : std::false_type {};
17//
18template <class V>
19requires (std::same_as<AlgVector<typename V::value_type,
20 typename V::allocator_type>,
21 V>)
22struct IsAlgVector<V> : std::true_type {};
23
31template <typename V1, typename F>
32requires (IsAlgVector<std::decay_t<V1> >::value)
33void
34ForEach (V1 & x, F const& f)
35{
36 Long n = x.numLocalRows();
37 auto* px = x.data();
38 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
39 {
40 f(px[i]);
41 });
42}
43
51template <typename V1, typename V2, typename F>
52requires (IsAlgVector<std::decay_t<V1> >::value &&
53 IsAlgVector<std::decay_t<V2> >::value)
54void
55ForEach (V1 & x, V2 & y, F const& f)
56{
57 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
58 Long n = x.numLocalRows();
59 auto* AMREX_RESTRICT px = x.data();
60 auto* AMREX_RESTRICT py = y.data();
61 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
62 {
63 f(px[i], py[i]);
64 });
65}
66
75template <typename V1, typename V2, typename V3, typename F>
76requires (IsAlgVector<std::decay_t<V1> >::value &&
77 IsAlgVector<std::decay_t<V2> >::value &&
78 IsAlgVector<std::decay_t<V3> >::value)
79void
80ForEach (V1 & x, V2 & y, V3 & z, F const& f)
81{
82 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
83 AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
84 Long n = x.numLocalRows();
85 auto* AMREX_RESTRICT px = x.data();
86 auto* AMREX_RESTRICT py = y.data();
87 auto* AMREX_RESTRICT pz = z.data();
88 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
89 {
90 f(px[i], py[i], pz[i]);
91 });
92}
93
103template <typename V1, typename V2, typename V3, typename V4, typename F>
104requires (IsAlgVector<std::decay_t<V1> >::value &&
105 IsAlgVector<std::decay_t<V2> >::value &&
106 IsAlgVector<std::decay_t<V3> >::value &&
107 IsAlgVector<std::decay_t<V4> >::value)
108void
109ForEach (V1 & x, V2 & y, V3 & z, V4 & a, F const& f)
110{
111 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
112 AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
113 AMREX_ASSERT(x.numLocalRows() == a.numLocalRows());
114 Long n = x.numLocalRows();
115 auto* AMREX_RESTRICT px = x.data();
116 auto* AMREX_RESTRICT py = y.data();
117 auto* AMREX_RESTRICT pz = z.data();
118 auto* AMREX_RESTRICT pa = a.data();
119 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
120 {
121 f(px[i], py[i], pz[i], pa[i]);
122 });
123}
124
135template <typename V1, typename V2, typename V3, typename V4, typename V5, typename F>
136requires (IsAlgVector<std::decay_t<V1> >::value &&
137 IsAlgVector<std::decay_t<V2> >::value &&
138 IsAlgVector<std::decay_t<V3> >::value &&
139 IsAlgVector<std::decay_t<V4> >::value &&
140 IsAlgVector<std::decay_t<V5> >::value)
141void
142ForEach (V1 & x, V2 & y, V3 & z, V4 & a, V5 & b, F const& f)
143{
144 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
145 AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
146 AMREX_ASSERT(x.numLocalRows() == a.numLocalRows());
147 AMREX_ASSERT(x.numLocalRows() == b.numLocalRows());
148 Long n = x.numLocalRows();
149 auto* AMREX_RESTRICT px = x.data();
150 auto* AMREX_RESTRICT py = y.data();
151 auto* AMREX_RESTRICT pz = z.data();
152 auto* AMREX_RESTRICT pa = a.data();
153 auto* AMREX_RESTRICT pb = b.data();
154 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
155 {
156 f(px[i], py[i], pz[i], pa[i], pb[i]);
157 });
158}
159
169template <typename T, typename Allocator>
170T Dot (AlgVector<T,Allocator> const& x, AlgVector<T,Allocator> const& y, bool local = false)
171{
172 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
173 Long n = x.numLocalRows();
174 auto const* px = x.data();
175 auto const* py = y.data();
176 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
177 {
178 return px[i] * py[i];
179 });
180 if (!local) {
182 }
183 return r;
184}
185
187template <typename T, typename Allocator>
189{
190 ForEach(y, x, [=] AMREX_GPU_DEVICE (T& yi, T const& xi) { yi += a*xi; });
191}
192
194template <typename T, typename Allocator>
196{
197 ForEach(y, x, [=] AMREX_GPU_DEVICE (T& yi, T const& xi) { yi = a*yi + xi; });
198}
199
201template <typename T, typename Allocator>
203 T b, AlgVector<T,Allocator> const& xb)
204{
205 ForEach(y, xa, xb, [=] AMREX_GPU_DEVICE (T& yi, T const& xai, T const& xbi) {
206 yi = a*xai + b*xbi;
207 });
208}
209
210}
211
212#endif
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_RESTRICT
Definition AMReX_Extension.H:32
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Distributed dense vector that mirrors the layout of an AlgPartition.
Definition AMReX_AlgVector.H:29
amrex_long Long
Definition AMReX_INT.H:30
void ParallelForOMP(T n, L const &f) noexcept
Performance-portable kernel launch function with optional OpenMP threading.
Definition AMReX_GpuLaunch.H:327
void Sum(T &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:221
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
Definition AMReX_Amr.cpp:50
FAB::value_type Dot(FabArray< FAB > const &x, int xcomp, FabArray< FAB > const &y, int ycomp, int ncomp, IntVect const &nghost, bool local=false)
Compute dot products of two FabArrays.
Definition AMReX_FabArrayUtility.H:1609
void LinComb(MF &dst, typename MF::value_type a, MF const &src_a, int acomp, typename MF::value_type b, MF const &src_b, int bcomp, int dcomp, int ncomp, IntVect const &nghost)
dst = a*src_a + b*src_b
Definition AMReX_FabArrayUtility.H:1941
constexpr void ForEach(TypeList< Ts... >, F &&f)
For each type t in TypeList, call f(t)
Definition AMReX_TypeList.H:83
void Xpay(MF &dst, typename MF::value_type a, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst = src + a * dst
Definition AMReX_FabArrayUtility.H:1906
void Axpy(AlgVector< T, Allocator > &y, T a, AlgVector< T, Allocator > const &x)
y = ax + y. For GPU builds this is asynchronous with respect to the host.
Definition AMReX_AlgVecUtil.H:188
Definition AMReX_AlgVecUtil.H:16