Block-Structured AMR Software Framework
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Loading...
Searching...
No Matches
AMReX_AlgVector.H
Go to the documentation of this file.
1#ifndef AMREX_ALG_VECTOR_H_
2#define AMREX_ALG_VECTOR_H_
3#include <AMReX_Config.H>
4
6#include <AMReX_FabArray.H>
7#include <AMReX_INT.H>
8#include <AMReX_LayoutData.H>
9#include <AMReX_TableData.H>
10
11#include <fstream>
12#include <string>
13#include <type_traits>
14
15namespace amrex {
16
17template <typename T, typename Allocator = DefaultAllocator<T> >
19{
20public:
21 using value_type = T;
22 using allocator_type = Allocator;
23
25
26 AlgVector () = default;
27 explicit AlgVector (Long global_size);
29
32
33 AlgVector (AlgVector<T, Allocator> &&) noexcept = default;
34 AlgVector& operator= (AlgVector<T, Allocator> &&) noexcept = default;
35
36 ~AlgVector () = default;
37
38 void define (Long global_size);
40
41 [[nodiscard]] bool empty () const { return m_partition.empty(); }
42
43 [[nodiscard]] AlgPartition const& partition () const { return m_partition; }
44
45 [[nodiscard]] Long numLocalRows () const { return m_end - m_begin; }
46 [[ nodiscard]] Long numGlobalRows () const { return m_partition.numGlobalRows(); }
47
49 [[nodiscard]] Long globalBegin () const { return m_begin; }
51 [[nodiscard]] Long globalEnd () const { return m_end; }
52
53 [[nodiscard]] T const* data () const { return m_data.data(); }
54 [[nodiscard]] T * data () { return m_data.data(); }
55
56 [[nodiscard]] AMREX_FORCE_INLINE
60
61 [[nodiscard]] AMREX_FORCE_INLINE
65
66 [[nodiscard]] AMREX_FORCE_INLINE
70
71 void setVal (T val);
72 void setValAsync (T val);
73
74 void copy (AlgVector<T> const& rhs);
75 void copyAsync (AlgVector<T> const& rhs);
76
77 void plus (AlgVector<T> const& rhs);
78 void plusAsync (AlgVector<T> const& rhs);
79
80 void scale (T scale_factor);
81 void scaleAsync (T scale_factor);
82
83 [[nodiscard]] T sum (bool local = false) const;
84
85 [[nodiscard]] T norminf (bool local = false) const;
86 [[nodiscard]] T norm2 (bool local = false) const;
87
88 template <typename FAB,
89 std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
90 std::is_same_v<T,typename FAB::value_type>, int> = 0>
91 void copyFrom (FabArray<FAB> const& fa);
92
93 template <typename FAB,
94 std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
95 std::is_same_v<T,typename FAB::value_type>,int> = 0>
96 void copyTo (FabArray<FAB> & fa) const;
97
98 void printToFile (std::string const& file) const;
99
100private:
102 Long m_begin = 0;
103 Long m_end = 0;
105};
106
107template <typename T, typename Allocator>
109 : m_partition(global_size),
110 m_begin(m_partition[ParallelDescriptor::MyProc()]),
111 m_end(m_partition[ParallelDescriptor::MyProc()+1]),
112 m_data(m_end-m_begin)
113{
114 static_assert(std::is_floating_point<T>::value, "AlgVector is for floating point type only");
115}
116
117template <typename T, typename Allocator>
119 : m_partition(std::move(partition)),
120 m_begin(m_partition[ParallelDescriptor::MyProc()]),
121 m_end(m_partition[ParallelDescriptor::MyProc()+1]),
122 m_data(m_end-m_begin)
123{
124 static_assert(std::is_floating_point<T>::value, "AlgVector is for floating point type only");
125}
126
127template <typename T, typename Allocator>
128void AlgVector<T,Allocator>::define (Long global_size)
129{
130 m_partition.define(global_size);
131 m_begin = m_partition[ParallelDescriptor::MyProc()];
132 m_end = m_partition[ParallelDescriptor::MyProc()+1];
133 m_data.resize(m_end-m_begin);
134}
135
136template <typename T, typename Allocator>
138{
139 m_partition = std::move(partition);
140 m_begin = m_partition[ParallelDescriptor::MyProc()];
141 m_end = m_partition[ParallelDescriptor::MyProc()+1];
142 m_data.resize(m_end-m_begin);
143}
144
145template <typename T, typename Allocator>
147{
148 setValAsync(val);
150}
151
152template <typename T, typename Allocator>
154{
155 Long n = m_data.size();
156 T* p = m_data.data();
157 ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { p[i] = val; });
158}
159
160template <typename T, typename Allocator>
162{
163 copyAsync(rhs);
165}
166
167template <typename T, typename Allocator>
169{
170 Long n = m_data.size();
171 AMREX_ASSERT(m_data.size() == rhs.m_data.size());
172 T* dst = m_data.data();
173 T const* src = rhs.data();
174#ifdef AMREX_USE_GPU
175 Gpu::dtod_memcpy_async(dst, src, n*sizeof(T));
176#else
177 std::memcpy(dst, src, n*sizeof(T));
178#endif
179}
180
181template <typename T, typename Allocator>
183{
184 plusAsync(rhs);
186}
187
188template <typename T, typename Allocator>
190{
191 Long n = m_data.size();
192 AMREX_ASSERT(m_data.size() == rhs.m_data.size());
193 T* dst = m_data.data();
194 T const* src = rhs.data();
195 ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { dst[i] += src[i]; });
196}
197
198template <typename T, typename Allocator>
199void AlgVector<T,Allocator>::scale (T scale_factor)
200{
201 scaleAsync(scale_factor);
203}
204
205template <typename T, typename Allocator>
207{
208 Long n = m_data.size();
209 T* p = m_data.data();
210 ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { p[i] *= scale_factor; });
211}
212
213template <typename T, typename Allocator>
214T AlgVector<T,Allocator>::sum (bool local) const
215{
216 Long n = m_data.size();
217 T const* p = m_data.data();
218 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
219 {
220 return p[i];
221 });
222 if (!local) {
224 }
225 return r;
226}
227
228template <typename T, typename Allocator>
230{
231 Long n = m_data.size();
232 T const* p = m_data.data();
233 T r = Reduce::Max<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
234 {
235 return amrex::Math::abs(p[i]);
236 });
237 if (!local) {
239 }
240 return r;
241}
242
243template <typename T, typename Allocator>
245{
246 Long n = m_data.size();
247 T const* p = m_data.data();
248 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
249 {
250 return p[i]*p[i];
251 });
252 if (!local) {
254 }
255 return std::sqrt(r);
256}
257
258template <typename T, typename Allocator>
259template <typename FAB, std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
260 std::is_same_v<T,typename FAB::value_type>, int> >
262{
264
265 LayoutData<T*> dptrs(fa.boxArray(), fa.DistributionMap());
266 T* p = m_data.data();
267 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
268 dptrs[mfi] = p;
269 p += mfi.validbox().numPts();
270 }
271
272#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
273#pragma omp parallel
274#endif
275 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
276 fa[mfi].template copyToMem<RunOn::Device>(mfi.validbox(), 0, 1, dptrs[mfi]);
277 }
278}
279
280template <typename T, typename Allocator>
281template <typename FAB, std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
282 std::is_same_v<T,typename FAB::value_type>, int> >
284{
286
288 T const* p = m_data.data();
289 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
290 dptrs[mfi] = p;
291 p += mfi.validbox().numPts();
292 }
293
294#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
295#pragma omp parallel
296#endif
297 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
298 fa[mfi].template copyFromMem<RunOn::Device>(mfi.validbox(), 0, 1, dptrs[mfi]);
299 }
300}
301
302template <typename T, typename Allocator>
303void AlgVector<T,Allocator>::printToFile (std::string const& file) const
304{
305 std::ofstream ofs(file+"."+std::to_string(ParallelDescriptor::MyProc()));
306 ofs << m_begin << " " << m_end << "\n";
307#ifdef AMREX_USE_GPU
308 Gpu::PinnedVector<T> hv(m_data.size());
309 Gpu::dtoh_memcpy_async(hv.data(), m_data.data(), m_data.size()*sizeof(T));
311 T const* p = hv.data();
312#else
313 T const* p = m_data;
314#endif
315 for (Long i = 0, N = m_data.size(); i < N; ++i) {
316 ofs << i+m_begin << " " << p[i] << "\n";
317 }
318}
319
320template <class V, class Enable = void> struct IsAlgVector : std::false_type {};
321//
322template <class V>
323struct IsAlgVector<V, std::enable_if_t<std::is_same_v<AlgVector<typename V::value_type,
324 typename V::allocator_type>,
325 V> > >
326 : std::true_type {};
327
328template <typename V1, typename F>
329std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value>
330ForEach (V1 & x, F const& f)
331{
332 Long n = x.numLocalRows();
333 auto* px = x.data();
334 ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
335 {
336 f(px[i]);
337 });
338}
339
340template <typename V1, typename V2, typename F>
341std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
342 IsAlgVector<std::decay_t<V2> >::value>
343ForEach (V1 & x, V2 & y, F const& f)
344{
345 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
346 Long n = x.numLocalRows();
347 auto* AMREX_RESTRICT px = x.data();
348 auto* AMREX_RESTRICT py = y.data();
349 ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
350 {
351 f(px[i], py[i]);
352 });
353}
354
355template <typename V1, typename V2, typename V3, typename F>
356std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
357 IsAlgVector<std::decay_t<V2> >::value &&
358 IsAlgVector<std::decay_t<V3> >::value>
359ForEach (V1 & x, V2 & y, V3 & z, F const& f)
360{
361 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
362 AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
363 Long n = x.numLocalRows();
364 auto* AMREX_RESTRICT px = x.data();
365 auto* AMREX_RESTRICT py = y.data();
366 auto* AMREX_RESTRICT pz = z.data();
367 ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
368 {
369 f(px[i], py[i], pz[i]);
370 });
371}
372
373template <typename V1, typename V2, typename V3, typename V4, typename F>
374std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
375 IsAlgVector<std::decay_t<V2> >::value &&
376 IsAlgVector<std::decay_t<V3> >::value &&
377 IsAlgVector<std::decay_t<V4> >::value>
378ForEach (V1 & x, V2 & y, V3 & z, V4 & a, F const& f)
379{
380 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
381 AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
382 AMREX_ASSERT(x.numLocalRows() == a.numLocalRows());
383 Long n = x.numLocalRows();
384 auto* AMREX_RESTRICT px = x.data();
385 auto* AMREX_RESTRICT py = y.data();
386 auto* AMREX_RESTRICT pz = z.data();
387 auto* AMREX_RESTRICT pa = a.data();
388 ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
389 {
390 f(px[i], py[i], pz[i], pa[i]);
391 });
392}
393
394template <typename V1, typename V2, typename V3, typename V4, typename V5, typename F>
395std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
396 IsAlgVector<std::decay_t<V2> >::value &&
397 IsAlgVector<std::decay_t<V3> >::value &&
398 IsAlgVector<std::decay_t<V4> >::value &&
399 IsAlgVector<std::decay_t<V5> >::value>
400ForEach (V1 & x, V2 & y, V3 & z, V4 & a, V5 & b, F const& f)
401{
402 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
403 AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
404 AMREX_ASSERT(x.numLocalRows() == a.numLocalRows());
405 AMREX_ASSERT(x.numLocalRows() == b.numLocalRows());
406 Long n = x.numLocalRows();
407 auto* AMREX_RESTRICT px = x.data();
408 auto* AMREX_RESTRICT py = y.data();
409 auto* AMREX_RESTRICT pz = z.data();
410 auto* AMREX_RESTRICT pa = a.data();
411 auto* AMREX_RESTRICT pb = b.data();
412 ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
413 {
414 f(px[i], py[i], pz[i], pa[i], pb[i]);
415 });
416}
417
418template <typename T>
419T Dot (AlgVector<T> const& x, AlgVector<T> const& y, bool local = false)
420{
421 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
422 Long n = x.numLocalRows();
423 auto const* px = x.data();
424 auto const* py = y.data();
425 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
426 {
427 return px[i] * py[i];
428 });
429 if (!local) {
431 }
432 return r;
433}
434
435template <typename T>
436void Axpy (AlgVector<T>& y, T a, AlgVector<T> const& x, bool async = false)
437{
438 ForEach(y, x, [=] AMREX_GPU_DEVICE (T& yi, T const& xi) { yi += a*xi; });
439 if (!async) { Gpu::streamSynchronize(); }
440}
441
442template <typename T>
443void LinComb (AlgVector<T>& y, T a, AlgVector<T> const& xa, T b, AlgVector<T> const& xb, bool async = false)
444{
445 ForEach(y, xa, xb, [=] AMREX_GPU_DEVICE (T& yi, T const& xai, T const& xbi) {
446 yi = a*xai + b*xbi;
447 });
448 if (!async) { Gpu::streamSynchronize(); }
449}
450
451}
452
453#endif
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_RESTRICT
Definition AMReX_Extension.H:37
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Definition AMReX_AlgPartition.H:14
Long numGlobalRows() const
Definition AMReX_AlgPartition.H:28
bool empty() const
Definition AMReX_AlgPartition.H:25
Definition AMReX_AlgVector.H:19
void setVal(T val)
Definition AMReX_AlgVector.H:146
Long numLocalRows() const
Definition AMReX_AlgVector.H:45
AMREX_FORCE_INLINE Table1D< T const, Long > const_view() const
Definition AMReX_AlgVector.H:62
T norminf(bool local=false) const
Definition AMReX_AlgVector.H:229
AlgPartition const & partition() const
Definition AMReX_AlgVector.H:43
void setValAsync(T val)
Definition AMReX_AlgVector.H:153
Long globalBegin() const
Inclusive global index begin.
Definition AMReX_AlgVector.H:49
bool empty() const
Definition AMReX_AlgVector.H:41
void plus(AlgVector< T > const &rhs)
Definition AMReX_AlgVector.H:182
T const * data() const
Definition AMReX_AlgVector.H:53
AlgPartition m_partition
Definition AMReX_AlgVector.H:101
AlgVector(AlgVector< T, Allocator > &&) noexcept=default
Long m_begin
Definition AMReX_AlgVector.H:102
Long m_end
Definition AMReX_AlgVector.H:103
void scale(T scale_factor)
Definition AMReX_AlgVector.H:199
void copy(AlgVector< T > const &rhs)
Definition AMReX_AlgVector.H:161
Long globalEnd() const
Exclusive global index end.
Definition AMReX_AlgVector.H:51
void copyFrom(FabArray< FAB > const &fa)
Definition AMReX_AlgVector.H:261
T * data()
Definition AMReX_AlgVector.H:54
Long numGlobalRows() const
Definition AMReX_AlgVector.H:46
void plusAsync(AlgVector< T > const &rhs)
Definition AMReX_AlgVector.H:189
AlgVector()=default
T norm2(bool local=false) const
Definition AMReX_AlgVector.H:244
AlgVector & operator=(AlgVector< T, Allocator > const &)=delete
void define(Long global_size)
Definition AMReX_AlgVector.H:128
void scaleAsync(T scale_factor)
Definition AMReX_AlgVector.H:206
Vec m_data
Definition AMReX_AlgVector.H:104
AlgVector(AlgVector< T, Allocator > const &)=delete
T sum(bool local=false) const
Definition AMReX_AlgVector.H:214
AMREX_FORCE_INLINE Table1D< T, Long > view()
Definition AMReX_AlgVector.H:67
Allocator allocator_type
Definition AMReX_AlgVector.H:22
void copyTo(FabArray< FAB > &fa) const
Definition AMReX_AlgVector.H:283
AMREX_FORCE_INLINE Table1D< T const, Long > view() const
Definition AMReX_AlgVector.H:57
void copyAsync(AlgVector< T > const &rhs)
Definition AMReX_AlgVector.H:168
T value_type
Definition AMReX_AlgVector.H:21
void printToFile(std::string const &file) const
Definition AMReX_AlgVector.H:303
bool is_cell_centered() const noexcept
This tests on whether the FabArray is cell-centered.
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition AMReX_FabArrayBase.H:130
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition AMReX_FabArrayBase.H:94
An Array of FortranArrayBox(FAB)-like Objects.
Definition AMReX_FabArray.H:344
a one-thingy-per-box distributed object
Definition AMReX_LayoutData.H:13
Definition AMReX_MFIter.H:57
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:141
Definition AMReX_PODVector.H:262
size_type size() const noexcept
Definition AMReX_PODVector.H:591
T * data() noexcept
Definition AMReX_PODVector.H:609
void dtod_memcpy_async(void *p_d_dst, const void *p_d_src, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:279
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:237
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:265
void Sum(T &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:204
void Max(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:126
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int MyProc() noexcept
return the rank number local to the current Parallel Context
Definition AMReX_ParallelDescriptor.H:125
Definition AMReX_Amr.cpp:49
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:191
void Axpy(AlgVector< T > &y, T a, AlgVector< T > const &x, bool async=false)
Definition AMReX_AlgVector.H:436
void LinComb(MF &dst, typename MF::value_type a, MF const &src_a, int acomp, typename MF::value_type b, MF const &src_b, int bcomp, int dcomp, int ncomp, IntVect const &nghost)
dst = a*src_a + b*src_b
Definition AMReX_FabArrayUtility.H:1863
constexpr void ForEach(TypeList< Ts... >, F &&f)
For each type t in TypeList, call f(t)
Definition AMReX_TypeList.H:78
FAB::value_type Dot(FabArray< FAB > const &x, int xcomp, FabArray< FAB > const &y, int ycomp, int ncomp, IntVect const &nghost, bool local=false)
Compute dot products of two FabArrays.
Definition AMReX_FabArrayUtility.H:1554
Definition AMReX_AlgVector.H:320
Definition AMReX_TableData.H:20