Block-Structured AMR Software Framework
 
Loading...
Searching...
No Matches
AMReX_AlgVector.H
Go to the documentation of this file.
1#ifndef AMREX_ALG_VECTOR_H_
2#define AMREX_ALG_VECTOR_H_
3#include <AMReX_Config.H>
4
6#include <AMReX_FabArray.H>
7#include <AMReX_INT.H>
8#include <AMReX_LayoutData.H>
9#include <AMReX_TableData.H>
10
11#include <fstream>
12#include <string>
13#include <type_traits>
14
15namespace amrex {
16
17template <typename T, typename Allocator = DefaultAllocator<T> >
19{
20public:
21 using value_type = T;
22 using allocator_type = Allocator;
23
25
26 AlgVector () = default;
27 explicit AlgVector (Long global_size);
29
32
33 AlgVector (AlgVector<T, Allocator> &&) noexcept = default;
34 AlgVector& operator= (AlgVector<T, Allocator> &&) noexcept = default;
35
36 ~AlgVector () = default;
37
38 void define (Long global_size);
40
41 [[nodiscard]] bool empty () const { return m_partition.empty(); }
42
43 [[nodiscard]] AlgPartition const& partition () const { return m_partition; }
44
45 [[nodiscard]] Long numLocalRows () const { return m_end - m_begin; }
46 [[ nodiscard]] Long numGlobalRows () const { return m_partition.numGlobalRows(); }
47
49 [[nodiscard]] Long globalBegin () const { return m_begin; }
51 [[nodiscard]] Long globalEnd () const { return m_end; }
52
53 [[nodiscard]] T const* data () const { return m_data.data(); }
54 [[nodiscard]] T * data () { return m_data.data(); }
55
56 [[nodiscard]] AMREX_FORCE_INLINE
60
61 [[nodiscard]] AMREX_FORCE_INLINE
65
66 [[nodiscard]] AMREX_FORCE_INLINE
70
71 void setVal (T val);
72 void setValAsync (T val);
73
74 void copy (AlgVector<T> const& rhs);
75 void copyAsync (AlgVector<T> const& rhs);
76
77 void plus (AlgVector<T> const& rhs);
78 void plusAsync (AlgVector<T> const& rhs);
79
80 void scale (T scale_factor);
81 void scaleAsync (T scale_factor);
82
83 [[nodiscard]] T sum (bool local = false) const;
84
85 [[nodiscard]] T norminf (bool local = false) const;
86 [[nodiscard]] T norm2 (bool local = false) const;
87
88 template <typename FAB,
89 std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
90 std::is_same_v<T,typename FAB::value_type>, int> = 0>
91 void copyFrom (FabArray<FAB> const& fa);
92
93 template <typename FAB,
94 std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
95 std::is_same_v<T,typename FAB::value_type>,int> = 0>
96 void copyTo (FabArray<FAB> & fa) const;
97
98 void printToFile (std::string const& file) const;
99
100private:
102 Long m_begin = 0;
103 Long m_end = 0;
105};
106
107template <typename T, typename Allocator>
109 : m_partition(global_size),
110 m_begin(m_partition[ParallelDescriptor::MyProc()]),
111 m_end(m_partition[ParallelDescriptor::MyProc()+1]),
112 m_data(m_end-m_begin)
113{
114 static_assert(std::is_floating_point<T>::value, "AlgVector is for floating point type only");
115}
116
117template <typename T, typename Allocator>
119 : m_partition(std::move(partition)),
120 m_begin(m_partition[ParallelDescriptor::MyProc()]),
121 m_end(m_partition[ParallelDescriptor::MyProc()+1]),
122 m_data(m_end-m_begin)
123{
124 static_assert(std::is_floating_point<T>::value, "AlgVector is for floating point type only");
125}
126
127template <typename T, typename Allocator>
128void AlgVector<T,Allocator>::define (Long global_size)
129{
130 m_partition.define(global_size);
131 m_begin = m_partition[ParallelDescriptor::MyProc()];
132 m_end = m_partition[ParallelDescriptor::MyProc()+1];
133 m_data.resize(m_end-m_begin);
134}
135
136template <typename T, typename Allocator>
138{
139 m_partition = std::move(partition);
140 m_begin = m_partition[ParallelDescriptor::MyProc()];
141 m_end = m_partition[ParallelDescriptor::MyProc()+1];
142 m_data.resize(m_end-m_begin);
143}
144
145template <typename T, typename Allocator>
147{
148 setValAsync(val);
150}
151
152template <typename T, typename Allocator>
154{
155 Long n = m_data.size();
156 T* p = m_data.data();
157 ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { p[i] = val; });
158}
159
160template <typename T, typename Allocator>
162{
163 copyAsync(rhs);
165}
166
167template <typename T, typename Allocator>
169{
170 Long n = m_data.size();
171 AMREX_ASSERT(m_data.size() == rhs.m_data.size());
172 T* dst = m_data.data();
173 T const* src = rhs.data();
174 Gpu::dtod_memcpy_async(dst, src, n*sizeof(T));
175}
176
177template <typename T, typename Allocator>
179{
180 plusAsync(rhs);
182}
183
184template <typename T, typename Allocator>
186{
187 Long n = m_data.size();
188 AMREX_ASSERT(m_data.size() == rhs.m_data.size());
189 T* dst = m_data.data();
190 T const* src = rhs.data();
191 ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { dst[i] += src[i]; });
192}
193
194template <typename T, typename Allocator>
195void AlgVector<T,Allocator>::scale (T scale_factor)
196{
197 scaleAsync(scale_factor);
199}
200
201template <typename T, typename Allocator>
203{
204 Long n = m_data.size();
205 T* p = m_data.data();
206 ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { p[i] *= scale_factor; });
207}
208
209template <typename T, typename Allocator>
210T AlgVector<T,Allocator>::sum (bool local) const
211{
212 Long n = m_data.size();
213 T const* p = m_data.data();
214 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
215 {
216 return p[i];
217 });
218 if (!local) {
220 }
221 return r;
222}
223
224template <typename T, typename Allocator>
226{
227 Long n = m_data.size();
228 T const* p = m_data.data();
229 T r = Reduce::Max<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
230 {
231 return amrex::Math::abs(p[i]);
232 });
233 if (!local) {
235 }
236 return r;
237}
238
239template <typename T, typename Allocator>
241{
242 Long n = m_data.size();
243 T const* p = m_data.data();
244 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
245 {
246 return p[i]*p[i];
247 });
248 if (!local) {
250 }
251 return std::sqrt(r);
252}
253
254template <typename T, typename Allocator>
255template <typename FAB, std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
256 std::is_same_v<T,typename FAB::value_type>, int> >
258{
260
261 LayoutData<T*> dptrs(fa.boxArray(), fa.DistributionMap());
262 T* p = m_data.data();
263 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
264 dptrs[mfi] = p;
265 p += mfi.validbox().numPts();
266 }
267
268#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
269#pragma omp parallel
270#endif
271 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
272 fa[mfi].template copyToMem<RunOn::Device>(mfi.validbox(), 0, 1, dptrs[mfi]);
273 }
274}
275
276template <typename T, typename Allocator>
277template <typename FAB, std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
278 std::is_same_v<T,typename FAB::value_type>, int> >
280{
282
284 T const* p = m_data.data();
285 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
286 dptrs[mfi] = p;
287 p += mfi.validbox().numPts();
288 }
289
290#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
291#pragma omp parallel
292#endif
293 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
294 fa[mfi].template copyFromMem<RunOn::Device>(mfi.validbox(), 0, 1, dptrs[mfi]);
295 }
296}
297
298template <typename T, typename Allocator>
299void AlgVector<T,Allocator>::printToFile (std::string const& file) const
300{
301 std::ofstream ofs(file+"."+std::to_string(ParallelDescriptor::MyProc()));
302 ofs << m_begin << " " << m_end << "\n";
303#ifdef AMREX_USE_GPU
304 Gpu::PinnedVector<T> hv(m_data.size());
305 Gpu::dtoh_memcpy_async(hv.data(), m_data.data(), m_data.size()*sizeof(T));
307 T const* p = hv.data();
308#else
309 T const* p = m_data;
310#endif
311 for (Long i = 0, N = m_data.size(); i < N; ++i) {
312 ofs << i+m_begin << " " << p[i] << "\n";
313 }
314}
315
316template <class V, class Enable = void> struct IsAlgVector : std::false_type {};
317//
318template <class V>
319struct IsAlgVector<V, std::enable_if_t<std::is_same_v<AlgVector<typename V::value_type,
320 typename V::allocator_type>,
321 V> > >
322 : std::true_type {};
323
324template <typename V1, typename F>
325std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value>
326ForEach (V1 & x, F const& f)
327{
328 Long n = x.numLocalRows();
329 auto* px = x.data();
330 ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
331 {
332 f(px[i]);
333 });
334}
335
336template <typename V1, typename V2, typename F>
337std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
338 IsAlgVector<std::decay_t<V2> >::value>
339ForEach (V1 & x, V2 & y, F const& f)
340{
341 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
342 Long n = x.numLocalRows();
343 auto* AMREX_RESTRICT px = x.data();
344 auto* AMREX_RESTRICT py = y.data();
345 ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
346 {
347 f(px[i], py[i]);
348 });
349}
350
351template <typename V1, typename V2, typename V3, typename F>
352std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
353 IsAlgVector<std::decay_t<V2> >::value &&
354 IsAlgVector<std::decay_t<V3> >::value>
355ForEach (V1 & x, V2 & y, V3 & z, F const& f)
356{
357 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
358 AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
359 Long n = x.numLocalRows();
360 auto* AMREX_RESTRICT px = x.data();
361 auto* AMREX_RESTRICT py = y.data();
362 auto* AMREX_RESTRICT pz = z.data();
363 ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
364 {
365 f(px[i], py[i], pz[i]);
366 });
367}
368
369template <typename V1, typename V2, typename V3, typename V4, typename F>
370std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
371 IsAlgVector<std::decay_t<V2> >::value &&
372 IsAlgVector<std::decay_t<V3> >::value &&
373 IsAlgVector<std::decay_t<V4> >::value>
374ForEach (V1 & x, V2 & y, V3 & z, V4 & a, F const& f)
375{
376 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
377 AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
378 AMREX_ASSERT(x.numLocalRows() == a.numLocalRows());
379 Long n = x.numLocalRows();
380 auto* AMREX_RESTRICT px = x.data();
381 auto* AMREX_RESTRICT py = y.data();
382 auto* AMREX_RESTRICT pz = z.data();
383 auto* AMREX_RESTRICT pa = a.data();
384 ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
385 {
386 f(px[i], py[i], pz[i], pa[i]);
387 });
388}
389
390template <typename V1, typename V2, typename V3, typename V4, typename V5, typename F>
391std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
392 IsAlgVector<std::decay_t<V2> >::value &&
393 IsAlgVector<std::decay_t<V3> >::value &&
394 IsAlgVector<std::decay_t<V4> >::value &&
395 IsAlgVector<std::decay_t<V5> >::value>
396ForEach (V1 & x, V2 & y, V3 & z, V4 & a, V5 & b, F const& f)
397{
398 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
399 AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
400 AMREX_ASSERT(x.numLocalRows() == a.numLocalRows());
401 AMREX_ASSERT(x.numLocalRows() == b.numLocalRows());
402 Long n = x.numLocalRows();
403 auto* AMREX_RESTRICT px = x.data();
404 auto* AMREX_RESTRICT py = y.data();
405 auto* AMREX_RESTRICT pz = z.data();
406 auto* AMREX_RESTRICT pa = a.data();
407 auto* AMREX_RESTRICT pb = b.data();
408 ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
409 {
410 f(px[i], py[i], pz[i], pa[i], pb[i]);
411 });
412}
413
414template <typename T>
415T Dot (AlgVector<T> const& x, AlgVector<T> const& y, bool local = false)
416{
417 AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
418 Long n = x.numLocalRows();
419 auto const* px = x.data();
420 auto const* py = y.data();
421 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
422 {
423 return px[i] * py[i];
424 });
425 if (!local) {
427 }
428 return r;
429}
430
431template <typename T>
432void Axpy (AlgVector<T>& y, T a, AlgVector<T> const& x, bool async = false)
433{
434 ForEach(y, x, [=] AMREX_GPU_DEVICE (T& yi, T const& xi) { yi += a*xi; });
435 if (!async) { Gpu::streamSynchronize(); }
436}
437
438template <typename T>
439void LinComb (AlgVector<T>& y, T a, AlgVector<T> const& xa, T b, AlgVector<T> const& xb, bool async = false)
440{
441 ForEach(y, xa, xb, [=] AMREX_GPU_DEVICE (T& yi, T const& xai, T const& xbi) {
442 yi = a*xai + b*xbi;
443 });
444 if (!async) { Gpu::streamSynchronize(); }
445}
446
447}
448
449#endif
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_RESTRICT
Definition AMReX_Extension.H:37
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Definition AMReX_AlgPartition.H:14
Long numGlobalRows() const
Definition AMReX_AlgPartition.H:28
bool empty() const
Definition AMReX_AlgPartition.H:25
Definition AMReX_AlgVector.H:19
void setVal(T val)
Definition AMReX_AlgVector.H:146
Long numLocalRows() const
Definition AMReX_AlgVector.H:45
T norminf(bool local=false) const
Definition AMReX_AlgVector.H:225
Table1D< T, Long > view()
Definition AMReX_AlgVector.H:67
AlgPartition const & partition() const
Definition AMReX_AlgVector.H:43
Table1D< T const, Long > const_view() const
Definition AMReX_AlgVector.H:62
void setValAsync(T val)
Definition AMReX_AlgVector.H:153
Long globalBegin() const
Inclusive global index begin.
Definition AMReX_AlgVector.H:49
bool empty() const
Definition AMReX_AlgVector.H:41
void plus(AlgVector< T > const &rhs)
Definition AMReX_AlgVector.H:178
T const * data() const
Definition AMReX_AlgVector.H:53
AlgPartition m_partition
Definition AMReX_AlgVector.H:101
AlgVector(AlgVector< T, Allocator > &&) noexcept=default
Long m_begin
Definition AMReX_AlgVector.H:102
Long m_end
Definition AMReX_AlgVector.H:103
void scale(T scale_factor)
Definition AMReX_AlgVector.H:195
void copy(AlgVector< T > const &rhs)
Definition AMReX_AlgVector.H:161
Long globalEnd() const
Exclusive global index end.
Definition AMReX_AlgVector.H:51
void copyFrom(FabArray< FAB > const &fa)
Definition AMReX_AlgVector.H:257
T * data()
Definition AMReX_AlgVector.H:54
Long numGlobalRows() const
Definition AMReX_AlgVector.H:46
void plusAsync(AlgVector< T > const &rhs)
Definition AMReX_AlgVector.H:185
AlgVector()=default
T norm2(bool local=false) const
Definition AMReX_AlgVector.H:240
AlgVector & operator=(AlgVector< T, Allocator > const &)=delete
void define(Long global_size)
Definition AMReX_AlgVector.H:128
void scaleAsync(T scale_factor)
Definition AMReX_AlgVector.H:202
Vec m_data
Definition AMReX_AlgVector.H:104
AlgVector(AlgVector< T, Allocator > const &)=delete
T sum(bool local=false) const
Definition AMReX_AlgVector.H:210
Table1D< T const, Long > view() const
Definition AMReX_AlgVector.H:57
Allocator allocator_type
Definition AMReX_AlgVector.H:22
void copyTo(FabArray< FAB > &fa) const
Definition AMReX_AlgVector.H:279
void copyAsync(AlgVector< T > const &rhs)
Definition AMReX_AlgVector.H:168
T value_type
Definition AMReX_AlgVector.H:21
void printToFile(std::string const &file) const
Definition AMReX_AlgVector.H:299
bool is_cell_centered() const noexcept
This tests on whether the FabArray is cell-centered.
Definition AMReX_FabArrayBase.cpp:2701
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition AMReX_FabArrayBase.H:131
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition AMReX_FabArrayBase.H:95
An Array of FortranArrayBox(FAB)-like Objects.
Definition AMReX_FabArray.H:345
a one-thingy-per-box distributed object
Definition AMReX_LayoutData.H:13
Definition AMReX_MFIter.H:57
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:141
Definition AMReX_PODVector.H:297
size_type size() const noexcept
Definition AMReX_PODVector.H:637
T * data() noexcept
Definition AMReX_PODVector.H:655
void dtod_memcpy_async(void *p_d_dst, const void *p_d_src, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:317
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:260
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:303
void Sum(T &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:204
void Max(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:126
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int MyProc() noexcept
return the rank number local to the current Parallel Context
Definition AMReX_ParallelDescriptor.H:125
Definition AMReX_Amr.cpp:49
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:191
void Axpy(AlgVector< T > &y, T a, AlgVector< T > const &x, bool async=false)
Definition AMReX_AlgVector.H:432
void LinComb(MF &dst, typename MF::value_type a, MF const &src_a, int acomp, typename MF::value_type b, MF const &src_b, int bcomp, int dcomp, int ncomp, IntVect const &nghost)
dst = a*src_a + b*src_b
Definition AMReX_FabArrayUtility.H:1957
constexpr void ForEach(TypeList< Ts... >, F &&f)
For each type t in TypeList, call f(t)
Definition AMReX_TypeList.H:78
FAB::value_type Dot(FabArray< FAB > const &x, int xcomp, FabArray< FAB > const &y, int ycomp, int ncomp, IntVect const &nghost, bool local=false)
Compute dot products of two FabArrays.
Definition AMReX_FabArrayUtility.H:1621
Definition AMReX_AlgVector.H:316
Definition AMReX_TableData.H:20