Block-Structured AMR Software Framework
AMReX_AlgVector.H
Go to the documentation of this file.
1 #ifndef AMREX_ALG_VECTOR_H_
2 #define AMREX_ALG_VECTOR_H_
3 #include <AMReX_Config.H>
4 
5 #include <AMReX_AlgPartition.H>
6 #include <AMReX_FabArray.H>
7 #include <AMReX_INT.H>
8 #include <AMReX_LayoutData.H>
9 #include <AMReX_TableData.H>
10 
11 #include <fstream>
12 #include <string>
13 #include <type_traits>
14 
15 namespace amrex {
16 
17 template <typename T, typename Allocator = DefaultAllocator<T> >
18 class AlgVector
19 {
20 public:
21  using value_type = T;
22  using allocator_type = Allocator;
23 
25 
26  AlgVector () = default;
27  explicit AlgVector (Long global_size);
28  explicit AlgVector (AlgPartition partition);
29 
32 
33  AlgVector (AlgVector<T, Allocator> &&) noexcept = default;
34  AlgVector& operator= (AlgVector<T, Allocator> &&) noexcept = default;
35 
36  ~AlgVector () = default;
37 
38  void define (Long global_size);
40 
41  [[nodiscard]] bool empty () const { return m_partition.empty(); }
42 
43  [[nodiscard]] AlgPartition const& partition () const { return m_partition; }
44 
45  [[nodiscard]] Long numLocalRows () const { return m_end - m_begin; }
46  [[ nodiscard]] Long numGlobalRows () const { return m_partition.numGlobalRows(); }
47 
49  [[nodiscard]] Long globalBegin () const { return m_begin; }
51  [[nodiscard]] Long globalEnd () const { return m_end; }
52 
53  [[nodiscard]] T const* data () const { return m_data.data(); }
54  [[nodiscard]] T * data () { return m_data.data(); }
55 
56  [[nodiscard]] AMREX_FORCE_INLINE
59  }
60 
61  [[nodiscard]] AMREX_FORCE_INLINE
64  }
65 
66  [[nodiscard]] AMREX_FORCE_INLINE
69  }
70 
71  void setVal (T val);
72  void setValAsync (T val);
73 
74  void copy (AlgVector<T> const& rhs);
75  void copyAsync (AlgVector<T> const& rhs);
76 
77  void plus (AlgVector<T> const& rhs);
78  void plusAsync (AlgVector<T> const& rhs);
79 
80  void scale (T scale_factor);
81  void scaleAsync (T scale_factor);
82 
83  [[nodiscard]] T sum (bool local = false) const;
84 
85  [[nodiscard]] T norminf (bool local = false) const;
86  [[nodiscard]] T norm2 (bool local = false) const;
87 
88  template <typename FAB,
89  std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
90  std::is_same_v<T,typename FAB::value_type>, int> = 0>
91  void copyFrom (FabArray<FAB> const& fa);
92 
93  template <typename FAB,
94  std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
95  std::is_same_v<T,typename FAB::value_type>,int> = 0>
96  void copyTo (FabArray<FAB> & fa) const;
97 
98  void printToFile (std::string const& file) const;
99 
100 private:
102  Long m_begin = 0;
103  Long m_end = 0;
105 };
106 
107 template <typename T, typename Allocator>
109  : m_partition(global_size),
110  m_begin(m_partition[ParallelDescriptor::MyProc()]),
111  m_end(m_partition[ParallelDescriptor::MyProc()+1]),
112  m_data(m_end-m_begin)
113 {
114  static_assert(std::is_floating_point<T>::value, "AlgVector is for floating point type only");
115 }
116 
117 template <typename T, typename Allocator>
119  : m_partition(std::move(partition)),
120  m_begin(m_partition[ParallelDescriptor::MyProc()]),
121  m_end(m_partition[ParallelDescriptor::MyProc()+1]),
122  m_data(m_end-m_begin)
123 {
124  static_assert(std::is_floating_point<T>::value, "AlgVector is for floating point type only");
125 }
126 
127 template <typename T, typename Allocator>
128 void AlgVector<T,Allocator>::define (Long global_size)
129 {
130  m_partition.define(global_size);
131  m_begin = m_partition[ParallelDescriptor::MyProc()];
132  m_end = m_partition[ParallelDescriptor::MyProc()+1];
133  m_data.resize(m_end-m_begin);
134 }
135 
136 template <typename T, typename Allocator>
138 {
139  m_partition = std::move(partition);
140  m_begin = m_partition[ParallelDescriptor::MyProc()];
141  m_end = m_partition[ParallelDescriptor::MyProc()+1];
142  m_data.resize(m_end-m_begin);
143 }
144 
145 template <typename T, typename Allocator>
147 {
148  setValAsync(val);
150 }
151 
152 template <typename T, typename Allocator>
154 {
155  Long n = m_data.size();
156  T* p = m_data.data();
157  ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { p[i] = val; });
158 }
159 
160 template <typename T, typename Allocator>
162 {
163  copyAsync(rhs);
165 }
166 
167 template <typename T, typename Allocator>
169 {
170  Long n = m_data.size();
171  AMREX_ASSERT(m_data.size() == rhs.m_data.size());
172  T* dst = m_data.data();
173  T const* src = rhs.data();
174 #ifdef AMREX_USE_GPU
175  Gpu::dtod_memcpy_async(dst, src, n*sizeof(T));
176 #else
177  std::memcpy(dst, src, n*sizeof(T));
178 #endif
179 }
180 
181 template <typename T, typename Allocator>
183 {
184  plusAsync(rhs);
186 }
187 
188 template <typename T, typename Allocator>
190 {
191  Long n = m_data.size();
192  AMREX_ASSERT(m_data.size() == rhs.m_data.size());
193  T* dst = m_data.data();
194  T const* src = rhs.data();
195  ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { dst[i] += src[i]; });
196 }
197 
198 template <typename T, typename Allocator>
199 void AlgVector<T,Allocator>::scale (T scale_factor)
200 {
201  scaleAsync(scale_factor);
203 }
204 
205 template <typename T, typename Allocator>
207 {
208  Long n = m_data.size();
209  T* p = m_data.data();
210  ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { p[i] *= scale_factor; });
211 }
212 
213 template <typename T, typename Allocator>
214 T AlgVector<T,Allocator>::sum (bool local) const
215 {
216  Long n = m_data.size();
217  T const* p = m_data.data();
218  T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
219  {
220  return p[i];
221  });
222  if (!local) {
224  }
225  return r;
226 }
227 
228 template <typename T, typename Allocator>
230 {
231  Long n = m_data.size();
232  T const* p = m_data.data();
233  T r = Reduce::Max<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
234  {
235  return amrex::Math::abs(p[i]);
236  });
237  if (!local) {
239  }
240  return r;
241 }
242 
243 template <typename T, typename Allocator>
244 T AlgVector<T,Allocator>::norm2 (bool local) const
245 {
246  Long n = m_data.size();
247  T const* p = m_data.data();
248  T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
249  {
250  return p[i]*p[i];
251  });
252  if (!local) {
254  }
255  return std::sqrt(r);
256 }
257 
258 template <typename T, typename Allocator>
259 template <typename FAB, std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
260  std::is_same_v<T,typename FAB::value_type>, int> >
262 {
264 
265  LayoutData<T*> dptrs(fa.boxArray(), fa.DistributionMap());
266  T* p = m_data.data();
267  for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
268  dptrs[mfi] = p;
269  p += mfi.validbox().numPts();
270  }
271 
272 #if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
273 #pragma omp parallel
274 #endif
275  for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
276  fa[mfi].template copyToMem<RunOn::Device>(mfi.validbox(), 0, 1, dptrs[mfi]);
277  }
278 }
279 
280 template <typename T, typename Allocator>
281 template <typename FAB, std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
282  std::is_same_v<T,typename FAB::value_type>, int> >
284 {
286 
288  T const* p = m_data.data();
289  for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
290  dptrs[mfi] = p;
291  p += mfi.validbox().numPts();
292  }
293 
294 #if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
295 #pragma omp parallel
296 #endif
297  for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
298  fa[mfi].template copyFromMem<RunOn::Device>(mfi.validbox(), 0, 1, dptrs[mfi]);
299  }
300 }
301 
302 template <typename T, typename Allocator>
303 void AlgVector<T,Allocator>::printToFile (std::string const& file) const
304 {
305  std::ofstream ofs(file+"."+std::to_string(ParallelDescriptor::MyProc()));
306  ofs << m_begin << " " << m_end << "\n";
307 #ifdef AMREX_USE_GPU
308  Gpu::PinnedVector<T> hv(m_data.size());
309  Gpu::dtoh_memcpy_async(hv.data(), m_data.data(), m_data.size()*sizeof(T));
311  T const* p = hv.data();
312 #else
313  T const* p = m_data;
314 #endif
315  for (Long i = 0, N = m_data.size(); i < N; ++i) {
316  ofs << i+m_begin << " " << p[i] << "\n";
317  }
318 }
319 
320 template <class V, class Enable = void> struct IsAlgVector : std::false_type {};
321 //
322 template <class V>
323 struct IsAlgVector<V, std::enable_if_t<std::is_same_v<AlgVector<typename V::value_type,
324  typename V::allocator_type>,
325  V> > >
326  : std::true_type {};
327 
328 template <typename V1, typename F>
329 std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value>
330 ForEach (V1 & x, F const& f)
331 {
332  Long n = x.numLocalRows();
333  auto* px = x.data();
334  ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
335  {
336  f(px[i]);
337  });
338 }
339 
340 template <typename V1, typename V2, typename F>
341 std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
342  IsAlgVector<std::decay_t<V2> >::value>
343 ForEach (V1 & x, V2 & y, F const& f)
344 {
345  AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
346  Long n = x.numLocalRows();
347  auto* AMREX_RESTRICT px = x.data();
348  auto* AMREX_RESTRICT py = y.data();
349  ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
350  {
351  f(px[i], py[i]);
352  });
353 }
354 
355 template <typename V1, typename V2, typename V3, typename F>
356 std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
357  IsAlgVector<std::decay_t<V2> >::value &&
358  IsAlgVector<std::decay_t<V3> >::value>
359 ForEach (V1 & x, V2 & y, V3 & z, F const& f)
360 {
361  AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
362  AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
363  Long n = x.numLocalRows();
364  auto* AMREX_RESTRICT px = x.data();
365  auto* AMREX_RESTRICT py = y.data();
366  auto* AMREX_RESTRICT pz = z.data();
367  ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
368  {
369  f(px[i], py[i], pz[i]);
370  });
371 }
372 
373 template <typename V1, typename V2, typename V3, typename V4, typename F>
374 std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
375  IsAlgVector<std::decay_t<V2> >::value &&
376  IsAlgVector<std::decay_t<V3> >::value &&
377  IsAlgVector<std::decay_t<V4> >::value>
378 ForEach (V1 & x, V2 & y, V3 & z, V4 & a, F const& f)
379 {
380  AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
381  AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
382  AMREX_ASSERT(x.numLocalRows() == a.numLocalRows());
383  Long n = x.numLocalRows();
384  auto* AMREX_RESTRICT px = x.data();
385  auto* AMREX_RESTRICT py = y.data();
386  auto* AMREX_RESTRICT pz = z.data();
387  auto* AMREX_RESTRICT pa = a.data();
388  ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
389  {
390  f(px[i], py[i], pz[i], pa[i]);
391  });
392 }
393 
394 template <typename V1, typename V2, typename V3, typename V4, typename V5, typename F>
395 std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
396  IsAlgVector<std::decay_t<V2> >::value &&
397  IsAlgVector<std::decay_t<V3> >::value &&
398  IsAlgVector<std::decay_t<V4> >::value &&
399  IsAlgVector<std::decay_t<V5> >::value>
400 ForEach (V1 & x, V2 & y, V3 & z, V4 & a, V5 & b, F const& f)
401 {
402  AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
403  AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());
404  AMREX_ASSERT(x.numLocalRows() == a.numLocalRows());
405  AMREX_ASSERT(x.numLocalRows() == b.numLocalRows());
406  Long n = x.numLocalRows();
407  auto* AMREX_RESTRICT px = x.data();
408  auto* AMREX_RESTRICT py = y.data();
409  auto* AMREX_RESTRICT pz = z.data();
410  auto* AMREX_RESTRICT pa = a.data();
411  auto* AMREX_RESTRICT pb = b.data();
412  ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
413  {
414  f(px[i], py[i], pz[i], pa[i], pb[i]);
415  });
416 }
417 
418 template <typename T>
419 T Dot (AlgVector<T> const& x, AlgVector<T> const& y, bool local = false)
420 {
421  AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());
422  Long n = x.numLocalRows();
423  auto const* px = x.data();
424  auto const* py = y.data();
425  T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
426  {
427  return px[i] * py[i];
428  });
429  if (!local) {
431  }
432  return r;
433 }
434 
435 template <typename T>
436 void Axpy (AlgVector<T>& y, T a, AlgVector<T> const& x, bool async = false)
437 {
438  ForEach(y, x, [=] AMREX_GPU_DEVICE (T& yi, T const& xi) { yi += a*xi; });
439  if (!async) { Gpu::streamSynchronize(); }
440 }
441 
442 template <typename T>
443 void LinComb (AlgVector<T>& y, T a, AlgVector<T> const& xa, T b, AlgVector<T> const& xb, bool async = false)
444 {
445  ForEach(y, xa, xb, [=] AMREX_GPU_DEVICE (T& yi, T const& xai, T const& xbi) {
446  yi = a*xai + b*xbi;
447  });
448  if (!async) { Gpu::streamSynchronize(); }
449 }
450 
451 }
452 
453 #endif
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition: AMReX_Extension.H:119
#define AMREX_RESTRICT
Definition: AMReX_Extension.H:37
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
Definition: AMReX_AlgPartition.H:14
Long numGlobalRows() const
Definition: AMReX_AlgPartition.H:28
bool empty() const
Definition: AMReX_AlgPartition.H:25
Definition: AMReX_AlgVector.H:19
void setVal(T val)
Definition: AMReX_AlgVector.H:146
Long numLocalRows() const
Definition: AMReX_AlgVector.H:45
T norminf(bool local=false) const
Definition: AMReX_AlgVector.H:229
AlgPartition const & partition() const
Definition: AMReX_AlgVector.H:43
void setValAsync(T val)
Definition: AMReX_AlgVector.H:153
AMREX_FORCE_INLINE Table1D< T, Long > view()
Definition: AMReX_AlgVector.H:67
Long globalBegin() const
Inclusive global index begin.
Definition: AMReX_AlgVector.H:49
bool empty() const
Definition: AMReX_AlgVector.H:41
void plus(AlgVector< T > const &rhs)
Definition: AMReX_AlgVector.H:182
AlgPartition m_partition
Definition: AMReX_AlgVector.H:101
AlgVector(AlgVector< T, Allocator > &&) noexcept=default
Long m_begin
Definition: AMReX_AlgVector.H:102
Long m_end
Definition: AMReX_AlgVector.H:103
void scale(T scale_factor)
Definition: AMReX_AlgVector.H:199
AlgVector & operator=(AlgVector< T, Allocator > const &)=delete
void copy(AlgVector< T > const &rhs)
Definition: AMReX_AlgVector.H:161
Long globalEnd() const
Exclusive global index end.
Definition: AMReX_AlgVector.H:51
void copyFrom(FabArray< FAB > const &fa)
Definition: AMReX_AlgVector.H:261
Long numGlobalRows() const
Definition: AMReX_AlgVector.H:46
void plusAsync(AlgVector< T > const &rhs)
Definition: AMReX_AlgVector.H:189
T * data()
Definition: AMReX_AlgVector.H:54
AlgVector()=default
T norm2(bool local=false) const
Definition: AMReX_AlgVector.H:244
T const * data() const
Definition: AMReX_AlgVector.H:53
void define(Long global_size)
Definition: AMReX_AlgVector.H:128
void scaleAsync(T scale_factor)
Definition: AMReX_AlgVector.H:206
Vec m_data
Definition: AMReX_AlgVector.H:104
AlgVector(AlgVector< T, Allocator > const &)=delete
T sum(bool local=false) const
Definition: AMReX_AlgVector.H:214
AMREX_FORCE_INLINE Table1D< T const, Long > const_view() const
Definition: AMReX_AlgVector.H:62
AMREX_FORCE_INLINE Table1D< T const, Long > view() const
Definition: AMReX_AlgVector.H:57
Allocator allocator_type
Definition: AMReX_AlgVector.H:22
void copyTo(FabArray< FAB > &fa) const
Definition: AMReX_AlgVector.H:283
void copyAsync(AlgVector< T > const &rhs)
Definition: AMReX_AlgVector.H:168
T value_type
Definition: AMReX_AlgVector.H:21
void printToFile(std::string const &file) const
Definition: AMReX_AlgVector.H:303
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition: AMReX_FabArrayBase.H:94
bool is_cell_centered() const noexcept
This tests on whether the FabArray is cell-centered.
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition: AMReX_FabArrayBase.H:130
An Array of FortranArrayBox(FAB)-like Objects.
Definition: AMReX_FabArray.H:344
a one-thingy-per-box distributed object
Definition: AMReX_LayoutData.H:13
Definition: AMReX_MFIter.H:57
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition: AMReX_MFIter.H:141
Definition: AMReX_PODVector.H:246
size_type size() const noexcept
Definition: AMReX_PODVector.H:575
T * data() noexcept
Definition: AMReX_PODVector.H:593
@ FAB
Definition: AMReX_AmrvisConstants.H:86
void dtod_memcpy_async(void *p_d_dst, const void *p_d_src, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:279
void copyAsync(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition: AMReX_GpuContainers.H:233
void streamSynchronize() noexcept
Definition: AMReX_GpuDevice.H:237
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:265
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void * memcpy(void *dest, const void *src, std::size_t count)
Definition: AMReX_GpuUtility.H:214
int MyProc()
Definition: AMReX_MPMD.cpp:117
void Sum(T &v, MPI_Comm comm)
Definition: AMReX_ParallelReduce.H:204
void Max(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition: AMReX_ParallelReduce.H:126
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition: AMReX_ParallelContext.H:70
int MyProc() noexcept
return the rank number local to the current Parallel Context
Definition: AMReX_ParallelDescriptor.H:125
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44
Definition: AMReX_Amr.cpp:49
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:200
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T abs(const GpuComplex< T > &a_z) noexcept
Return the absolute value of a complex number.
Definition: AMReX_GpuComplex.H:356
void Axpy(AlgVector< T > &y, T a, AlgVector< T > const &x, bool async=false)
Definition: AMReX_AlgVector.H:436
void LinComb(MF &dst, typename MF::value_type a, MF const &src_a, int acomp, typename MF::value_type b, MF const &src_b, int bcomp, int dcomp, int ncomp, IntVect const &nghost)
dst = a*src_a + b*src_b
Definition: AMReX_FabArrayUtility.H:1662
constexpr void ForEach(TypeList< Ts... >, F &&f)
For each type t in TypeList, call f(t)
Definition: AMReX_TypeList.H:78
FAB::value_type Dot(FabArray< FAB > const &x, int xcomp, FabArray< FAB > const &y, int ycomp, int ncomp, IntVect const &nghost, bool local=false)
Compute dot products of two FabArrays.
Definition: AMReX_FabArrayUtility.H:1554
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE GpuComplex< T > sqrt(const GpuComplex< T > &a_z) noexcept
Return the square root of a complex number.
Definition: AMReX_GpuComplex.H:373
Definition: AMReX_AlgVector.H:320
Definition: AMReX_TableData.H:20