Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_AlgVector.H
Go to the documentation of this file.
1#ifndef AMREX_ALG_VECTOR_H_
2#define AMREX_ALG_VECTOR_H_
3#include <AMReX_Config.H>
4
6#include <AMReX_FabArray.H>
7#include <AMReX_INT.H>
8#include <AMReX_LayoutData.H>
9#include <AMReX_TableData.H>
10
11#include <fstream>
12#include <string>
13#include <type_traits>
14#include <utility>
15
16namespace amrex {
17
27template <typename T, typename Allocator = DefaultAllocator<T> >
29{
30public:
31 using value_type = T;
32 using allocator_type = Allocator;
33
35
37 AlgVector () = default;
39 explicit AlgVector (Long global_size);
42
45
46 AlgVector (AlgVector<T, Allocator> &&) noexcept = default;
47 AlgVector& operator= (AlgVector<T, Allocator> &&) noexcept = default;
48
49 ~AlgVector () = default;
50
59 void define (Long global_size);
66
68 [[nodiscard]] bool empty () const { return m_partition.empty(); }
69
71 [[nodiscard]] AlgPartition const& partition () const { return m_partition; }
72
74 [[nodiscard]] Long numLocalRows () const { return m_end - m_begin; }
76 [[ nodiscard]] Long numGlobalRows () const { return m_partition.numGlobalRows(); }
77
79 [[nodiscard]] Long globalBegin () const { return m_begin; }
81 [[nodiscard]] Long globalEnd () const { return m_end; }
82
85 [[nodiscard]] T const* data () const { return m_data.data(); }
88 [[nodiscard]] T * data () { return m_data.data(); }
89
93 [[nodiscard]] AMREX_FORCE_INLINE
95 return Table1D<T const, Long>{m_data.data(), m_begin, m_end};
96 }
97
101 [[nodiscard]] AMREX_FORCE_INLINE
103 return Table1D<T const, Long>{m_data.data(), m_begin, m_end};
104 }
105
109 [[nodiscard]] AMREX_FORCE_INLINE
111 return Table1D<T, Long>{m_data.data(), m_begin, m_end};
112 }
113
116 void setVal (T val);
117
120 void setValAsync (T val);
121
124 void copy (AlgVector<T,Allocator> const& rhs);
125
129
132 void plus (AlgVector<T,Allocator> const& rhs);
133
137
140 void scale (T scale_factor);
141
144 void scaleAsync (T scale_factor);
145
156 [[nodiscard]] T sum (bool local = false) const;
157
168 [[nodiscard]] T norminf (bool local = false) const;
169
180 [[nodiscard]] T norm1 (bool local = false) const;
181
192 [[nodiscard]] T norm2 (bool local = false) const;
193
204 [[nodiscard]] std::pair<T,T> norm1and2 (bool local = false) const;
205
206 template <typename FAB,
207 std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
208 std::is_same_v<T,typename FAB::value_type>, int> = 0>
209 void copyFrom (FabArray<FAB> const& fa);
210
211 template <typename FAB,
212 std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
213 std::is_same_v<T,typename FAB::value_type>,int> = 0>
214 void copyTo (FabArray<FAB> & fa) const;
215
216 /*
217 * \brief Print the vector to files.
218 *
219 * Each process writes its local portion of the vector to a separate
220 * file. This function is provided for debugging purpose. Using it in
221 * large-scale runs may overwhelm the file system.
222 *
223 * File format:
224 * - The first line contains two integers describing the local vector:
225 * the global row begin index and the global row end index.
226 * - This is followed by one line per element. Each line contains the
227 * global row index and vector value.
228 *
229 * \param file Base file name. The full name on process `i` is `{file}.{i}`.
230 */
231 void printToFile (std::string const& file) const;
232
233private:
234 AlgPartition m_partition;
235 Long m_begin = 0;
236 Long m_end = 0;
237 Vec m_data;
238};
239
240template <typename T, typename Allocator>
242 : m_partition(global_size),
243 m_begin(m_partition[ParallelDescriptor::MyProc()]),
244 m_end(m_partition[ParallelDescriptor::MyProc()+1]),
245 m_data(m_end-m_begin)
246{}
247
248template <typename T, typename Allocator>
250 : m_partition(std::move(partition)),
251 m_begin(m_partition[ParallelDescriptor::MyProc()]),
252 m_end(m_partition[ParallelDescriptor::MyProc()+1]),
253 m_data(m_end-m_begin)
254{}
255
256template <typename T, typename Allocator>
258{
259 m_partition.define(global_size);
260 m_begin = m_partition[ParallelDescriptor::MyProc()];
261 m_end = m_partition[ParallelDescriptor::MyProc()+1];
262 m_data.resize(m_end-m_begin);
263}
264
265template <typename T, typename Allocator>
267{
268 m_partition = std::move(partition);
269 m_begin = m_partition[ParallelDescriptor::MyProc()];
270 m_end = m_partition[ParallelDescriptor::MyProc()+1];
271 m_data.resize(m_end-m_begin);
272}
273
274template <typename T, typename Allocator>
276{
277 setValAsync(val);
279}
280
281template <typename T, typename Allocator>
283{
284 Long n = m_data.size();
285 T* p = m_data.data();
286 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { p[i] = val; });
287}
288
289template <typename T, typename Allocator>
291{
292 copyAsync(rhs);
294}
295
296template <typename T, typename Allocator>
298{
299 Long n = m_data.size();
300 AMREX_ASSERT(m_data.size() == rhs.m_data.size());
301 T* dst = m_data.data();
302 T const* src = rhs.data();
303 Gpu::dtod_memcpy_async(dst, src, n*sizeof(T));
304}
305
306template <typename T, typename Allocator>
308{
309 plusAsync(rhs);
311}
312
313template <typename T, typename Allocator>
315{
316 Long n = m_data.size();
317 AMREX_ASSERT(m_data.size() == rhs.m_data.size());
318 T* dst = m_data.data();
319 T const* src = rhs.data();
320 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { dst[i] += src[i]; });
321}
322
323template <typename T, typename Allocator>
324void AlgVector<T,Allocator>::scale (T scale_factor)
325{
326 scaleAsync(scale_factor);
328}
329
330template <typename T, typename Allocator>
332{
333 Long n = m_data.size();
334 T* p = m_data.data();
335 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { p[i] *= scale_factor; });
336}
337
338template <typename T, typename Allocator>
339T AlgVector<T,Allocator>::sum (bool local) const
340{
341 Long n = m_data.size();
342 T const* p = m_data.data();
343 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
344 {
345 return p[i];
346 });
347 if (!local) {
349 }
350 return r;
351}
352
353template <typename T, typename Allocator>
355{
356 Long n = m_data.size();
357 T const* p = m_data.data();
358 T r = Reduce::Max<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
359 {
360 return amrex::Math::abs(p[i]);
361 });
362 if (!local) {
364 }
365 return r;
366}
367
368template <typename T, typename Allocator>
370{
371 Long n = m_data.size();
372 T const* p = m_data.data();
373 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
374 {
375 return amrex::Math::abs(p[i]);
376 });
377 if (!local) {
379 }
380 return r;
381}
382
383template <typename T, typename Allocator>
385{
386 Long n = m_data.size();
387 T const* p = m_data.data();
388 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
389 {
390 return p[i]*p[i];
391 });
392 if (!local) {
394 }
395 return std::sqrt(r);
396}
397
398template <typename T, typename Allocator>
399std::pair<T,T> AlgVector<T,Allocator>::norm1and2 (bool local) const
400{
401 Long n = m_data.size();
402 T const* p = m_data.data();
404 ReduceData<T,T> reduce_data(reduce_op);
405 using ReduceTuple = typename decltype(reduce_data)::Type;
406 reduce_op.eval(n, reduce_data, [=] AMREX_GPU_DEVICE (Long i) -> ReduceTuple
407 {
408 return {amrex::Math::abs(p[i]), p[i]*p[i]};
409 });
410 auto hv = reduce_data.value(reduce_op);
411 if (local) {
412 return std::make_pair(amrex::get<0>(hv), std::sqrt(amrex::get<1>(hv)));
413 } else {
414 std::array<T,2> a{amrex::get<0>(hv), amrex::get<1>(hv)};
416 return std::make_pair(a[0], std::sqrt(a[1]));
417 }
418}
419
425template <typename T, typename Allocator>
426template <typename FAB, std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
427 std::is_same_v<T,typename FAB::value_type>, int> >
429{
431
432 LayoutData<T*> dptrs(fa.boxArray(), fa.DistributionMap());
433 T* p = m_data.data();
434 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
435 dptrs[mfi] = p;
436 p += mfi.validbox().numPts();
437 }
438
439#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
440#pragma omp parallel
441#endif
442 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
443 fa[mfi].template copyToMem<RunOn::Device>(mfi.validbox(), 0, 1, dptrs[mfi]);
444 }
445}
446
452template <typename T, typename Allocator>
453template <typename FAB, std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
454 std::is_same_v<T,typename FAB::value_type>, int> >
456{
458
460 T const* p = m_data.data();
461 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
462 dptrs[mfi] = p;
463 p += mfi.validbox().numPts();
464 }
465
466#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
467#pragma omp parallel
468#endif
469 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
470 fa[mfi].template copyFromMem<RunOn::Device>(mfi.validbox(), 0, 1, dptrs[mfi]);
471 }
472}
473
474template <typename T, typename Allocator>
475void AlgVector<T,Allocator>::printToFile (std::string const& file) const
476{
477 std::ofstream ofs(file+"."+std::to_string(ParallelDescriptor::MyProc()));
478 ofs << m_begin << " " << m_end << "\n";
479#ifdef AMREX_USE_GPU
480 Gpu::PinnedVector<T> hv(m_data.size());
481 Gpu::dtoh_memcpy_async(hv.data(), m_data.data(), m_data.size()*sizeof(T));
483 T const* p = hv.data();
484#else
485 T const* p = m_data.data();
486#endif
487 for (Long i = 0, N = m_data.size(); i < N; ++i) {
488 ofs << i+m_begin << " " << p[i] << "\n";
489 }
490}
491
492}
493
494#endif
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Definition AMReX_AlgPartition.H:21
Long numGlobalRows() const
Total number of rows covered by the partition.
Definition AMReX_AlgPartition.H:47
bool empty() const
True if the partition contains no rows.
Definition AMReX_AlgPartition.H:40
Distributed dense vector that mirrors the layout of an AlgPartition.
Definition AMReX_AlgVector.H:29
void setVal(T val)
Definition AMReX_AlgVector.H:275
Long numLocalRows() const
Number of entries stored on this rank.
Definition AMReX_AlgVector.H:74
T norminf(bool local=false) const
Return the infinity norm.
Definition AMReX_AlgVector.H:354
Table1D< T, Long > view()
Definition AMReX_AlgVector.H:110
AlgPartition const & partition() const
Partition describing the global layout of the vector.
Definition AMReX_AlgVector.H:71
void copy(AlgVector< T, Allocator > const &rhs)
Definition AMReX_AlgVector.H:290
Table1D< T const, Long > const_view() const
Definition AMReX_AlgVector.H:102
void setValAsync(T val)
Definition AMReX_AlgVector.H:282
Long globalBegin() const
Inclusive global index begin.
Definition AMReX_AlgVector.H:79
bool empty() const
True if the local storage holds zero entries.
Definition AMReX_AlgVector.H:68
std::pair< T, T > norm1and2(bool local=false) const
Return the 1-norm and 2-norm as a pair.
Definition AMReX_AlgVector.H:399
T const * data() const
Definition AMReX_AlgVector.H:85
AlgVector(AlgPartition partition)
Adopt an explicit partition that describes the data layout.
Definition AMReX_AlgVector.H:249
AlgVector(AlgVector< T, Allocator > &&) noexcept=default
void plusAsync(AlgVector< T, Allocator > const &rhs)
Definition AMReX_AlgVector.H:314
void scale(T scale_factor)
Definition AMReX_AlgVector.H:324
AlgVector(Long global_size)
Allocate and partition global_size rows across MPI ranks.
Definition AMReX_AlgVector.H:241
Long globalEnd() const
Exclusive global index end.
Definition AMReX_AlgVector.H:81
void copyFrom(FabArray< FAB > const &fa)
Flatten a cell-centered FabArray into this vector's storage.
Definition AMReX_AlgVector.H:428
T * data()
Definition AMReX_AlgVector.H:88
Long numGlobalRows() const
Global number of rows.
Definition AMReX_AlgVector.H:76
AlgVector()=default
Create an empty vector; call define() before use.
T norm2(bool local=false) const
Return the 2-norm.
Definition AMReX_AlgVector.H:384
AlgVector & operator=(AlgVector< T, Allocator > const &)=delete
void copyAsync(AlgVector< T, Allocator > const &rhs)
Definition AMReX_AlgVector.H:297
void define(Long global_size)
Resize/repartition the vector to span global_size rows.
Definition AMReX_AlgVector.H:257
void scaleAsync(T scale_factor)
Definition AMReX_AlgVector.H:331
AlgVector(AlgVector< T, Allocator > const &)=delete
void plus(AlgVector< T, Allocator > const &rhs)
Definition AMReX_AlgVector.H:307
T sum(bool local=false) const
Return the sum of elements.
Definition AMReX_AlgVector.H:339
Table1D< T const, Long > view() const
Definition AMReX_AlgVector.H:94
Allocator allocator_type
Definition AMReX_AlgVector.H:32
void copyTo(FabArray< FAB > &fa) const
Scatter this vector back into a cell-centered FabArray.
Definition AMReX_AlgVector.H:455
T norm1(bool local=false) const
Return the 1-norm.
Definition AMReX_AlgVector.H:369
T value_type
Definition AMReX_AlgVector.H:31
void printToFile(std::string const &file) const
Definition AMReX_AlgVector.H:475
bool is_cell_centered() const noexcept
This tests on whether the FabArray is cell-centered.
Definition AMReX_FabArrayBase.cpp:2698
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition AMReX_FabArrayBase.H:131
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition AMReX_FabArrayBase.H:95
An Array of FortranArrayBox(FAB)-like Objects.
Definition AMReX_FabArray.H:350
a one-thingy-per-box distributed object
Definition AMReX_LayoutData.H:13
Iterator for looping ever tiles and boxes of amrex::FabArray based containers.
Definition AMReX_MFIter.H:85
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:169
Dynamically allocated vector for trivially copyable data.
Definition AMReX_PODVector.H:308
size_type size() const noexcept
Definition AMReX_PODVector.H:648
T * data() noexcept
Definition AMReX_PODVector.H:666
Definition AMReX_Reduce.H:453
Type value()
Definition AMReX_Reduce.H:488
Definition AMReX_Reduce.H:612
std::enable_if_t< IsFabArray< MF >::value > eval(MF const &mf, IntVect const &nghost, D &reduce_data, F &&f)
Definition AMReX_Reduce.H:746
amrex_long Long
Definition AMReX_INT.H:30
void ParallelForOMP(T n, L const &f) noexcept
Performance-portable kernel launch function with optional OpenMP threading.
Definition AMReX_GpuLaunch.H:319
int MyProc() noexcept
Definition AMReX_ParallelDescriptor.H:128
void Sum(T &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:221
void Max(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:133
void dtod_memcpy_async(void *p_d_dst, const void *p_d_src, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:449
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:435
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
Definition AMReX_Amr.cpp:49
Definition AMReX_TableData.H:20