Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_AlgVector.H
Go to the documentation of this file.
1#ifndef AMREX_ALG_VECTOR_H_
2#define AMREX_ALG_VECTOR_H_
3#include <AMReX_Config.H>
4
6#include <AMReX_FabArray.H>
7#include <AMReX_INT.H>
8#include <AMReX_LayoutData.H>
9#include <AMReX_TableData.H>
10
11#include <fstream>
12#include <string>
13#include <type_traits>
14#include <utility>
15
16namespace amrex {
17
27template <typename T, typename Allocator = DefaultAllocator<T> >
29{
30public:
31 using value_type = T;
32 using allocator_type = Allocator;
33
35
37 AlgVector () = default;
39 explicit AlgVector (Long global_size);
42
45
46 AlgVector (AlgVector<T, Allocator> &&) noexcept = default;
47 AlgVector& operator= (AlgVector<T, Allocator> &&) noexcept = default;
48
49 ~AlgVector () = default;
50
59 void define (Long global_size);
66
68 [[nodiscard]] bool empty () const { return m_partition.empty(); }
69
71 [[nodiscard]] AlgPartition const& partition () const { return m_partition; }
72
74 [[nodiscard]] Long numLocalRows () const { return m_end - m_begin; }
76 [[ nodiscard]] Long numGlobalRows () const { return m_partition.numGlobalRows(); }
77
79 [[nodiscard]] Long globalBegin () const { return m_begin; }
81 [[nodiscard]] Long globalEnd () const { return m_end; }
82
85 [[nodiscard]] T const* data () const { return m_data.data(); }
88 [[nodiscard]] T * data () { return m_data.data(); }
89
93 [[nodiscard]] AMREX_FORCE_INLINE
95 return Table1D<T const, Long>{m_data.data(), m_begin, m_end};
96 }
97
101 [[nodiscard]] AMREX_FORCE_INLINE
103 return Table1D<T const, Long>{m_data.data(), m_begin, m_end};
104 }
105
109 [[nodiscard]] AMREX_FORCE_INLINE
111 return Table1D<T, Long>{m_data.data(), m_begin, m_end};
112 }
113
116 void setVal (T val);
117
120 void setValAsync (T val);
121
124 void copy (AlgVector<T,Allocator> const& rhs);
125
129
132 void plus (AlgVector<T,Allocator> const& rhs);
133
137
140 void scale (T scale_factor);
141
144 void scaleAsync (T scale_factor);
145
156 [[nodiscard]] T sum (bool local = false) const;
157
168 [[nodiscard]] T norminf (bool local = false) const;
169
180 [[nodiscard]] T norm1 (bool local = false) const;
181
192 [[nodiscard]] T norm2 (bool local = false) const;
193
204 [[nodiscard]] std::pair<T,T> norm1and2 (bool local = false) const;
205
206 template <BaseFabType FAB>
207 requires (std::same_as<T,typename FAB::value_type>)
208 void copyFrom (FabArray<FAB> const& fa);
209
210 template <BaseFabType FAB>
211 requires (std::same_as<T,typename FAB::value_type>)
212 void copyTo (FabArray<FAB> & fa) const;
213
214 /*
215 * \brief Print the vector to files.
216 *
217 * Each process writes its local portion of the vector to a separate
218 * file. This function is provided for debugging purpose. Using it in
219 * large-scale runs may overwhelm the file system.
220 *
221 * File format:
222 * - The first line contains two integers describing the local vector:
223 * the global row begin index and the global row end index.
224 * - This is followed by one line per element. Each line contains the
225 * global row index and vector value.
226 *
227 * \param file Base file name. The full name on process `i` is `{file}.{i}`.
228 */
229 void printToFile (std::string const& file) const;
230
231private:
232 AlgPartition m_partition;
233 Long m_begin = 0;
234 Long m_end = 0;
235 Vec m_data;
236};
237
238template <typename T, typename Allocator>
240 : m_partition(global_size),
241 m_begin(m_partition[ParallelDescriptor::MyProc()]),
242 m_end(m_partition[ParallelDescriptor::MyProc()+1]),
243 m_data(m_end-m_begin)
244{}
245
246template <typename T, typename Allocator>
248 : m_partition(std::move(partition)),
249 m_begin(m_partition[ParallelDescriptor::MyProc()]),
250 m_end(m_partition[ParallelDescriptor::MyProc()+1]),
251 m_data(m_end-m_begin)
252{}
253
254template <typename T, typename Allocator>
256{
257 m_partition.define(global_size);
258 m_begin = m_partition[ParallelDescriptor::MyProc()];
259 m_end = m_partition[ParallelDescriptor::MyProc()+1];
260 m_data.resize(m_end-m_begin);
261}
262
263template <typename T, typename Allocator>
265{
266 m_partition = std::move(partition);
267 m_begin = m_partition[ParallelDescriptor::MyProc()];
268 m_end = m_partition[ParallelDescriptor::MyProc()+1];
269 m_data.resize(m_end-m_begin);
270}
271
272template <typename T, typename Allocator>
274{
275 setValAsync(val);
277}
278
279template <typename T, typename Allocator>
281{
282 Long n = m_data.size();
283 T* p = m_data.data();
284 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { p[i] = val; });
285}
286
287template <typename T, typename Allocator>
289{
290 copyAsync(rhs);
292}
293
294template <typename T, typename Allocator>
296{
297 Long n = m_data.size();
298 AMREX_ASSERT(m_data.size() == rhs.m_data.size());
299 T* dst = m_data.data();
300 T const* src = rhs.data();
301 Gpu::dtod_memcpy_async(dst, src, n*sizeof(T));
302}
303
304template <typename T, typename Allocator>
306{
307 plusAsync(rhs);
309}
310
311template <typename T, typename Allocator>
313{
314 Long n = m_data.size();
315 AMREX_ASSERT(m_data.size() == rhs.m_data.size());
316 T* dst = m_data.data();
317 T const* src = rhs.data();
318 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { dst[i] += src[i]; });
319}
320
321template <typename T, typename Allocator>
322void AlgVector<T,Allocator>::scale (T scale_factor)
323{
324 scaleAsync(scale_factor);
326}
327
328template <typename T, typename Allocator>
330{
331 Long n = m_data.size();
332 T* p = m_data.data();
333 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { p[i] *= scale_factor; });
334}
335
336template <typename T, typename Allocator>
337T AlgVector<T,Allocator>::sum (bool local) const
338{
339 Long n = m_data.size();
340 T const* p = m_data.data();
341 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
342 {
343 return p[i];
344 });
345 if (!local) {
347 }
348 return r;
349}
350
351template <typename T, typename Allocator>
353{
354 Long n = m_data.size();
355 T const* p = m_data.data();
356 T r = Reduce::Max<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
357 {
358 return amrex::Math::abs(p[i]);
359 });
360 if (!local) {
362 }
363 return r;
364}
365
366template <typename T, typename Allocator>
368{
369 Long n = m_data.size();
370 T const* p = m_data.data();
371 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
372 {
373 return amrex::Math::abs(p[i]);
374 });
375 if (!local) {
377 }
378 return r;
379}
380
381template <typename T, typename Allocator>
383{
384 Long n = m_data.size();
385 T const* p = m_data.data();
386 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
387 {
388 return p[i]*p[i];
389 });
390 if (!local) {
392 }
393 return std::sqrt(r);
394}
395
396template <typename T, typename Allocator>
397std::pair<T,T> AlgVector<T,Allocator>::norm1and2 (bool local) const
398{
399 Long n = m_data.size();
400 T const* p = m_data.data();
402 ReduceData<T,T> reduce_data(reduce_op);
403 using ReduceTuple = typename decltype(reduce_data)::Type;
404 reduce_op.eval(n, reduce_data, [=] AMREX_GPU_DEVICE (Long i) -> ReduceTuple
405 {
406 return {amrex::Math::abs(p[i]), p[i]*p[i]};
407 });
408 auto hv = reduce_data.value(reduce_op);
409 if (local) {
410 return std::make_pair(amrex::get<0>(hv), std::sqrt(amrex::get<1>(hv)));
411 } else {
412 std::array<T,2> a{amrex::get<0>(hv), amrex::get<1>(hv)};
414 return std::make_pair(a[0], std::sqrt(a[1]));
415 }
416}
417
423template <typename T, typename Allocator>
424template <BaseFabType FAB>
425requires (std::same_as<T,typename FAB::value_type>)
427{
429
430 LayoutData<T*> dptrs(fa.boxArray(), fa.DistributionMap());
431 T* p = m_data.data();
432 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
433 dptrs[mfi] = p;
434 p += mfi.validbox().numPts();
435 }
436
437#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
438#pragma omp parallel
439#endif
440 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
441 fa[mfi].template copyToMem<RunOn::Device>(mfi.validbox(), 0, 1, dptrs[mfi]);
442 }
443}
444
450template <typename T, typename Allocator>
451template <BaseFabType FAB>
452requires (std::same_as<T,typename FAB::value_type>)
454{
456
458 T const* p = m_data.data();
459 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
460 dptrs[mfi] = p;
461 p += mfi.validbox().numPts();
462 }
463
464#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
465#pragma omp parallel
466#endif
467 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
468 fa[mfi].template copyFromMem<RunOn::Device>(mfi.validbox(), 0, 1, dptrs[mfi]);
469 }
470}
471
472template <typename T, typename Allocator>
473void AlgVector<T,Allocator>::printToFile (std::string const& file) const
474{
475 std::ofstream ofs(file+"."+std::to_string(ParallelDescriptor::MyProc()));
476 ofs << m_begin << " " << m_end << "\n";
477#ifdef AMREX_USE_GPU
478 Gpu::PinnedVector<T> hv(m_data.size());
479 Gpu::dtoh_memcpy_async(hv.data(), m_data.data(), m_data.size()*sizeof(T));
481 T const* p = hv.data();
482#else
483 T const* p = m_data.data();
484#endif
485 for (Long i = 0, N = m_data.size(); i < N; ++i) {
486 ofs << i+m_begin << " " << p[i] << "\n";
487 }
488}
489
490}
491
492#endif
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Definition AMReX_AlgPartition.H:21
Long numGlobalRows() const
Total number of rows covered by the partition.
Definition AMReX_AlgPartition.H:47
bool empty() const
True if the partition contains no rows.
Definition AMReX_AlgPartition.H:40
Distributed dense vector that mirrors the layout of an AlgPartition.
Definition AMReX_AlgVector.H:29
void setVal(T val)
Definition AMReX_AlgVector.H:273
Long numLocalRows() const
Number of entries stored on this rank.
Definition AMReX_AlgVector.H:74
T norminf(bool local=false) const
Return the infinity norm.
Definition AMReX_AlgVector.H:352
void copyTo(FabArray< FAB > &fa) const
Scatter this vector back into a cell-centered FabArray.
Definition AMReX_AlgVector.H:453
Table1D< T, Long > view()
Definition AMReX_AlgVector.H:110
AlgPartition const & partition() const
Partition describing the global layout of the vector.
Definition AMReX_AlgVector.H:71
void copy(AlgVector< T, Allocator > const &rhs)
Definition AMReX_AlgVector.H:288
Table1D< T const, Long > const_view() const
Definition AMReX_AlgVector.H:102
void setValAsync(T val)
Definition AMReX_AlgVector.H:280
Long globalBegin() const
Inclusive global index begin.
Definition AMReX_AlgVector.H:79
bool empty() const
True if the local storage holds zero entries.
Definition AMReX_AlgVector.H:68
void copyFrom(FabArray< FAB > const &fa)
Flatten a cell-centered FabArray into this vector's storage.
Definition AMReX_AlgVector.H:426
std::pair< T, T > norm1and2(bool local=false) const
Return the 1-norm and 2-norm as a pair.
Definition AMReX_AlgVector.H:397
T const * data() const
Definition AMReX_AlgVector.H:85
AlgVector(AlgPartition partition)
Adopt an explicit partition that describes the data layout.
Definition AMReX_AlgVector.H:247
AlgVector(AlgVector< T, Allocator > &&) noexcept=default
void plusAsync(AlgVector< T, Allocator > const &rhs)
Definition AMReX_AlgVector.H:312
void scale(T scale_factor)
Definition AMReX_AlgVector.H:322
AlgVector(Long global_size)
Allocate and partition global_size rows across MPI ranks.
Definition AMReX_AlgVector.H:239
Long globalEnd() const
Exclusive global index end.
Definition AMReX_AlgVector.H:81
T * data()
Definition AMReX_AlgVector.H:88
Long numGlobalRows() const
Global number of rows.
Definition AMReX_AlgVector.H:76
AlgVector()=default
Create an empty vector; call define() before use.
T norm2(bool local=false) const
Return the 2-norm.
Definition AMReX_AlgVector.H:382
AlgVector & operator=(AlgVector< T, Allocator > const &)=delete
void copyAsync(AlgVector< T, Allocator > const &rhs)
Definition AMReX_AlgVector.H:295
void define(Long global_size)
Resize/repartition the vector to span global_size rows.
Definition AMReX_AlgVector.H:255
void scaleAsync(T scale_factor)
Definition AMReX_AlgVector.H:329
AlgVector(AlgVector< T, Allocator > const &)=delete
void plus(AlgVector< T, Allocator > const &rhs)
Definition AMReX_AlgVector.H:305
T sum(bool local=false) const
Return the sum of elements.
Definition AMReX_AlgVector.H:337
Table1D< T const, Long > view() const
Definition AMReX_AlgVector.H:94
Allocator allocator_type
Definition AMReX_AlgVector.H:32
T norm1(bool local=false) const
Return the 1-norm.
Definition AMReX_AlgVector.H:367
T value_type
Definition AMReX_AlgVector.H:31
void printToFile(std::string const &file) const
Definition AMReX_AlgVector.H:473
bool is_cell_centered() const noexcept
This tests on whether the FabArray is cell-centered.
Definition AMReX_FabArrayBase.cpp:2699
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition AMReX_FabArrayBase.H:131
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition AMReX_FabArrayBase.H:95
An Array of FortranArrayBox(FAB)-like Objects.
Definition AMReX_FabArray.H:344
a one-thingy-per-box distributed object
Definition AMReX_LayoutData.H:13
Iterator for looping ever tiles and boxes of amrex::FabArray based containers.
Definition AMReX_MFIter.H:88
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:172
Dynamically allocated vector for trivially copyable data.
Definition AMReX_PODVector.H:308
size_type size() const noexcept
Definition AMReX_PODVector.H:648
T * data() noexcept
Definition AMReX_PODVector.H:666
Definition AMReX_Reduce.H:438
Type value()
Definition AMReX_Reduce.H:473
Definition AMReX_Reduce.H:597
void eval(MF const &mf, IntVect const &nghost, D &reduce_data, F &&f)
Definition AMReX_Reduce.H:731
amrex_long Long
Definition AMReX_INT.H:30
void ParallelForOMP(T n, L const &f) noexcept
Performance-portable kernel launch function with optional OpenMP threading.
Definition AMReX_GpuLaunch.H:327
int MyProc() noexcept
Definition AMReX_ParallelDescriptor.H:128
void Sum(T &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:221
void Max(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:133
void dtod_memcpy_async(void *p_d_dst, const void *p_d_src, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:449
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:435
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
Definition AMReX_Amr.cpp:50
Definition AMReX_TableData.H:20