Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_AlgVector.H
Go to the documentation of this file.
1#ifndef AMREX_ALG_VECTOR_H_
2#define AMREX_ALG_VECTOR_H_
3#include <AMReX_Config.H>
4
6#include <AMReX_FabArray.H>
7#include <AMReX_INT.H>
8#include <AMReX_LayoutData.H>
9#include <AMReX_TableData.H>
10
11#include <fstream>
12#include <string>
13#include <type_traits>
14#include <utility>
15
16namespace amrex {
17
18template <typename T, typename Allocator = DefaultAllocator<T> >
20{
21public:
22 using value_type = T;
23 using allocator_type = Allocator;
24
26
27 AlgVector () = default;
28 explicit AlgVector (Long global_size);
30
33
34 AlgVector (AlgVector<T, Allocator> &&) noexcept = default;
35 AlgVector& operator= (AlgVector<T, Allocator> &&) noexcept = default;
36
37 ~AlgVector () = default;
38
39 void define (Long global_size);
41
42 [[nodiscard]] bool empty () const { return m_partition.empty(); }
43
44 [[nodiscard]] AlgPartition const& partition () const { return m_partition; }
45
46 [[nodiscard]] Long numLocalRows () const { return m_end - m_begin; }
47 [[ nodiscard]] Long numGlobalRows () const { return m_partition.numGlobalRows(); }
48
50 [[nodiscard]] Long globalBegin () const { return m_begin; }
52 [[nodiscard]] Long globalEnd () const { return m_end; }
53
56 [[nodiscard]] T const* data () const { return m_data.data(); }
59 [[nodiscard]] T * data () { return m_data.data(); }
60
64 [[nodiscard]] AMREX_FORCE_INLINE
66 return Table1D<T const, Long>{m_data.data(), m_begin, m_end};
67 }
68
72 [[nodiscard]] AMREX_FORCE_INLINE
74 return Table1D<T const, Long>{m_data.data(), m_begin, m_end};
75 }
76
80 [[nodiscard]] AMREX_FORCE_INLINE
82 return Table1D<T, Long>{m_data.data(), m_begin, m_end};
83 }
84
87 void setVal (T val);
88
91 void setValAsync (T val);
92
95 void copy (AlgVector<T,Allocator> const& rhs);
96
100
103 void plus (AlgVector<T,Allocator> const& rhs);
104
108
111 void scale (T scale_factor);
112
115 void scaleAsync (T scale_factor);
116
127 [[nodiscard]] T sum (bool local = false) const;
128
139 [[nodiscard]] T norminf (bool local = false) const;
140
151 [[nodiscard]] T norm1 (bool local = false) const;
152
163 [[nodiscard]] T norm2 (bool local = false) const;
164
175 [[nodiscard]] std::pair<T,T> norm1and2 (bool local = false) const;
176
177 template <typename FAB,
178 std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
179 std::is_same_v<T,typename FAB::value_type>, int> = 0>
180 void copyFrom (FabArray<FAB> const& fa);
181
182 template <typename FAB,
183 std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
184 std::is_same_v<T,typename FAB::value_type>,int> = 0>
185 void copyTo (FabArray<FAB> & fa) const;
186
187 /*
188 * \brief Print the vector to files.
189 *
190 * Each process writes its local portion of the vector to a separate
191 * file. This function is provided for debugging purpose. Using it in
192 * large-scale runs may overwhelm the file system.
193 *
194 * File format:
195 * - The first line contains two integers describing the local vector:
196 * the global row begin index and the global row end index.
197 * - This is followed by one line per element. Each line contains the
198 * global row index and vector value.
199 *
200 * \param file Base file name. The full name on process `i` is `{file}.{i}`.
201 */
202 void printToFile (std::string const& file) const;
203
204private:
205 AlgPartition m_partition;
206 Long m_begin = 0;
207 Long m_end = 0;
208 Vec m_data;
209};
210
211template <typename T, typename Allocator>
213 : m_partition(global_size),
214 m_begin(m_partition[ParallelDescriptor::MyProc()]),
215 m_end(m_partition[ParallelDescriptor::MyProc()+1]),
216 m_data(m_end-m_begin)
217{}
218
219template <typename T, typename Allocator>
221 : m_partition(std::move(partition)),
222 m_begin(m_partition[ParallelDescriptor::MyProc()]),
223 m_end(m_partition[ParallelDescriptor::MyProc()+1]),
224 m_data(m_end-m_begin)
225{}
226
227template <typename T, typename Allocator>
229{
230 m_partition.define(global_size);
231 m_begin = m_partition[ParallelDescriptor::MyProc()];
232 m_end = m_partition[ParallelDescriptor::MyProc()+1];
233 m_data.resize(m_end-m_begin);
234}
235
236template <typename T, typename Allocator>
238{
239 m_partition = std::move(partition);
240 m_begin = m_partition[ParallelDescriptor::MyProc()];
241 m_end = m_partition[ParallelDescriptor::MyProc()+1];
242 m_data.resize(m_end-m_begin);
243}
244
245template <typename T, typename Allocator>
247{
248 setValAsync(val);
250}
251
252template <typename T, typename Allocator>
254{
255 Long n = m_data.size();
256 T* p = m_data.data();
257 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { p[i] = val; });
258}
259
260template <typename T, typename Allocator>
262{
263 copyAsync(rhs);
265}
266
267template <typename T, typename Allocator>
269{
270 Long n = m_data.size();
271 AMREX_ASSERT(m_data.size() == rhs.m_data.size());
272 T* dst = m_data.data();
273 T const* src = rhs.data();
274 Gpu::dtod_memcpy_async(dst, src, n*sizeof(T));
275}
276
277template <typename T, typename Allocator>
279{
280 plusAsync(rhs);
282}
283
284template <typename T, typename Allocator>
286{
287 Long n = m_data.size();
288 AMREX_ASSERT(m_data.size() == rhs.m_data.size());
289 T* dst = m_data.data();
290 T const* src = rhs.data();
291 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { dst[i] += src[i]; });
292}
293
294template <typename T, typename Allocator>
295void AlgVector<T,Allocator>::scale (T scale_factor)
296{
297 scaleAsync(scale_factor);
299}
300
301template <typename T, typename Allocator>
303{
304 Long n = m_data.size();
305 T* p = m_data.data();
306 ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept { p[i] *= scale_factor; });
307}
308
309template <typename T, typename Allocator>
310T AlgVector<T,Allocator>::sum (bool local) const
311{
312 Long n = m_data.size();
313 T const* p = m_data.data();
314 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
315 {
316 return p[i];
317 });
318 if (!local) {
320 }
321 return r;
322}
323
324template <typename T, typename Allocator>
326{
327 Long n = m_data.size();
328 T const* p = m_data.data();
329 T r = Reduce::Max<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
330 {
331 return amrex::Math::abs(p[i]);
332 });
333 if (!local) {
335 }
336 return r;
337}
338
339template <typename T, typename Allocator>
341{
342 Long n = m_data.size();
343 T const* p = m_data.data();
344 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
345 {
346 return amrex::Math::abs(p[i]);
347 });
348 if (!local) {
350 }
351 return r;
352}
353
354template <typename T, typename Allocator>
356{
357 Long n = m_data.size();
358 T const* p = m_data.data();
359 T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept
360 {
361 return p[i]*p[i];
362 });
363 if (!local) {
365 }
366 return std::sqrt(r);
367}
368
369template <typename T, typename Allocator>
370std::pair<T,T> AlgVector<T,Allocator>::norm1and2 (bool local) const
371{
372 Long n = m_data.size();
373 T const* p = m_data.data();
375 ReduceData<T,T> reduce_data(reduce_op);
376 using ReduceTuple = typename decltype(reduce_data)::Type;
377 reduce_op.eval(n, reduce_data, [=] AMREX_GPU_DEVICE (Long i) -> ReduceTuple
378 {
379 return {amrex::Math::abs(p[i]), p[i]*p[i]};
380 });
381 auto hv = reduce_data.value(reduce_op);
382 if (local) {
383 return std::make_pair(amrex::get<0>(hv), std::sqrt(amrex::get<1>(hv)));
384 } else {
385 std::array<T,2> a{amrex::get<0>(hv), amrex::get<1>(hv)};
387 return std::make_pair(a[0], std::sqrt(a[1]));
388 }
389}
390
391template <typename T, typename Allocator>
392template <typename FAB, std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
393 std::is_same_v<T,typename FAB::value_type>, int> >
395{
397
398 LayoutData<T*> dptrs(fa.boxArray(), fa.DistributionMap());
399 T* p = m_data.data();
400 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
401 dptrs[mfi] = p;
402 p += mfi.validbox().numPts();
403 }
404
405#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
406#pragma omp parallel
407#endif
408 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
409 fa[mfi].template copyToMem<RunOn::Device>(mfi.validbox(), 0, 1, dptrs[mfi]);
410 }
411}
412
413template <typename T, typename Allocator>
414template <typename FAB, std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
415 std::is_same_v<T,typename FAB::value_type>, int> >
417{
419
421 T const* p = m_data.data();
422 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
423 dptrs[mfi] = p;
424 p += mfi.validbox().numPts();
425 }
426
427#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
428#pragma omp parallel
429#endif
430 for (MFIter mfi(fa); mfi.isValid(); ++mfi) {
431 fa[mfi].template copyFromMem<RunOn::Device>(mfi.validbox(), 0, 1, dptrs[mfi]);
432 }
433}
434
435template <typename T, typename Allocator>
436void AlgVector<T,Allocator>::printToFile (std::string const& file) const
437{
438 std::ofstream ofs(file+"."+std::to_string(ParallelDescriptor::MyProc()));
439 ofs << m_begin << " " << m_end << "\n";
440#ifdef AMREX_USE_GPU
441 Gpu::PinnedVector<T> hv(m_data.size());
442 Gpu::dtoh_memcpy_async(hv.data(), m_data.data(), m_data.size()*sizeof(T));
444 T const* p = hv.data();
445#else
446 T const* p = m_data.data();
447#endif
448 for (Long i = 0, N = m_data.size(); i < N; ++i) {
449 ofs << i+m_begin << " " << p[i] << "\n";
450 }
451}
452
453}
454
455#endif
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Definition AMReX_AlgPartition.H:14
Long numGlobalRows() const
Definition AMReX_AlgPartition.H:28
bool empty() const
Definition AMReX_AlgPartition.H:25
Definition AMReX_AlgVector.H:20
void setVal(T val)
Definition AMReX_AlgVector.H:246
Long numLocalRows() const
Definition AMReX_AlgVector.H:46
T norminf(bool local=false) const
Return the infinity norm.
Definition AMReX_AlgVector.H:325
Table1D< T, Long > view()
Definition AMReX_AlgVector.H:81
AlgPartition const & partition() const
Definition AMReX_AlgVector.H:44
void copy(AlgVector< T, Allocator > const &rhs)
Definition AMReX_AlgVector.H:261
Table1D< T const, Long > const_view() const
Definition AMReX_AlgVector.H:73
void setValAsync(T val)
Definition AMReX_AlgVector.H:253
Long globalBegin() const
Inclusive global index begin.
Definition AMReX_AlgVector.H:50
bool empty() const
Definition AMReX_AlgVector.H:42
std::pair< T, T > norm1and2(bool local=false) const
Return the 1-norm and 2-norm as a pair.
Definition AMReX_AlgVector.H:370
T const * data() const
Definition AMReX_AlgVector.H:56
AlgVector(AlgPartition partition)
Definition AMReX_AlgVector.H:220
AlgVector(AlgVector< T, Allocator > &&) noexcept=default
void plusAsync(AlgVector< T, Allocator > const &rhs)
Definition AMReX_AlgVector.H:285
void scale(T scale_factor)
Definition AMReX_AlgVector.H:295
AlgVector(Long global_size)
Definition AMReX_AlgVector.H:212
Long globalEnd() const
Exclusive global index end.
Definition AMReX_AlgVector.H:52
void copyFrom(FabArray< FAB > const &fa)
Definition AMReX_AlgVector.H:394
T * data()
Definition AMReX_AlgVector.H:59
Long numGlobalRows() const
Definition AMReX_AlgVector.H:47
AlgVector()=default
T norm2(bool local=false) const
Return the 2-norm.
Definition AMReX_AlgVector.H:355
AlgVector & operator=(AlgVector< T, Allocator > const &)=delete
void copyAsync(AlgVector< T, Allocator > const &rhs)
Definition AMReX_AlgVector.H:268
void define(Long global_size)
Definition AMReX_AlgVector.H:228
void scaleAsync(T scale_factor)
Definition AMReX_AlgVector.H:302
AlgVector(AlgVector< T, Allocator > const &)=delete
void plus(AlgVector< T, Allocator > const &rhs)
Definition AMReX_AlgVector.H:278
T sum(bool local=false) const
Return the sum of elements.
Definition AMReX_AlgVector.H:310
Table1D< T const, Long > view() const
Definition AMReX_AlgVector.H:65
Allocator allocator_type
Definition AMReX_AlgVector.H:23
void copyTo(FabArray< FAB > &fa) const
Definition AMReX_AlgVector.H:416
T norm1(bool local=false) const
Return the 1-norm.
Definition AMReX_AlgVector.H:340
T value_type
Definition AMReX_AlgVector.H:22
void printToFile(std::string const &file) const
Definition AMReX_AlgVector.H:436
bool is_cell_centered() const noexcept
This tests on whether the FabArray is cell-centered.
Definition AMReX_FabArrayBase.cpp:2701
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition AMReX_FabArrayBase.H:131
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition AMReX_FabArrayBase.H:95
An Array of FortranArrayBox(FAB)-like Objects.
Definition AMReX_FabArray.H:347
a one-thingy-per-box distributed object
Definition AMReX_LayoutData.H:13
Iterator for looping ever tiles and boxes of amrex::FabArray based containers.
Definition AMReX_MFIter.H:85
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:169
Dynamically allocated vector for trivially copyable data.
Definition AMReX_PODVector.H:308
size_type size() const noexcept
Definition AMReX_PODVector.H:648
T * data() noexcept
Definition AMReX_PODVector.H:666
Definition AMReX_Reduce.H:257
Type value()
Definition AMReX_Reduce.H:289
Definition AMReX_Reduce.H:389
std::enable_if_t< IsFabArray< MF >::value > eval(MF const &mf, IntVect const &nghost, D &reduce_data, F &&f)
Definition AMReX_Reduce.H:458
amrex_long Long
Definition AMReX_INT.H:30
void ParallelForOMP(T n, L const &f) noexcept
Performance-portable kernel launch function with optional OpenMP threading.
Definition AMReX_GpuLaunch.H:243
int MyProc() noexcept
Definition AMReX_ParallelDescriptor.H:128
void Sum(T &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:221
void Max(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:133
void dtod_memcpy_async(void *p_d_dst, const void *p_d_src, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:329
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:263
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:315
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
Definition AMReX_Amr.cpp:49
Definition AMReX_TableData.H:20