Block-Structured AMR Software Framework
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Loading...
Searching...
No Matches
AMReX_PCI.H
Go to the documentation of this file.
1#ifndef AMREX_PCI_H_
2#define AMREX_PCI_H_
3
4template <class FAB>
5void
6FabArray<FAB>::PC_local_cpu (const CPC& thecpc, FabArray<FAB> const& src,
7 int scomp, int dcomp, int ncomp, CpOp op)
8{
9 auto const N_locs = static_cast<int>(thecpc.m_LocTags->size());
10 if (N_locs == 0) { return; }
11 bool is_thread_safe = thecpc.m_threadsafe_loc;
12
13 if (is_thread_safe)
14 {
15#ifdef AMREX_USE_OMP
16#pragma omp parallel for
17#endif
18 for (int i = 0; i < N_locs; ++i)
19 {
20 const CopyComTag& tag = (*thecpc.m_LocTags)[i];
21 if (this != &src || tag.dstIndex != tag.srcIndex || tag.sbox != tag.dbox) {
22 // avoid self copy or plus
23 const FAB* sfab = &(src[tag.srcIndex]);
24 FAB* dfab = &(get(tag.dstIndex));
25 if (op == FabArrayBase::COPY)
26 {
27 dfab->template copy<RunOn::Host>(*sfab, tag.sbox, scomp, tag.dbox, dcomp, ncomp);
28 }
29 else
30 {
31 dfab->template plus<RunOn::Host>(*sfab, tag.sbox, tag.dbox, scomp, dcomp, ncomp);
32 }
33 }
34 }
35 }
36 else
37 {
39 for (int i = 0; i < N_locs; ++i)
40 {
41 const CopyComTag& tag = (*thecpc.m_LocTags)[i];
42 if (this != &src || tag.dstIndex != tag.srcIndex || tag.sbox != tag.dbox) {
43 loc_copy_tags[tag.dstIndex].push_back
44 ({src.fabPtr(tag.srcIndex), tag.dbox, tag.sbox.smallEnd()-tag.dbox.smallEnd()});
45 }
46 }
47
48#ifdef AMREX_USE_OMP
49#pragma omp parallel
50#endif
51 for (MFIter mfi(*this); mfi.isValid(); ++mfi)
52 {
53 const auto& tags = loc_copy_tags[mfi];
54 auto dfab = this->array(mfi);
55 if (op == FabArrayBase::COPY)
56 {
57 for (auto const & tag : tags)
58 {
59 auto const sfab = tag.sfab->array();
60 Dim3 offset = tag.offset.dim3();
61 amrex::LoopConcurrentOnCpu (tag.dbox, ncomp,
62 [=] (int i, int j, int k, int n) noexcept
63 {
64 dfab(i,j,k,dcomp+n) = sfab(i+offset.x,j+offset.y,k+offset.z,scomp+n);
65 });
66 }
67 }
68 else
69 {
70 for (auto const & tag : tags)
71 {
72 auto const sfab = tag.sfab->array();
73 Dim3 offset = tag.offset.dim3();
74 amrex::LoopConcurrentOnCpu (tag.dbox, ncomp,
75 [=] (int i, int j, int k, int n) noexcept
76 {
77 dfab(i,j,k,dcomp+n) += sfab(i+offset.x,j+offset.y,k+offset.z,scomp+n);
78 });
79 }
80 }
81 }
82 }
83}
84
85#ifdef AMREX_USE_GPU
86template <class FAB>
87void
89 int scomp, int dcomp, int ncomp, CpOp op)
90{
91 int N_locs = thecpc.m_LocTags->size();
92 if (N_locs == 0) { return; }
93 bool is_thread_safe = thecpc.m_threadsafe_loc;
94
95 using TagType = Array4CopyTag<value_type>;
96 Vector<TagType> loc_copy_tags;
97 loc_copy_tags.reserve(N_locs);
98
99 Vector<BaseFab<int> > maskfabs;
100 Vector<Array4Tag<int> > masks;
101 if (!is_thread_safe)
102 {
103 if ((op == FabArrayBase::COPY && !amrex::IsStoreAtomic<value_type>::value) ||
104 (op == FabArrayBase::ADD && !amrex::HasAtomicAdd <value_type>::value))
105 {
106 maskfabs.resize(this->local_size());
107 masks.reserve(N_locs);
108 }
109 }
110
111 for (int i = 0; i < N_locs; ++i)
112 {
113 const CopyComTag& tag = (*thecpc.m_LocTags)[i];
114 if (this != &src || tag.dstIndex != tag.srcIndex || tag.sbox != tag.dbox) {
115 int li = this->localindex(tag.dstIndex);
116 loc_copy_tags.push_back
117 ({this->atLocalIdx(li).array(),
118 src.fabPtr(tag.srcIndex)->const_array(),
119 tag.dbox,
120 (tag.sbox.smallEnd()-tag.dbox.smallEnd()).dim3()});
121
122 if (maskfabs.size() > 0) {
123 if (!maskfabs[li].isAllocated()) {
124 maskfabs[li].resize(this->atLocalIdx(li).box());
125 }
126 masks.emplace_back(Array4Tag<int>{maskfabs[li].array()});
127 }
128 }
129 }
130
131 if (maskfabs.size() > 0) {
132 amrex::ParallelFor(masks,
133 [=] AMREX_GPU_DEVICE (int i, int j, int k, Array4Tag<int> const& msk) noexcept
134 {
135 msk.dfab(i,j,k) = 0;
136 });
137 }
138
139 if (op == FabArrayBase::COPY)
140 {
141 if (is_thread_safe) {
142 detail::fab_to_fab<value_type, value_type>(loc_copy_tags, scomp,
144 } else {
145 detail::fab_to_fab_atomic_cpy<value_type, value_type>(
146 loc_copy_tags, scomp, dcomp, ncomp, masks);
147 }
148 }
149 else
150 {
151 if (is_thread_safe) {
152 detail::fab_to_fab<value_type, value_type>(loc_copy_tags, scomp,
154 } else {
155 detail::fab_to_fab_atomic_add<value_type, value_type>(
156 loc_copy_tags, scomp, dcomp, ncomp, masks);
157 }
158 }
159}
160#endif
161
162#endif
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1089
AMREX_GPU_HOST_DEVICE const IntVectND< dim > & smallEnd() const &noexcept
Get the smallend of the BoxND.
Definition AMReX_Box.H:105
CpOp
parallel copy or add
Definition AMReX_FabArrayBase.H:393
An Array of FortranArrayBox(FAB)-like Objects.
Definition AMReX_FabArray.H:344
FAB * fabPtr(const MFIter &mfi) noexcept
Return pointer to FAB.
Definition AMReX_FabArray.H:1560
a one-thingy-per-box distributed object
Definition AMReX_LayoutData.H:13
Definition AMReX_MFIter.H:57
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:141
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:27
Long size() const noexcept
Definition AMReX_Vector.H:50
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:191
DistributionMapping const & DistributionMap(FabArrayBase const &fa)
AMREX_ATTRIBUTE_FLATTEN_FOR void LoopConcurrentOnCpu(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition AMReX_Loop.H:378
AMREX_GPU_HOST_DEVICE constexpr GpuTupleElement< I, GpuTuple< Ts... > >::type & get(GpuTuple< Ts... > &tup) noexcept
Definition AMReX_Tuple.H:179
BoxArray const & boxArray(FabArrayBase const &fa)
Definition AMReX_TagParallelFor.H:26
Definition AMReX_TagParallelFor.H:49
Array4< T > dfab
Definition AMReX_TagParallelFor.H:50
Definition AMReX_Dim3.H:12
parallel copy or add
Definition AMReX_FabArrayBase.H:536
bool m_threadsafe_loc
Definition AMReX_FabArrayBase.H:473
std::unique_ptr< CopyComTagsContainer > m_LocTags
Definition AMReX_FabArrayBase.H:475
Used by a bunch of routines when communicating via MPI.
Definition AMReX_FabArrayBase.H:194
Box sbox
Definition AMReX_FabArrayBase.H:196
int srcIndex
Definition AMReX_FabArrayBase.H:198
Box dbox
Definition AMReX_FabArrayBase.H:195
int dstIndex
Definition AMReX_FabArrayBase.H:197
Definition AMReX_TypeTraits.H:266
Definition AMReX_FBI.H:32
Definition AMReX_FBI.H:22