Block-Structured AMR Software Framework
AMReX_PCI.H
Go to the documentation of this file.
1 #ifndef AMREX_PCI_H_
2 #define AMREX_PCI_H_
3 
4 template <class FAB>
5 void
6 FabArray<FAB>::PC_local_cpu (const CPC& thecpc, FabArray<FAB> const& src,
7  int scomp, int dcomp, int ncomp, CpOp op)
8 {
9  auto const N_locs = static_cast<int>(thecpc.m_LocTags->size());
10  if (N_locs == 0) { return; }
11  bool is_thread_safe = thecpc.m_threadsafe_loc;
12 
13  if (is_thread_safe)
14  {
15 #ifdef AMREX_USE_OMP
16 #pragma omp parallel for
17 #endif
18  for (int i = 0; i < N_locs; ++i)
19  {
20  const CopyComTag& tag = (*thecpc.m_LocTags)[i];
21  if (this != &src || tag.dstIndex != tag.srcIndex || tag.sbox != tag.dbox) {
22  // avoid self copy or plus
23  const FAB* sfab = &(src[tag.srcIndex]);
24  FAB* dfab = &(get(tag.dstIndex));
25  if (op == FabArrayBase::COPY)
26  {
27  dfab->template copy<RunOn::Host>(*sfab, tag.sbox, scomp, tag.dbox, dcomp, ncomp);
28  }
29  else
30  {
31  dfab->template plus<RunOn::Host>(*sfab, tag.sbox, tag.dbox, scomp, dcomp, ncomp);
32  }
33  }
34  }
35  }
36  else
37  {
39  for (int i = 0; i < N_locs; ++i)
40  {
41  const CopyComTag& tag = (*thecpc.m_LocTags)[i];
42  if (this != &src || tag.dstIndex != tag.srcIndex || tag.sbox != tag.dbox) {
43  loc_copy_tags[tag.dstIndex].push_back
44  ({src.fabPtr(tag.srcIndex), tag.dbox, tag.sbox.smallEnd()-tag.dbox.smallEnd()});
45  }
46  }
47 
48 #ifdef AMREX_USE_OMP
49 #pragma omp parallel
50 #endif
51  for (MFIter mfi(*this); mfi.isValid(); ++mfi)
52  {
53  const auto& tags = loc_copy_tags[mfi];
54  auto dfab = this->array(mfi);
55  if (op == FabArrayBase::COPY)
56  {
57  for (auto const & tag : tags)
58  {
59  auto const sfab = tag.sfab->array();
60  Dim3 offset = tag.offset.dim3();
61  amrex::LoopConcurrentOnCpu (tag.dbox, ncomp,
62  [=] (int i, int j, int k, int n) noexcept
63  {
64  dfab(i,j,k,dcomp+n) = sfab(i+offset.x,j+offset.y,k+offset.z,scomp+n);
65  });
66  }
67  }
68  else
69  {
70  for (auto const & tag : tags)
71  {
72  auto const sfab = tag.sfab->array();
73  Dim3 offset = tag.offset.dim3();
74  amrex::LoopConcurrentOnCpu (tag.dbox, ncomp,
75  [=] (int i, int j, int k, int n) noexcept
76  {
77  dfab(i,j,k,dcomp+n) += sfab(i+offset.x,j+offset.y,k+offset.z,scomp+n);
78  });
79  }
80  }
81  }
82  }
83 }
84 
85 #ifdef AMREX_USE_GPU
86 template <class FAB>
87 void
88 FabArray<FAB>::PC_local_gpu (const CPC& thecpc, FabArray<FAB> const& src,
89  int scomp, int dcomp, int ncomp, CpOp op)
90 {
91  int N_locs = thecpc.m_LocTags->size();
92  if (N_locs == 0) { return; }
93  bool is_thread_safe = thecpc.m_threadsafe_loc;
94 
95  using TagType = Array4CopyTag<value_type>;
96  Vector<TagType> loc_copy_tags;
97  loc_copy_tags.reserve(N_locs);
98 
99  Vector<BaseFab<int> > maskfabs;
100  Vector<Array4Tag<int> > masks;
101  if (!is_thread_safe)
102  {
103  if ((op == FabArrayBase::COPY && !amrex::IsStoreAtomic<value_type>::value) ||
104  (op == FabArrayBase::ADD && !amrex::HasAtomicAdd <value_type>::value))
105  {
106  maskfabs.resize(this->local_size());
107  masks.reserve(N_locs);
108  }
109  }
110 
111  for (int i = 0; i < N_locs; ++i)
112  {
113  const CopyComTag& tag = (*thecpc.m_LocTags)[i];
114  if (this != &src || tag.dstIndex != tag.srcIndex || tag.sbox != tag.dbox) {
115  int li = this->localindex(tag.dstIndex);
116  loc_copy_tags.push_back
117  ({this->atLocalIdx(li).array(),
118  src.fabPtr(tag.srcIndex)->const_array(),
119  tag.dbox,
120  (tag.sbox.smallEnd()-tag.dbox.smallEnd()).dim3()});
121 
122  if (maskfabs.size() > 0) {
123  if (!maskfabs[li].isAllocated()) {
124  maskfabs[li].resize(this->atLocalIdx(li).box());
125  }
126  masks.emplace_back(Array4Tag<int>{maskfabs[li].array()});
127  }
128  }
129  }
130 
131  if (maskfabs.size() > 0) {
132  amrex::ParallelFor(masks,
133  [=] AMREX_GPU_DEVICE (int i, int j, int k, Array4Tag<int> const& msk) noexcept
134  {
135  msk.dfab(i,j,k) = 0;
136  });
137  }
138 
139  if (op == FabArrayBase::COPY)
140  {
141  if (is_thread_safe) {
142  detail::fab_to_fab<value_type, value_type>(loc_copy_tags, scomp,
144  } else {
145  detail::fab_to_fab_atomic_cpy<value_type, value_type>(
146  loc_copy_tags, scomp, dcomp, ncomp, masks);
147  }
148  }
149  else
150  {
151  if (is_thread_safe) {
152  detail::fab_to_fab<value_type, value_type>(loc_copy_tags, scomp,
154  } else {
155  detail::fab_to_fab_atomic_add<value_type, value_type>(
156  loc_copy_tags, scomp, dcomp, ncomp, masks);
157  }
158  }
159 }
160 #endif
161 
162 #endif
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition: AMReX_HypreMLABecLap.cpp:1089
AMREX_GPU_HOST_DEVICE const IntVectND< dim > & smallEnd() const &noexcept
Get the smallend of the BoxND.
Definition: AMReX_Box.H:105
CpOp
parallel copy or add
Definition: AMReX_FabArrayBase.H:393
An Array of FortranArrayBox(FAB)-like Objects.
Definition: AMReX_FabArray.H:343
FAB * fabPtr(const MFIter &mfi) noexcept
Return pointer to FAB.
Definition: AMReX_FabArray.H:1497
a one-thingy-per-box distributed object
Definition: AMReX_LayoutData.H:13
Definition: AMReX_MFIter.H:57
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition: AMReX_MFIter.H:141
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition: AMReX_Vector.H:27
Long size() const noexcept
Definition: AMReX_Vector.H:50
@ FAB
Definition: AMReX_AmrvisConstants.H:86
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:200
DistributionMapping const & DistributionMap(FabArrayBase const &fa)
AMREX_ATTRIBUTE_FLATTEN_FOR void LoopConcurrentOnCpu(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition: AMReX_Loop.H:377
constexpr AMREX_GPU_HOST_DEVICE GpuTupleElement< I, GpuTuple< Ts... > >::type & get(GpuTuple< Ts... > &tup) noexcept
Definition: AMReX_Tuple.H:179
BoxArray const & boxArray(FabArrayBase const &fa)
Definition: AMReX_TagParallelFor.H:26
Definition: AMReX_TagParallelFor.H:49
Array4< T > dfab
Definition: AMReX_TagParallelFor.H:50
Definition: AMReX_Dim3.H:12
parallel copy or add
Definition: AMReX_FabArrayBase.H:536
bool m_threadsafe_loc
Definition: AMReX_FabArrayBase.H:473
std::unique_ptr< CopyComTagsContainer > m_LocTags
Definition: AMReX_FabArrayBase.H:475
Used by a bunch of routines when communicating via MPI.
Definition: AMReX_FabArrayBase.H:194
Box sbox
Definition: AMReX_FabArrayBase.H:196
int srcIndex
Definition: AMReX_FabArrayBase.H:198
Box dbox
Definition: AMReX_FabArrayBase.H:195
int dstIndex
Definition: AMReX_FabArrayBase.H:197
Definition: AMReX_TypeTraits.H:56
Definition: AMReX_TypeTraits.H:266
Definition: AMReX_FBI.H:32
Definition: AMReX_FBI.H:22