Block-Structured AMR Software Framework
AMReX_NeighborParticlesGPUImpl.H
Go to the documentation of this file.
1 #ifndef AMREX_NEIGHBORPARTICLESGPUIMPL_H_
2 #define AMREX_NEIGHBORPARTICLESGPUIMPL_H_
3 #include <AMReX_Config.H>
4 
5 namespace detail
6 {
7  inline Vector<Box> getBoundaryBoxes(const Box& box, int ncells)
8  {
9  AMREX_ASSERT_WITH_MESSAGE(box.size() > 2*IntVect(AMREX_D_DECL(ncells, ncells, ncells)),
10  "Too many cells requested in getBoundaryBoxes");
11 
12  AMREX_ASSERT_WITH_MESSAGE(box.ixType().cellCentered(),
13  "Box must be cell-centered");
14 
15  Vector<Box> bl;
16  for (int i = 0; i < AMREX_SPACEDIM; ++i) {
17  BoxList face_boxes;
18  Box hi_face_box = adjCellHi(box, i, ncells);
19  Box lo_face_box = adjCellLo(box, i, ncells);
20  face_boxes.push_back(hi_face_box); bl.push_back(hi_face_box);
21  face_boxes.push_back(lo_face_box); bl.push_back(lo_face_box);
22  for (auto face_box : face_boxes) {
23  for (int j = 0; j < AMREX_SPACEDIM; ++j) {
24  if (i == j) { continue; }
25  BoxList edge_boxes;
26  Box hi_edge_box = adjCellHi(face_box, j, ncells);
27  Box lo_edge_box = adjCellLo(face_box, j, ncells);
28  edge_boxes.push_back(hi_edge_box); bl.push_back(hi_edge_box);
29  edge_boxes.push_back(lo_edge_box); bl.push_back(lo_edge_box);
30  for (auto edge_box : edge_boxes) {
31  for (int k = 0; k < AMREX_SPACEDIM; ++k) {
32  if ((j == k) || (i == k)) { continue; }
33  Box hi_corner_box = adjCellHi(edge_box, k, ncells);
34  Box lo_corner_box = adjCellLo(edge_box, k, ncells);
35  bl.push_back(hi_corner_box);
36  bl.push_back(lo_corner_box);
37  }
38  }
39  }
40  }
41  }
42 
43  RemoveDuplicates(bl);
44  return bl;
45  }
46 }
47 
48 template <int NStructReal, int NStructInt, int NArrayReal, int NArrayInt>
49 void
52 {
53  BL_PROFILE("NeighborParticleContainer<NStructReal, NStructInt, NArrayReal, NArrayInt>::buildNeighborMask");
54  m_neighbor_mask_initialized = true;
55  const int lev = 0;
56  const Geometry& geom = this->Geom(lev);
57  const BoxArray& ba = this->ParticleBoxArray(lev);
58  const DistributionMapping& dmap = this->ParticleDistributionMap(lev);
59 
60  if (ba.size() == 1 && (! geom.isAnyPeriodic()) ) { return; }
61 
62  if (m_neighbor_mask_ptr == nullptr ||
63  ! BoxArray::SameRefs(m_neighbor_mask_ptr->boxArray(), ba) ||
64  ! DistributionMapping::SameRefs(m_neighbor_mask_ptr->DistributionMap(), dmap))
65  {
66  const Periodicity& periodicity = geom.periodicity();
67  const std::vector<IntVect>& pshifts = periodicity.shiftIntVect();
68 
69  for (MFIter mfi(ba, dmap); mfi.isValid(); ++mfi)
70  {
71  int grid = mfi.index();
72 
73  std::set<NeighborTask> neighbor_grids;
74  for (auto pshift : pshifts)
75  {
76  const Box box = ba[mfi] + pshift;
77 
78  const bool first_only = false;
79  auto isecs = ba.intersections(box, first_only, m_num_neighbor_cells);
80 
81  for (auto& isec : isecs)
82  {
83  int nbor_grid = isec.first;
84  const Box isec_box = isec.second - pshift;
85  if ( (grid == nbor_grid) && (pshift == 0)) { continue; }
86  neighbor_grids.insert(NeighborTask(nbor_grid, isec_box, pshift));
87  const int global_rank = dmap[nbor_grid];
88  neighbor_procs.push_back(ParallelContext::global_to_local_rank(global_rank));
89  }
90  }
91 
92  Gpu::HostVector<Box> h_isec_boxes;
94  for (auto nbor_grid : neighbor_grids)
95  {
96  NeighborCode code;
97  code.grid_id = nbor_grid.grid_id;
98  code.periodic_shift = nbor_grid.periodic_shift;
99  h_code_arr.push_back(code);
100  h_isec_boxes.push_back(nbor_grid.box);
101  }
102 
103  m_code_array[grid].resize(h_code_arr.size());
104  Gpu::copyAsync(Gpu::hostToDevice, h_code_arr.begin(), h_code_arr.end(),
105  m_code_array[grid].begin());
106  m_isec_boxes[grid].resize(h_isec_boxes.size());
107  Gpu::copyAsync(Gpu::hostToDevice, h_isec_boxes.begin(), h_isec_boxes.end(),
108  m_isec_boxes[grid].begin());
109 
111  }
112 
113  RemoveDuplicates(neighbor_procs);
114  }
115 }
116 
117 template <int NStructReal, int NStructInt, int NArrayReal, int NArrayInt>
118 void
120 buildNeighborCopyOp (bool use_boundary_neighbor)
121 {
122  BL_PROFILE("NeighborParticleContainer<NStructReal, NStructInt, NArrayReal, NArrayInt>::buildNeighborCopyOp()");
123 
124  AMREX_ASSERT(!hasNeighbors() || use_boundary_neighbor);
125 
126  const int lev = 0;
127  const auto& geom = this->Geom(lev);
128  const auto dxi = this->Geom(lev).InvCellSizeArray();
129  const auto plo = this->Geom(lev).ProbLoArray();
130  const auto domain = this->Geom(lev).Domain();
131  auto& plev = this->GetParticles(lev);
132  auto& ba = this->ParticleBoxArray(lev);
133 
134  if (ba.size() == 1 && (! geom.isAnyPeriodic()) ) { return; }
135 
136  for(MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi)
137  {
138  int gid = mfi.index();
139  int tid = mfi.LocalTileIndex();
140  auto index = std::make_pair(gid, tid);
141 
142  auto& src_tile = plev[index];
143  auto& aos = src_tile.GetArrayOfStructs();
144  const size_t np_real = aos.numParticles();
145 
146  size_t np = np_real;
147  if (use_boundary_neighbor) {
148  np = m_boundary_particle_ids[lev][index].size();
149  }
150  else {
151  m_boundary_particle_ids.resize(1);
152  m_boundary_particle_ids[lev][index];
153  }
154 
155  const auto* p_bndry_pid = m_boundary_particle_ids[lev][index].dataPtr();
156 
157  Gpu::DeviceVector<int> counts(np + 1, 0);
158  Gpu::DeviceVector<int> offsets(np + 1);
159  auto p_counts = counts.dataPtr();
160  auto p_offsets = offsets.dataPtr();
161 
162  ParticleType* p_ptr = aos.data();
163  auto p_code_array = m_code_array[gid].dataPtr();
164  auto p_isec_boxes = m_isec_boxes[gid].dataPtr();
165  const int nisec_box = m_isec_boxes[gid].size();
166  // auto p_code_offsets = m_code_offsets[gid].dataPtr();
167 
168  AMREX_FOR_1D ( np, i,
169  {
170  // note that cannot use ternary statement here because p_bndry is not
171  // properly allocated when use_boundary_neighbor=false
172  int pid = i;
173  if (use_boundary_neighbor) {
174  pid = p_bndry_pid[i];
175  }
176  IntVect iv = getParticleCell(p_ptr[pid], plo, dxi, domain);
177  for (int j=0; j<nisec_box; ++j) {
178  if (p_isec_boxes[j].contains(iv)) {
179  ++p_counts[i];
180  }
181  }
182  });
183 
184  amrex::Gpu::exclusive_scan(counts.begin(), counts.end(), offsets.begin());
185 
186  int num_copies;
187  Gpu::dtoh_memcpy_async(&num_copies, offsets.data()+np, sizeof(int));
189 
190  neighbor_copy_op.resize(gid, lev, num_copies);
191 
192  auto p_boxes = neighbor_copy_op.m_boxes[lev][gid].dataPtr();
193  auto p_levs = neighbor_copy_op.m_levels[lev][gid].dataPtr();
194  auto p_src_indices = neighbor_copy_op.m_src_indices[lev][gid].dataPtr();
195  auto p_periodic_shift = neighbor_copy_op.m_periodic_shift[lev][gid].dataPtr();
196 
198  AMREX_FOR_1D ( np, i,
199  {
200  int pid = i;
201  if (use_boundary_neighbor) {
202  pid = p_bndry_pid[i];
203  }
204  IntVect iv = getParticleCell(p_ptr[pid], plo, dxi, domain);
205  int k = p_offsets[i];
206  for (int j=0; j<nisec_box; ++j) {
207  if (p_isec_boxes[j].contains(iv)) {
208  p_boxes[k] = p_code_array[j].grid_id;
209  p_levs[k] = 0;
210  p_periodic_shift[k] = p_code_array[j].periodic_shift;
211  p_src_indices[k] = pid;
212  ++k;
213  }
214  }
215  AMREX_ALWAYS_ASSERT(k == p_offsets[i+1]);
216  });
217 
219  }
220 }
221 
222 template <int NStructReal, int NStructInt, int NArrayReal, int NArrayInt>
223 void
226 {
227  BL_PROFILE("NeighborParticleContainer::fillNeighbors");
228 
229  AMREX_ASSERT(numParticlesOutOfRange(*this, 0) == 0);
230 
231  buildNeighborMask();
232  this->defineBufferMap();
233 
234  neighbor_copy_op.clear();
235  neighbor_copy_plan.clear();
236  buildNeighborCopyOp();
237  neighbor_copy_plan.build(*this, neighbor_copy_op, ghost_int_comp,
238  ghost_real_comp, true);
239  updateNeighborsGPU(false);
240 }
241 
242 template <int NStructReal, int NStructInt, int NArrayReal, int NArrayInt>
243 void
245 updateNeighborsGPU (bool boundary_neighbors_only)
246 {
247  BL_PROFILE("NeighborParticleContainer::updateNeighborsGPU");
248 
249  if (boundary_neighbors_only) {
250  neighbor_copy_op.clear();
251  neighbor_copy_plan.clear();
252  buildNeighborCopyOp(true);
253  neighbor_copy_plan.build(*this, neighbor_copy_op, ghost_int_comp,
254  ghost_real_comp, true);
255  }
256 
257  clearNeighbors();
258 
260  snd_buffer.setArena(The_Comms_Arena());
261  rcv_buffer.setArena(The_Comms_Arena());
262  }
263 
264  packBuffer(*this, neighbor_copy_op, neighbor_copy_plan, snd_buffer);
266  {
267  neighbor_copy_plan.buildMPIFinish(this->BufferMap());
268  communicateParticlesStart(*this, neighbor_copy_plan, snd_buffer, rcv_buffer);
269  unpackBuffer(*this, neighbor_copy_plan, snd_buffer, NeighborUnpackPolicy());
270  communicateParticlesFinish(neighbor_copy_plan);
271  unpackRemotes(*this, neighbor_copy_plan, rcv_buffer, NeighborUnpackPolicy());
272  }
273  else
274  {
276  if (snd_buffer.arena()->isPinned()) {
277  neighbor_copy_plan.buildMPIFinish(this->BufferMap());
279  communicateParticlesStart(*this, neighbor_copy_plan, snd_buffer, pinned_rcv_buffer);
280  } else {
281  pinned_snd_buffer.resize(snd_buffer.size());
282  Gpu::dtoh_memcpy_async(pinned_snd_buffer.dataPtr(), snd_buffer.dataPtr(), snd_buffer.size());
283  neighbor_copy_plan.buildMPIFinish(this->BufferMap());
285  communicateParticlesStart(*this, neighbor_copy_plan, pinned_snd_buffer, pinned_rcv_buffer);
286  }
287 
288  rcv_buffer.resize(pinned_rcv_buffer.size());
289  unpackBuffer(*this, neighbor_copy_plan, snd_buffer, NeighborUnpackPolicy());
290  communicateParticlesFinish(neighbor_copy_plan);
291  Gpu::htod_memcpy_async(rcv_buffer.dataPtr(), pinned_rcv_buffer.dataPtr(), pinned_rcv_buffer.size());
292  unpackRemotes(*this, neighbor_copy_plan, rcv_buffer, NeighborUnpackPolicy());
293  }
294 
296 }
297 
298 template <int NStructReal, int NStructInt, int NArrayReal, int NArrayInt>
299 void
302 {
303  BL_PROFILE("NeighborParticleContainer::clearNeighborsGPU");
304 
305  this->reserveData();
306  this->resizeData();
307  for (int lev = 0; lev < this->numLevels(); ++lev)
308  {
309  for(MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi)
310  {
311  auto& ptile = this->DefineAndReturnParticleTile(lev, mfi);
312  ptile.setNumNeighbors(0);
313  }
314  }
315 }
316 
317 #endif
#define BL_PROFILE(a)
Definition: AMReX_BLProfiler.H:551
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: AMReX_BLassert.H:37
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_ALWAYS_ASSERT(EX)
Definition: AMReX_BLassert.H:50
#define AMREX_FOR_1D(...)
Definition: AMReX_GpuLaunch.nolint.H:41
#define AMREX_D_DECL(a, b, c)
Definition: AMReX_SPACE.H:104
A collection of Boxes stored in an Array.
Definition: AMReX_BoxArray.H:550
std::vector< std::pair< int, Box > > intersections(const Box &bx) const
Return intersections of Box and BoxArray.
Long size() const noexcept
Return the number of boxes in the BoxArray.
Definition: AMReX_BoxArray.H:597
Calculates the distribution of FABs to MPI processes.
Definition: AMReX_DistributionMapping.H:41
Rectangular problem domain geometry.
Definition: AMReX_Geometry.H:73
bool isAnyPeriodic() const noexcept
Is domain periodic in any direction?
Definition: AMReX_Geometry.H:333
Periodicity periodicity() const noexcept
Definition: AMReX_Geometry.H:355
Definition: AMReX_MFIter.H:57
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition: AMReX_MFIter.H:141
Definition: AMReX_NeighborParticles.H:35
typename ParticleContainerType::ParticleType ParticleType
Definition: AMReX_NeighborParticles.H:38
Definition: AMReX_PODVector.H:246
size_type size() const noexcept
Definition: AMReX_PODVector.H:575
T * data() noexcept
Definition: AMReX_PODVector.H:593
iterator begin() noexcept
Definition: AMReX_PODVector.H:601
iterator end() noexcept
Definition: AMReX_PODVector.H:605
T * dataPtr() noexcept
Definition: AMReX_PODVector.H:597
void push_back(const T &a_value)
Definition: AMReX_PODVector.H:556
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition: AMReX_Periodicity.H:17
std::vector< IntVect > shiftIntVect(IntVect const &nghost=IntVect(0)) const
Definition: AMReX_Periodicity.cpp:8
void copyAsync(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition: AMReX_GpuContainers.H:233
OutIter exclusive_scan(InIter begin, InIter end, OutIter result)
Definition: AMReX_Scan.H:1377
static constexpr HostToDevice hostToDevice
Definition: AMReX_GpuContainers.H:98
void streamSynchronize() noexcept
Definition: AMReX_GpuDevice.H:237
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:265
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:251
int global_to_local_rank(int rank) noexcept
Definition: AMReX_ParallelContext.H:98
bool UseGpuAwareMpi()
Definition: AMReX_ParallelDescriptor.H:111
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > adjCellLo(const BoxND< dim > &b, int dir, int len=1) noexcept
Returns the cell centered BoxND of length len adjacent to b on the low end along the coordinate direc...
Definition: AMReX_Box.H:1591
void communicateParticlesStart(const PC &pc, ParticleCopyPlan &plan, const SndBuffer &snd_buffer, RcvBuffer &rcv_buffer)
Definition: AMReX_ParticleCommunication.H:496
BoxND< AMREX_SPACEDIM > Box
Definition: AMReX_BaseFwd.H:27
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE IntVect getParticleCell(P const &p, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &plo, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &dxi) noexcept
Returns the cell index for a given particle using the provided lower bounds and cell sizes.
Definition: AMReX_ParticleUtil.H:374
void unpackRemotes(PC &pc, const ParticleCopyPlan &plan, Buffer &rcv_buffer, UnpackPolicy const &policy)
Definition: AMReX_ParticleCommunication.H:596
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > adjCellHi(const BoxND< dim > &b, int dir, int len=1) noexcept
Similar to adjCellLo but builds an adjacent BoxND on the high end.
Definition: AMReX_Box.H:1612
void communicateParticlesFinish(const ParticleCopyPlan &plan)
Definition: AMReX_ParticleCommunication.cpp:384
Arena * The_Comms_Arena()
Definition: AMReX_Arena.cpp:669
IntVectND< AMREX_SPACEDIM > IntVect
Definition: AMReX_BaseFwd.H:30
int numParticlesOutOfRange(Iterator const &pti, int nGrow)
Returns the number of particles that are more than nGrow cells from the box correspond to the input i...
Definition: AMReX_ParticleUtil.H:34
void RemoveDuplicates(Vector< T > &vec)
Definition: AMReX_Vector.H:190
void unpackBuffer(PC &pc, const ParticleCopyPlan &plan, const Buffer &snd_buffer, UnpackPolicy const &policy)
Definition: AMReX_ParticleCommunication.H:431
void packBuffer(const PC &pc, const ParticleCopyOp &op, const ParticleCopyPlan &plan, Buffer &snd_buffer)
Definition: AMReX_ParticleCommunication.H:332
Definition: AMReX_FabArrayCommI.H:896
Vector< Box > getBoundaryBoxes(const Box &box, int ncells)
Definition: AMReX_NeighborParticlesGPUImpl.H:7
Definition: AMReX_NeighborParticles.H:16
IntVect periodic_shift
Definition: AMReX_NeighborParticles.H:18
int grid_id
Definition: AMReX_NeighborParticles.H:17
Definition: AMReX_NeighborParticles.H:431
Definition: AMReX_ParticleCommunication.H:19