Block-Structured AMR Software Framework
AMReX_ParticleCommunication.H
Go to the documentation of this file.
1 #ifndef AMREX_PARTICLECOMMUNICATION_H_
2 #define AMREX_PARTICLECOMMUNICATION_H_
3 #include <AMReX_Config.H>
4 
5 #include <AMReX_Gpu.H>
6 #include <AMReX_GpuContainers.H>
7 #include <AMReX_IntVect.H>
9 #include <AMReX_MFIter.H>
10 #include <AMReX_Scan.H>
11 #include <AMReX_TypeTraits.H>
12 #include <AMReX_MakeParticle.H>
13 
14 #include <map>
15 
16 namespace amrex {
17 
19 {
20  template <class PTile>
21  void resizeTiles (std::vector<PTile*>& tiles, const std::vector<int>& sizes, std::vector<int>& offsets) const
22  {
23  for(int i = 0; i < static_cast<int>(sizes.size()); ++i)
24  {
25  int offset = tiles[i]->numTotalParticles();
26  int nn = tiles[i]->getNumNeighbors();
27  tiles[i]->setNumNeighbors(nn + sizes[i]);
28  offsets.push_back(offset);
29  }
30  }
31 };
32 
34 {
35  template <class PTile>
36  void resizeTiles (std::vector<PTile*>& tiles, const std::vector<int>& sizes, std::vector<int>& offsets) const
37  {
38  int N = static_cast<int>(sizes.size());
39 
40  std::map<PTile*, int> tile_sizes;
41  for(int i = 0; i < N; ++i) {
42  tile_sizes[tiles[i]] = tiles[i]->numParticles();
43  }
44 
45  for(int i = 0; i < N; ++i)
46  {
47  offsets.push_back(tile_sizes[tiles[i]]);
48  tile_sizes[tiles[i]] += sizes[i];
49  }
50 
51  for (auto& kv : tile_sizes) {
52  kv.first->resize(kv.second);
53  }
54  }
55 };
56 
58 {
63 
64  void clear ();
65 
66  void setNumLevels (int num_levels);
67 
68  void resize (int gid, int lev, int size);
69 
70  [[nodiscard]] int numCopies (int gid, int lev) const
71  {
72  if (m_boxes.size() <= lev) { return 0; }
73  auto mit = m_boxes[lev].find(gid);
74  return mit == m_boxes[lev].end() ? 0 : int(mit->second.size());
75  }
76 
77  [[nodiscard]] int numLevels () const { return int(m_boxes.size()); }
78 };
79 
81 {
83 
87 
93 
94  Long m_NumSnds = 0;
95  int m_nrcvs = 0;
98 
101 
104 
106 
112 
115 
118 
121 
124 
125  Long superParticleSize() const { return m_superparticle_size; }
126 
127  template <class PC, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
128  void build (const PC& pc,
129  const ParticleCopyOp& op,
130  const Vector<int>& int_comp_mask,
131  const Vector<int>& real_comp_mask,
132  bool local)
133  {
134  BL_PROFILE("ParticleCopyPlan::build");
135 
136  m_local = local;
137 
138  const int ngrow = 1; // note - fix
139 
140  const int num_levels = op.numLevels();
141  const int num_buckets = pc.BufferMap().numBuckets();
142 
143  if (m_local)
144  {
145  m_neighbor_procs = pc.NeighborProcs(ngrow);
146  }
147  else
148  {
150  std::iota(m_neighbor_procs.begin(), m_neighbor_procs.end(), 0);
151  }
152 
153  m_box_counts_d.resize(0);
154  m_box_counts_d.resize(num_buckets+1, 0);
155  m_box_offsets.resize(num_buckets+1);
156  auto* p_dst_box_counts = m_box_counts_d.dataPtr();
157  auto getBucket = pc.stableRedistribute() ? pc.BufferMap().getHostBucketFunctor() : pc.BufferMap().getBucketFunctor();
158 
159  Gpu::HostVector<unsigned int> h_box_counts;
160  if (pc.stableRedistribute() ) {
161  h_box_counts.resize(m_box_counts_d.size(), 0);
162  }
163 
164  m_dst_indices.resize(num_levels);
165  for (int lev = 0; lev < num_levels; ++lev)
166  {
167  for (const auto& kv : pc.GetParticles(lev))
168  {
169  int gid = kv.first.first;
170  int num_copies = op.numCopies(gid, lev);
171  if (num_copies == 0) { continue; }
172  m_dst_indices[lev][gid].resize(num_copies);
173 
174  if (pc.stableRedistribute()) {
175  const Gpu::DeviceVector<int>& d_boxes = op.m_boxes[lev].at(gid);
176  Gpu::HostVector<int> h_boxes(d_boxes.size());
177  Gpu::copy(Gpu::deviceToHost,d_boxes.begin(),d_boxes.end(),h_boxes.begin());
178 
179  const Gpu::DeviceVector<int>& d_levs = op.m_levels[lev].at(gid);
180  Gpu::HostVector<int> h_levs(d_levs.size());
181  Gpu::copy(Gpu::deviceToHost,d_levs.begin(),d_levs.end(),h_levs.begin());
182 
183  Gpu::HostVector<int> h_dst_indices(num_copies);
184  for (int i = 0; i < num_copies; ++i) {
185  int dst_box = h_boxes[i];
186  if (dst_box >= 0) {
187  int dst_lev = h_levs[i];
188  int index = static_cast<int>(h_box_counts[getBucket(dst_lev, dst_box)]++);
189  h_dst_indices[i] = index;
190  }
191  }
192  Gpu::copy(Gpu::hostToDevice,h_dst_indices.begin(),h_dst_indices.end(),m_dst_indices[lev][gid].begin());
193  }
194  else {
195  const auto* p_boxes = op.m_boxes[lev].at(gid).dataPtr();
196  const auto* p_levs = op.m_levels[lev].at(gid).dataPtr();
197  auto* p_dst_indices = m_dst_indices[lev][gid].dataPtr();
198  AMREX_FOR_1D ( num_copies, i,
199  {
200  int dst_box = p_boxes[i];
201  if (dst_box >= 0)
202  {
203  int dst_lev = p_levs[i];
204  int index = static_cast<int>(Gpu::Atomic::Add(
205  &p_dst_box_counts[getBucket(dst_lev, dst_box)], 1U));
206  p_dst_indices[i] = index;
207  }
208  });
209  }
210  }
211  }
212 
213  if (pc.stableRedistribute()) {
214  Gpu::copy(Gpu::hostToDevice,h_box_counts.begin(),h_box_counts.end(),m_box_counts_d.begin());
215  }
216 
218  m_box_offsets.begin());
219 
220  m_box_counts_h.resize(m_box_counts_d.size());
222  m_box_counts_h.begin());
223 
224  m_snd_pad_correction_h.resize(0);
226 
229  m_snd_pad_correction_d.begin());
230 
231  d_int_comp_mask.resize(int_comp_mask.size());
232  Gpu::copyAsync(Gpu::hostToDevice, int_comp_mask.begin(), int_comp_mask.end(),
233  d_int_comp_mask.begin());
234  d_real_comp_mask.resize(real_comp_mask.size());
235  Gpu::copyAsync(Gpu::hostToDevice, real_comp_mask.begin(), real_comp_mask.end(),
236  d_real_comp_mask.begin());
237 
239 
240  int NStructReal = PC::ParticleContainerType::NStructReal;
241  int NStructInt = PC::ParticleContainerType::NStructInt;
242 
243  int num_real_comm_comp = 0;
244  for (int i = AMREX_SPACEDIM + NStructReal; i < real_comp_mask.size(); ++i) {
245  if (real_comp_mask[i]) {++num_real_comm_comp;}
246  }
247 
248  int num_int_comm_comp = 0;
249  for (int i = 2 + NStructInt; i < int_comp_mask.size(); ++i) {
250  if (int_comp_mask[i]) {++num_int_comm_comp;}
251  }
252 
253  if constexpr (PC::ParticleType::is_soa_particle) {
254  m_superparticle_size = sizeof(uint64_t); // idcpu
255  } else {
256  m_superparticle_size = sizeof(typename PC::ParticleType);
257  }
258  m_superparticle_size += num_real_comm_comp * sizeof(typename PC::ParticleType::RealType)
259  + num_int_comm_comp * sizeof(int);
260 
261  buildMPIStart(pc.BufferMap(), m_superparticle_size);
262  }
263 
264  void clear ();
265 
266  void buildMPIFinish (const ParticleBufferMap& map);
267 
268 private:
269 
270  void buildMPIStart (const ParticleBufferMap& map, Long psize);
271 
272  //
273  // Snds - a Vector with the number of bytes that is process will send to each proc.
274  // Rcvs - a Vector that, after calling this method, will contain the
275  // number of bytes this process will receive from each proc.
276  //
277  void doHandShake (const Vector<Long>& Snds, Vector<Long>& Rcvs) const;
278 
279  //
280  // In the local version of this method, each proc knows which other
281  // procs it could possibly receive messages from, meaning we can do
282  // this purely with point-to-point communication.
283  //
284  void doHandShakeLocal (const Vector<Long>& Snds, Vector<Long>& Rcvs) const;
285 
286  //
287  // In the global version, we don't know who we'll receive from, so we
288  // need to do some collective communication first.
289  //
290  static void doHandShakeGlobal (const Vector<Long>& Snds, Vector<Long>& Rcvs);
291 
292  //
293  // Another version of the above that is implemented using MPI All-to-All
294  //
295  static void doHandShakeAllToAll (const Vector<Long>& Snds, Vector<Long>& Rcvs);
296 
297  bool m_local;
298 };
299 
301 {
302  const unsigned int* m_box_offsets;
303  const std::size_t* m_pad_correction;
304 
307 
309  : m_box_offsets(plan.m_box_offsets.dataPtr()),
310  m_pad_correction(plan.m_snd_pad_correction_d.dataPtr()),
311  m_get_pid(map.getPIDFunctor()),
312  m_get_bucket(map.getBucketFunctor())
313  {}
314 
316  Long operator() (int dst_box, int dst_lev, std::size_t psize, int i) const
317  {
318  int dst_pid = m_get_pid(dst_lev, dst_box);
319  Long dst_offset = Long(psize)*(m_box_offsets[m_get_bucket(dst_lev, dst_box)] + i);
320  dst_offset += Long(m_pad_correction[dst_pid]);
321  return dst_offset;
322  }
323 };
324 
325 template <class PC, class Buffer,
326  std::enable_if_t<IsParticleContainer<PC>::value &&
327  std::is_base_of_v<PolymorphicArenaAllocator<typename Buffer::value_type>,
328  Buffer>, int> foo = 0>
329 void packBuffer (const PC& pc, const ParticleCopyOp& op, const ParticleCopyPlan& plan,
330  Buffer& snd_buffer)
331 {
332  BL_PROFILE("amrex::packBuffer");
333 
334  Long psize = plan.superParticleSize();
335 
336  int num_levels = op.numLevels();
337  int num_buckets = pc.BufferMap().numBuckets();
338 
339  std::size_t total_buffer_size = 0;
340  if (plan.m_snd_offsets.empty())
341  {
342  unsigned int np = 0;
343  Gpu::copy(Gpu::deviceToHost, plan.m_box_offsets.begin() + num_buckets,
344  plan.m_box_offsets.begin() + num_buckets + 1, &np);
345  total_buffer_size = np*psize;
346  }
347  else
348  {
349  total_buffer_size = plan.m_snd_offsets.back();
350  }
351 
352  if (! snd_buffer.arena()->hasFreeDeviceMemory(total_buffer_size)) {
353  snd_buffer.clear();
354  snd_buffer.setArena(The_Pinned_Arena());
355  }
356  snd_buffer.resize(total_buffer_size);
357 
358  const auto* p_comm_real = plan.d_real_comp_mask.dataPtr();
359  const auto* p_comm_int = plan.d_int_comp_mask.dataPtr();
360 
361  const auto plo = pc.Geom(0).ProbLoArray();
362  const auto phi = pc.Geom(0).ProbHiArray();
363  const auto is_per = pc.Geom(0).isPeriodicArray();
364  for (int lev = 0; lev < num_levels; ++lev)
365  {
366  auto& plev = pc.GetParticles(lev);
367  for (auto& kv : plev)
368  {
369  int gid = kv.first.first;
370  int tid = kv.first.second;
371  auto index = std::make_pair(gid, tid);
372 
373  auto& src_tile = plev.at(index);
374  const auto& ptd = src_tile.getConstParticleTileData();
375 
376  int num_copies = op.numCopies(gid, lev);
377  if (num_copies == 0) { continue; }
378 
379  const auto* p_boxes = op.m_boxes[lev].at(gid).dataPtr();
380  const auto* p_levels = op.m_levels[lev].at(gid).dataPtr();
381  const auto* p_src_indices = op.m_src_indices[lev].at(gid).dataPtr();
382  const auto* p_periodic_shift = op.m_periodic_shift[lev].at(gid).dataPtr();
383  const auto* p_dst_indices = plan.m_dst_indices[lev].at(gid).dataPtr();
384  auto* p_snd_buffer = snd_buffer.dataPtr();
385  GetSendBufferOffset get_offset(plan, pc.BufferMap());
386 
387  AMREX_FOR_1D ( num_copies, i,
388  {
389  int dst_box = p_boxes[i];
390  if (dst_box >= 0)
391  {
392  int dst_lev = p_levels[i];
393  auto dst_offset = get_offset(dst_box, dst_lev, psize, p_dst_indices[i]);
394  int src_index = p_src_indices[i];
395  ptd.packParticleData(p_snd_buffer, src_index, dst_offset, p_comm_real, p_comm_int);
396 
397  const IntVect& pshift = p_periodic_shift[i];
398  bool do_periodic_shift =
399  AMREX_D_TERM( (is_per[0] && pshift[0] != 0),
400  || (is_per[1] && pshift[1] != 0),
401  || (is_per[2] && pshift[2] != 0) );
402 
403  if (do_periodic_shift)
404  {
405  ParticleReal pos[AMREX_SPACEDIM];
406  amrex::Gpu::memcpy(&pos[0], &p_snd_buffer[dst_offset],
407  AMREX_SPACEDIM*sizeof(ParticleReal));
408  for (int idim = 0; idim < AMREX_SPACEDIM; ++idim)
409  {
410  if (! is_per[idim]) { continue; }
411  if (pshift[idim] > 0) {
412  pos[idim] += phi[idim] - plo[idim];
413  } else if (pshift[idim] < 0) {
414  pos[idim] -= phi[idim] - plo[idim];
415  }
416  }
417  amrex::Gpu::memcpy(&p_snd_buffer[dst_offset], &pos[0],
418  AMREX_SPACEDIM*sizeof(ParticleReal));
419  }
420  }
421  });
422  }
423  }
424 }
425 
426 template <class PC, class Buffer, class UnpackPolicy,
427  std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
428 void unpackBuffer (PC& pc, const ParticleCopyPlan& plan, const Buffer& snd_buffer, UnpackPolicy const& policy)
429 {
430  BL_PROFILE("amrex::unpackBuffer");
431 
432  using PTile = typename PC::ParticleTileType;
433 
434  int num_levels = pc.BufferMap().numLevels();
435  Long psize = plan.superParticleSize();
436 
437  // count how many particles we have to add to each tile
438  std::vector<int> sizes;
439  std::vector<PTile*> tiles;
440  for (int lev = 0; lev < num_levels; ++lev)
441  {
442  for(MFIter mfi = pc.MakeMFIter(lev); mfi.isValid(); ++mfi)
443  {
444  int gid = mfi.index();
445  int tid = mfi.LocalTileIndex();
446  auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
447  int num_copies = plan.m_box_counts_h[pc.BufferMap().gridAndLevToBucket(gid, lev)];
448  sizes.push_back(num_copies);
449  tiles.push_back(&tile);
450  }
451  }
452 
453  // resize the tiles and compute offsets
454  std::vector<int> offsets;
455  policy.resizeTiles(tiles, sizes, offsets);
456 
457  const auto* p_comm_real = plan.d_real_comp_mask.dataPtr();
458  const auto* p_comm_int = plan.d_int_comp_mask.dataPtr();
459 
460  // local unpack
461  int uindex = 0;
462  for (int lev = 0; lev < num_levels; ++lev)
463  {
464  auto& plev = pc.GetParticles(lev);
465  for(MFIter mfi = pc.MakeMFIter(lev); mfi.isValid(); ++mfi)
466  {
467  int gid = mfi.index();
468  int tid = mfi.LocalTileIndex();
469  auto index = std::make_pair(gid, tid);
470 
471  auto& tile = plev[index];
472 
473  GetSendBufferOffset get_offset(plan, pc.BufferMap());
474  auto p_snd_buffer = snd_buffer.dataPtr();
475 
476  int offset = offsets[uindex];
477  int size = sizes[uindex];
478  ++uindex;
479 
480  auto ptd = tile.getParticleTileData();
481  AMREX_FOR_1D ( size, i,
482  {
483  auto src_offset = get_offset(gid, lev, psize, i);
484  int dst_index = offset + i;
485  ptd.unpackParticleData(p_snd_buffer, src_offset, dst_index, p_comm_real, p_comm_int);
486  });
487  }
488  }
489 }
490 
491 template <class PC, class SndBuffer, class RcvBuffer,
492  std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
493 void communicateParticlesStart (const PC& pc, ParticleCopyPlan& plan, const SndBuffer& snd_buffer, RcvBuffer& rcv_buffer)
494 {
495  BL_PROFILE("amrex::communicateParticlesStart");
496 
497 #ifdef AMREX_USE_MPI
498  Long psize = plan.superParticleSize();
499  const int NProcs = ParallelContext::NProcsSub();
500  const int MyProc = ParallelContext::MyProcSub();
501 
502  if (NProcs == 1) { return; }
503 
504  Vector<int> RcvProc;
505  Vector<Long> rOffset;
506 
507  plan.m_rcv_pad_correction_h.resize(0);
508  plan.m_rcv_pad_correction_h.push_back(0);
509 
510  Long TotRcvBytes = 0;
511  for (int i = 0; i < NProcs; ++i) {
512  if (plan.m_rcv_num_particles[i] > 0) {
513  RcvProc.push_back(i);
514  Long nbytes = plan.m_rcv_num_particles[i]*psize;
515  std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
516  TotRcvBytes = Long(amrex::aligned_size(acd, TotRcvBytes));
517  rOffset.push_back(TotRcvBytes);
518  TotRcvBytes += Long(amrex::aligned_size(acd, nbytes));
519  plan.m_rcv_pad_correction_h.push_back(plan.m_rcv_pad_correction_h.back() + nbytes);
520  }
521  }
522 
523  for (int i = 0; i < plan.m_nrcvs; ++i)
524  {
525  plan.m_rcv_pad_correction_h[i] = rOffset[i] - plan.m_rcv_pad_correction_h[i];
526  }
527 
530  plan.m_rcv_pad_correction_d.begin());
531 
532  rcv_buffer.resize(TotRcvBytes);
533 
534  plan.m_nrcvs = int(RcvProc.size());
535 
536  plan.m_particle_stats.resize(0);
537  plan.m_particle_stats.resize(plan.m_nrcvs);
538 
539  plan.m_particle_rreqs.resize(0);
540  plan.m_particle_rreqs.resize(plan.m_nrcvs);
541 
542  const int SeqNum = ParallelDescriptor::SeqNum();
543 
544  // Post receives.
545  for (int i = 0; i < plan.m_nrcvs; ++i) {
546  const auto Who = RcvProc[i];
547  const auto offset = rOffset[i];
548  Long nbytes = plan.m_rcv_num_particles[Who]*psize;
549  std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
550  const auto Cnt = amrex::aligned_size(acd, nbytes);
551 
552  AMREX_ASSERT(Cnt > 0);
553  AMREX_ASSERT(Who >= 0 && Who < NProcs);
554  AMREX_ASSERT(amrex::aligned_size(acd, nbytes) % acd == 0);
555 
556  plan.m_particle_rreqs[i] =
557  ParallelDescriptor::Arecv((char*) (rcv_buffer.dataPtr() + offset), Cnt, Who, SeqNum, ParallelContext::CommunicatorSub()).req();
558  }
559 
560  if (plan.m_NumSnds == 0) { return; }
561 
562  // Send.
563  for (int i = 0; i < NProcs; ++i)
564  {
565  if (i == MyProc) { continue; }
566  const auto Who = i;
567  const auto Cnt = plan.m_snd_counts[i];
568  if (Cnt == 0) { continue; }
569 
570  auto snd_offset = plan.m_snd_offsets[i];
571  AMREX_ASSERT(plan.m_snd_counts[i] % ParallelDescriptor::sizeof_selected_comm_data_type(plan.m_snd_num_particles[i]*psize) == 0);
572  AMREX_ASSERT(Who >= 0 && Who < NProcs);
573 
574  ParallelDescriptor::Send((char const*)(snd_buffer.dataPtr()+snd_offset), Cnt, Who, SeqNum,
576  }
577 
579 #else
580  amrex::ignore_unused(pc,plan,snd_buffer,rcv_buffer);
581 #endif // MPI
582 }
583 
584 void communicateParticlesFinish (const ParticleCopyPlan& plan);
585 
586 template <class PC, class Buffer, class UnpackPolicy,
587  std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
588 void unpackRemotes (PC& pc, const ParticleCopyPlan& plan, Buffer& rcv_buffer, UnpackPolicy const& policy)
589 {
590  BL_PROFILE("amrex::unpackRemotes");
591 
592 #ifdef AMREX_USE_MPI
593  const int NProcs = ParallelContext::NProcsSub();
594  if (NProcs == 1) { return; }
595 
596  const int MyProc = ParallelContext::MyProcSub();
598  using PTile = typename PC::ParticleTileType;
599 
600  if (plan.m_nrcvs > 0)
601  {
602  const auto* p_comm_real = plan.d_real_comp_mask.dataPtr();
603  const auto* p_comm_int = plan.d_int_comp_mask.dataPtr();
604  auto* p_rcv_buffer = rcv_buffer.dataPtr();
605 
606  std::vector<int> sizes;
607  std::vector<PTile*> tiles;
608  for (int i = 0, N = int(plan.m_rcv_box_counts.size()); i < N; ++i)
609  {
610  int copy_size = plan.m_rcv_box_counts[i];
611  int lev = plan.m_rcv_box_levs[i];
612  int gid = plan.m_rcv_box_ids[i];
613  int tid = 0;
614  auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
615  sizes.push_back(copy_size);
616  tiles.push_back(&tile);
617  }
618 
619  Vector<int> offsets;
620  policy.resizeTiles(tiles, sizes, offsets);
622  int uindex = 0;
623  int procindex = 0, rproc = plan.m_rcv_box_pids[0];
624  for (int i = 0, N = int(plan.m_rcv_box_counts.size()); i < N; ++i)
625  {
626  int lev = plan.m_rcv_box_levs[i];
627  int gid = plan.m_rcv_box_ids[i];
628  int tid = 0;
629  auto offset = plan.m_rcv_box_offsets[i];
630  procindex = (rproc == plan.m_rcv_box_pids[i]) ? procindex : procindex+1;
631  rproc = plan.m_rcv_box_pids[i];
632 
633  auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
634  auto ptd = tile.getParticleTileData();
635 
637  ParallelContext::global_to_local_rank(pc.ParticleDistributionMap(lev)[gid]));
638 
639  int dst_offset = offsets[uindex];
640  int size = sizes[uindex];
641  ++uindex;
642 
643  Long psize = plan.superParticleSize();
644  const auto* p_pad_adjust = plan.m_rcv_pad_correction_d.dataPtr();
645 
646  AMREX_FOR_1D ( size, ip, {
647  Long src_offset = psize*(offset + ip) + p_pad_adjust[procindex];
648  int dst_index = dst_offset + ip;
649  ptd.unpackParticleData(p_rcv_buffer, src_offset, dst_index,
650  p_comm_real, p_comm_int);
651  });
652 
654  }
655  }
656 #else
657  amrex::ignore_unused(pc,plan,rcv_buffer,policy);
658 #endif // MPI
659 }
660 
661 } // namespace amrex
662 
663 #endif // AMREX_PARTICLECOMMUNICATION_H_
#define BL_PROFILE(a)
Definition: AMReX_BLProfiler.H:551
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition: AMReX_Extension.H:119
#define AMREX_FOR_1D(...)
Definition: AMReX_GpuLaunch.nolint.H:41
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition: AMReX_HypreMLABecLap.cpp:1089
#define AMREX_D_TERM(a, b, c)
Definition: AMReX_SPACE.H:129
Definition: AMReX_MFIter.H:57
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition: AMReX_MFIter.H:141
Definition: AMReX_PODVector.H:246
size_type size() const noexcept
Definition: AMReX_PODVector.H:575
iterator begin() noexcept
Definition: AMReX_PODVector.H:601
iterator end() noexcept
Definition: AMReX_PODVector.H:605
void resize(size_type a_new_size)
Definition: AMReX_PODVector.H:625
MPI_Request req() const
Definition: AMReX_ParallelDescriptor.H:74
Definition: AMReX_ParticleBufferMap.H:53
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition: AMReX_Vector.H:27
Long size() const noexcept
Definition: AMReX_Vector.H:50
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T Add(T *sum, T value) noexcept
Definition: AMReX_GpuAtomic.H:198
AMREX_GPU_HOST_DEVICE Long size(T const &b) noexcept
integer version
Definition: AMReX_GpuRange.H:26
void copy(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition: AMReX_GpuContainers.H:121
void copyAsync(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition: AMReX_GpuContainers.H:233
OutIter exclusive_scan(InIter begin, InIter end, OutIter result)
Definition: AMReX_Scan.H:1377
static constexpr DeviceToHost deviceToHost
Definition: AMReX_GpuContainers.H:99
static constexpr HostToDevice hostToDevice
Definition: AMReX_GpuContainers.H:98
void streamSynchronize() noexcept
Definition: AMReX_GpuDevice.H:237
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void * memcpy(void *dest, const void *src, std::size_t count)
Definition: AMReX_GpuUtility.H:214
int NProcs()
Process ID in MPI_COMM_WORLD.
Definition: AMReX_MPMD.cpp:122
int MyProc()
Definition: AMReX_MPMD.cpp:117
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition: AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition: AMReX_ParallelContext.H:76
int global_to_local_rank(int rank) noexcept
Definition: AMReX_ParallelContext.H:98
int NProcsSub() noexcept
number of ranks in current frame
Definition: AMReX_ParallelContext.H:74
Message Send(const T *buf, size_t n, int dst_pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1109
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition: AMReX_ParallelDescriptor.H:613
Message Arecv(T *, size_t n, int pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1130
Definition: AMReX_Amr.cpp:49
void communicateParticlesStart(const PC &pc, ParticleCopyPlan &plan, const SndBuffer &snd_buffer, RcvBuffer &rcv_buffer)
Definition: AMReX_ParticleCommunication.H:493
void unpackRemotes(PC &pc, const ParticleCopyPlan &plan, Buffer &rcv_buffer, UnpackPolicy const &policy)
Definition: AMReX_ParticleCommunication.H:588
void communicateParticlesFinish(const ParticleCopyPlan &plan)
Definition: AMReX_ParticleCommunication.cpp:371
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition: AMReX.H:111
Arena * The_Pinned_Arena()
Definition: AMReX_Arena.cpp:649
const int[]
Definition: AMReX_BLProfiler.cpp:1664
std::size_t aligned_size(std::size_t align_requirement, std::size_t size) noexcept
Given a minimum required size of size bytes, this returns the next largest arena size that will align...
Definition: AMReX_Arena.H:30
void unpackBuffer(PC &pc, const ParticleCopyPlan &plan, const Buffer &snd_buffer, UnpackPolicy const &policy)
Definition: AMReX_ParticleCommunication.H:428
void packBuffer(const PC &pc, const ParticleCopyOp &op, const ParticleCopyPlan &plan, Buffer &snd_buffer)
Definition: AMReX_ParticleCommunication.H:329
Definition: AMReX_ParticleBufferMap.H:35
Definition: AMReX_ParticleBufferMap.H:14
Definition: AMReX_ParticleCommunication.H:301
const unsigned int * m_box_offsets
Definition: AMReX_ParticleCommunication.H:302
GetPID m_get_pid
Definition: AMReX_ParticleCommunication.H:305
AMREX_FORCE_INLINE AMREX_GPU_DEVICE Long operator()(int dst_box, int dst_lev, std::size_t psize, int i) const
Definition: AMReX_ParticleCommunication.H:316
GetBucket m_get_bucket
Definition: AMReX_ParticleCommunication.H:306
const std::size_t * m_pad_correction
Definition: AMReX_ParticleCommunication.H:303
GetSendBufferOffset(const ParticleCopyPlan &plan, const ParticleBufferMap &map)
Definition: AMReX_ParticleCommunication.H:308
Definition: AMReX_ParticleCommunication.H:19
void resizeTiles(std::vector< PTile * > &tiles, const std::vector< int > &sizes, std::vector< int > &offsets) const
Definition: AMReX_ParticleCommunication.H:21
Definition: AMReX_ParticleCommunication.H:58
int numCopies(int gid, int lev) const
Definition: AMReX_ParticleCommunication.H:70
void setNumLevels(int num_levels)
Definition: AMReX_ParticleCommunication.cpp:14
void clear()
Definition: AMReX_ParticleCommunication.cpp:6
int numLevels() const
Definition: AMReX_ParticleCommunication.H:77
Vector< std::map< int, Gpu::DeviceVector< IntVect > > > m_periodic_shift
Definition: AMReX_ParticleCommunication.H:62
Vector< std::map< int, Gpu::DeviceVector< int > > > m_boxes
Definition: AMReX_ParticleCommunication.H:59
Vector< std::map< int, Gpu::DeviceVector< int > > > m_levels
Definition: AMReX_ParticleCommunication.H:60
void resize(int gid, int lev, int size)
Definition: AMReX_ParticleCommunication.cpp:22
Vector< std::map< int, Gpu::DeviceVector< int > > > m_src_indices
Definition: AMReX_ParticleCommunication.H:61
Definition: AMReX_ParticleCommunication.H:81
Vector< int > m_rcv_box_ids
Definition: AMReX_ParticleCommunication.H:90
Vector< std::size_t > m_snd_offsets
Definition: AMReX_ParticleCommunication.H:113
Vector< int > m_rcv_box_counts
Definition: AMReX_ParticleCommunication.H:88
Vector< std::size_t > m_snd_counts
Definition: AMReX_ParticleCommunication.H:114
Long m_NumSnds
Definition: AMReX_ParticleCommunication.H:94
bool m_local
Definition: AMReX_ParticleCommunication.H:297
void doHandShake(const Vector< Long > &Snds, Vector< Long > &Rcvs) const
Definition: AMReX_ParticleCommunication.cpp:256
Vector< int > m_neighbor_procs
Definition: AMReX_ParticleCommunication.H:105
Vector< int > m_rcv_box_pids
Definition: AMReX_ParticleCommunication.H:91
Vector< int > m_rcv_box_levs
Definition: AMReX_ParticleCommunication.H:92
Gpu::DeviceVector< int > d_real_comp_mask
Definition: AMReX_ParticleCommunication.H:122
Gpu::DeviceVector< std::size_t > m_snd_pad_correction_d
Definition: AMReX_ParticleCommunication.H:117
static void doHandShakeAllToAll(const Vector< Long > &Snds, Vector< Long > &Rcvs)
Definition: AMReX_ParticleCommunication.cpp:304
Long m_superparticle_size
Definition: AMReX_ParticleCommunication.H:123
Vector< Long > m_Snds
Definition: AMReX_ParticleCommunication.H:107
Gpu::DeviceVector< unsigned int > m_box_counts_d
Definition: AMReX_ParticleCommunication.H:84
Vector< Long > m_Rcvs
Definition: AMReX_ParticleCommunication.H:108
Vector< int > m_rcv_box_offsets
Definition: AMReX_ParticleCommunication.H:89
Vector< std::size_t > m_snd_pad_correction_h
Definition: AMReX_ParticleCommunication.H:116
Vector< std::map< int, Gpu::DeviceVector< int > > > m_dst_indices
Definition: AMReX_ParticleCommunication.H:82
Gpu::DeviceVector< unsigned int > m_box_offsets
Definition: AMReX_ParticleCommunication.H:86
Gpu::DeviceVector< std::size_t > m_rcv_pad_correction_d
Definition: AMReX_ParticleCommunication.H:120
Vector< std::size_t > m_rOffset
Definition: AMReX_ParticleCommunication.H:110
void buildMPIStart(const ParticleBufferMap &map, Long psize)
Definition: AMReX_ParticleCommunication.cpp:48
Vector< Long > m_snd_num_particles
Definition: AMReX_ParticleCommunication.H:102
Vector< MPI_Request > m_particle_rreqs
Definition: AMReX_ParticleCommunication.H:100
Long superParticleSize() const
Definition: AMReX_ParticleCommunication.H:125
Vector< MPI_Status > m_particle_stats
Definition: AMReX_ParticleCommunication.H:99
Gpu::HostVector< int > m_rcv_data
Definition: AMReX_ParticleCommunication.H:111
Vector< MPI_Status > m_build_stats
Definition: AMReX_ParticleCommunication.H:96
void buildMPIFinish(const ParticleBufferMap &map)
Definition: AMReX_ParticleCommunication.cpp:207
void build(const PC &pc, const ParticleCopyOp &op, const Vector< int > &int_comp_mask, const Vector< int > &real_comp_mask, bool local)
Definition: AMReX_ParticleCommunication.H:128
Vector< int > m_RcvProc
Definition: AMReX_ParticleCommunication.H:109
Vector< std::size_t > m_rcv_pad_correction_h
Definition: AMReX_ParticleCommunication.H:119
int m_nrcvs
Definition: AMReX_ParticleCommunication.H:95
Gpu::HostVector< unsigned int > m_box_counts_h
Definition: AMReX_ParticleCommunication.H:85
Vector< MPI_Request > m_build_rreqs
Definition: AMReX_ParticleCommunication.H:97
Vector< Long > m_rcv_num_particles
Definition: AMReX_ParticleCommunication.H:103
Gpu::DeviceVector< int > d_int_comp_mask
Definition: AMReX_ParticleCommunication.H:122
static void doHandShakeGlobal(const Vector< Long > &Snds, Vector< Long > &Rcvs)
Definition: AMReX_ParticleCommunication.cpp:327
void doHandShakeLocal(const Vector< Long > &Snds, Vector< Long > &Rcvs) const
Definition: AMReX_ParticleCommunication.cpp:263
void clear()
Definition: AMReX_ParticleCommunication.cpp:34
Definition: AMReX_ParticleCommunication.H:34
void resizeTiles(std::vector< PTile * > &tiles, const std::vector< int > &sizes, std::vector< int > &offsets) const
Definition: AMReX_ParticleCommunication.H:36