Block-Structured AMR Software Framework
AMReX_FabArrayCommI.H
Go to the documentation of this file.
1 
2 #include <AMReX_FBI.H>
3 #include <AMReX_PCI.H>
4 
5 template <class FAB>
6 template <typename BUF, class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
7 void
8 FabArray<FAB>::FBEP_nowait (int scomp, int ncomp, const IntVect& nghost,
9  const Periodicity& period, bool cross,
10  bool enforce_periodicity_only,
11  bool override_sync)
12 {
13  BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms: FB");
14  BL_PROFILE("FillBoundary_nowait()");
15 
16  AMREX_ASSERT_WITH_MESSAGE(!fbd, "FillBoundary_nowait() called when comm operation already in progress.");
17  AMREX_ASSERT(!enforce_periodicity_only || !override_sync);
18 
19  bool work_to_do;
20  if (enforce_periodicity_only) {
21  work_to_do = period.isAnyPeriodic();
22  } else if (override_sync) {
23  work_to_do = (nghost.max() > 0) || !is_cell_centered();
24  } else {
25  work_to_do = nghost.max() > 0;
26  }
27  if (!work_to_do) { return; }
28 
29  const FB& TheFB = getFB(nghost, period, cross, enforce_periodicity_only, override_sync);
30 
31  if (ParallelContext::NProcsSub() == 1)
32  {
33  //
34  // There can only be local work to do.
35  //
36  int N_locs = (*TheFB.m_LocTags).size();
37  if (N_locs == 0) { return; }
38 #ifdef AMREX_USE_GPU
39  if (Gpu::inLaunchRegion())
40  {
41 #if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
42  if (Gpu::inGraphRegion())
43  {
44  FB_local_copy_cuda_graph_1(TheFB, scomp, ncomp);
45  }
46  else
47 #endif
48  {
49  FB_local_copy_gpu(TheFB, scomp, ncomp);
50  }
51  }
52  else
53 #endif
54  {
55  FB_local_copy_cpu(TheFB, scomp, ncomp);
56  }
57 
58  return;
59  }
60 
61 #ifdef BL_USE_MPI
62 
63  //
64  // Do this before prematurely exiting if running in parallel.
65  // Otherwise sequence numbers will not match across MPI processes.
66  //
68 
69  const int N_locs = TheFB.m_LocTags->size();
70  const int N_rcvs = TheFB.m_RcvTags->size();
71  const int N_snds = TheFB.m_SndTags->size();
72 
73  if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
74  // No work to do.
75  return;
76  }
77 
78  fbd = std::make_unique<FBData<FAB>>();
79  fbd->fb = &TheFB;
80  fbd->scomp = scomp;
81  fbd->ncomp = ncomp;
82  fbd->tag = SeqNum;
83 
84  //
85  // Post rcvs. Allocate one chunk of space to hold'm all.
86  //
87 
88  if (N_rcvs > 0) {
89  PostRcvs<BUF>(*TheFB.m_RcvTags, fbd->the_recv_data,
90  fbd->recv_data, fbd->recv_size, fbd->recv_from, fbd->recv_reqs,
91  ncomp, SeqNum);
92  fbd->recv_stat.resize(N_rcvs);
93  }
94 
95  //
96  // Post send's
97  //
98  char*& the_send_data = fbd->the_send_data;
99  Vector<char*> & send_data = fbd->send_data;
100  Vector<std::size_t> send_size;
101  Vector<int> send_rank;
102  Vector<MPI_Request>& send_reqs = fbd->send_reqs;
104 
105  if (N_snds > 0)
106  {
107  PrepareSendBuffers<BUF>(*TheFB.m_SndTags, the_send_data, send_data, send_size, send_rank,
108  send_reqs, send_cctc, ncomp);
109 
110 #ifdef AMREX_USE_GPU
111  if (Gpu::inLaunchRegion())
112  {
113 #if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
114  if (Gpu::inGraphRegion()) {
115  FB_pack_send_buffer_cuda_graph(TheFB, scomp, ncomp, send_data, send_size, send_cctc);
116  }
117  else
118 #endif
119  {
120  pack_send_buffer_gpu<BUF>(*this, scomp, ncomp, send_data, send_size, send_cctc);
121  }
122  }
123  else
124 #endif
125  {
126  pack_send_buffer_cpu<BUF>(*this, scomp, ncomp, send_data, send_size, send_cctc);
127  }
128 
129  AMREX_ASSERT(send_reqs.size() == N_snds);
130  PostSnds(send_data, send_size, send_rank, send_reqs, SeqNum);
131  }
132 
133  FillBoundary_test();
134 
135  //
136  // Do the local work. Hope for a bit of communication/computation overlap.
137  //
138  if (N_locs > 0)
139  {
140 #ifdef AMREX_USE_GPU
141  if (Gpu::inLaunchRegion())
142  {
143 #if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
144  if (Gpu::inGraphRegion()) {
145  FB_local_copy_cuda_graph_n(TheFB, scomp, ncomp);
146  }
147  else
148 #endif
149  {
150  FB_local_copy_gpu(TheFB, scomp, ncomp);
151  }
152  }
153  else
154 #endif
155  {
156  FB_local_copy_cpu(TheFB, scomp, ncomp);
157  }
158 
159  FillBoundary_test();
160  }
161 
162 #endif /*BL_USE_MPI*/
163 }
164 
165 template <class FAB>
166 template <typename BUF, class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
167 void
169 {
170 #ifdef AMREX_USE_MPI
171 
172  BL_PROFILE("FillBoundary_finish()");
174 
175  if (!fbd) { n_filled = IntVect::TheZeroVector(); return; }
176 
177  const FB* TheFB = fbd->fb;
178  const auto N_rcvs = static_cast<int>(TheFB->m_RcvTags->size());
179  if (N_rcvs > 0)
180  {
181  Vector<const CopyComTagsContainer*> recv_cctc(N_rcvs,nullptr);
182  for (int k = 0; k < N_rcvs; k++)
183  {
184  if (fbd->recv_size[k] > 0)
185  {
186  auto const& cctc = TheFB->m_RcvTags->at(fbd->recv_from[k]);
187  recv_cctc[k] = &cctc;
188  }
189  }
190 
191  int actual_n_rcvs = N_rcvs - std::count(fbd->recv_data.begin(), fbd->recv_data.end(), nullptr);
192 
193  if (actual_n_rcvs > 0) {
194  ParallelDescriptor::Waitall(fbd->recv_reqs, fbd->recv_stat);
195 #ifdef AMREX_DEBUG
196  if (!CheckRcvStats(fbd->recv_stat, fbd->recv_size, fbd->tag))
197  {
198  amrex::Abort("FillBoundary_finish failed with wrong message size");
199  }
200 #endif
201  }
202 
203  bool is_thread_safe = TheFB->m_threadsafe_rcv;
204 
205 #ifdef AMREX_USE_GPU
206  if (Gpu::inLaunchRegion())
207  {
208 #if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
209  if (Gpu::inGraphRegion())
210  {
211  FB_unpack_recv_buffer_cuda_graph(*TheFB, fbd->scomp, fbd->ncomp,
212  fbd->recv_data, fbd->recv_size,
213  recv_cctc, is_thread_safe);
214  }
215  else
216 #endif
217  {
218  unpack_recv_buffer_gpu<BUF>(*this, fbd->scomp, fbd->ncomp, fbd->recv_data, fbd->recv_size,
219  recv_cctc, FabArrayBase::COPY, is_thread_safe);
220  }
221  }
222  else
223 #endif
224  {
225  unpack_recv_buffer_cpu<BUF>(*this, fbd->scomp, fbd->ncomp, fbd->recv_data, fbd->recv_size,
226  recv_cctc, FabArrayBase::COPY, is_thread_safe);
227  }
228 
229  if (fbd->the_recv_data)
230  {
231  amrex::The_Comms_Arena()->free(fbd->the_recv_data);
232  fbd->the_recv_data = nullptr;
233  }
234  }
235 
236  const auto N_snds = static_cast<int>(TheFB->m_SndTags->size());
237  if (N_snds > 0) {
238  Vector<MPI_Status> stats(fbd->send_reqs.size());
239  ParallelDescriptor::Waitall(fbd->send_reqs, stats);
240  amrex::The_Comms_Arena()->free(fbd->the_send_data);
241  fbd->the_send_data = nullptr;
242  }
243 
244  fbd.reset();
245 
246 #endif
247 }
248 
249 // \cond CODEGEN
250 template <class FAB>
251 void
253  int scomp,
254  int dcomp,
255  int ncomp,
256  const IntVect& snghost,
257  const IntVect& dnghost,
258  const Periodicity& period,
259  CpOp op,
260  const FabArrayBase::CPC * a_cpc)
261 {
262  BL_PROFILE("FabArray::ParallelCopy()");
263 
264  ParallelCopy_nowait(src, scomp, dcomp, ncomp, snghost, dnghost, period, op, a_cpc);
266 }
267 
268 template <class FAB>
269 void
270 FabArray<FAB>::ParallelCopyToGhost (const FabArray<FAB>& src,
271  int scomp,
272  int dcomp,
273  int ncomp,
274  const IntVect& snghost,
275  const IntVect& dnghost,
276  const Periodicity& period)
277 {
278  BL_PROFILE("FabArray::ParallelCopyToGhost()");
279 
280  ParallelCopy_nowait(src, scomp, dcomp, ncomp, snghost, dnghost, period,
281  FabArrayBase::COPY, nullptr, true);
283 }
284 
285 template <class FAB>
286 void
287 FabArray<FAB>::ParallelCopyToGhost_nowait (const FabArray<FAB>& src,
288  int scomp,
289  int dcomp,
290  int ncomp,
291  const IntVect& snghost,
292  const IntVect& dnghost,
293  const Periodicity& period)
294 {
295  ParallelCopy_nowait(src, scomp, dcomp, ncomp, snghost, dnghost, period,
296  FabArrayBase::COPY, nullptr, true);
297 }
298 
299 template <class FAB>
300 void
301 FabArray<FAB>::ParallelCopyToGhost_finish ()
302 {
304 }
305 
306 
307 template <class FAB>
308 void
309 FabArray<FAB>::ParallelCopy_nowait (const FabArray<FAB>& src,
310  int scomp,
311  int dcomp,
312  int ncomp,
313  const IntVect& snghost,
314  const IntVect& dnghost,
315  const Periodicity& period,
316  CpOp op,
317  const FabArrayBase::CPC * a_cpc,
318  bool to_ghost_cells_only)
319 {
320  BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms: PC");
321  BL_PROFILE("FabArray::ParallelCopy_nowait()");
322 
323  AMREX_ASSERT_WITH_MESSAGE(!pcd, "ParallelCopy_nowait() called when comm operation already in progress.");
324 
325  if (empty() || src.empty()) {
326  return;
327  }
328 
329  BL_ASSERT(op == FabArrayBase::COPY || op == FabArrayBase::ADD);
330  BL_ASSERT(boxArray().ixType() == src.boxArray().ixType());
331  BL_ASSERT(src.nGrowVect().allGE(snghost));
332  BL_ASSERT( nGrowVect().allGE(dnghost));
333 
334  n_filled = dnghost;
335 
336  if ((src.boxArray().ixType().cellCentered() || op == FabArrayBase::COPY) &&
337  (boxarray == src.boxarray && distributionMap == src.distributionMap) &&
338  snghost == IntVect::TheZeroVector() &&
339  dnghost == IntVect::TheZeroVector() &&
340  !period.isAnyPeriodic() && !to_ghost_cells_only)
341  {
342  //
343  // Short-circuit full intersection code if we're doing copy()s or if
344  // we're doing plus()s on cell-centered data. Don't do plus()s on
345  // non-cell-centered data this simplistic way.
346  //
347  if (this != &src) { // avoid self copy or plus
348  if (op == FabArrayBase::COPY) {
349  Copy(*this, src, scomp, dcomp, ncomp, IntVect(0));
350  } else {
351  Add(*this, src, scomp, dcomp, ncomp, IntVect(0));
352  }
353  }
354  return;
355  }
356 
357  const CPC& thecpc = (a_cpc) ? *a_cpc : getCPC(dnghost, src, snghost, period, to_ghost_cells_only);
358 
359  if (ParallelContext::NProcsSub() == 1)
360  {
361  //
362  // There can only be local work to do.
363  //
364 
365  int N_locs = (*thecpc.m_LocTags).size();
366  if (N_locs == 0) { return; }
367 #ifdef AMREX_USE_GPU
368  if (Gpu::inLaunchRegion())
369  {
370  PC_local_gpu(thecpc, src, scomp, dcomp, ncomp, op);
371  }
372  else
373 #endif
374  {
375  PC_local_cpu(thecpc, src, scomp, dcomp, ncomp, op);
376  }
377 
378  return;
379  }
380 
381 #ifdef BL_USE_MPI
382 
383  //
384  // Do this before prematurely exiting if running in parallel.
385  // Otherwise sequence numbers will not match across MPI processes.
386  //
387  int tag = ParallelDescriptor::SeqNum();
388 
389  const int N_snds = thecpc.m_SndTags->size();
390  const int N_rcvs = thecpc.m_RcvTags->size();
391  const int N_locs = thecpc.m_LocTags->size();
392 
393  if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
394  //
395  // No work to do.
396  //
397 
398  return;
399  }
400 
401  //
402  // Send/Recv at most MaxComp components at a time to cut down memory usage.
403  //
404  int NCompLeft = ncomp;
405  int SC = scomp, DC = dcomp, NC;
406 
407  for (int ipass = 0; ipass < ncomp; )
408  {
409  pcd = std::make_unique<PCData<FAB>>();
410  pcd->cpc = &thecpc;
411  pcd->src = &src;
412  pcd->op = op;
413  pcd->tag = tag;
414 
415  NC = std::min(NCompLeft,FabArrayBase::MaxComp);
416  const bool last_iter = (NCompLeft == NC);
417 
418  pcd->SC = SC;
419  pcd->DC = DC;
420  pcd->NC = NC;
421 
422  //
423  // Post rcvs. Allocate one chunk of space to hold'm all.
424  //
425  pcd->the_recv_data = nullptr;
426 
427  pcd->actual_n_rcvs = 0;
428  if (N_rcvs > 0) {
429  PostRcvs(*thecpc.m_RcvTags, pcd->the_recv_data,
430  pcd->recv_data, pcd->recv_size, pcd->recv_from, pcd->recv_reqs, NC, pcd->tag);
431  pcd->actual_n_rcvs = N_rcvs - std::count(pcd->recv_size.begin(), pcd->recv_size.end(), 0);
432  }
433 
434  //
435  // Post send's
436  //
437  Vector<char*> send_data;
438  Vector<std::size_t> send_size;
439  Vector<int> send_rank;
440  Vector<const CopyComTagsContainer*> send_cctc;
441 
442  if (N_snds > 0)
443  {
444  src.PrepareSendBuffers(*thecpc.m_SndTags, pcd->the_send_data, send_data, send_size,
445  send_rank, pcd->send_reqs, send_cctc, NC);
446 
447 #ifdef AMREX_USE_GPU
448  if (Gpu::inLaunchRegion())
449  {
450  pack_send_buffer_gpu(src, SC, NC, send_data, send_size, send_cctc);
451  }
452  else
453 #endif
454  {
455  pack_send_buffer_cpu(src, SC, NC, send_data, send_size, send_cctc);
456  }
457 
458  AMREX_ASSERT(pcd->send_reqs.size() == N_snds);
459  FabArray<FAB>::PostSnds(send_data, send_size, send_rank, pcd->send_reqs, pcd->tag);
460  }
461 
462  //
463  // Do the local work. Hope for a bit of communication/computation overlap.
464  //
465  if (N_locs > 0)
466  {
467 #ifdef AMREX_USE_GPU
468  if (Gpu::inLaunchRegion())
469  {
470  PC_local_gpu(thecpc, src, SC, DC, NC, op);
471  }
472  else
473 #endif
474  {
475  PC_local_cpu(thecpc, src, SC, DC, NC, op);
476  }
477  }
478 
479  if (!last_iter)
480  {
482 
483  SC += NC;
484  DC += NC;
485  }
486 
487  ipass += NC;
488  NCompLeft -= NC;
489  }
490 
491 #endif /*BL_USE_MPI*/
492 }
493 
494 template <class FAB>
495 void
497 {
498 
499 #ifdef BL_USE_MPI
500 
501  BL_PROFILE("FabArray::ParallelCopy_finish()");
503 
504  if (!pcd) { return; }
505 
506  const CPC* thecpc = pcd->cpc;
507 
508  const auto N_snds = static_cast<int>(thecpc->m_SndTags->size());
509  const auto N_rcvs = static_cast<int>(thecpc->m_RcvTags->size());
510 
511  if (N_rcvs > 0)
512  {
513  Vector<const CopyComTagsContainer*> recv_cctc(N_rcvs,nullptr);
514  for (int k = 0; k < N_rcvs; ++k)
515  {
516  if (pcd->recv_size[k] > 0)
517  {
518  auto const& cctc = thecpc->m_RcvTags->at(pcd->recv_from[k]);
519  recv_cctc[k] = &cctc;
520  }
521  }
522 
523  if (pcd->actual_n_rcvs > 0) {
524  Vector<MPI_Status> stats(N_rcvs);
525  ParallelDescriptor::Waitall(pcd->recv_reqs, stats);
526 #ifdef AMREX_DEBUG
527  if (!CheckRcvStats(stats, pcd->recv_size, pcd->tag))
528  {
529  amrex::Abort("ParallelCopy failed with wrong message size");
530  }
531 #endif
532  }
533 
534  bool is_thread_safe = thecpc->m_threadsafe_rcv;
535 
536 #ifdef AMREX_USE_GPU
537  if (Gpu::inLaunchRegion())
538  {
539  unpack_recv_buffer_gpu(*this, pcd->DC, pcd->NC, pcd->recv_data, pcd->recv_size,
540  recv_cctc, pcd->op, is_thread_safe);
541  }
542  else
543 #endif
544  {
545  unpack_recv_buffer_cpu(*this, pcd->DC, pcd->NC, pcd->recv_data, pcd->recv_size,
546  recv_cctc, pcd->op, is_thread_safe);
547  }
548 
549  if (pcd->the_recv_data)
550  {
551  amrex::The_Comms_Arena()->free(pcd->the_recv_data);
552  pcd->the_recv_data = nullptr;
553  }
554  }
555 
556  if (N_snds > 0) {
557  if (! thecpc->m_SndTags->empty()) {
558  Vector<MPI_Status> stats(pcd->send_reqs.size());
559  ParallelDescriptor::Waitall(pcd->send_reqs, stats);
560  }
561  amrex::The_Comms_Arena()->free(pcd->the_send_data);
562  pcd->the_send_data = nullptr;
563  }
564 
565  pcd.reset();
566 
567 #endif /*BL_USE_MPI*/
568 }
569 
570 template <class FAB>
571 void
572 FabArray<FAB>::copyTo (FAB& dest, int scomp, int dcomp, int ncomp, int nghost) const
573 {
574  BL_PROFILE("FabArray::copy(fab)");
575 
576  BL_ASSERT(dcomp + ncomp <= dest.nComp());
577  BL_ASSERT(IntVect(nghost).allLE(nGrowVect()));
578 
579  int root_proc = this->DistributionMap()[0];
580 
581  BoxArray ba(dest.box());
582  DistributionMapping dm(Vector<int>{root_proc});
583  FabArray<FAB> destmf(ba, dm, ncomp, 0, MFInfo().SetAlloc(false));
584  if (ParallelDescriptor::MyProc() == root_proc) {
585  destmf.setFab(0, FAB(dest, amrex::make_alias, dcomp, ncomp));
586  }
587 
588  destmf.ParallelCopy(*this, scomp, 0, ncomp, nghost, 0);
589 
590 #ifdef BL_USE_MPI
591  using T = typename FAB::value_type;
592  if (ParallelContext::NProcsSub() > 1) {
593  Long count = dest.numPts()*ncomp;
594  T* const p0 = dest.dataPtr(dcomp);
595  T* pb = p0;
596 #ifdef AMREX_USE_GPU
597  if (dest.arena()->isDevice()) {
598  pb = (T*)The_Pinned_Arena()->alloc(sizeof(T)*count);
599  Gpu::dtoh_memcpy_async(pb, p0, sizeof(T)*count);
601  }
602 #endif
605 #ifdef AMREX_USE_GPU
606  if (pb != p0) {
607  Gpu::htod_memcpy_async(p0, pb, sizeof(T)*count);
609  }
610 #endif
611  }
612 #endif
613 }
614 // \endcond
615 #ifdef BL_USE_MPI
616 template <class FAB>
617 template <typename BUF>
619 FabArray<FAB>::PrepareSendBuffers (const MapOfCopyComTagContainers& SndTags,
620  Vector<char*>& send_data,
621  Vector<std::size_t>& send_size,
622  Vector<int>& send_rank,
623  Vector<MPI_Request>& send_reqs,
624  Vector<const CopyComTagsContainer*>& send_cctc,
625  int ncomp)
626 {
627  char* pointer = nullptr;
628  PrepareSendBuffers<BUF>(SndTags, pointer, send_data, send_size, send_rank, send_reqs, send_cctc, ncomp);
629  return TheFaArenaPointer(pointer);
630 }
631 
632 template <class FAB>
633 template <typename BUF>
634 void
635 FabArray<FAB>::PrepareSendBuffers (const MapOfCopyComTagContainers& SndTags,
636  char*& the_send_data,
637  Vector<char*>& send_data,
638  Vector<std::size_t>& send_size,
639  Vector<int>& send_rank,
640  Vector<MPI_Request>& send_reqs,
641  Vector<const CopyComTagsContainer*>& send_cctc,
642  int ncomp)
643 {
644  send_data.clear();
645  send_size.clear();
646  send_rank.clear();
647  send_reqs.clear();
648  send_cctc.clear();
649  const auto N_snds = SndTags.size();
650  if (N_snds == 0) { return; }
651  send_data.reserve(N_snds);
652  send_size.reserve(N_snds);
653  send_rank.reserve(N_snds);
654  send_reqs.reserve(N_snds);
655  send_cctc.reserve(N_snds);
656 
657  Vector<std::size_t> offset; offset.reserve(N_snds);
658  std::size_t total_volume = 0;
659  for (auto const& kv : SndTags)
660  {
661  auto const& cctc = kv.second;
662 
663  std::size_t nbytes = 0;
664  for (auto const& cct : kv.second)
665  {
666  nbytes += cct.sbox.numPts() * ncomp * sizeof(BUF);
667  }
668 
669  std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
670  nbytes = amrex::aligned_size(acd, nbytes); // so that bytes are aligned
671 
672  // Also need to align the offset properly
673  total_volume = amrex::aligned_size(std::max(alignof(BUF), acd),
674  total_volume);
675 
676  offset.push_back(total_volume);
677  total_volume += nbytes;
678 
679  send_data.push_back(nullptr);
680  send_size.push_back(nbytes);
681  send_rank.push_back(kv.first);
682  send_reqs.push_back(MPI_REQUEST_NULL);
683  send_cctc.push_back(&cctc);
684  }
685 
686  if (total_volume > 0)
687  {
688  the_send_data = static_cast<char*>(amrex::The_Comms_Arena()->alloc(total_volume));
689  for (int i = 0, N = static_cast<int>(send_size.size()); i < N; ++i) {
690  send_data[i] = the_send_data + offset[i];
691  }
692  } else {
693  the_send_data = nullptr;
694  }
695 }
696 
697 template <class FAB>
698 void
699 FabArray<FAB>::PostSnds (Vector<char*> const& send_data,
700  Vector<std::size_t> const& send_size,
701  Vector<int> const& send_rank,
702  Vector<MPI_Request>& send_reqs,
703  int SeqNum)
704 {
706 
707  const auto N_snds = static_cast<int>(send_reqs.size());
708  for (int j = 0; j < N_snds; ++j)
709  {
710  if (send_size[j] > 0) {
711  const int rank = ParallelContext::global_to_local_rank(send_rank[j]);
712  send_reqs[j] = ParallelDescriptor::Asend
713  (send_data[j], send_size[j], rank, SeqNum, comm).req();
714  }
715  }
716 }
717 
718 template <class FAB>
719 template <typename BUF>
720 TheFaArenaPointer FabArray<FAB>::PostRcvs (const MapOfCopyComTagContainers& RcvTags,
721  Vector<char*>& recv_data,
722  Vector<std::size_t>& recv_size,
723  Vector<int>& recv_from,
724  Vector<MPI_Request>& recv_reqs,
725  int ncomp,
726  int SeqNum)
727 {
728  char* pointer = nullptr;
729  PostRcvs(RcvTags, pointer, recv_data, recv_size, recv_from, recv_reqs, ncomp, SeqNum);
730  return TheFaArenaPointer(pointer);
731 }
732 
733 template <class FAB>
734 template <typename BUF>
735 void
736 FabArray<FAB>::PostRcvs (const MapOfCopyComTagContainers& RcvTags,
737  char*& the_recv_data,
738  Vector<char*>& recv_data,
739  Vector<std::size_t>& recv_size,
740  Vector<int>& recv_from,
741  Vector<MPI_Request>& recv_reqs,
742  int ncomp,
743  int SeqNum)
744 {
745  recv_data.clear();
746  recv_size.clear();
747  recv_from.clear();
748  recv_reqs.clear();
749 
750  Vector<std::size_t> offset;
751  std::size_t TotalRcvsVolume = 0;
752  for (const auto& kv : RcvTags) // loop over senders
753  {
754  std::size_t nbytes = 0;
755  for (auto const& cct : kv.second)
756  {
757  nbytes += cct.dbox.numPts() * ncomp * sizeof(BUF);
758  }
759 
760  std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
761  nbytes = amrex::aligned_size(acd, nbytes); // so that nbytes are aligned
762 
763  // Also need to align the offset properly
764  TotalRcvsVolume = amrex::aligned_size(std::max(alignof(BUF),acd),
765  TotalRcvsVolume);
766 
767  offset.push_back(TotalRcvsVolume);
768  TotalRcvsVolume += nbytes;
769 
770  recv_data.push_back(nullptr);
771  recv_size.push_back(nbytes);
772  recv_from.push_back(kv.first);
773  recv_reqs.push_back(MPI_REQUEST_NULL);
774  }
775 
776  const auto nrecv = static_cast<int>(recv_from.size());
777 
779 
780  if (TotalRcvsVolume == 0)
781  {
782  the_recv_data = nullptr;
783  }
784  else
785  {
786  the_recv_data = static_cast<char*>(amrex::The_Comms_Arena()->alloc(TotalRcvsVolume));
787 
788  for (int i = 0; i < nrecv; ++i)
789  {
790  recv_data[i] = the_recv_data + offset[i];
791  if (recv_size[i] > 0)
792  {
793  const int rank = ParallelContext::global_to_local_rank(recv_from[i]);
794  recv_reqs[i] = ParallelDescriptor::Arecv
795  (recv_data[i], recv_size[i], rank, SeqNum, comm).req();
796  }
797  }
798  }
799 }
800 #endif
801 
802 template <class FAB>
803 void
805  int scomp,
806  int dcomp,
807  int ncomp,
808  const IntVect& nghost)
809 {
811  "FabArray::Redistribute: must have the same BoxArray");
812 
813  if (ParallelContext::NProcsSub() == 1)
814  {
815  Copy(*this, src, scomp, dcomp, ncomp, nghost);
816  return;
817  }
818 
819 #ifdef BL_USE_MPI
820 
822 
823  ParallelCopy(src, scomp, dcomp, ncomp, nghost, nghost, Periodicity::NonPeriodic(),
824  FabArrayBase::COPY, &cpc);
825 
826 #endif
827 }
828 
829 template <class FAB>
830 void
832 {
833 #if defined(AMREX_USE_MPI) && !defined(AMREX_DEBUG)
834  // We only test if no DEBUG because in DEBUG we check the status later.
835  // If Test is done here, the status check will fail.
836  int flag;
837  ParallelDescriptor::Test(fbd->recv_reqs, flag, fbd->recv_stat);
838 #endif
839 }
840 
841 namespace detail {
842 template <class TagT>
843 void fbv_copy (Vector<TagT> const& tags)
844 {
845  const int N = tags.size();
846  if (N == 0) { return; }
847 #ifdef AMREX_USE_GPU
848  if (Gpu::inLaunchRegion()) {
849  ParallelFor(tags, 1,
850  [=] AMREX_GPU_DEVICE (int i, int j, int k, int, TagT const& tag) noexcept
851  {
852  const int ncomp = tag.dfab.nComp();
853  for (int n = 0; n < ncomp; ++n) {
854  tag.dfab(i,j,k,n) = tag.sfab(i+tag.offset.x,j+tag.offset.y,k+tag.offset.z,n);
855  }
856  });
857  } else
858 #endif
859  {
860 #ifdef AMREX_USE_OMP
861 #pragma omp parallel for
862 #endif
863  for (int itag = 0; itag < N; ++itag) {
864  auto const& tag = tags[itag];
865  const int ncomp = tag.dfab.nComp();
866  AMREX_LOOP_4D(tag.dbox, ncomp, i, j, k, n,
867  {
868  tag.dfab(i,j,k,n) = tag.sfab(i+tag.offset.x,j+tag.offset.y,k+tag.offset.z,n);
869  });
870  }
871  }
872 }
873 }
874 
875 template <class MF>
876 std::enable_if_t<IsFabArray<MF>::value>
877 FillBoundary (Vector<MF*> const& mf, Vector<int> const& scomp,
878  Vector<int> const& ncomp, Vector<IntVect> const& nghost,
879  Vector<Periodicity> const& period, Vector<int> const& cross = {})
880 {
881  BL_PROFILE("FillBoundary(Vector)");
882 #if 1
883  const int N = mf.size();
884  for (int i = 0; i < N; ++i) {
885  mf[i]->FillBoundary_nowait(scomp[i], ncomp[i], nghost[i], period[i],
886  cross.empty() ? 0 : cross[i]);
887  }
888  for (int i = 0; i < N; ++i) {
889  mf[i]->FillBoundary_finish();
890  }
891 
892 #else
893  using FAB = typename MF::FABType::value_type;
894  using T = typename FAB::value_type;
895 
896  const int nmfs = mf.size();
897  Vector<FabArrayBase::CommMetaData const*> cmds;
898  int N_locs = 0;
899  int N_rcvs = 0;
900  int N_snds = 0;
901  for (int imf = 0; imf < nmfs; ++imf) {
902  if (nghost[imf].max() > 0) {
903  auto const& TheFB = mf[imf]->getFB(nghost[imf], period[imf],
904  cross.empty() ? 0 : cross[imf]);
905  // The FB is cached. Therefore it's safe take its address for later use.
906  cmds.push_back(static_cast<FabArrayBase::CommMetaData const*>(&TheFB));
907  N_locs += TheFB.m_LocTags->size();
908  N_rcvs += TheFB.m_RcvTags->size();
909  N_snds += TheFB.m_SndTags->size();
910  } else {
911  cmds.push_back(nullptr);
912  }
913  }
914 
915  using TagT = Array4CopyTag<T>;
916  Vector<TagT> local_tags;
917  local_tags.reserve(N_locs);
918  static_assert(amrex::IsStoreAtomic<T>::value, "FillBoundary(Vector): storing T is not atomic");
919  for (int imf = 0; imf < nmfs; ++imf) {
920  if (cmds[imf]) {
921  auto const& tags = *(cmds[imf]->m_LocTags);
922  for (auto const& tag : tags) {
923  local_tags.push_back({(*mf[imf])[tag.dstIndex].array (scomp[imf],ncomp[imf]),
924  (*mf[imf])[tag.srcIndex].const_array(scomp[imf],ncomp[imf]),
925  tag.dbox,
926  (tag.sbox.smallEnd()-tag.dbox.smallEnd()).dim3()});
927  }
928  }
929  }
930 
931  if (ParallelContext::NProcsSub() == 1) {
932  detail::fbv_copy(local_tags);
933  return;
934  }
935 
936 #ifdef AMREX_USE_MPI
937  //
938  // Do this before prematurely exiting if running in parallel.
939  // Otherwise sequence numbers will not match across MPI processes.
940  //
943 
944  if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) { return; } // No work to do
945 
946  char* the_recv_data = nullptr;
947  Vector<int> recv_from;
948  Vector<std::size_t> recv_size;
949  Vector<MPI_Request> recv_reqs;
950  Vector<MPI_Status> recv_stat;
951  Vector<TagT> recv_tags;
952 
953  if (N_rcvs > 0) {
954 
955  for (int imf = 0; imf < nmfs; ++imf) {
956  if (cmds[imf]) {
957  auto const& tags = *(cmds[imf]->m_RcvTags);
958  for (const auto& kv : tags) {
959  recv_from.push_back(kv.first);
960  }
961  }
962  }
963  amrex::RemoveDuplicates(recv_from);
964  const int nrecv = recv_from.size();
965 
966  recv_reqs.resize(nrecv, MPI_REQUEST_NULL);
967  recv_stat.resize(nrecv);
968 
969  recv_tags.reserve(N_rcvs);
970 
971  Vector<Vector<std::size_t> > recv_offset(nrecv);
973  recv_size.reserve(nrecv);
974  offset.reserve(nrecv);
975  std::size_t TotalRcvsVolume = 0;
976  for (int i = 0; i < nrecv; ++i) {
977  std::size_t nbytes = 0;
978  for (int imf = 0; imf < nmfs; ++imf) {
979  if (cmds[imf]) {
980  auto const& tags = *(cmds[imf]->m_RcvTags);
981  auto it = tags.find(recv_from[i]);
982  if (it != tags.end()) {
983  for (auto const& cct : it->second) {
984  auto& dfab = (*mf[imf])[cct.dstIndex];
985  recv_offset[i].push_back(nbytes);
986  recv_tags.push_back({dfab.array(scomp[imf],ncomp[imf]),
987  makeArray4<T const>(nullptr,cct.dbox,ncomp[imf]),
988  cct.dbox, Dim3{0,0,0}});
989  nbytes += dfab.nBytes(cct.dbox,ncomp[imf]);
990  }
991  }
992  }
993  }
994 
995  std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
996  nbytes = amrex::aligned_size(acd, nbytes); // so that nbytes are aligned
997 
998  // Also need to align the offset properly
999  TotalRcvsVolume = amrex::aligned_size(std::max(alignof(T),acd), TotalRcvsVolume);
1000 
1001  offset.push_back(TotalRcvsVolume);
1002  TotalRcvsVolume += nbytes;
1003 
1004  recv_size.push_back(nbytes);
1005  }
1006 
1007  the_recv_data = static_cast<char*>(amrex::The_Comms_Arena()->alloc(TotalRcvsVolume));
1008 
1009  int k = 0;
1010  for (int i = 0; i < nrecv; ++i) {
1011  char* p = the_recv_data + offset[i];
1012  const int rank = ParallelContext::global_to_local_rank(recv_from[i]);
1013  recv_reqs[i] = ParallelDescriptor::Arecv
1014  (p, recv_size[i], rank, SeqNum, comm).req();
1015  for (int j = 0, nj = recv_offset[i].size(); j < nj; ++j) {
1016  recv_tags[k++].sfab.p = (T const*)(p + recv_offset[i][j]);
1017  }
1018  }
1019  }
1020 
1021  char* the_send_data = nullptr;
1022  Vector<int> send_rank;
1023  Vector<char*> send_data;
1024  Vector<std::size_t> send_size;
1025  Vector<MPI_Request> send_reqs;
1026  if (N_snds > 0) {
1027  for (int imf = 0; imf < nmfs; ++imf) {
1028  if (cmds[imf]) {
1029  auto const& tags = *(cmds[imf]->m_SndTags);
1030  for (auto const& kv : tags) {
1031  send_rank.push_back(kv.first);
1032  }
1033  }
1034  }
1035  amrex::RemoveDuplicates(send_rank);
1036  const int nsend = send_rank.size();
1037 
1038  send_data.resize(nsend, nullptr);
1039  send_reqs.resize(nsend, MPI_REQUEST_NULL);
1040 
1041  Vector<TagT> send_tags;
1042  send_tags.reserve(N_snds);
1043 
1044  Vector<Vector<std::size_t> > send_offset(nsend);
1046  send_size.reserve(nsend);
1047  offset.reserve(nsend);
1048  std::size_t TotalSndsVolume = 0;
1049  for (int i = 0; i < nsend; ++i) {
1050  std::size_t nbytes = 0;
1051  for (int imf = 0; imf < nmfs; ++imf) {
1052  if (cmds[imf]) {
1053  auto const& tags = *(cmds[imf]->m_SndTags);
1054  auto it = tags.find(send_rank[i]);
1055  if (it != tags.end()) {
1056  for (auto const& cct : it->second) {
1057  auto const& sfab = (*mf[imf])[cct.srcIndex];
1058  send_offset[i].push_back(nbytes);
1059  send_tags.push_back({amrex::makeArray4<T>(nullptr,cct.sbox,ncomp[imf]),
1060  sfab.const_array(scomp[imf],ncomp[imf]),
1061  cct.sbox, Dim3{0,0,0}});
1062  nbytes += sfab.nBytes(cct.sbox,ncomp[imf]);
1063  }
1064  }
1065  }
1066  }
1067 
1068  std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
1069  nbytes = amrex::aligned_size(acd, nbytes); // so that bytes are aligned
1070 
1071  // Also need to align the offset properly
1072  TotalSndsVolume = amrex::aligned_size(std::max(alignof(T),acd), TotalSndsVolume);
1073 
1074  offset.push_back(TotalSndsVolume);
1075  TotalSndsVolume += nbytes;
1076 
1077  send_size.push_back(nbytes);
1078  }
1079 
1080  the_send_data = static_cast<char*>(amrex::The_Comms_Arena()->alloc(TotalSndsVolume));
1081  int k = 0;
1082  for (int i = 0; i < nsend; ++i) {
1083  send_data[i] = the_send_data + offset[i];
1084  for (int j = 0, nj = send_offset[i].size(); j < nj; ++j) {
1085  send_tags[k++].dfab.p = (T*)(send_data[i] + send_offset[i][j]);
1086  }
1087  }
1088 
1089  detail::fbv_copy(send_tags);
1090 
1091  FabArray<FAB>::PostSnds(send_data, send_size, send_rank, send_reqs, SeqNum);
1092  }
1093 
1094 #if !defined(AMREX_DEBUG)
1095  int recv_flag;
1096  ParallelDescriptor::Test(recv_reqs, recv_flag, recv_stat);
1097 #endif
1098 
1099  if (N_locs > 0) {
1100  detail::fbv_copy(local_tags);
1101 #if !defined(AMREX_DEBUG)
1102  ParallelDescriptor::Test(recv_reqs, recv_flag, recv_stat);
1103 #endif
1104  }
1105 
1106  if (N_rcvs > 0) {
1107  ParallelDescriptor::Waitall(recv_reqs, recv_stat);
1108 #ifdef AMREX_DEBUG
1109  if (!FabArrayBase::CheckRcvStats(recv_stat, recv_size, SeqNum)) {
1110  amrex::Abort("FillBoundary(vector) failed with wrong message size");
1111  }
1112 #endif
1113 
1114  detail::fbv_copy(recv_tags);
1115 
1116  amrex::The_Comms_Arena()->free(the_recv_data);
1117  }
1118 
1119  if (N_snds > 0) {
1120  Vector<MPI_Status> stats(send_reqs.size());
1121  ParallelDescriptor::Waitall(send_reqs, stats);
1122  amrex::The_Comms_Arena()->free(the_send_data);
1123  }
1124 
1125 #endif // #ifdef AMREX_USE_MPI
1126 #endif // #if 1 #else
1127 }
1128 
1129 template <class MF>
1130 std::enable_if_t<IsFabArray<MF>::value>
1131 FillBoundary (Vector<MF*> const& mf, const Periodicity& a_period = Periodicity::NonPeriodic())
1132 {
1133  Vector<int> scomp(mf.size(), 0);
1134  Vector<int> ncomp;
1135  Vector<IntVect> nghost;
1136  Vector<Periodicity> period(mf.size(), a_period);
1137  ncomp.reserve(mf.size());
1138  nghost.reserve(mf.size());
1139  for (auto const& x : mf) {
1140  ncomp.push_back(x->nComp());
1141  nghost.push_back(x->nGrowVect());
1142  }
1143  FillBoundary(mf, scomp, ncomp, nghost, period);
1144 }
#define BL_PROFILE(a)
Definition: AMReX_BLProfiler.H:551
#define BL_PROFILE_SYNC_STOP()
Definition: AMReX_BLProfiler.H:645
#define BL_PROFILE_SYNC_START_TIMED(fname)
Definition: AMReX_BLProfiler.H:644
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: AMReX_BLassert.H:49
#define BL_ASSERT(EX)
Definition: AMReX_BLassert.H:39
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: AMReX_BLassert.H:37
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_NODISCARD
Definition: AMReX_Extension.H:251
std::enable_if_t< IsFabArray< MF >::value > FillBoundary(Vector< MF * > const &mf, Vector< int > const &scomp, Vector< int > const &ncomp, Vector< IntVect > const &nghost, Vector< Periodicity > const &period, Vector< int > const &cross={})
Definition: AMReX_FabArrayCommI.H:877
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition: AMReX_HypreMLABecLap.cpp:1089
#define AMREX_LOOP_4D(bx, ncomp, i, j, k, n, block)
Definition: AMReX_Loop.nolint.H:16
int MPI_Comm
Definition: AMReX_ccse-mpi.H:47
static constexpr int MPI_REQUEST_NULL
Definition: AMReX_ccse-mpi.H:53
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
IndexType ixType() const noexcept
Return index type of this BoxArray.
Definition: AMReX_BoxArray.H:836
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition: AMReX_FabArrayBase.H:94
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition: AMReX_FabArrayBase.H:130
An Array of FortranArrayBox(FAB)-like Objects.
Definition: AMReX_FabArray.H:344
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int max() const noexcept
maximum (no absolute values) value
Definition: AMReX_IntVect.H:214
MPI_Request req() const
Definition: AMReX_ParallelDescriptor.H:74
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition: AMReX_Periodicity.H:17
bool isAnyPeriodic() const noexcept
Definition: AMReX_Periodicity.H:22
Long size() const noexcept
Definition: AMReX_Vector.H:50
@ FAB
Definition: AMReX_AmrvisConstants.H:86
AMREX_GPU_HOST_DEVICE Long size(T const &b) noexcept
integer version
Definition: AMReX_GpuRange.H:26
bool inGraphRegion()
Definition: AMReX_GpuControl.H:115
void streamSynchronize() noexcept
Definition: AMReX_GpuDevice.H:237
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:265
bool inLaunchRegion() noexcept
Definition: AMReX_GpuControl.H:86
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:251
int MyProc()
Definition: AMReX_MPMD.cpp:117
std::enable_if_t< IsBaseFab< FAB >) &&IsCallableR< Dim3, DTOS, Dim3 >) &&IsFabProjection< Proj, FAB >)> unpack_recv_buffer_cpu(FabArray< FAB > &mf, int dcomp, int ncomp, Vector< char * > const &recv_data, Vector< std::size_t > const &recv_size, Vector< FabArrayBase::CopyComTagsContainer const * > const &recv_cctc, DTOS const &dtos=DTOS{}, Proj const &proj=Proj{}) noexcept
std::enable_if_t< IsBaseFab< FAB >) &&IsDataPacking< DataPacking, FAB >)> ParallelCopy_finish(FabArray< FAB > &dest, CommHandler handler, const FabArrayBase::CommMetaData &cmd, const DataPacking &data_packing)
Definition: AMReX_NonLocalBC.H:793
std::enable_if_t< IsBaseFab< FAB >::value > PrepareSendBuffers(const PackComponents &components, FabArray< FAB > &dest, const FabArray< FAB > &src, CommData &comm, const FabArrayBase::MapOfCopyComTagContainers &cctc)
Calls PrepareComBuffers.
Definition: AMReX_NonLocalBC.H:555
AMREX_NODISCARD CommHandler ParallelCopy_nowait(NoLocalCopy, FabArray< FAB > &dest, const FabArray< FAB > &src, const FabArrayBase::CommMetaData &cmd, const DataPacking &data_packing)
Definition: AMReX_NonLocalBC.H:701
std::enable_if_t< IsBaseFab< FAB >) &&IsCallableR< Dim3, DTOS, Dim3 >) &&IsFabProjection< Proj, FAB >)> unpack_recv_buffer_gpu(FabArray< FAB > &mf, int scomp, int ncomp, Vector< char * > const &recv_data, Vector< std::size_t > const &recv_size, Vector< FabArrayBase::CopyComTagsContainer const * > const &recv_cctc, DTOS const &dtos=DTOS{}, Proj const &proj=Proj{})
std::enable_if_t< IsBaseFab< FAB >) &&IsCallableR< Dim3, DTOS, Dim3 >) &&IsFabProjection< Proj, FAB >)> FillBoundary_finish(CommHandler handler, FabArray< FAB > &mf, const FabArrayBase::CommMetaData &cmd, int scomp, int ncomp, DTOS const &dtos, Proj const &proj=Proj{})
Finish communication started by FillBoundary_nowait.
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition: AMReX_ParallelContext.H:70
int global_to_local_rank(int rank) noexcept
Definition: AMReX_ParallelContext.H:98
int NProcsSub() noexcept
number of ranks in current frame
Definition: AMReX_ParallelContext.H:74
void Test(MPI_Request &, int &, MPI_Status &)
Definition: AMReX_ParallelDescriptor.cpp:1207
Message Asend(const T *, size_t n, int pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1088
void Waitall(Vector< MPI_Request > &, Vector< MPI_Status > &)
Definition: AMReX_ParallelDescriptor.cpp:1295
void Bcast(void *, int, MPI_Datatype, int, MPI_Comm)
Definition: AMReX_ParallelDescriptor.cpp:1282
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition: AMReX_ParallelDescriptor.H:613
Message Arecv(T *, size_t n, int pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1130
@ min
Definition: AMReX_ParallelReduce.H:18
@ max
Definition: AMReX_ParallelReduce.H:17
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:200
DistributionMapping const & DistributionMap(FabArrayBase const &fa)
@ make_alias
Definition: AMReX_MakeType.H:7
std::unique_ptr< char, TheFaArenaDeleter > TheFaArenaPointer
Definition: AMReX_FabArray.H:104
IntVect nGrowVect(FabArrayBase const &fa)
void Copy(FabArray< DFAB > &dst, FabArray< SFAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition: AMReX_FabArray.H:179
BoxArray const & boxArray(FabArrayBase const &fa)
Arena * The_Comms_Arena()
Definition: AMReX_Arena.cpp:669
IntVectND< AMREX_SPACEDIM > IntVect
Definition: AMReX_BaseFwd.H:30
void Add(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition: AMReX_FabArray.H:240
Arena * The_Pinned_Arena()
Definition: AMReX_Arena.cpp:649
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition: AMReX.cpp:225
std::size_t aligned_size(std::size_t align_requirement, std::size_t size) noexcept
Given a minimum required size of size bytes, this returns the next largest arena size that will align...
Definition: AMReX_Arena.H:30
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition: AMReX_FabArrayUtility.H:1672
void RemoveDuplicates(Vector< T > &vec)
Definition: AMReX_Vector.H:190
Definition: AMReX_FabArrayCommI.H:841
void fbv_copy(Vector< TagT > const &tags)
Definition: AMReX_FabArrayCommI.H:843
Definition: AMReX_Dim3.H:12
parallel copy or add
Definition: AMReX_FabArrayBase.H:536
bool m_threadsafe_rcv
Definition: AMReX_FabArrayBase.H:474
std::unique_ptr< MapOfCopyComTagContainers > m_RcvTags
Definition: AMReX_FabArrayBase.H:477
std::unique_ptr< MapOfCopyComTagContainers > m_SndTags
Definition: AMReX_FabArrayBase.H:476
std::unique_ptr< CopyComTagsContainer > m_LocTags
Definition: AMReX_FabArrayBase.H:475
FillBoundary.
Definition: AMReX_FabArrayBase.H:487
Definition: AMReX_TypeTraits.H:266