Block-Structured AMR Software Framework
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Loading...
Searching...
No Matches
AMReX_FabArrayCommI.H
Go to the documentation of this file.
1
2#include <AMReX_FBI.H>
3#include <AMReX_PCI.H>
4
5template <class FAB>
6template <typename BUF, class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
7void
8FabArray<FAB>::FBEP_nowait (int scomp, int ncomp, const IntVect& nghost,
9 const Periodicity& period, bool cross,
10 bool enforce_periodicity_only,
11 bool override_sync)
12{
13 BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms: FB");
14 BL_PROFILE("FillBoundary_nowait()");
15
16 AMREX_ASSERT_WITH_MESSAGE(!fbd, "FillBoundary_nowait() called when comm operation already in progress.");
17 AMREX_ASSERT(!enforce_periodicity_only || !override_sync);
18
19 bool work_to_do;
20 if (enforce_periodicity_only) {
21 work_to_do = period.isAnyPeriodic();
22 } else if (override_sync) {
23 work_to_do = (nghost.max() > 0) || !is_cell_centered();
24 } else {
25 work_to_do = nghost.max() > 0;
26 }
27 if (!work_to_do) { return; }
28
29 const FB& TheFB = getFB(nghost, period, cross, enforce_periodicity_only, override_sync);
30
31 if (ParallelContext::NProcsSub() == 1)
32 {
33 //
34 // There can only be local work to do.
35 //
36 int N_locs = (*TheFB.m_LocTags).size();
37 if (N_locs == 0) { return; }
38#ifdef AMREX_USE_GPU
39 if (Gpu::inLaunchRegion())
40 {
41#if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
42 if (Gpu::inGraphRegion())
43 {
44 FB_local_copy_cuda_graph_1(TheFB, scomp, ncomp);
45 }
46 else
47#endif
48 {
49 FB_local_copy_gpu(TheFB, scomp, ncomp);
50 }
51 }
52 else
53#endif
54 {
55 FB_local_copy_cpu(TheFB, scomp, ncomp);
56 }
57
58 return;
59 }
60
61#ifdef BL_USE_MPI
62
63 //
64 // Do this before prematurely exiting if running in parallel.
65 // Otherwise sequence numbers will not match across MPI processes.
66 //
67 int SeqNum = ParallelDescriptor::SeqNum();
68
69 const int N_locs = TheFB.m_LocTags->size();
70 const int N_rcvs = TheFB.m_RcvTags->size();
71 const int N_snds = TheFB.m_SndTags->size();
72
73 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
74 // No work to do.
75 return;
76 }
77
78 fbd = std::make_unique<FBData<FAB>>();
79 fbd->fb = &TheFB;
80 fbd->scomp = scomp;
81 fbd->ncomp = ncomp;
82 fbd->tag = SeqNum;
83
84 //
85 // Post rcvs. Allocate one chunk of space to hold'm all.
86 //
87
88 if (N_rcvs > 0) {
89 PostRcvs<BUF>(*TheFB.m_RcvTags, fbd->the_recv_data,
90 fbd->recv_data, fbd->recv_size, fbd->recv_from, fbd->recv_reqs,
91 ncomp, SeqNum);
92 fbd->recv_stat.resize(N_rcvs);
93 }
94
95 //
96 // Post send's
97 //
98 char*& the_send_data = fbd->the_send_data;
99 Vector<char*> & send_data = fbd->send_data;
100 Vector<std::size_t> send_size;
101 Vector<int> send_rank;
102 Vector<MPI_Request>& send_reqs = fbd->send_reqs;
104
105 if (N_snds > 0)
106 {
107 PrepareSendBuffers<BUF>(*TheFB.m_SndTags, the_send_data, send_data, send_size, send_rank,
108 send_reqs, send_cctc, ncomp);
109
110#ifdef AMREX_USE_GPU
111 if (Gpu::inLaunchRegion())
112 {
113#if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
114 if (Gpu::inGraphRegion()) {
115 FB_pack_send_buffer_cuda_graph(TheFB, scomp, ncomp, send_data, send_size, send_cctc);
116 }
117 else
118#endif
119 {
120 pack_send_buffer_gpu<BUF>(*this, scomp, ncomp, send_data, send_size, send_cctc);
121 }
122 }
123 else
124#endif
125 {
126 pack_send_buffer_cpu<BUF>(*this, scomp, ncomp, send_data, send_size, send_cctc);
127 }
128
129 AMREX_ASSERT(send_reqs.size() == N_snds);
130 PostSnds(send_data, send_size, send_rank, send_reqs, SeqNum);
131 }
132
133 FillBoundary_test();
134
135 //
136 // Do the local work. Hope for a bit of communication/computation overlap.
137 //
138 if (N_locs > 0)
139 {
140#ifdef AMREX_USE_GPU
141 if (Gpu::inLaunchRegion())
142 {
143#if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
144 if (Gpu::inGraphRegion()) {
145 FB_local_copy_cuda_graph_n(TheFB, scomp, ncomp);
146 }
147 else
148#endif
149 {
150 FB_local_copy_gpu(TheFB, scomp, ncomp);
151 }
152 }
153 else
154#endif
155 {
156 FB_local_copy_cpu(TheFB, scomp, ncomp);
157 }
158
159 FillBoundary_test();
160 }
161
162#endif /*BL_USE_MPI*/
163}
164
165template <class FAB>
166template <typename BUF, class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
167void
169{
170#ifdef AMREX_USE_MPI
171
172 BL_PROFILE("FillBoundary_finish()");
174
175 if (!fbd) { n_filled = IntVect::TheZeroVector(); return; }
176
177 const FB* TheFB = fbd->fb;
178 const auto N_rcvs = static_cast<int>(TheFB->m_RcvTags->size());
179 if (N_rcvs > 0)
180 {
181 Vector<const CopyComTagsContainer*> recv_cctc(N_rcvs,nullptr);
182 for (int k = 0; k < N_rcvs; k++)
183 {
184 if (fbd->recv_size[k] > 0)
185 {
186 auto const& cctc = TheFB->m_RcvTags->at(fbd->recv_from[k]);
187 recv_cctc[k] = &cctc;
188 }
189 }
190
191 int actual_n_rcvs = N_rcvs - std::count(fbd->recv_data.begin(), fbd->recv_data.end(), nullptr);
192
193 if (actual_n_rcvs > 0) {
194 ParallelDescriptor::Waitall(fbd->recv_reqs, fbd->recv_stat);
195#ifdef AMREX_DEBUG
196 if (!CheckRcvStats(fbd->recv_stat, fbd->recv_size, fbd->tag))
197 {
198 amrex::Abort("FillBoundary_finish failed with wrong message size");
199 }
200#endif
201 }
202
203 bool is_thread_safe = TheFB->m_threadsafe_rcv;
204
205#ifdef AMREX_USE_GPU
206 if (Gpu::inLaunchRegion())
207 {
208#if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
209 if (Gpu::inGraphRegion())
210 {
211 FB_unpack_recv_buffer_cuda_graph(*TheFB, fbd->scomp, fbd->ncomp,
212 fbd->recv_data, fbd->recv_size,
213 recv_cctc, is_thread_safe);
214 }
215 else
216#endif
217 {
218 unpack_recv_buffer_gpu<BUF>(*this, fbd->scomp, fbd->ncomp, fbd->recv_data, fbd->recv_size,
219 recv_cctc, FabArrayBase::COPY, is_thread_safe);
220 }
221 }
222 else
223#endif
224 {
225 unpack_recv_buffer_cpu<BUF>(*this, fbd->scomp, fbd->ncomp, fbd->recv_data, fbd->recv_size,
226 recv_cctc, FabArrayBase::COPY, is_thread_safe);
227 }
228
229 if (fbd->the_recv_data)
230 {
231 amrex::The_Comms_Arena()->free(fbd->the_recv_data);
232 fbd->the_recv_data = nullptr;
233 }
234 }
235
236 const auto N_snds = static_cast<int>(TheFB->m_SndTags->size());
237 if (N_snds > 0) {
238 Vector<MPI_Status> stats(fbd->send_reqs.size());
239 ParallelDescriptor::Waitall(fbd->send_reqs, stats);
240 amrex::The_Comms_Arena()->free(fbd->the_send_data);
241 fbd->the_send_data = nullptr;
242 }
243
244 fbd.reset();
245
246#endif
247}
248
249// \cond CODEGEN
250template <class FAB>
251void
253 int scomp,
254 int dcomp,
255 int ncomp,
256 const IntVect& snghost,
257 const IntVect& dnghost,
258 const Periodicity& period,
259 CpOp op,
260 const FabArrayBase::CPC * a_cpc)
261{
262 BL_PROFILE("FabArray::ParallelCopy()");
263
264 ParallelCopy_nowait(src, scomp, dcomp, ncomp, snghost, dnghost, period, op, a_cpc);
265 ParallelCopy_finish();
266}
267
268template <class FAB>
269void
270FabArray<FAB>::ParallelCopyToGhost (const FabArray<FAB>& src,
271 int scomp,
272 int dcomp,
273 int ncomp,
274 const IntVect& snghost,
275 const IntVect& dnghost,
276 const Periodicity& period)
277{
278 BL_PROFILE("FabArray::ParallelCopyToGhost()");
279
280 ParallelCopy_nowait(src, scomp, dcomp, ncomp, snghost, dnghost, period,
281 FabArrayBase::COPY, nullptr, true);
282 ParallelCopy_finish();
283}
284
285template <class FAB>
286void
287FabArray<FAB>::ParallelCopyToGhost_nowait (const FabArray<FAB>& src,
288 int scomp,
289 int dcomp,
290 int ncomp,
291 const IntVect& snghost,
292 const IntVect& dnghost,
293 const Periodicity& period)
294{
295 ParallelCopy_nowait(src, scomp, dcomp, ncomp, snghost, dnghost, period,
296 FabArrayBase::COPY, nullptr, true);
297}
298
299template <class FAB>
300void
301FabArray<FAB>::ParallelCopyToGhost_finish ()
302{
304}
305
306
307template <class FAB>
308void
309FabArray<FAB>::ParallelCopy_nowait (const FabArray<FAB>& src,
310 int scomp,
311 int dcomp,
312 int ncomp,
313 const IntVect& snghost,
314 const IntVect& dnghost,
315 const Periodicity& period,
316 CpOp op,
317 const FabArrayBase::CPC * a_cpc,
318 bool to_ghost_cells_only)
319{
320 BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms: PC");
321 BL_PROFILE("FabArray::ParallelCopy_nowait()");
322
323 AMREX_ASSERT_WITH_MESSAGE(!pcd, "ParallelCopy_nowait() called when comm operation already in progress.");
324
325 if (empty() || src.empty()) {
326 return;
327 }
328
329 BL_ASSERT(op == FabArrayBase::COPY || op == FabArrayBase::ADD);
330 BL_ASSERT(boxArray().ixType() == src.boxArray().ixType());
331 BL_ASSERT(src.nGrowVect().allGE(snghost));
332 BL_ASSERT( nGrowVect().allGE(dnghost));
333
334 n_filled = dnghost;
335
336 if ((ParallelDescriptor::NProcs() == 1) &&
337 (this->size() == 1) && (src.size() == 1) &&
338 !period.isAnyPeriodic() && !to_ghost_cells_only)
339 {
340 if (this != &src) { // avoid self copy or plus
341 auto const& da = this->array(0, dcomp);
342 auto const& sa = src.const_array(0, scomp);
343 Box box = amrex::grow(src.box(0),snghost)
344 & amrex::grow(this->box(0),dnghost);
345 if (op == FabArrayBase::COPY) {
346#ifdef AMREX_USE_GPU
347 ParallelFor(box, ncomp,
348 [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) {
349 da(i,j,k,n) = sa(i,j,k,n);
350 });
351#else
352 auto const& lo = amrex::lbound(box);
353 auto const& hi = amrex::ubound(box);
354#ifdef AMREX_USE_OMP
355#pragma omp parallel for collapse(3)
356#endif
357 for (int n = 0; n < ncomp; ++n) {
358 for (int k = lo.z; k <= hi.z; ++k) {
359 for (int j = lo.y; j <= hi.y; ++j) {
361 for (int i = lo.x; i <= hi.x; ++i) {
362 da(i,j,k,n) = sa(i,j,k,n);
363 }}}}
364#endif
365 } else {
366#ifdef AMREX_USE_GPU
367 ParallelFor(box, ncomp,
368 [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) {
369 da(i,j,k,n) += sa(i,j,k,n);
370 });
371#else
372 auto const& lo = amrex::lbound(box);
373 auto const& hi = amrex::ubound(box);
374#ifdef AMREX_USE_OMP
375#pragma omp parallel for collapse(3)
376#endif
377 for (int n = 0; n < ncomp; ++n) {
378 for (int k = lo.z; k <= hi.z; ++k) {
379 for (int j = lo.y; j <= hi.y; ++j) {
381 for (int i = lo.x; i <= hi.x; ++i) {
382 da(i,j,k,n) += sa(i,j,k,n);
383 }}}}
384#endif
385 }
386 Gpu::streamSynchronize();
387 }
388 return;
389 }
390
391 if ((src.boxArray().ixType().cellCentered() || op == FabArrayBase::COPY) &&
392 (boxarray == src.boxarray && distributionMap == src.distributionMap) &&
393 snghost == IntVect::TheZeroVector() &&
394 dnghost == IntVect::TheZeroVector() &&
395 !period.isAnyPeriodic() && !to_ghost_cells_only)
396 {
397 //
398 // Short-circuit full intersection code if we're doing copy()s or if
399 // we're doing plus()s on cell-centered data. Don't do plus()s on
400 // non-cell-centered data this simplistic way.
401 //
402 if (this != &src) { // avoid self copy or plus
403 if (op == FabArrayBase::COPY) {
404 Copy(*this, src, scomp, dcomp, ncomp, IntVect(0));
405 } else {
406 Add(*this, src, scomp, dcomp, ncomp, IntVect(0));
407 }
408 }
409 return;
410 }
411
412 const CPC& thecpc = (a_cpc) ? *a_cpc : getCPC(dnghost, src, snghost, period, to_ghost_cells_only);
413
414 if (ParallelContext::NProcsSub() == 1)
415 {
416 //
417 // There can only be local work to do.
418 //
419
420 int N_locs = (*thecpc.m_LocTags).size();
421 if (N_locs == 0) { return; }
422#ifdef AMREX_USE_GPU
423 if (Gpu::inLaunchRegion())
424 {
425 PC_local_gpu(thecpc, src, scomp, dcomp, ncomp, op);
426 }
427 else
428#endif
429 {
430 PC_local_cpu(thecpc, src, scomp, dcomp, ncomp, op);
431 }
432
433 return;
434 }
435
436#ifdef BL_USE_MPI
437
438 //
439 // Do this before prematurely exiting if running in parallel.
440 // Otherwise sequence numbers will not match across MPI processes.
441 //
442 int tag = ParallelDescriptor::SeqNum();
443
444 const int N_snds = thecpc.m_SndTags->size();
445 const int N_rcvs = thecpc.m_RcvTags->size();
446 const int N_locs = thecpc.m_LocTags->size();
447
448 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
449 //
450 // No work to do.
451 //
452
453 return;
454 }
455
456 //
457 // Send/Recv at most MaxComp components at a time to cut down memory usage.
458 //
459 int NCompLeft = ncomp;
460 int SC = scomp, DC = dcomp, NC;
461
462 for (int ipass = 0; ipass < ncomp; )
463 {
464 pcd = std::make_unique<PCData<FAB>>();
465 pcd->cpc = &thecpc;
466 pcd->src = &src;
467 pcd->op = op;
468 pcd->tag = tag;
469
470 NC = std::min(NCompLeft,FabArrayBase::MaxComp);
471 const bool last_iter = (NCompLeft == NC);
472
473 pcd->SC = SC;
474 pcd->DC = DC;
475 pcd->NC = NC;
476
477 //
478 // Post rcvs. Allocate one chunk of space to hold'm all.
479 //
480 pcd->the_recv_data = nullptr;
481
482 pcd->actual_n_rcvs = 0;
483 if (N_rcvs > 0) {
484 PostRcvs(*thecpc.m_RcvTags, pcd->the_recv_data,
485 pcd->recv_data, pcd->recv_size, pcd->recv_from, pcd->recv_reqs, NC, pcd->tag);
486 pcd->actual_n_rcvs = N_rcvs - std::count(pcd->recv_size.begin(), pcd->recv_size.end(), 0);
487 }
488
489 //
490 // Post send's
491 //
492 Vector<char*> send_data;
493 Vector<std::size_t> send_size;
494 Vector<int> send_rank;
495 Vector<const CopyComTagsContainer*> send_cctc;
496
497 if (N_snds > 0)
498 {
499 src.PrepareSendBuffers(*thecpc.m_SndTags, pcd->the_send_data, send_data, send_size,
500 send_rank, pcd->send_reqs, send_cctc, NC);
501
502#ifdef AMREX_USE_GPU
503 if (Gpu::inLaunchRegion())
504 {
505 pack_send_buffer_gpu(src, SC, NC, send_data, send_size, send_cctc);
506 }
507 else
508#endif
509 {
510 pack_send_buffer_cpu(src, SC, NC, send_data, send_size, send_cctc);
511 }
512
513 AMREX_ASSERT(pcd->send_reqs.size() == N_snds);
514 FabArray<FAB>::PostSnds(send_data, send_size, send_rank, pcd->send_reqs, pcd->tag);
515 }
516
517 //
518 // Do the local work. Hope for a bit of communication/computation overlap.
519 //
520 if (N_locs > 0)
521 {
522#ifdef AMREX_USE_GPU
523 if (Gpu::inLaunchRegion())
524 {
525 PC_local_gpu(thecpc, src, SC, DC, NC, op);
526 }
527 else
528#endif
529 {
530 PC_local_cpu(thecpc, src, SC, DC, NC, op);
531 }
532 }
533
534 if (!last_iter)
535 {
537
538 SC += NC;
539 DC += NC;
540 }
541
542 ipass += NC;
543 NCompLeft -= NC;
544 }
545
546#endif /*BL_USE_MPI*/
547}
548
549template <class FAB>
550void
551FabArray<FAB>::ParallelCopy_finish ()
552{
553
554#ifdef BL_USE_MPI
555
556 BL_PROFILE("FabArray::ParallelCopy_finish()");
558
559 if (!pcd) { return; }
560
561 const CPC* thecpc = pcd->cpc;
562
563 const auto N_snds = static_cast<int>(thecpc->m_SndTags->size());
564 const auto N_rcvs = static_cast<int>(thecpc->m_RcvTags->size());
565
566 if (N_rcvs > 0)
567 {
568 Vector<const CopyComTagsContainer*> recv_cctc(N_rcvs,nullptr);
569 for (int k = 0; k < N_rcvs; ++k)
570 {
571 if (pcd->recv_size[k] > 0)
572 {
573 auto const& cctc = thecpc->m_RcvTags->at(pcd->recv_from[k]);
574 recv_cctc[k] = &cctc;
575 }
576 }
577
578 if (pcd->actual_n_rcvs > 0) {
579 Vector<MPI_Status> stats(N_rcvs);
580 ParallelDescriptor::Waitall(pcd->recv_reqs, stats);
581#ifdef AMREX_DEBUG
582 if (!CheckRcvStats(stats, pcd->recv_size, pcd->tag))
583 {
584 amrex::Abort("ParallelCopy failed with wrong message size");
585 }
586#endif
587 }
588
589 bool is_thread_safe = thecpc->m_threadsafe_rcv;
590
591#ifdef AMREX_USE_GPU
592 if (Gpu::inLaunchRegion())
593 {
594 unpack_recv_buffer_gpu(*this, pcd->DC, pcd->NC, pcd->recv_data, pcd->recv_size,
595 recv_cctc, pcd->op, is_thread_safe);
596 }
597 else
598#endif
599 {
600 unpack_recv_buffer_cpu(*this, pcd->DC, pcd->NC, pcd->recv_data, pcd->recv_size,
601 recv_cctc, pcd->op, is_thread_safe);
602 }
603
604 if (pcd->the_recv_data)
605 {
606 amrex::The_Comms_Arena()->free(pcd->the_recv_data);
607 pcd->the_recv_data = nullptr;
608 }
609 }
610
611 if (N_snds > 0) {
612 if (! thecpc->m_SndTags->empty()) {
613 Vector<MPI_Status> stats(pcd->send_reqs.size());
614 ParallelDescriptor::Waitall(pcd->send_reqs, stats);
615 }
616 amrex::The_Comms_Arena()->free(pcd->the_send_data);
617 pcd->the_send_data = nullptr;
618 }
619
620 pcd.reset();
621
622#endif /*BL_USE_MPI*/
623}
624
625template <class FAB>
626void
627FabArray<FAB>::copyTo (FAB& dest, int scomp, int dcomp, int ncomp, int nghost) const
628{
629 BL_PROFILE("FabArray::copy(fab)");
630
631 BL_ASSERT(dcomp + ncomp <= dest.nComp());
632 BL_ASSERT(IntVect(nghost).allLE(nGrowVect()));
633
634 int root_proc = this->DistributionMap()[0];
635
636 BoxArray ba(dest.box());
637 DistributionMapping dm(Vector<int>{root_proc});
638 FabArray<FAB> destmf(ba, dm, ncomp, 0, MFInfo().SetAlloc(false));
639 if (ParallelDescriptor::MyProc() == root_proc) {
640 destmf.setFab(0, FAB(dest, amrex::make_alias, dcomp, ncomp));
641 }
642
643 destmf.ParallelCopy(*this, scomp, 0, ncomp, nghost, 0);
644
645#ifdef BL_USE_MPI
646 using T = typename FAB::value_type;
647 if (ParallelContext::NProcsSub() > 1) {
648 Long count = dest.numPts()*ncomp;
649 T* const p0 = dest.dataPtr(dcomp);
650 T* pb = p0;
651#ifdef AMREX_USE_GPU
652 if (dest.arena()->isDevice()) {
653 pb = (T*)The_Pinned_Arena()->alloc(sizeof(T)*count);
654 Gpu::dtoh_memcpy_async(pb, p0, sizeof(T)*count);
655 Gpu::streamSynchronize();
656 }
657#endif
658 ParallelDescriptor::Bcast(pb, count, ParallelContext::global_to_local_rank(root_proc),
659 ParallelContext::CommunicatorSub());
660#ifdef AMREX_USE_GPU
661 if (pb != p0) {
662 Gpu::htod_memcpy_async(p0, pb, sizeof(T)*count);
663 Gpu::streamSynchronize();
664 }
665#endif
666 }
667#endif
668}
669// \endcond
670#ifdef BL_USE_MPI
671template <class FAB>
672template <typename BUF>
674FabArray<FAB>::PrepareSendBuffers (const MapOfCopyComTagContainers& SndTags,
675 Vector<char*>& send_data,
676 Vector<std::size_t>& send_size,
677 Vector<int>& send_rank,
678 Vector<MPI_Request>& send_reqs,
679 Vector<const CopyComTagsContainer*>& send_cctc,
680 int ncomp)
681{
682 char* pointer = nullptr;
683 PrepareSendBuffers<BUF>(SndTags, pointer, send_data, send_size, send_rank, send_reqs, send_cctc, ncomp);
684 return TheFaArenaPointer(pointer);
685}
686
687template <class FAB>
688template <typename BUF>
689void
690FabArray<FAB>::PrepareSendBuffers (const MapOfCopyComTagContainers& SndTags,
691 char*& the_send_data,
692 Vector<char*>& send_data,
693 Vector<std::size_t>& send_size,
694 Vector<int>& send_rank,
695 Vector<MPI_Request>& send_reqs,
696 Vector<const CopyComTagsContainer*>& send_cctc,
697 int ncomp)
698{
699 send_data.clear();
700 send_size.clear();
701 send_rank.clear();
702 send_reqs.clear();
703 send_cctc.clear();
704 const auto N_snds = SndTags.size();
705 if (N_snds == 0) { return; }
706 send_data.reserve(N_snds);
707 send_size.reserve(N_snds);
708 send_rank.reserve(N_snds);
709 send_reqs.reserve(N_snds);
710 send_cctc.reserve(N_snds);
711
712 Vector<std::size_t> offset; offset.reserve(N_snds);
713 std::size_t total_volume = 0;
714 for (auto const& kv : SndTags)
715 {
716 auto const& cctc = kv.second;
717
718 std::size_t nbytes = 0;
719 for (auto const& cct : kv.second)
720 {
721 nbytes += cct.sbox.numPts() * ncomp * sizeof(BUF);
722 }
723
724 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
725 nbytes = amrex::aligned_size(acd, nbytes); // so that bytes are aligned
726
727 // Also need to align the offset properly
728 total_volume = amrex::aligned_size(std::max(alignof(BUF), acd),
729 total_volume);
730
731 offset.push_back(total_volume);
732 total_volume += nbytes;
733
734 send_data.push_back(nullptr);
735 send_size.push_back(nbytes);
736 send_rank.push_back(kv.first);
737 send_reqs.push_back(MPI_REQUEST_NULL);
738 send_cctc.push_back(&cctc);
739 }
740
741 if (total_volume > 0)
742 {
743 the_send_data = static_cast<char*>(amrex::The_Comms_Arena()->alloc(total_volume));
744 for (int i = 0, N = static_cast<int>(send_size.size()); i < N; ++i) {
745 send_data[i] = the_send_data + offset[i];
746 }
747 } else {
748 the_send_data = nullptr;
749 }
750}
751
752template <class FAB>
753void
754FabArray<FAB>::PostSnds (Vector<char*> const& send_data,
755 Vector<std::size_t> const& send_size,
756 Vector<int> const& send_rank,
757 Vector<MPI_Request>& send_reqs,
758 int SeqNum)
759{
760 MPI_Comm comm = ParallelContext::CommunicatorSub();
761
762 const auto N_snds = static_cast<int>(send_reqs.size());
763 for (int j = 0; j < N_snds; ++j)
764 {
765 if (send_size[j] > 0) {
766 const int rank = ParallelContext::global_to_local_rank(send_rank[j]);
767 send_reqs[j] = ParallelDescriptor::Asend
768 (send_data[j], send_size[j], rank, SeqNum, comm).req();
769 }
770 }
771}
772
773template <class FAB>
774template <typename BUF>
775TheFaArenaPointer FabArray<FAB>::PostRcvs (const MapOfCopyComTagContainers& RcvTags,
776 Vector<char*>& recv_data,
777 Vector<std::size_t>& recv_size,
778 Vector<int>& recv_from,
779 Vector<MPI_Request>& recv_reqs,
780 int ncomp,
781 int SeqNum)
782{
783 char* pointer = nullptr;
784 PostRcvs(RcvTags, pointer, recv_data, recv_size, recv_from, recv_reqs, ncomp, SeqNum);
785 return TheFaArenaPointer(pointer);
786}
787
788template <class FAB>
789template <typename BUF>
790void
791FabArray<FAB>::PostRcvs (const MapOfCopyComTagContainers& RcvTags,
792 char*& the_recv_data,
793 Vector<char*>& recv_data,
794 Vector<std::size_t>& recv_size,
795 Vector<int>& recv_from,
796 Vector<MPI_Request>& recv_reqs,
797 int ncomp,
798 int SeqNum)
799{
800 recv_data.clear();
801 recv_size.clear();
802 recv_from.clear();
803 recv_reqs.clear();
804
805 Vector<std::size_t> offset;
806 std::size_t TotalRcvsVolume = 0;
807 for (const auto& kv : RcvTags) // loop over senders
808 {
809 std::size_t nbytes = 0;
810 for (auto const& cct : kv.second)
811 {
812 nbytes += cct.dbox.numPts() * ncomp * sizeof(BUF);
813 }
814
815 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
816 nbytes = amrex::aligned_size(acd, nbytes); // so that nbytes are aligned
817
818 // Also need to align the offset properly
819 TotalRcvsVolume = amrex::aligned_size(std::max(alignof(BUF),acd),
820 TotalRcvsVolume);
821
822 offset.push_back(TotalRcvsVolume);
823 TotalRcvsVolume += nbytes;
824
825 recv_data.push_back(nullptr);
826 recv_size.push_back(nbytes);
827 recv_from.push_back(kv.first);
828 recv_reqs.push_back(MPI_REQUEST_NULL);
829 }
830
831 const auto nrecv = static_cast<int>(recv_from.size());
832
833 MPI_Comm comm = ParallelContext::CommunicatorSub();
834
835 if (TotalRcvsVolume == 0)
836 {
837 the_recv_data = nullptr;
838 }
839 else
840 {
841 the_recv_data = static_cast<char*>(amrex::The_Comms_Arena()->alloc(TotalRcvsVolume));
842
843 for (int i = 0; i < nrecv; ++i)
844 {
845 recv_data[i] = the_recv_data + offset[i];
846 if (recv_size[i] > 0)
847 {
848 const int rank = ParallelContext::global_to_local_rank(recv_from[i]);
849 recv_reqs[i] = ParallelDescriptor::Arecv
850 (recv_data[i], recv_size[i], rank, SeqNum, comm).req();
851 }
852 }
853 }
854}
855#endif
856
857template <class FAB>
858void
860 int scomp,
861 int dcomp,
862 int ncomp,
863 const IntVect& nghost)
864{
866 "FabArray::Redistribute: must have the same BoxArray");
867
868 if (ParallelContext::NProcsSub() == 1)
869 {
870 Copy(*this, src, scomp, dcomp, ncomp, nghost);
871 return;
872 }
873
874#ifdef BL_USE_MPI
875
877
878 ParallelCopy(src, scomp, dcomp, ncomp, nghost, nghost, Periodicity::NonPeriodic(),
879 FabArrayBase::COPY, &cpc);
880
881#endif
882}
883
884template <class FAB>
885void
887{
888#if defined(AMREX_USE_MPI) && !defined(AMREX_DEBUG)
889 // We only test if no DEBUG because in DEBUG we check the status later.
890 // If Test is done here, the status check will fail.
891 int flag;
892 ParallelDescriptor::Test(fbd->recv_reqs, flag, fbd->recv_stat);
893#endif
894}
895
896namespace detail {
897template <class TagT>
898void fbv_copy (Vector<TagT> const& tags)
899{
900 const int N = tags.size();
901 if (N == 0) { return; }
902#ifdef AMREX_USE_GPU
903 if (Gpu::inLaunchRegion()) {
904 ParallelFor(tags, 1,
905 [=] AMREX_GPU_DEVICE (int i, int j, int k, int, TagT const& tag) noexcept
906 {
907 const int ncomp = tag.dfab.nComp();
908 for (int n = 0; n < ncomp; ++n) {
909 tag.dfab(i,j,k,n) = tag.sfab(i+tag.offset.x,j+tag.offset.y,k+tag.offset.z,n);
910 }
911 });
912 } else
913#endif
914 {
915#ifdef AMREX_USE_OMP
916#pragma omp parallel for
917#endif
918 for (int itag = 0; itag < N; ++itag) {
919 auto const& tag = tags[itag];
920 const int ncomp = tag.dfab.nComp();
921 AMREX_LOOP_4D(tag.dbox, ncomp, i, j, k, n,
922 {
923 tag.dfab(i,j,k,n) = tag.sfab(i+tag.offset.x,j+tag.offset.y,k+tag.offset.z,n);
924 });
925 }
926 }
927}
928}
929
930template <class MF>
931std::enable_if_t<IsFabArray<MF>::value>
932FillBoundary (Vector<MF*> const& mf, Vector<int> const& scomp,
933 Vector<int> const& ncomp, Vector<IntVect> const& nghost,
934 Vector<Periodicity> const& period, Vector<int> const& cross = {})
935{
936 BL_PROFILE("FillBoundary(Vector)");
937#if 1
938 const int N = mf.size();
939 for (int i = 0; i < N; ++i) {
940 mf[i]->FillBoundary_nowait(scomp[i], ncomp[i], nghost[i], period[i],
941 cross.empty() ? 0 : cross[i]);
942 }
943 for (int i = 0; i < N; ++i) {
944 mf[i]->FillBoundary_finish();
945 }
946
947#else
948 using FAB = typename MF::FABType::value_type;
949 using T = typename FAB::value_type;
950
951 const int nmfs = mf.size();
952 Vector<FabArrayBase::CommMetaData const*> cmds;
953 int N_locs = 0;
954 int N_rcvs = 0;
955 int N_snds = 0;
956 for (int imf = 0; imf < nmfs; ++imf) {
957 if (nghost[imf].max() > 0) {
958 auto const& TheFB = mf[imf]->getFB(nghost[imf], period[imf],
959 cross.empty() ? 0 : cross[imf]);
960 // The FB is cached. Therefore it's safe take its address for later use.
961 cmds.push_back(static_cast<FabArrayBase::CommMetaData const*>(&TheFB));
962 N_locs += TheFB.m_LocTags->size();
963 N_rcvs += TheFB.m_RcvTags->size();
964 N_snds += TheFB.m_SndTags->size();
965 } else {
966 cmds.push_back(nullptr);
967 }
968 }
969
970 using TagT = Array4CopyTag<T>;
971 Vector<TagT> local_tags;
972 local_tags.reserve(N_locs);
973 static_assert(amrex::IsStoreAtomic<T>::value, "FillBoundary(Vector): storing T is not atomic");
974 for (int imf = 0; imf < nmfs; ++imf) {
975 if (cmds[imf]) {
976 auto const& tags = *(cmds[imf]->m_LocTags);
977 for (auto const& tag : tags) {
978 local_tags.push_back({(*mf[imf])[tag.dstIndex].array (scomp[imf],ncomp[imf]),
979 (*mf[imf])[tag.srcIndex].const_array(scomp[imf],ncomp[imf]),
980 tag.dbox,
981 (tag.sbox.smallEnd()-tag.dbox.smallEnd()).dim3()});
982 }
983 }
984 }
985
986 if (ParallelContext::NProcsSub() == 1) {
987 detail::fbv_copy(local_tags);
988 return;
989 }
990
991#ifdef AMREX_USE_MPI
992 //
993 // Do this before prematurely exiting if running in parallel.
994 // Otherwise sequence numbers will not match across MPI processes.
995 //
996 int SeqNum = ParallelDescriptor::SeqNum();
997 MPI_Comm comm = ParallelContext::CommunicatorSub();
998
999 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) { return; } // No work to do
1000
1001 char* the_recv_data = nullptr;
1002 Vector<int> recv_from;
1003 Vector<std::size_t> recv_size;
1004 Vector<MPI_Request> recv_reqs;
1005 Vector<MPI_Status> recv_stat;
1006 Vector<TagT> recv_tags;
1007
1008 if (N_rcvs > 0) {
1009
1010 for (int imf = 0; imf < nmfs; ++imf) {
1011 if (cmds[imf]) {
1012 auto const& tags = *(cmds[imf]->m_RcvTags);
1013 for (const auto& kv : tags) {
1014 recv_from.push_back(kv.first);
1015 }
1016 }
1017 }
1018 amrex::RemoveDuplicates(recv_from);
1019 const int nrecv = recv_from.size();
1020
1021 recv_reqs.resize(nrecv, MPI_REQUEST_NULL);
1022 recv_stat.resize(nrecv);
1023
1024 recv_tags.reserve(N_rcvs);
1025
1026 Vector<Vector<std::size_t> > recv_offset(nrecv);
1028 recv_size.reserve(nrecv);
1029 offset.reserve(nrecv);
1030 std::size_t TotalRcvsVolume = 0;
1031 for (int i = 0; i < nrecv; ++i) {
1032 std::size_t nbytes = 0;
1033 for (int imf = 0; imf < nmfs; ++imf) {
1034 if (cmds[imf]) {
1035 auto const& tags = *(cmds[imf]->m_RcvTags);
1036 auto it = tags.find(recv_from[i]);
1037 if (it != tags.end()) {
1038 for (auto const& cct : it->second) {
1039 auto& dfab = (*mf[imf])[cct.dstIndex];
1040 recv_offset[i].push_back(nbytes);
1041 recv_tags.push_back({dfab.array(scomp[imf],ncomp[imf]),
1042 makeArray4<T const>(nullptr,cct.dbox,ncomp[imf]),
1043 cct.dbox, Dim3{0,0,0}});
1044 nbytes += dfab.nBytes(cct.dbox,ncomp[imf]);
1045 }
1046 }
1047 }
1048 }
1049
1050 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
1051 nbytes = amrex::aligned_size(acd, nbytes); // so that nbytes are aligned
1052
1053 // Also need to align the offset properly
1054 TotalRcvsVolume = amrex::aligned_size(std::max(alignof(T),acd), TotalRcvsVolume);
1055
1056 offset.push_back(TotalRcvsVolume);
1057 TotalRcvsVolume += nbytes;
1058
1059 recv_size.push_back(nbytes);
1060 }
1061
1062 the_recv_data = static_cast<char*>(amrex::The_Comms_Arena()->alloc(TotalRcvsVolume));
1063
1064 int k = 0;
1065 for (int i = 0; i < nrecv; ++i) {
1066 char* p = the_recv_data + offset[i];
1067 const int rank = ParallelContext::global_to_local_rank(recv_from[i]);
1068 recv_reqs[i] = ParallelDescriptor::Arecv
1069 (p, recv_size[i], rank, SeqNum, comm).req();
1070 for (int j = 0, nj = recv_offset[i].size(); j < nj; ++j) {
1071 recv_tags[k++].sfab.p = (T const*)(p + recv_offset[i][j]);
1072 }
1073 }
1074 }
1075
1076 char* the_send_data = nullptr;
1077 Vector<int> send_rank;
1078 Vector<char*> send_data;
1079 Vector<std::size_t> send_size;
1080 Vector<MPI_Request> send_reqs;
1081 if (N_snds > 0) {
1082 for (int imf = 0; imf < nmfs; ++imf) {
1083 if (cmds[imf]) {
1084 auto const& tags = *(cmds[imf]->m_SndTags);
1085 for (auto const& kv : tags) {
1086 send_rank.push_back(kv.first);
1087 }
1088 }
1089 }
1090 amrex::RemoveDuplicates(send_rank);
1091 const int nsend = send_rank.size();
1092
1093 send_data.resize(nsend, nullptr);
1094 send_reqs.resize(nsend, MPI_REQUEST_NULL);
1095
1096 Vector<TagT> send_tags;
1097 send_tags.reserve(N_snds);
1098
1099 Vector<Vector<std::size_t> > send_offset(nsend);
1100 Vector<std::size_t> offset;
1101 send_size.reserve(nsend);
1102 offset.reserve(nsend);
1103 std::size_t TotalSndsVolume = 0;
1104 for (int i = 0; i < nsend; ++i) {
1105 std::size_t nbytes = 0;
1106 for (int imf = 0; imf < nmfs; ++imf) {
1107 if (cmds[imf]) {
1108 auto const& tags = *(cmds[imf]->m_SndTags);
1109 auto it = tags.find(send_rank[i]);
1110 if (it != tags.end()) {
1111 for (auto const& cct : it->second) {
1112 auto const& sfab = (*mf[imf])[cct.srcIndex];
1113 send_offset[i].push_back(nbytes);
1114 send_tags.push_back({amrex::makeArray4<T>(nullptr,cct.sbox,ncomp[imf]),
1115 sfab.const_array(scomp[imf],ncomp[imf]),
1116 cct.sbox, Dim3{0,0,0}});
1117 nbytes += sfab.nBytes(cct.sbox,ncomp[imf]);
1118 }
1119 }
1120 }
1121 }
1122
1123 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
1124 nbytes = amrex::aligned_size(acd, nbytes); // so that bytes are aligned
1125
1126 // Also need to align the offset properly
1127 TotalSndsVolume = amrex::aligned_size(std::max(alignof(T),acd), TotalSndsVolume);
1128
1129 offset.push_back(TotalSndsVolume);
1130 TotalSndsVolume += nbytes;
1131
1132 send_size.push_back(nbytes);
1133 }
1134
1135 the_send_data = static_cast<char*>(amrex::The_Comms_Arena()->alloc(TotalSndsVolume));
1136 int k = 0;
1137 for (int i = 0; i < nsend; ++i) {
1138 send_data[i] = the_send_data + offset[i];
1139 for (int j = 0, nj = send_offset[i].size(); j < nj; ++j) {
1140 send_tags[k++].dfab.p = (T*)(send_data[i] + send_offset[i][j]);
1141 }
1142 }
1143
1144 detail::fbv_copy(send_tags);
1145
1146 FabArray<FAB>::PostSnds(send_data, send_size, send_rank, send_reqs, SeqNum);
1147 }
1148
1149#if !defined(AMREX_DEBUG)
1150 int recv_flag;
1151 ParallelDescriptor::Test(recv_reqs, recv_flag, recv_stat);
1152#endif
1153
1154 if (N_locs > 0) {
1155 detail::fbv_copy(local_tags);
1156#if !defined(AMREX_DEBUG)
1157 ParallelDescriptor::Test(recv_reqs, recv_flag, recv_stat);
1158#endif
1159 }
1160
1161 if (N_rcvs > 0) {
1162 ParallelDescriptor::Waitall(recv_reqs, recv_stat);
1163#ifdef AMREX_DEBUG
1164 if (!FabArrayBase::CheckRcvStats(recv_stat, recv_size, SeqNum)) {
1165 amrex::Abort("FillBoundary(vector) failed with wrong message size");
1166 }
1167#endif
1168
1169 detail::fbv_copy(recv_tags);
1170
1171 amrex::The_Comms_Arena()->free(the_recv_data);
1172 }
1173
1174 if (N_snds > 0) {
1175 Vector<MPI_Status> stats(send_reqs.size());
1176 ParallelDescriptor::Waitall(send_reqs, stats);
1177 amrex::The_Comms_Arena()->free(the_send_data);
1178 }
1179
1180#endif // #ifdef AMREX_USE_MPI
1181#endif // #if 1 #else
1182}
1183
1184template <class MF>
1185std::enable_if_t<IsFabArray<MF>::value>
1186FillBoundary (Vector<MF*> const& mf, const Periodicity& a_period = Periodicity::NonPeriodic())
1187{
1188 Vector<int> scomp(mf.size(), 0);
1189 Vector<int> ncomp;
1190 Vector<IntVect> nghost;
1191 Vector<Periodicity> period(mf.size(), a_period);
1192 ncomp.reserve(mf.size());
1193 nghost.reserve(mf.size());
1194 for (auto const& x : mf) {
1195 ncomp.push_back(x->nComp());
1196 nghost.push_back(x->nGrowVect());
1197 }
1198 FillBoundary(mf, scomp, ncomp, nghost, period);
1199}
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define BL_PROFILE_SYNC_STOP()
Definition AMReX_BLProfiler.H:645
#define BL_PROFILE_SYNC_START_TIMED(fname)
Definition AMReX_BLProfiler.H:644
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:49
#define BL_ASSERT(EX)
Definition AMReX_BLassert.H:39
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:37
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_PRAGMA_SIMD
Definition AMReX_Extension.H:80
#define AMREX_NODISCARD
Definition AMReX_Extension.H:251
std::enable_if_t< IsFabArray< MF >::value > FillBoundary(Vector< MF * > const &mf, Vector< int > const &scomp, Vector< int > const &ncomp, Vector< IntVect > const &nghost, Vector< Periodicity > const &period, Vector< int > const &cross={})
Definition AMReX_FabArrayCommI.H:932
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1089
#define AMREX_LOOP_4D(bx, ncomp, i, j, k, n, block)
Definition AMReX_Loop.nolint.H:16
int MPI_Comm
Definition AMReX_ccse-mpi.H:47
static constexpr int MPI_REQUEST_NULL
Definition AMReX_ccse-mpi.H:53
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
IndexType ixType() const noexcept
Return index type of this BoxArray.
Definition AMReX_BoxArray.H:837
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition AMReX_FabArrayBase.H:130
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition AMReX_FabArrayBase.H:94
An Array of FortranArrayBox(FAB)-like Objects.
Definition AMReX_FabArray.H:344
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int max() const noexcept
maximum (no absolute values) value
Definition AMReX_IntVect.H:212
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition AMReX_Periodicity.H:17
bool isAnyPeriodic() const noexcept
Definition AMReX_Periodicity.H:22
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:27
Long size() const noexcept
Definition AMReX_Vector.H:50
@ FAB
Definition AMReX_AmrvisConstants.H:86
AMREX_GPU_HOST_DEVICE Long size(T const &b) noexcept
integer version
Definition AMReX_GpuRange.H:26
std::enable_if_t< IsBaseFab< FAB >() &&IsCallableR< Dim3, DTOS, Dim3 >() &&IsFabProjection< Proj, FAB >()> unpack_recv_buffer_cpu(FabArray< FAB > &mf, int dcomp, int ncomp, Vector< char * > const &recv_data, Vector< std::size_t > const &recv_size, Vector< FabArrayBase::CopyComTagsContainer const * > const &recv_cctc, DTOS const &dtos=DTOS{}, Proj const &proj=Proj{}) noexcept
AMREX_NODISCARD CommHandler ParallelCopy_nowait(NoLocalCopy, FabArray< FAB > &dest, const FabArray< FAB > &src, const FabArrayBase::CommMetaData &cmd, const DataPacking &data_packing)
Definition AMReX_NonLocalBC.H:705
std::enable_if_t< IsBaseFab< FAB >() &&IsCallableR< Dim3, DTOS, Dim3 >() &&IsFabProjection< Proj, FAB >()> unpack_recv_buffer_gpu(FabArray< FAB > &mf, int scomp, int ncomp, Vector< char * > const &recv_data, Vector< std::size_t > const &recv_size, Vector< FabArrayBase::CopyComTagsContainer const * > const &recv_cctc, DTOS const &dtos=DTOS{}, Proj const &proj=Proj{})
std::enable_if_t< IsBaseFab< FAB >() &&IsDataPacking< DataPacking, FAB >()> ParallelCopy_finish(FabArray< FAB > &dest, CommHandler handler, const FabArrayBase::CommMetaData &cmd, const DataPacking &data_packing)
Definition AMReX_NonLocalBC.H:797
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition AMReX_ParallelDescriptor.H:613
double second() noexcept
Returns wall-clock seconds since start of execution.
Definition AMReX_ParallelDescriptor.cpp:1288
@ max
Definition AMReX_ParallelReduce.H:17
@ make_alias
Definition AMReX_MakeType.H:7
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:191
std::unique_ptr< char, TheFaArenaDeleter > TheFaArenaPointer
Definition AMReX_FabArray.H:104
DistributionMapping const & DistributionMap(FabArrayBase const &fa)
BoxND< AMREX_SPACEDIM > Box
Definition AMReX_BaseFwd.H:27
IntVect nGrowVect(FabArrayBase const &fa)
void Copy(FabArray< DFAB > &dst, FabArray< SFAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArray.H:179
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 ubound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:315
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:308
Arena * The_Comms_Arena()
Definition AMReX_Arena.cpp:676
IntVectND< AMREX_SPACEDIM > IntVect
Definition AMReX_BaseFwd.H:30
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition AMReX_Box.H:1211
void Add(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArray.H:240
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:656
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:230
std::size_t aligned_size(std::size_t align_requirement, std::size_t size) noexcept
Given a minimum required size of size bytes, this returns the next largest arena size that will align...
Definition AMReX_Arena.H:30
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition AMReX_FabArrayUtility.H:1873
void RemoveDuplicates(Vector< T > &vec)
Definition AMReX_Vector.H:208
BoxArray const & boxArray(FabArrayBase const &fa)
Definition AMReX_FabArrayCommI.H:896
void fbv_copy(Vector< TagT > const &tags)
Definition AMReX_FabArrayCommI.H:898
Definition AMReX_Dim3.H:12
parallel copy or add
Definition AMReX_FabArrayBase.H:536
bool m_threadsafe_rcv
Definition AMReX_FabArrayBase.H:474
std::unique_ptr< MapOfCopyComTagContainers > m_RcvTags
Definition AMReX_FabArrayBase.H:477
std::unique_ptr< MapOfCopyComTagContainers > m_SndTags
Definition AMReX_FabArrayBase.H:476
std::unique_ptr< CopyComTagsContainer > m_LocTags
Definition AMReX_FabArrayBase.H:475
FillBoundary.
Definition AMReX_FabArrayBase.H:487
Definition AMReX_TypeTraits.H:266