6 template <typename BUF, class F, std::enable_if_t<IsBaseFab<F>::value,
int>Z>
10 bool enforce_periodicity_only,
17 AMREX_ASSERT(!enforce_periodicity_only || !override_sync);
20 if (enforce_periodicity_only) {
22 }
else if (override_sync) {
23 work_to_do = (nghost.
max() > 0) || !is_cell_centered();
25 work_to_do = nghost.
max() > 0;
27 if (!work_to_do) {
return; }
29 const FB& TheFB = getFB(nghost, period, cross, enforce_periodicity_only, override_sync);
37 if (N_locs == 0) {
return; }
41 #if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
44 FB_local_copy_cuda_graph_1(TheFB, scomp, ncomp);
49 FB_local_copy_gpu(TheFB, scomp, ncomp);
55 FB_local_copy_cpu(TheFB, scomp, ncomp);
69 const int N_locs = TheFB.
m_LocTags->size();
70 const int N_rcvs = TheFB.
m_RcvTags->size();
71 const int N_snds = TheFB.
m_SndTags->size();
73 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
78 fbd = std::make_unique<FBData<FAB>>();
89 PostRcvs<BUF>(*TheFB.
m_RcvTags, fbd->the_recv_data,
90 fbd->recv_data, fbd->recv_size, fbd->recv_from, fbd->recv_reqs,
92 fbd->recv_stat.resize(N_rcvs);
98 char*& the_send_data = fbd->the_send_data;
107 PrepareSendBuffers<BUF>(*TheFB.
m_SndTags, the_send_data, send_data, send_size, send_rank,
108 send_reqs, send_cctc, ncomp);
113 #if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
115 FB_pack_send_buffer_cuda_graph(TheFB, scomp, ncomp, send_data, send_size, send_cctc);
120 pack_send_buffer_gpu<BUF>(*
this, scomp, ncomp, send_data, send_size, send_cctc);
126 pack_send_buffer_cpu<BUF>(*
this, scomp, ncomp, send_data, send_size, send_cctc);
130 PostSnds(send_data, send_size, send_rank, send_reqs,
SeqNum);
143 #if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
145 FB_local_copy_cuda_graph_n(TheFB, scomp, ncomp);
150 FB_local_copy_gpu(TheFB, scomp, ncomp);
156 FB_local_copy_cpu(TheFB, scomp, ncomp);
166 template <typename BUF, class F, std::enable_if_t<IsBaseFab<F>::value,
int>Z>
175 if (!fbd) { n_filled = IntVect::TheZeroVector();
return; }
177 const FB* TheFB = fbd->fb;
178 const auto N_rcvs =
static_cast<int>(TheFB->
m_RcvTags->size());
182 for (
int k = 0; k < N_rcvs; k++)
184 if (fbd->recv_size[k] > 0)
186 auto const& cctc = TheFB->
m_RcvTags->at(fbd->recv_from[k]);
187 recv_cctc[k] = &cctc;
191 int actual_n_rcvs = N_rcvs - std::count(fbd->recv_data.begin(), fbd->recv_data.end(),
nullptr);
193 if (actual_n_rcvs > 0) {
196 if (!CheckRcvStats(fbd->recv_stat, fbd->recv_size, fbd->tag))
198 amrex::Abort(
"FillBoundary_finish failed with wrong message size");
208 #if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
211 FB_unpack_recv_buffer_cuda_graph(*TheFB, fbd->scomp, fbd->ncomp,
212 fbd->recv_data, fbd->recv_size,
213 recv_cctc, is_thread_safe);
218 unpack_recv_buffer_gpu<BUF>(*
this, fbd->scomp, fbd->ncomp, fbd->recv_data, fbd->recv_size,
219 recv_cctc, FabArrayBase::COPY, is_thread_safe);
225 unpack_recv_buffer_cpu<BUF>(*
this, fbd->scomp, fbd->ncomp, fbd->recv_data, fbd->recv_size,
226 recv_cctc, FabArrayBase::COPY, is_thread_safe);
229 if (fbd->the_recv_data)
232 fbd->the_recv_data =
nullptr;
236 const auto N_snds =
static_cast<int>(TheFB->
m_SndTags->size());
241 fbd->the_send_data =
nullptr;
270 FabArray<FAB>::ParallelCopyToGhost (
const FabArray<FAB>& src,
276 const Periodicity& period)
278 BL_PROFILE(
"FabArray::ParallelCopyToGhost()");
281 FabArrayBase::COPY,
nullptr,
true);
287 FabArray<FAB>::ParallelCopyToGhost_nowait (
const FabArray<FAB>& src,
293 const Periodicity& period)
296 FabArrayBase::COPY,
nullptr,
true);
301 FabArray<FAB>::ParallelCopyToGhost_finish ()
315 const Periodicity& period,
317 const FabArrayBase::CPC * a_cpc,
318 bool to_ghost_cells_only)
321 BL_PROFILE(
"FabArray::ParallelCopy_nowait()");
325 if (empty() || src.empty()) {
329 BL_ASSERT(op == FabArrayBase::COPY || op == FabArrayBase::ADD);
331 BL_ASSERT(src.nGrowVect().allGE(snghost));
336 if ((src.boxArray().ixType().cellCentered() || op == FabArrayBase::COPY) &&
337 (boxarray == src.boxarray && distributionMap == src.distributionMap) &&
338 snghost == IntVect::TheZeroVector() &&
339 dnghost == IntVect::TheZeroVector() &&
340 !period.isAnyPeriodic() && !to_ghost_cells_only)
348 if (op == FabArrayBase::COPY) {
351 Add(*
this, src, scomp, dcomp, ncomp,
IntVect(0));
357 const CPC& thecpc = (a_cpc) ? *a_cpc : getCPC(dnghost, src, snghost, period, to_ghost_cells_only);
365 int N_locs = (*thecpc.m_LocTags).
size();
366 if (N_locs == 0) {
return; }
370 PC_local_gpu(thecpc, src, scomp, dcomp, ncomp, op);
375 PC_local_cpu(thecpc, src, scomp, dcomp, ncomp, op);
389 const int N_snds = thecpc.m_SndTags->size();
390 const int N_rcvs = thecpc.m_RcvTags->size();
391 const int N_locs = thecpc.m_LocTags->size();
393 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
404 int NCompLeft = ncomp;
405 int SC = scomp, DC = dcomp, NC;
407 for (
int ipass = 0; ipass < ncomp; )
409 pcd = std::make_unique<PCData<FAB>>();
415 NC =
std::min(NCompLeft,FabArrayBase::MaxComp);
416 const bool last_iter = (NCompLeft == NC);
425 pcd->the_recv_data =
nullptr;
427 pcd->actual_n_rcvs = 0;
429 PostRcvs(*thecpc.m_RcvTags, pcd->the_recv_data,
430 pcd->recv_data, pcd->recv_size, pcd->recv_from, pcd->recv_reqs, NC, pcd->tag);
431 pcd->actual_n_rcvs = N_rcvs - std::count(pcd->recv_size.begin(), pcd->recv_size.end(), 0);
437 Vector<char*> send_data;
438 Vector<std::size_t> send_size;
439 Vector<int> send_rank;
440 Vector<const CopyComTagsContainer*> send_cctc;
444 src.PrepareSendBuffers(*thecpc.m_SndTags, pcd->the_send_data, send_data, send_size,
445 send_rank, pcd->send_reqs, send_cctc, NC);
450 pack_send_buffer_gpu(src, SC, NC, send_data, send_size, send_cctc);
455 pack_send_buffer_cpu(src, SC, NC, send_data, send_size, send_cctc);
459 FabArray<FAB>::PostSnds(send_data, send_size, send_rank, pcd->send_reqs, pcd->tag);
470 PC_local_gpu(thecpc, src, SC, DC, NC, op);
475 PC_local_cpu(thecpc, src, SC, DC, NC, op);
501 BL_PROFILE(
"FabArray::ParallelCopy_finish()");
504 if (!pcd) {
return; }
506 const CPC* thecpc = pcd->cpc;
508 const auto N_snds =
static_cast<int>(thecpc->m_SndTags->size());
509 const auto N_rcvs =
static_cast<int>(thecpc->m_RcvTags->size());
513 Vector<const CopyComTagsContainer*> recv_cctc(N_rcvs,
nullptr);
514 for (
int k = 0; k < N_rcvs; ++k)
516 if (pcd->recv_size[k] > 0)
518 auto const& cctc = thecpc->m_RcvTags->at(pcd->recv_from[k]);
519 recv_cctc[k] = &cctc;
523 if (pcd->actual_n_rcvs > 0) {
524 Vector<MPI_Status> stats(N_rcvs);
527 if (!CheckRcvStats(stats, pcd->recv_size, pcd->tag))
529 amrex::Abort(
"ParallelCopy failed with wrong message size");
534 bool is_thread_safe = thecpc->m_threadsafe_rcv;
540 recv_cctc, pcd->op, is_thread_safe);
546 recv_cctc, pcd->op, is_thread_safe);
549 if (pcd->the_recv_data)
552 pcd->the_recv_data =
nullptr;
557 if (! thecpc->m_SndTags->empty()) {
558 Vector<MPI_Status> stats(pcd->send_reqs.size());
562 pcd->the_send_data =
nullptr;
572 FabArray<FAB>::copyTo (
FAB& dest,
int scomp,
int dcomp,
int ncomp,
int nghost)
const
576 BL_ASSERT(dcomp + ncomp <= dest.nComp());
581 BoxArray ba(dest.box());
582 DistributionMapping dm(Vector<int>{root_proc});
583 FabArray<FAB> destmf(ba, dm, ncomp, 0, MFInfo().SetAlloc(
false));
588 destmf.ParallelCopy(*
this, scomp, 0, ncomp, nghost, 0);
591 using T =
typename FAB::value_type;
593 Long count = dest.numPts()*ncomp;
594 T*
const p0 = dest.dataPtr(dcomp);
597 if (dest.arena()->isDevice()) {
617 template <
typename BUF>
620 Vector<char*>& send_data,
621 Vector<std::size_t>& send_size,
622 Vector<int>& send_rank,
623 Vector<MPI_Request>& send_reqs,
624 Vector<const CopyComTagsContainer*>& send_cctc,
627 char* pointer =
nullptr;
628 PrepareSendBuffers<BUF>(SndTags, pointer, send_data, send_size, send_rank, send_reqs, send_cctc, ncomp);
633 template <
typename BUF>
636 char*& the_send_data,
637 Vector<char*>& send_data,
638 Vector<std::size_t>& send_size,
639 Vector<int>& send_rank,
640 Vector<MPI_Request>& send_reqs,
641 Vector<const CopyComTagsContainer*>& send_cctc,
649 const auto N_snds = SndTags.size();
650 if (N_snds == 0) {
return; }
651 send_data.reserve(N_snds);
652 send_size.reserve(N_snds);
653 send_rank.reserve(N_snds);
654 send_reqs.reserve(N_snds);
655 send_cctc.reserve(N_snds);
658 std::size_t total_volume = 0;
659 for (
auto const& kv : SndTags)
661 auto const& cctc = kv.second;
663 std::size_t nbytes = 0;
664 for (
auto const& cct : kv.second)
666 nbytes += cct.sbox.numPts() * ncomp *
sizeof(BUF);
669 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
676 offset.push_back(total_volume);
677 total_volume += nbytes;
679 send_data.push_back(
nullptr);
680 send_size.push_back(nbytes);
681 send_rank.push_back(kv.first);
683 send_cctc.push_back(&cctc);
686 if (total_volume > 0)
689 for (
int i = 0, N =
static_cast<int>(send_size.size()); i < N; ++i) {
690 send_data[i] = the_send_data +
offset[i];
693 the_send_data =
nullptr;
699 FabArray<FAB>::PostSnds (Vector<char*>
const& send_data,
700 Vector<std::size_t>
const& send_size,
701 Vector<int>
const& send_rank,
702 Vector<MPI_Request>& send_reqs,
707 const auto N_snds =
static_cast<int>(send_reqs.size());
708 for (
int j = 0; j < N_snds; ++j)
710 if (send_size[j] > 0) {
713 (send_data[j], send_size[j], rank,
SeqNum, comm).
req();
719 template <
typename BUF>
720 TheFaArenaPointer FabArray<FAB>::PostRcvs (
const MapOfCopyComTagContainers& RcvTags,
721 Vector<char*>& recv_data,
722 Vector<std::size_t>& recv_size,
723 Vector<int>& recv_from,
724 Vector<MPI_Request>& recv_reqs,
728 char* pointer =
nullptr;
729 PostRcvs(RcvTags, pointer, recv_data, recv_size, recv_from, recv_reqs, ncomp,
SeqNum);
734 template <
typename BUF>
736 FabArray<FAB>::PostRcvs (
const MapOfCopyComTagContainers& RcvTags,
737 char*& the_recv_data,
738 Vector<char*>& recv_data,
739 Vector<std::size_t>& recv_size,
740 Vector<int>& recv_from,
741 Vector<MPI_Request>& recv_reqs,
750 Vector<std::size_t>
offset;
751 std::size_t TotalRcvsVolume = 0;
752 for (
const auto& kv : RcvTags)
754 std::size_t nbytes = 0;
755 for (
auto const& cct : kv.second)
757 nbytes += cct.dbox.numPts() * ncomp *
sizeof(BUF);
760 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
767 offset.push_back(TotalRcvsVolume);
768 TotalRcvsVolume += nbytes;
770 recv_data.push_back(
nullptr);
771 recv_size.push_back(nbytes);
772 recv_from.push_back(kv.first);
776 const auto nrecv =
static_cast<int>(recv_from.size());
780 if (TotalRcvsVolume == 0)
782 the_recv_data =
nullptr;
788 for (
int i = 0; i < nrecv; ++i)
790 recv_data[i] = the_recv_data +
offset[i];
791 if (recv_size[i] > 0)
795 (recv_data[i], recv_size[i], rank,
SeqNum, comm).
req();
811 "FabArray::Redistribute: must have the same BoxArray");
815 Copy(*
this, src, scomp, dcomp, ncomp, nghost);
823 ParallelCopy(src, scomp, dcomp, ncomp, nghost, nghost, Periodicity::NonPeriodic(),
824 FabArrayBase::COPY, &cpc);
833 #if defined(AMREX_USE_MPI) && !defined(AMREX_DEBUG)
842 template <
class TagT>
845 const int N = tags.size();
846 if (N == 0) {
return; }
852 const int ncomp = tag.dfab.nComp();
853 for (
int n = 0; n < ncomp; ++n) {
854 tag.dfab(i,j,k,n) = tag.sfab(i+tag.offset.x,j+tag.offset.y,k+tag.offset.z,n);
861 #pragma omp parallel for
863 for (
int itag = 0; itag < N; ++itag) {
864 auto const& tag = tags[itag];
865 const int ncomp = tag.dfab.nComp();
868 tag.dfab(i,j,k,n) = tag.sfab(i+tag.offset.x,j+tag.offset.y,k+tag.offset.z,n);
876 std::enable_if_t<IsFabArray<MF>::value>
878 Vector<int>
const& ncomp, Vector<IntVect>
const& nghost,
879 Vector<Periodicity>
const& period, Vector<int>
const& cross = {})
883 const int N = mf.size();
884 for (
int i = 0; i < N; ++i) {
885 mf[i]->FillBoundary_nowait(scomp[i], ncomp[i], nghost[i], period[i],
886 cross.empty() ? 0 : cross[i]);
888 for (
int i = 0; i < N; ++i) {
889 mf[i]->FillBoundary_finish();
893 using FAB =
typename MF::FABType::value_type;
894 using T =
typename FAB::value_type;
896 const int nmfs = mf.size();
897 Vector<FabArrayBase::CommMetaData const*> cmds;
901 for (
int imf = 0; imf < nmfs; ++imf) {
902 if (nghost[imf].
max() > 0) {
903 auto const& TheFB = mf[imf]->getFB(nghost[imf], period[imf],
904 cross.empty() ? 0 : cross[imf]);
906 cmds.push_back(
static_cast<FabArrayBase::CommMetaData const*
>(&TheFB));
907 N_locs += TheFB.m_LocTags->size();
908 N_rcvs += TheFB.m_RcvTags->size();
909 N_snds += TheFB.m_SndTags->size();
911 cmds.push_back(
nullptr);
915 using TagT = Array4CopyTag<T>;
916 Vector<TagT> local_tags;
917 local_tags.reserve(N_locs);
919 for (
int imf = 0; imf < nmfs; ++imf) {
921 auto const& tags = *(cmds[imf]->m_LocTags);
922 for (
auto const& tag : tags) {
923 local_tags.push_back({(*mf[imf])[tag.dstIndex].array (scomp[imf],ncomp[imf]),
924 (*mf[imf])[tag.srcIndex].const_array(scomp[imf],ncomp[imf]),
926 (tag.sbox.smallEnd()-tag.dbox.smallEnd()).dim3()});
944 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
return; }
946 char* the_recv_data =
nullptr;
947 Vector<int> recv_from;
948 Vector<std::size_t> recv_size;
949 Vector<MPI_Request> recv_reqs;
950 Vector<MPI_Status> recv_stat;
951 Vector<TagT> recv_tags;
955 for (
int imf = 0; imf < nmfs; ++imf) {
957 auto const& tags = *(cmds[imf]->m_RcvTags);
958 for (
const auto& kv : tags) {
959 recv_from.push_back(kv.first);
964 const int nrecv = recv_from.size();
967 recv_stat.resize(nrecv);
969 recv_tags.reserve(N_rcvs);
973 recv_size.reserve(nrecv);
975 std::size_t TotalRcvsVolume = 0;
976 for (
int i = 0; i < nrecv; ++i) {
977 std::size_t nbytes = 0;
978 for (
int imf = 0; imf < nmfs; ++imf) {
980 auto const& tags = *(cmds[imf]->m_RcvTags);
981 auto it = tags.find(recv_from[i]);
982 if (it != tags.end()) {
983 for (
auto const& cct : it->second) {
984 auto& dfab = (*mf[imf])[cct.dstIndex];
985 recv_offset[i].push_back(nbytes);
986 recv_tags.push_back({dfab.array(scomp[imf],ncomp[imf]),
987 makeArray4<T const>(
nullptr,cct.dbox,ncomp[imf]),
988 cct.dbox,
Dim3{0,0,0}});
989 nbytes += dfab.nBytes(cct.dbox,ncomp[imf]);
995 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
1001 offset.push_back(TotalRcvsVolume);
1002 TotalRcvsVolume += nbytes;
1004 recv_size.push_back(nbytes);
1010 for (
int i = 0; i < nrecv; ++i) {
1011 char* p = the_recv_data +
offset[i];
1014 (p, recv_size[i], rank,
SeqNum, comm).
req();
1015 for (
int j = 0, nj = recv_offset[i].
size(); j < nj; ++j) {
1016 recv_tags[k++].sfab.p = (T
const*)(p + recv_offset[i][j]);
1021 char* the_send_data =
nullptr;
1022 Vector<int> send_rank;
1023 Vector<char*> send_data;
1024 Vector<std::size_t> send_size;
1025 Vector<MPI_Request> send_reqs;
1027 for (
int imf = 0; imf < nmfs; ++imf) {
1029 auto const& tags = *(cmds[imf]->m_SndTags);
1030 for (
auto const& kv : tags) {
1031 send_rank.push_back(kv.first);
1036 const int nsend = send_rank.size();
1038 send_data.resize(nsend,
nullptr);
1041 Vector<TagT> send_tags;
1042 send_tags.reserve(N_snds);
1046 send_size.reserve(nsend);
1048 std::size_t TotalSndsVolume = 0;
1049 for (
int i = 0; i < nsend; ++i) {
1050 std::size_t nbytes = 0;
1051 for (
int imf = 0; imf < nmfs; ++imf) {
1053 auto const& tags = *(cmds[imf]->m_SndTags);
1054 auto it = tags.find(send_rank[i]);
1055 if (it != tags.end()) {
1056 for (
auto const& cct : it->second) {
1057 auto const& sfab = (*mf[imf])[cct.srcIndex];
1058 send_offset[i].push_back(nbytes);
1059 send_tags.push_back({amrex::makeArray4<T>(
nullptr,cct.sbox,ncomp[imf]),
1060 sfab.const_array(scomp[imf],ncomp[imf]),
1061 cct.sbox,
Dim3{0,0,0}});
1062 nbytes += sfab.nBytes(cct.sbox,ncomp[imf]);
1068 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
1074 offset.push_back(TotalSndsVolume);
1075 TotalSndsVolume += nbytes;
1077 send_size.push_back(nbytes);
1082 for (
int i = 0; i < nsend; ++i) {
1083 send_data[i] = the_send_data +
offset[i];
1084 for (
int j = 0, nj = send_offset[i].
size(); j < nj; ++j) {
1085 send_tags[k++].dfab.p = (T*)(send_data[i] + send_offset[i][j]);
1091 FabArray<FAB>::PostSnds(send_data, send_size, send_rank, send_reqs,
SeqNum);
1094 #if !defined(AMREX_DEBUG)
1101 #if !defined(AMREX_DEBUG)
1109 if (!FabArrayBase::CheckRcvStats(recv_stat, recv_size,
SeqNum)) {
1110 amrex::Abort(
"FillBoundary(vector) failed with wrong message size");
1120 Vector<MPI_Status> stats(send_reqs.size());
1130 std::enable_if_t<IsFabArray<MF>::value>
1131 FillBoundary (Vector<MF*>
const& mf,
const Periodicity& a_period = Periodicity::NonPeriodic())
1133 Vector<int> scomp(mf.size(), 0);
1135 Vector<IntVect> nghost;
1136 Vector<Periodicity> period(mf.size(), a_period);
1137 ncomp.reserve(mf.size());
1138 nghost.reserve(mf.size());
1139 for (
auto const& x : mf) {
1140 ncomp.push_back(x->nComp());
1141 nghost.push_back(x->nGrowVect());
#define BL_PROFILE(a)
Definition: AMReX_BLProfiler.H:551
#define BL_PROFILE_SYNC_STOP()
Definition: AMReX_BLProfiler.H:645
#define BL_PROFILE_SYNC_START_TIMED(fname)
Definition: AMReX_BLProfiler.H:644
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: AMReX_BLassert.H:49
#define BL_ASSERT(EX)
Definition: AMReX_BLassert.H:39
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: AMReX_BLassert.H:37
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_NODISCARD
Definition: AMReX_Extension.H:251
std::enable_if_t< IsFabArray< MF >::value > FillBoundary(Vector< MF * > const &mf, Vector< int > const &scomp, Vector< int > const &ncomp, Vector< IntVect > const &nghost, Vector< Periodicity > const &period, Vector< int > const &cross={})
Definition: AMReX_FabArrayCommI.H:877
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition: AMReX_HypreMLABecLap.cpp:1089
#define AMREX_LOOP_4D(bx, ncomp, i, j, k, n, block)
Definition: AMReX_Loop.nolint.H:16
int MPI_Comm
Definition: AMReX_ccse-mpi.H:47
static constexpr int MPI_REQUEST_NULL
Definition: AMReX_ccse-mpi.H:53
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
IndexType ixType() const noexcept
Return index type of this BoxArray.
Definition: AMReX_BoxArray.H:836
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition: AMReX_FabArrayBase.H:94
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition: AMReX_FabArrayBase.H:130
An Array of FortranArrayBox(FAB)-like Objects.
Definition: AMReX_FabArray.H:344
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int max() const noexcept
maximum (no absolute values) value
Definition: AMReX_IntVect.H:214
MPI_Request req() const
Definition: AMReX_ParallelDescriptor.H:74
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition: AMReX_Periodicity.H:17
bool isAnyPeriodic() const noexcept
Definition: AMReX_Periodicity.H:22
Long size() const noexcept
Definition: AMReX_Vector.H:50
@ FAB
Definition: AMReX_AmrvisConstants.H:86
AMREX_GPU_HOST_DEVICE Long size(T const &b) noexcept
integer version
Definition: AMReX_GpuRange.H:26
bool inGraphRegion()
Definition: AMReX_GpuControl.H:115
void streamSynchronize() noexcept
Definition: AMReX_GpuDevice.H:237
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:265
bool inLaunchRegion() noexcept
Definition: AMReX_GpuControl.H:86
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:251
int MyProc()
Definition: AMReX_MPMD.cpp:117
std::enable_if_t< IsBaseFab< FAB >) &&IsCallableR< Dim3, DTOS, Dim3 >) &&IsFabProjection< Proj, FAB >)> unpack_recv_buffer_cpu(FabArray< FAB > &mf, int dcomp, int ncomp, Vector< char * > const &recv_data, Vector< std::size_t > const &recv_size, Vector< FabArrayBase::CopyComTagsContainer const * > const &recv_cctc, DTOS const &dtos=DTOS{}, Proj const &proj=Proj{}) noexcept
std::enable_if_t< IsBaseFab< FAB >) &&IsDataPacking< DataPacking, FAB >)> ParallelCopy_finish(FabArray< FAB > &dest, CommHandler handler, const FabArrayBase::CommMetaData &cmd, const DataPacking &data_packing)
Definition: AMReX_NonLocalBC.H:793
std::enable_if_t< IsBaseFab< FAB >::value > PrepareSendBuffers(const PackComponents &components, FabArray< FAB > &dest, const FabArray< FAB > &src, CommData &comm, const FabArrayBase::MapOfCopyComTagContainers &cctc)
Calls PrepareComBuffers.
Definition: AMReX_NonLocalBC.H:555
AMREX_NODISCARD CommHandler ParallelCopy_nowait(NoLocalCopy, FabArray< FAB > &dest, const FabArray< FAB > &src, const FabArrayBase::CommMetaData &cmd, const DataPacking &data_packing)
Definition: AMReX_NonLocalBC.H:701
std::enable_if_t< IsBaseFab< FAB >) &&IsCallableR< Dim3, DTOS, Dim3 >) &&IsFabProjection< Proj, FAB >)> unpack_recv_buffer_gpu(FabArray< FAB > &mf, int scomp, int ncomp, Vector< char * > const &recv_data, Vector< std::size_t > const &recv_size, Vector< FabArrayBase::CopyComTagsContainer const * > const &recv_cctc, DTOS const &dtos=DTOS{}, Proj const &proj=Proj{})
std::enable_if_t< IsBaseFab< FAB >) &&IsCallableR< Dim3, DTOS, Dim3 >) &&IsFabProjection< Proj, FAB >)> FillBoundary_finish(CommHandler handler, FabArray< FAB > &mf, const FabArrayBase::CommMetaData &cmd, int scomp, int ncomp, DTOS const &dtos, Proj const &proj=Proj{})
Finish communication started by FillBoundary_nowait.
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition: AMReX_ParallelContext.H:70
int global_to_local_rank(int rank) noexcept
Definition: AMReX_ParallelContext.H:98
int NProcsSub() noexcept
number of ranks in current frame
Definition: AMReX_ParallelContext.H:74
void Test(MPI_Request &, int &, MPI_Status &)
Definition: AMReX_ParallelDescriptor.cpp:1207
Message Asend(const T *, size_t n, int pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1088
void Waitall(Vector< MPI_Request > &, Vector< MPI_Status > &)
Definition: AMReX_ParallelDescriptor.cpp:1295
void Bcast(void *, int, MPI_Datatype, int, MPI_Comm)
Definition: AMReX_ParallelDescriptor.cpp:1282
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition: AMReX_ParallelDescriptor.H:613
Message Arecv(T *, size_t n, int pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1130
@ min
Definition: AMReX_ParallelReduce.H:18
@ max
Definition: AMReX_ParallelReduce.H:17
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:200
DistributionMapping const & DistributionMap(FabArrayBase const &fa)
@ make_alias
Definition: AMReX_MakeType.H:7
std::unique_ptr< char, TheFaArenaDeleter > TheFaArenaPointer
Definition: AMReX_FabArray.H:104
IntVect nGrowVect(FabArrayBase const &fa)
void Copy(FabArray< DFAB > &dst, FabArray< SFAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition: AMReX_FabArray.H:179
BoxArray const & boxArray(FabArrayBase const &fa)
Arena * The_Comms_Arena()
Definition: AMReX_Arena.cpp:669
IntVectND< AMREX_SPACEDIM > IntVect
Definition: AMReX_BaseFwd.H:30
void Add(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition: AMReX_FabArray.H:240
Arena * The_Pinned_Arena()
Definition: AMReX_Arena.cpp:649
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition: AMReX.cpp:225
std::size_t aligned_size(std::size_t align_requirement, std::size_t size) noexcept
Given a minimum required size of size bytes, this returns the next largest arena size that will align...
Definition: AMReX_Arena.H:30
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition: AMReX_FabArrayUtility.H:1672
void RemoveDuplicates(Vector< T > &vec)
Definition: AMReX_Vector.H:190
Definition: AMReX_FabArrayCommI.H:841
void fbv_copy(Vector< TagT > const &tags)
Definition: AMReX_FabArrayCommI.H:843
Definition: AMReX_Dim3.H:12
parallel copy or add
Definition: AMReX_FabArrayBase.H:536
FillBoundary.
Definition: AMReX_FabArrayBase.H:487
Definition: AMReX_TypeTraits.H:266