6 template <typename BUF, class F, std::enable_if_t<IsBaseFab<F>::value,
int>Z>
10 bool enforce_periodicity_only,
17 AMREX_ASSERT(!enforce_periodicity_only || !override_sync);
20 if (enforce_periodicity_only) {
22 }
else if (override_sync) {
23 work_to_do = (nghost.
max() > 0) || !is_cell_centered();
25 work_to_do = nghost.
max() > 0;
27 if (!work_to_do) {
return; }
29 const FB& TheFB = getFB(nghost, period, cross, enforce_periodicity_only, override_sync);
37 if (N_locs == 0) {
return; }
41 #if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
44 FB_local_copy_cuda_graph_1(TheFB, scomp, ncomp);
49 FB_local_copy_gpu(TheFB, scomp, ncomp);
55 FB_local_copy_cpu(TheFB, scomp, ncomp);
69 const int N_locs = TheFB.
m_LocTags->size();
70 const int N_rcvs = TheFB.
m_RcvTags->size();
71 const int N_snds = TheFB.
m_SndTags->size();
73 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
78 fbd = std::make_unique<FBData<FAB>>();
89 PostRcvs<BUF>(*TheFB.
m_RcvTags, fbd->the_recv_data,
90 fbd->recv_data, fbd->recv_size, fbd->recv_from, fbd->recv_reqs,
92 fbd->recv_stat.resize(N_rcvs);
98 char*& the_send_data = fbd->the_send_data;
107 PrepareSendBuffers<BUF>(*TheFB.
m_SndTags, the_send_data, send_data, send_size, send_rank,
108 send_reqs, send_cctc, ncomp);
113 #if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
115 FB_pack_send_buffer_cuda_graph(TheFB, scomp, ncomp, send_data, send_size, send_cctc);
120 pack_send_buffer_gpu<BUF>(*
this, scomp, ncomp, send_data, send_size, send_cctc);
126 pack_send_buffer_cpu<BUF>(*
this, scomp, ncomp, send_data, send_size, send_cctc);
130 PostSnds(send_data, send_size, send_rank, send_reqs,
SeqNum);
143 #if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
145 FB_local_copy_cuda_graph_n(TheFB, scomp, ncomp);
150 FB_local_copy_gpu(TheFB, scomp, ncomp);
156 FB_local_copy_cpu(TheFB, scomp, ncomp);
166 template <typename BUF, class F, std::enable_if_t<IsBaseFab<F>::value,
int>Z>
175 if (!fbd) { n_filled = IntVect::TheZeroVector();
return; }
177 const FB* TheFB = fbd->fb;
178 const auto N_rcvs =
static_cast<int>(TheFB->
m_RcvTags->size());
182 for (
int k = 0; k < N_rcvs; k++)
184 if (fbd->recv_size[k] > 0)
186 auto const& cctc = TheFB->
m_RcvTags->at(fbd->recv_from[k]);
187 recv_cctc[k] = &cctc;
191 int actual_n_rcvs = N_rcvs - std::count(fbd->recv_data.begin(), fbd->recv_data.end(),
nullptr);
193 if (actual_n_rcvs > 0) {
196 if (!CheckRcvStats(fbd->recv_stat, fbd->recv_size, fbd->tag))
198 amrex::Abort(
"FillBoundary_finish failed with wrong message size");
208 #if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
211 FB_unpack_recv_buffer_cuda_graph(*TheFB, fbd->scomp, fbd->ncomp,
212 fbd->recv_data, fbd->recv_size,
213 recv_cctc, is_thread_safe);
218 unpack_recv_buffer_gpu<BUF>(*
this, fbd->scomp, fbd->ncomp, fbd->recv_data, fbd->recv_size,
219 recv_cctc, FabArrayBase::COPY, is_thread_safe);
225 unpack_recv_buffer_cpu<BUF>(*
this, fbd->scomp, fbd->ncomp, fbd->recv_data, fbd->recv_size,
226 recv_cctc, FabArrayBase::COPY, is_thread_safe);
229 if (fbd->the_recv_data)
232 fbd->the_recv_data =
nullptr;
236 const auto N_snds =
static_cast<int>(TheFB->
m_SndTags->size());
241 fbd->the_send_data =
nullptr;
270 FabArray<FAB>::ParallelCopyToGhost (
const FabArray<FAB>& src,
276 const Periodicity& period)
278 BL_PROFILE(
"FabArray::ParallelCopyToGhost()");
281 FabArrayBase::COPY,
nullptr,
true);
287 FabArray<FAB>::ParallelCopyToGhost_nowait (
const FabArray<FAB>& src,
293 const Periodicity& period)
296 FabArrayBase::COPY,
nullptr,
true);
301 FabArray<FAB>::ParallelCopyToGhost_finish ()
315 const Periodicity& period,
317 const FabArrayBase::CPC * a_cpc,
318 bool to_ghost_cells_only)
321 BL_PROFILE(
"FabArray::ParallelCopy_nowait()");
325 if (empty() || src.empty()) {
329 BL_ASSERT(op == FabArrayBase::COPY || op == FabArrayBase::ADD);
331 BL_ASSERT(src.nGrowVect().allGE(snghost));
337 (this->
size() == 1) && (src.size() == 1) &&
338 !period.isAnyPeriodic() && !to_ghost_cells_only)
341 auto const& da = this->array(0, dcomp);
342 auto const& sa = src.const_array(0, scomp);
345 if (op == FabArrayBase::COPY) {
349 da(i,j,k,n) = sa(i,j,k,n);
355 #pragma omp parallel for collapse(3)
357 for (
int n = 0; n < ncomp; ++n) {
358 for (
int k = lo.z; k <= hi.z; ++k) {
359 for (
int j = lo.y; j <= hi.y; ++j) {
361 for (
int i = lo.x; i <= hi.x; ++i) {
362 da(i,j,k,n) = sa(i,j,k,n);
369 da(i,j,k,n) += sa(i,j,k,n);
375 #pragma omp parallel for collapse(3)
377 for (
int n = 0; n < ncomp; ++n) {
378 for (
int k = lo.z; k <= hi.z; ++k) {
379 for (
int j = lo.y; j <= hi.y; ++j) {
381 for (
int i = lo.x; i <= hi.x; ++i) {
382 da(i,j,k,n) += sa(i,j,k,n);
391 if ((src.boxArray().ixType().cellCentered() || op == FabArrayBase::COPY) &&
392 (boxarray == src.boxarray && distributionMap == src.distributionMap) &&
393 snghost == IntVect::TheZeroVector() &&
394 dnghost == IntVect::TheZeroVector() &&
395 !period.isAnyPeriodic() && !to_ghost_cells_only)
403 if (op == FabArrayBase::COPY) {
406 Add(*
this, src, scomp, dcomp, ncomp,
IntVect(0));
412 const CPC& thecpc = (a_cpc) ? *a_cpc : getCPC(dnghost, src, snghost, period, to_ghost_cells_only);
420 int N_locs = (*thecpc.m_LocTags).
size();
421 if (N_locs == 0) {
return; }
425 PC_local_gpu(thecpc, src, scomp, dcomp, ncomp, op);
430 PC_local_cpu(thecpc, src, scomp, dcomp, ncomp, op);
444 const int N_snds = thecpc.m_SndTags->size();
445 const int N_rcvs = thecpc.m_RcvTags->size();
446 const int N_locs = thecpc.m_LocTags->size();
448 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
459 int NCompLeft = ncomp;
460 int SC = scomp, DC = dcomp, NC;
462 for (
int ipass = 0; ipass < ncomp; )
464 pcd = std::make_unique<PCData<FAB>>();
470 NC =
std::min(NCompLeft,FabArrayBase::MaxComp);
471 const bool last_iter = (NCompLeft == NC);
480 pcd->the_recv_data =
nullptr;
482 pcd->actual_n_rcvs = 0;
484 PostRcvs(*thecpc.m_RcvTags, pcd->the_recv_data,
485 pcd->recv_data, pcd->recv_size, pcd->recv_from, pcd->recv_reqs, NC, pcd->tag);
486 pcd->actual_n_rcvs = N_rcvs - std::count(pcd->recv_size.begin(), pcd->recv_size.end(), 0);
492 Vector<char*> send_data;
493 Vector<std::size_t> send_size;
494 Vector<int> send_rank;
495 Vector<const CopyComTagsContainer*> send_cctc;
499 src.PrepareSendBuffers(*thecpc.m_SndTags, pcd->the_send_data, send_data, send_size,
500 send_rank, pcd->send_reqs, send_cctc, NC);
505 pack_send_buffer_gpu(src, SC, NC, send_data, send_size, send_cctc);
510 pack_send_buffer_cpu(src, SC, NC, send_data, send_size, send_cctc);
514 FabArray<FAB>::PostSnds(send_data, send_size, send_rank, pcd->send_reqs, pcd->tag);
525 PC_local_gpu(thecpc, src, SC, DC, NC, op);
530 PC_local_cpu(thecpc, src, SC, DC, NC, op);
556 BL_PROFILE(
"FabArray::ParallelCopy_finish()");
559 if (!pcd) {
return; }
561 const CPC* thecpc = pcd->cpc;
563 const auto N_snds =
static_cast<int>(thecpc->m_SndTags->size());
564 const auto N_rcvs =
static_cast<int>(thecpc->m_RcvTags->size());
568 Vector<const CopyComTagsContainer*> recv_cctc(N_rcvs,
nullptr);
569 for (
int k = 0; k < N_rcvs; ++k)
571 if (pcd->recv_size[k] > 0)
573 auto const& cctc = thecpc->m_RcvTags->at(pcd->recv_from[k]);
574 recv_cctc[k] = &cctc;
578 if (pcd->actual_n_rcvs > 0) {
579 Vector<MPI_Status> stats(N_rcvs);
582 if (!CheckRcvStats(stats, pcd->recv_size, pcd->tag))
584 amrex::Abort(
"ParallelCopy failed with wrong message size");
589 bool is_thread_safe = thecpc->m_threadsafe_rcv;
595 recv_cctc, pcd->op, is_thread_safe);
601 recv_cctc, pcd->op, is_thread_safe);
604 if (pcd->the_recv_data)
607 pcd->the_recv_data =
nullptr;
612 if (! thecpc->m_SndTags->empty()) {
613 Vector<MPI_Status> stats(pcd->send_reqs.size());
617 pcd->the_send_data =
nullptr;
627 FabArray<FAB>::copyTo (
FAB& dest,
int scomp,
int dcomp,
int ncomp,
int nghost)
const
631 BL_ASSERT(dcomp + ncomp <= dest.nComp());
636 BoxArray ba(dest.box());
637 DistributionMapping dm(Vector<int>{root_proc});
638 FabArray<FAB> destmf(ba, dm, ncomp, 0, MFInfo().SetAlloc(
false));
643 destmf.ParallelCopy(*
this, scomp, 0, ncomp, nghost, 0);
646 using T =
typename FAB::value_type;
648 Long count = dest.numPts()*ncomp;
649 T*
const p0 = dest.dataPtr(dcomp);
652 if (dest.arena()->isDevice()) {
672 template <
typename BUF>
675 Vector<char*>& send_data,
676 Vector<std::size_t>& send_size,
677 Vector<int>& send_rank,
678 Vector<MPI_Request>& send_reqs,
679 Vector<const CopyComTagsContainer*>& send_cctc,
682 char* pointer =
nullptr;
683 PrepareSendBuffers<BUF>(SndTags, pointer, send_data, send_size, send_rank, send_reqs, send_cctc, ncomp);
688 template <
typename BUF>
691 char*& the_send_data,
692 Vector<char*>& send_data,
693 Vector<std::size_t>& send_size,
694 Vector<int>& send_rank,
695 Vector<MPI_Request>& send_reqs,
696 Vector<const CopyComTagsContainer*>& send_cctc,
704 const auto N_snds = SndTags.size();
705 if (N_snds == 0) {
return; }
706 send_data.reserve(N_snds);
707 send_size.reserve(N_snds);
708 send_rank.reserve(N_snds);
709 send_reqs.reserve(N_snds);
710 send_cctc.reserve(N_snds);
713 std::size_t total_volume = 0;
714 for (
auto const& kv : SndTags)
716 auto const& cctc = kv.second;
718 std::size_t nbytes = 0;
719 for (
auto const& cct : kv.second)
721 nbytes += cct.sbox.numPts() * ncomp *
sizeof(BUF);
724 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
731 offset.push_back(total_volume);
732 total_volume += nbytes;
734 send_data.push_back(
nullptr);
735 send_size.push_back(nbytes);
736 send_rank.push_back(kv.first);
738 send_cctc.push_back(&cctc);
741 if (total_volume > 0)
744 for (
int i = 0, N =
static_cast<int>(send_size.size()); i < N; ++i) {
745 send_data[i] = the_send_data +
offset[i];
748 the_send_data =
nullptr;
754 FabArray<FAB>::PostSnds (Vector<char*>
const& send_data,
755 Vector<std::size_t>
const& send_size,
756 Vector<int>
const& send_rank,
757 Vector<MPI_Request>& send_reqs,
762 const auto N_snds =
static_cast<int>(send_reqs.size());
763 for (
int j = 0; j < N_snds; ++j)
765 if (send_size[j] > 0) {
768 (send_data[j], send_size[j], rank,
SeqNum, comm).
req();
774 template <
typename BUF>
775 TheFaArenaPointer FabArray<FAB>::PostRcvs (
const MapOfCopyComTagContainers& RcvTags,
776 Vector<char*>& recv_data,
777 Vector<std::size_t>& recv_size,
778 Vector<int>& recv_from,
779 Vector<MPI_Request>& recv_reqs,
783 char* pointer =
nullptr;
784 PostRcvs(RcvTags, pointer, recv_data, recv_size, recv_from, recv_reqs, ncomp,
SeqNum);
789 template <
typename BUF>
791 FabArray<FAB>::PostRcvs (
const MapOfCopyComTagContainers& RcvTags,
792 char*& the_recv_data,
793 Vector<char*>& recv_data,
794 Vector<std::size_t>& recv_size,
795 Vector<int>& recv_from,
796 Vector<MPI_Request>& recv_reqs,
805 Vector<std::size_t>
offset;
806 std::size_t TotalRcvsVolume = 0;
807 for (
const auto& kv : RcvTags)
809 std::size_t nbytes = 0;
810 for (
auto const& cct : kv.second)
812 nbytes += cct.dbox.numPts() * ncomp *
sizeof(BUF);
815 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
822 offset.push_back(TotalRcvsVolume);
823 TotalRcvsVolume += nbytes;
825 recv_data.push_back(
nullptr);
826 recv_size.push_back(nbytes);
827 recv_from.push_back(kv.first);
831 const auto nrecv =
static_cast<int>(recv_from.size());
835 if (TotalRcvsVolume == 0)
837 the_recv_data =
nullptr;
843 for (
int i = 0; i < nrecv; ++i)
845 recv_data[i] = the_recv_data +
offset[i];
846 if (recv_size[i] > 0)
850 (recv_data[i], recv_size[i], rank,
SeqNum, comm).
req();
866 "FabArray::Redistribute: must have the same BoxArray");
870 Copy(*
this, src, scomp, dcomp, ncomp, nghost);
878 ParallelCopy(src, scomp, dcomp, ncomp, nghost, nghost, Periodicity::NonPeriodic(),
879 FabArrayBase::COPY, &cpc);
888 #if defined(AMREX_USE_MPI) && !defined(AMREX_DEBUG)
897 template <
class TagT>
900 const int N = tags.size();
901 if (N == 0) {
return; }
907 const int ncomp = tag.dfab.nComp();
908 for (
int n = 0; n < ncomp; ++n) {
909 tag.dfab(i,j,k,n) = tag.sfab(i+tag.offset.x,j+tag.offset.y,k+tag.offset.z,n);
916 #pragma omp parallel for
918 for (
int itag = 0; itag < N; ++itag) {
919 auto const& tag = tags[itag];
920 const int ncomp = tag.dfab.nComp();
923 tag.dfab(i,j,k,n) = tag.sfab(i+tag.offset.x,j+tag.offset.y,k+tag.offset.z,n);
931 std::enable_if_t<IsFabArray<MF>::value>
933 Vector<int>
const& ncomp, Vector<IntVect>
const& nghost,
934 Vector<Periodicity>
const& period, Vector<int>
const& cross = {})
938 const int N = mf.size();
939 for (
int i = 0; i < N; ++i) {
940 mf[i]->FillBoundary_nowait(scomp[i], ncomp[i], nghost[i], period[i],
941 cross.empty() ? 0 : cross[i]);
943 for (
int i = 0; i < N; ++i) {
944 mf[i]->FillBoundary_finish();
948 using FAB =
typename MF::FABType::value_type;
949 using T =
typename FAB::value_type;
951 const int nmfs = mf.size();
952 Vector<FabArrayBase::CommMetaData const*> cmds;
956 for (
int imf = 0; imf < nmfs; ++imf) {
957 if (nghost[imf].
max() > 0) {
958 auto const& TheFB = mf[imf]->getFB(nghost[imf], period[imf],
959 cross.empty() ? 0 : cross[imf]);
961 cmds.push_back(
static_cast<FabArrayBase::CommMetaData const*
>(&TheFB));
962 N_locs += TheFB.m_LocTags->size();
963 N_rcvs += TheFB.m_RcvTags->size();
964 N_snds += TheFB.m_SndTags->size();
966 cmds.push_back(
nullptr);
970 using TagT = Array4CopyTag<T>;
971 Vector<TagT> local_tags;
972 local_tags.reserve(N_locs);
974 for (
int imf = 0; imf < nmfs; ++imf) {
976 auto const& tags = *(cmds[imf]->m_LocTags);
977 for (
auto const& tag : tags) {
978 local_tags.push_back({(*mf[imf])[tag.dstIndex].array (scomp[imf],ncomp[imf]),
979 (*mf[imf])[tag.srcIndex].const_array(scomp[imf],ncomp[imf]),
981 (tag.sbox.smallEnd()-tag.dbox.smallEnd()).dim3()});
999 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
return; }
1001 char* the_recv_data =
nullptr;
1002 Vector<int> recv_from;
1003 Vector<std::size_t> recv_size;
1004 Vector<MPI_Request> recv_reqs;
1005 Vector<MPI_Status> recv_stat;
1006 Vector<TagT> recv_tags;
1010 for (
int imf = 0; imf < nmfs; ++imf) {
1012 auto const& tags = *(cmds[imf]->m_RcvTags);
1013 for (
const auto& kv : tags) {
1014 recv_from.push_back(kv.first);
1019 const int nrecv = recv_from.size();
1022 recv_stat.resize(nrecv);
1024 recv_tags.reserve(N_rcvs);
1026 Vector<Vector<std::size_t> > recv_offset(nrecv);
1027 Vector<std::size_t>
offset;
1028 recv_size.reserve(nrecv);
1030 std::size_t TotalRcvsVolume = 0;
1031 for (
int i = 0; i < nrecv; ++i) {
1032 std::size_t nbytes = 0;
1033 for (
int imf = 0; imf < nmfs; ++imf) {
1035 auto const& tags = *(cmds[imf]->m_RcvTags);
1036 auto it = tags.find(recv_from[i]);
1037 if (it != tags.end()) {
1038 for (
auto const& cct : it->second) {
1039 auto& dfab = (*mf[imf])[cct.dstIndex];
1040 recv_offset[i].push_back(nbytes);
1041 recv_tags.push_back({dfab.array(scomp[imf],ncomp[imf]),
1042 makeArray4<T const>(
nullptr,cct.dbox,ncomp[imf]),
1043 cct.dbox, Dim3{0,0,0}});
1044 nbytes += dfab.nBytes(cct.dbox,ncomp[imf]);
1050 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
1056 offset.push_back(TotalRcvsVolume);
1057 TotalRcvsVolume += nbytes;
1059 recv_size.push_back(nbytes);
1065 for (
int i = 0; i < nrecv; ++i) {
1066 char* p = the_recv_data +
offset[i];
1069 (p, recv_size[i], rank,
SeqNum, comm).
req();
1070 for (
int j = 0, nj = recv_offset[i].
size(); j < nj; ++j) {
1071 recv_tags[k++].sfab.p = (T
const*)(p + recv_offset[i][j]);
1076 char* the_send_data =
nullptr;
1077 Vector<int> send_rank;
1078 Vector<char*> send_data;
1079 Vector<std::size_t> send_size;
1080 Vector<MPI_Request> send_reqs;
1082 for (
int imf = 0; imf < nmfs; ++imf) {
1084 auto const& tags = *(cmds[imf]->m_SndTags);
1085 for (
auto const& kv : tags) {
1086 send_rank.push_back(kv.first);
1091 const int nsend = send_rank.size();
1093 send_data.resize(nsend,
nullptr);
1096 Vector<TagT> send_tags;
1097 send_tags.reserve(N_snds);
1099 Vector<Vector<std::size_t> > send_offset(nsend);
1100 Vector<std::size_t>
offset;
1101 send_size.reserve(nsend);
1103 std::size_t TotalSndsVolume = 0;
1104 for (
int i = 0; i < nsend; ++i) {
1105 std::size_t nbytes = 0;
1106 for (
int imf = 0; imf < nmfs; ++imf) {
1108 auto const& tags = *(cmds[imf]->m_SndTags);
1109 auto it = tags.find(send_rank[i]);
1110 if (it != tags.end()) {
1111 for (
auto const& cct : it->second) {
1112 auto const& sfab = (*mf[imf])[cct.srcIndex];
1113 send_offset[i].push_back(nbytes);
1114 send_tags.push_back({amrex::makeArray4<T>(
nullptr,cct.sbox,ncomp[imf]),
1115 sfab.const_array(scomp[imf],ncomp[imf]),
1116 cct.sbox, Dim3{0,0,0}});
1117 nbytes += sfab.nBytes(cct.sbox,ncomp[imf]);
1123 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
1129 offset.push_back(TotalSndsVolume);
1130 TotalSndsVolume += nbytes;
1132 send_size.push_back(nbytes);
1137 for (
int i = 0; i < nsend; ++i) {
1138 send_data[i] = the_send_data +
offset[i];
1139 for (
int j = 0, nj = send_offset[i].
size(); j < nj; ++j) {
1140 send_tags[k++].dfab.p = (T*)(send_data[i] + send_offset[i][j]);
1146 FabArray<FAB>::PostSnds(send_data, send_size, send_rank, send_reqs,
SeqNum);
1149 #if !defined(AMREX_DEBUG)
1156 #if !defined(AMREX_DEBUG)
1164 if (!FabArrayBase::CheckRcvStats(recv_stat, recv_size,
SeqNum)) {
1165 amrex::Abort(
"FillBoundary(vector) failed with wrong message size");
1175 Vector<MPI_Status> stats(send_reqs.size());
1185 std::enable_if_t<IsFabArray<MF>::value>
1186 FillBoundary (Vector<MF*>
const& mf,
const Periodicity& a_period = Periodicity::NonPeriodic())
1188 Vector<int> scomp(mf.size(), 0);
1190 Vector<IntVect> nghost;
1191 Vector<Periodicity> period(mf.size(), a_period);
1192 ncomp.reserve(mf.size());
1193 nghost.reserve(mf.size());
1194 for (
auto const& x : mf) {
1195 ncomp.push_back(x->nComp());
1196 nghost.push_back(x->nGrowVect());
#define BL_PROFILE(a)
Definition: AMReX_BLProfiler.H:551
#define BL_PROFILE_SYNC_STOP()
Definition: AMReX_BLProfiler.H:645
#define BL_PROFILE_SYNC_START_TIMED(fname)
Definition: AMReX_BLProfiler.H:644
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: AMReX_BLassert.H:49
#define BL_ASSERT(EX)
Definition: AMReX_BLassert.H:39
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: AMReX_BLassert.H:37
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_PRAGMA_SIMD
Definition: AMReX_Extension.H:80
#define AMREX_NODISCARD
Definition: AMReX_Extension.H:251
std::enable_if_t< IsFabArray< MF >::value > FillBoundary(Vector< MF * > const &mf, Vector< int > const &scomp, Vector< int > const &ncomp, Vector< IntVect > const &nghost, Vector< Periodicity > const &period, Vector< int > const &cross={})
Definition: AMReX_FabArrayCommI.H:932
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition: AMReX_HypreMLABecLap.cpp:1089
#define AMREX_LOOP_4D(bx, ncomp, i, j, k, n, block)
Definition: AMReX_Loop.nolint.H:16
int MPI_Comm
Definition: AMReX_ccse-mpi.H:47
static constexpr int MPI_REQUEST_NULL
Definition: AMReX_ccse-mpi.H:53
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
IndexType ixType() const noexcept
Return index type of this BoxArray.
Definition: AMReX_BoxArray.H:837
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition: AMReX_FabArrayBase.H:94
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition: AMReX_FabArrayBase.H:130
An Array of FortranArrayBox(FAB)-like Objects.
Definition: AMReX_FabArray.H:344
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int max() const noexcept
maximum (no absolute values) value
Definition: AMReX_IntVect.H:214
MPI_Request req() const
Definition: AMReX_ParallelDescriptor.H:74
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition: AMReX_Periodicity.H:17
bool isAnyPeriodic() const noexcept
Definition: AMReX_Periodicity.H:22
Long size() const noexcept
Definition: AMReX_Vector.H:50
@ FAB
Definition: AMReX_AmrvisConstants.H:86
AMREX_GPU_HOST_DEVICE Long size(T const &b) noexcept
integer version
Definition: AMReX_GpuRange.H:26
bool inGraphRegion()
Definition: AMReX_GpuControl.H:115
void streamSynchronize() noexcept
Definition: AMReX_GpuDevice.H:237
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:265
bool inLaunchRegion() noexcept
Definition: AMReX_GpuControl.H:86
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:251
int NProcs()
Process ID in MPI_COMM_WORLD.
Definition: AMReX_MPMD.cpp:122
int MyProc()
Definition: AMReX_MPMD.cpp:117
std::enable_if_t< IsBaseFab< FAB >) &&IsCallableR< Dim3, DTOS, Dim3 >) &&IsFabProjection< Proj, FAB >)> unpack_recv_buffer_cpu(FabArray< FAB > &mf, int dcomp, int ncomp, Vector< char * > const &recv_data, Vector< std::size_t > const &recv_size, Vector< FabArrayBase::CopyComTagsContainer const * > const &recv_cctc, DTOS const &dtos=DTOS{}, Proj const &proj=Proj{}) noexcept
std::enable_if_t< IsBaseFab< FAB >) &&IsDataPacking< DataPacking, FAB >)> ParallelCopy_finish(FabArray< FAB > &dest, CommHandler handler, const FabArrayBase::CommMetaData &cmd, const DataPacking &data_packing)
Definition: AMReX_NonLocalBC.H:793
std::enable_if_t< IsBaseFab< FAB >::value > PrepareSendBuffers(const PackComponents &components, FabArray< FAB > &dest, const FabArray< FAB > &src, CommData &comm, const FabArrayBase::MapOfCopyComTagContainers &cctc)
Calls PrepareComBuffers.
Definition: AMReX_NonLocalBC.H:555
AMREX_NODISCARD CommHandler ParallelCopy_nowait(NoLocalCopy, FabArray< FAB > &dest, const FabArray< FAB > &src, const FabArrayBase::CommMetaData &cmd, const DataPacking &data_packing)
Definition: AMReX_NonLocalBC.H:701
std::enable_if_t< IsBaseFab< FAB >) &&IsCallableR< Dim3, DTOS, Dim3 >) &&IsFabProjection< Proj, FAB >)> unpack_recv_buffer_gpu(FabArray< FAB > &mf, int scomp, int ncomp, Vector< char * > const &recv_data, Vector< std::size_t > const &recv_size, Vector< FabArrayBase::CopyComTagsContainer const * > const &recv_cctc, DTOS const &dtos=DTOS{}, Proj const &proj=Proj{})
std::enable_if_t< IsBaseFab< FAB >) &&IsCallableR< Dim3, DTOS, Dim3 >) &&IsFabProjection< Proj, FAB >)> FillBoundary_finish(CommHandler handler, FabArray< FAB > &mf, const FabArrayBase::CommMetaData &cmd, int scomp, int ncomp, DTOS const &dtos, Proj const &proj=Proj{})
Finish communication started by FillBoundary_nowait.
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition: AMReX_ParallelContext.H:70
int global_to_local_rank(int rank) noexcept
Definition: AMReX_ParallelContext.H:98
int NProcsSub() noexcept
number of ranks in current frame
Definition: AMReX_ParallelContext.H:74
void Test(MPI_Request &, int &, MPI_Status &)
Definition: AMReX_ParallelDescriptor.cpp:1207
Message Asend(const T *, size_t n, int pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1088
void Waitall(Vector< MPI_Request > &, Vector< MPI_Status > &)
Definition: AMReX_ParallelDescriptor.cpp:1295
void Bcast(void *, int, MPI_Datatype, int, MPI_Comm)
Definition: AMReX_ParallelDescriptor.cpp:1282
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition: AMReX_ParallelDescriptor.H:613
Message Arecv(T *, size_t n, int pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1130
@ min
Definition: AMReX_ParallelReduce.H:18
@ max
Definition: AMReX_ParallelReduce.H:17
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:200
DistributionMapping const & DistributionMap(FabArrayBase const &fa)
@ make_alias
Definition: AMReX_MakeType.H:7
std::unique_ptr< char, TheFaArenaDeleter > TheFaArenaPointer
Definition: AMReX_FabArray.H:104
BoxND< AMREX_SPACEDIM > Box
Definition: AMReX_BaseFwd.H:27
IntVect nGrowVect(FabArrayBase const &fa)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition: AMReX_Box.H:1211
void Copy(FabArray< DFAB > &dst, FabArray< SFAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition: AMReX_FabArray.H:179
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 ubound(Array4< T > const &a) noexcept
Definition: AMReX_Array4.H:315
BoxArray const & boxArray(FabArrayBase const &fa)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
Definition: AMReX_Array4.H:308
Arena * The_Comms_Arena()
Definition: AMReX_Arena.cpp:669
IntVectND< AMREX_SPACEDIM > IntVect
Definition: AMReX_BaseFwd.H:30
void Add(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition: AMReX_FabArray.H:240
Arena * The_Pinned_Arena()
Definition: AMReX_Arena.cpp:649
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition: AMReX.cpp:225
std::size_t aligned_size(std::size_t align_requirement, std::size_t size) noexcept
Given a minimum required size of size bytes, this returns the next largest arena size that will align...
Definition: AMReX_Arena.H:30
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition: AMReX_FabArrayUtility.H:1672
void RemoveDuplicates(Vector< T > &vec)
Definition: AMReX_Vector.H:190
Definition: AMReX_FabArrayCommI.H:896
void fbv_copy(Vector< TagT > const &tags)
Definition: AMReX_FabArrayCommI.H:898
parallel copy or add
Definition: AMReX_FabArrayBase.H:536
FillBoundary.
Definition: AMReX_FabArrayBase.H:487
Definition: AMReX_TypeTraits.H:266