8template <typename BUF, class F, std::enable_if_t<IsBaseFab<F>::value,
int>Z>
12 bool enforce_periodicity_only,
14 IntVect const& sumboundary_src_nghost,
21 AMREX_ASSERT(!enforce_periodicity_only || !override_sync);
23 bool sumboundary = sumboundary_src_nghost.
allGE(0);
26 if (enforce_periodicity_only) {
28 }
else if (override_sync) {
29 work_to_do = (nghost.
max() > 0) || !is_cell_centered();
30 }
else if (sumboundary) {
33 work_to_do = nghost.
max() > 0;
35 if (!work_to_do) {
return; }
37 const FB& TheFB = getFB(nghost, period, cross, enforce_periodicity_only, override_sync, sumboundary_src_nghost);
45 if (N_locs == 0) {
return; }
49#if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
52 FB_local_copy_cuda_graph_1(TheFB, scomp, ncomp);
59 FB_local_add_gpu(TheFB, scomp, ncomp, deterministic);
64 FB_local_copy_gpu(TheFB, scomp, ncomp);
76 FB_local_add_cpu(TheFB, scomp, ncomp);
81 FB_local_copy_cpu(TheFB, scomp, ncomp);
96 const int N_locs = TheFB.
m_LocTags->size();
97 const int N_rcvs = TheFB.
m_RcvTags->size();
98 const int N_snds = TheFB.
m_SndTags->size();
100 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
105 fbd = std::make_unique<FBData<FAB>>();
110 fbd->deterministic = deterministic;
117 PostRcvs<BUF>(*TheFB.
m_RcvTags, fbd->the_recv_data,
118 fbd->recv_data, fbd->recv_size, fbd->recv_from, fbd->recv_reqs,
120 fbd->recv_stat.resize(N_rcvs);
126 char*& the_send_data = fbd->the_send_data;
135 PrepareSendBuffers<BUF>(*TheFB.
m_SndTags, the_send_data, send_data, send_size, send_rank,
136 send_reqs, send_cctc, ncomp);
141#if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
143 FB_pack_send_buffer_cuda_graph(TheFB, scomp, ncomp, send_data, send_size, send_cctc);
148 pack_send_buffer_gpu<BUF>(*
this, scomp, ncomp, send_data, send_size, send_cctc, TheFB.
m_id);
154 pack_send_buffer_cpu<BUF>(*
this, scomp, ncomp, send_data, send_size, send_cctc);
158 PostSnds(send_data, send_size, send_rank, send_reqs, SeqNum);
171#if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
173 FB_local_copy_cuda_graph_n(TheFB, scomp, ncomp);
180 FB_local_add_gpu(TheFB, scomp, ncomp, deterministic);
185 FB_local_copy_gpu(TheFB, scomp, ncomp);
197 FB_local_add_cpu(TheFB, scomp, ncomp);
202 FB_local_copy_cpu(TheFB, scomp, ncomp);
217template <typename BUF, class F, std::enable_if_t<IsBaseFab<F>::value,
int>Z>
228 const FB* TheFB = fbd->fb;
230 const auto N_rcvs =
static_cast<int>(TheFB->
m_RcvTags->size());
234 for (
int k = 0; k < N_rcvs; k++)
236 if (fbd->recv_size[k] > 0)
238 auto const& cctc = TheFB->
m_RcvTags->at(fbd->recv_from[k]);
239 recv_cctc[k] = &cctc;
243 int actual_n_rcvs = N_rcvs - std::count(fbd->recv_data.begin(), fbd->recv_data.end(),
nullptr);
245 if (actual_n_rcvs > 0) {
248 if (!CheckRcvStats(fbd->recv_stat, fbd->recv_size, fbd->tag))
250 amrex::Abort(
"FillBoundary_finish failed with wrong message size");
261#if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
264 FB_unpack_recv_buffer_cuda_graph(*TheFB, fbd->scomp, fbd->ncomp,
265 fbd->recv_data, fbd->recv_size,
266 recv_cctc, is_thread_safe);
271 bool deterministic = fbd->deterministic;
272 unpack_recv_buffer_gpu<BUF>(*
this, fbd->scomp, fbd->ncomp, fbd->recv_data, fbd->recv_size,
273 recv_cctc, op, is_thread_safe, TheFB->
m_id, deterministic);
279 unpack_recv_buffer_cpu<BUF>(*
this, fbd->scomp, fbd->ncomp, fbd->recv_data, fbd->recv_size,
280 recv_cctc, op, is_thread_safe);
283 if (fbd->the_recv_data)
286 fbd->the_recv_data =
nullptr;
290 const auto N_snds =
static_cast<int>(TheFB->
m_SndTags->size());
295 fbd->the_send_data =
nullptr;
318 ParallelCopy_nowait(src, scomp, dcomp, ncomp, snghost, dnghost, period, op, a_cpc,
319 false, deterministic);
320 ParallelCopy_finish();
331 ParallelCopy_nowait(src,src_comp,dest_comp,num_comp,snghost,dnghost,
offset,period);
332 ParallelCopy_finish();
343 ParallelCopy_nowait(src,src_comp,dest_comp,num_comp,snghost,dnghost,
offset,period,
345 ParallelCopy_finish();
358 BL_PROFILE(
"FabArray::ParallelCopyToGhost()");
360 ParallelCopy_nowait(src, scomp, dcomp, ncomp, snghost, dnghost, period,
362 ParallelCopy_finish();
375 ParallelCopy_nowait(src, scomp, dcomp, ncomp, snghost, dnghost, period,
383 ParallelCopy_finish();
398 bool to_ghost_cells_only,
401 ParallelCopy_nowait(src,scomp,dcomp,ncomp,snghost,dnghost,
IntVect(0),period,op,a_cpc,
402 to_ghost_cells_only, deterministic);
417 bool to_ghost_cells_only,
421 BL_PROFILE(
"FabArray::ParallelCopy_nowait()");
425 if (empty() || src.
empty()) {
437 (this->size() == 1) && (src.
size() == 1) &&
441 auto const& da = this->array(0, dcomp);
450 da(i,j,k,n) = sa(i,j,k,n);
461#pragma omp parallel for collapse(3)
463 for (
int n = 0; n < ncomp; ++n) {
464 for (
int k = lo.z; k <= hi.z; ++k) {
465 for (
int j = lo.y; j <= hi.y; ++j) {
467 for (
int i = lo.x; i <= hi.x; ++i) {
468 da(i,j,k,n) = sa(i,j,k,n);
476 da(i,j,k,n) += sa(i,j,k,n);
487#pragma omp parallel for collapse(3)
489 for (
int n = 0; n < ncomp; ++n) {
490 for (
int k = lo.z; k <= hi.z; ++k) {
491 for (
int j = lo.y; j <= hi.y; ++j) {
493 for (
int i = lo.x; i <= hi.x; ++i) {
494 da(i,j,k,n) += sa(i,j,k,n);
517 Add(*
this, src, scomp, dcomp, ncomp,
IntVect(0));
523 const CPC& thecpc = (a_cpc) ? *a_cpc : getCPC(dnghost, src, snghost, period,
524 to_ghost_cells_only,
offset);
533 if (N_locs == 0) {
return; }
537 PC_local_gpu(thecpc, src, scomp, dcomp, ncomp, op, deterministic);
542 PC_local_cpu(thecpc, src, scomp, dcomp, ncomp, op);
556 const int N_snds = thecpc.
m_SndTags->size();
557 const int N_rcvs = thecpc.
m_RcvTags->size();
558 const int N_locs = thecpc.
m_LocTags->size();
560 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
571 int NCompLeft = ncomp;
572 int SC = scomp, DC = dcomp, NC;
574 for (
int ipass = 0; ipass < ncomp; )
576 pcd = std::make_unique<PCData<FAB>>();
585 const bool last_iter = (NCompLeft == NC);
594 pcd->the_recv_data =
nullptr;
596 pcd->actual_n_rcvs = 0;
598 PostRcvs(*thecpc.
m_RcvTags, pcd->the_recv_data,
599 pcd->recv_data, pcd->recv_size, pcd->recv_from, pcd->recv_reqs, NC, pcd->tag);
600 pcd->actual_n_rcvs = N_rcvs - std::count(pcd->recv_size.begin(), pcd->recv_size.end(), 0);
613 src.PrepareSendBuffers(*thecpc.
m_SndTags, pcd->the_send_data, send_data, send_size,
614 send_rank, pcd->send_reqs, send_cctc, NC);
619 pack_send_buffer_gpu(src, SC, NC, send_data, send_size, send_cctc, thecpc.
m_id);
624 pack_send_buffer_cpu(src, SC, NC, send_data, send_size, send_cctc);
639 PC_local_gpu(thecpc, src, SC, DC, NC, op, deterministic);
644 PC_local_cpu(thecpc, src, SC, DC, NC, op);
650 ParallelCopy_finish();
674 BL_PROFILE(
"FabArray::ParallelCopy_finish()");
677 if (!pcd) {
return; }
679 const CPC* thecpc = pcd->cpc;
681 const auto N_snds =
static_cast<int>(thecpc->
m_SndTags->size());
682 const auto N_rcvs =
static_cast<int>(thecpc->
m_RcvTags->size());
687 for (
int k = 0; k < N_rcvs; ++k)
689 if (pcd->recv_size[k] > 0)
691 auto const& cctc = thecpc->
m_RcvTags->at(pcd->recv_from[k]);
692 recv_cctc[k] = &cctc;
696 if (pcd->actual_n_rcvs > 0) {
700 if (!CheckRcvStats(stats, pcd->recv_size, pcd->tag))
702 amrex::Abort(
"ParallelCopy failed with wrong message size");
712 unpack_recv_buffer_gpu(*
this, pcd->DC, pcd->NC, pcd->recv_data, pcd->recv_size,
713 recv_cctc, pcd->op, is_thread_safe, thecpc->
m_id,
719 unpack_recv_buffer_cpu(*
this, pcd->DC, pcd->NC, pcd->recv_data, pcd->recv_size,
720 recv_cctc, pcd->op, is_thread_safe);
723 if (pcd->the_recv_data)
726 pcd->the_recv_data =
nullptr;
736 pcd->the_send_data =
nullptr;
750 BL_ASSERT(dcomp + ncomp <= dest.nComp());
762 destmf.ParallelCopy(*
this, scomp, 0, ncomp, nghost, 0);
765 using T =
typename FAB::value_type;
767 Long count = dest.numPts()*ncomp;
768 T*
const p0 = dest.dataPtr(dcomp);
792template <
typename BUF>
802 char* pointer =
nullptr;
803 PrepareSendBuffers<BUF>(SndTags, pointer, send_data, send_size, send_rank, send_reqs, send_cctc, ncomp);
808template <
typename BUF>
810FabArray<FAB>::PrepareSendBuffers (
const MapOfCopyComTagContainers& SndTags,
811 char*& the_send_data,
812 Vector<char*>& send_data,
813 Vector<std::size_t>& send_size,
814 Vector<int>& send_rank,
815 Vector<MPI_Request>& send_reqs,
816 Vector<const CopyComTagsContainer*>& send_cctc,
824 const auto N_snds = SndTags.size();
825 if (N_snds == 0) {
return; }
826 send_data.reserve(N_snds);
827 send_size.reserve(N_snds);
828 send_rank.reserve(N_snds);
829 send_reqs.reserve(N_snds);
830 send_cctc.reserve(N_snds);
833 std::size_t total_volume = 0;
834 for (
auto const& kv : SndTags)
836 auto const& cctc = kv.second;
838 std::size_t nbytes = 0;
839 for (
auto const& cct : kv.
second)
841 nbytes += cct.sbox.numPts() * ncomp *
sizeof(BUF);
844 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
851 offset.push_back(total_volume);
852 total_volume += nbytes;
854 send_data.push_back(
nullptr);
855 send_size.push_back(nbytes);
856 send_rank.push_back(kv.first);
858 send_cctc.push_back(&cctc);
861 if (total_volume > 0)
864 for (
int i = 0, N =
static_cast<int>(send_size.size()); i < N; ++i) {
865 send_data[i] = the_send_data +
offset[i];
868 the_send_data =
nullptr;
874FabArray<FAB>::PostSnds (Vector<char*>
const& send_data,
875 Vector<std::size_t>
const& send_size,
876 Vector<int>
const& send_rank,
877 Vector<MPI_Request>& send_reqs,
882 const auto N_snds =
static_cast<int>(send_reqs.size());
883 for (
int j = 0; j < N_snds; ++j)
885 if (send_size[j] > 0) {
888 (send_data[j], send_size[j], rank, SeqNum, comm).
req();
894template <
typename BUF>
895TheFaArenaPointer FabArray<FAB>::PostRcvs (
const MapOfCopyComTagContainers& RcvTags,
896 Vector<char*>& recv_data,
897 Vector<std::size_t>& recv_size,
898 Vector<int>& recv_from,
899 Vector<MPI_Request>& recv_reqs,
903 char* pointer =
nullptr;
904 PostRcvs(RcvTags, pointer, recv_data, recv_size, recv_from, recv_reqs, ncomp, SeqNum);
909template <
typename BUF>
911FabArray<FAB>::PostRcvs (
const MapOfCopyComTagContainers& RcvTags,
912 char*& the_recv_data,
913 Vector<char*>& recv_data,
914 Vector<std::size_t>& recv_size,
915 Vector<int>& recv_from,
916 Vector<MPI_Request>& recv_reqs,
925 Vector<std::size_t>
offset;
926 std::size_t TotalRcvsVolume = 0;
927 for (
const auto& kv : RcvTags)
929 std::size_t nbytes = 0;
930 for (
auto const& cct : kv.
second)
932 nbytes += cct.dbox.numPts() * ncomp *
sizeof(BUF);
935 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
942 offset.push_back(TotalRcvsVolume);
943 TotalRcvsVolume += nbytes;
945 recv_data.push_back(
nullptr);
946 recv_size.push_back(nbytes);
947 recv_from.push_back(kv.first);
951 const auto nrecv =
static_cast<int>(recv_from.size());
955 if (TotalRcvsVolume == 0)
957 the_recv_data =
nullptr;
963 for (
int i = 0; i < nrecv; ++i)
965 recv_data[i] = the_recv_data +
offset[i];
966 if (recv_size[i] > 0)
970 (recv_data[i], recv_size[i], rank, SeqNum, comm).
req();
986 "FabArray::Redistribute: must have the same BoxArray");
990 Copy(*
this, src, scomp, dcomp, ncomp, nghost);
1008#if defined(AMREX_USE_MPI) && !defined(AMREX_DEBUG)
1018template <
class TagT>
1021 const int N = tags.
size();
1022 if (N == 0) {
return; }
1028 const int ncomp = tag.dfab.nComp();
1029 for (
int n = 0; n < ncomp; ++n) {
1030 tag.dfab(i,j,k,n) = tag.sfab(i+tag.offset.x,j+tag.offset.y,k+tag.offset.z,n);
1037#pragma omp parallel for
1039 for (
int itag = 0; itag < N; ++itag) {
1040 auto const& tag = tags[itag];
1041 const int ncomp = tag.dfab.nComp();
1044 tag.dfab(i,j,k,n) = tag.sfab(i+tag.offset.x,j+tag.offset.y,k+tag.offset.z,n);
1066std::enable_if_t<IsFabArray<MF>::value>
1073 const int N = mf.
size();
1074 for (
int i = 0; i < N; ++i) {
1075 mf[i]->FillBoundary_nowait(scomp[i], ncomp[i], nghost[i], period[i],
1076 cross.empty() ? 0 : cross[i]);
1089std::enable_if_t<IsFabArray<MF>::value>
1097 ncomp.reserve(mf.
size());
1098 nghost.reserve(mf.
size());
1099 for (
auto const&
x : mf) {
1100 ncomp.push_back(
x->nComp());
1101 nghost.push_back(
x->nGrowVect());
1112std::enable_if_t<IsFabArray<MF>::value>
1116 const int N = mf.
size();
1117 for (
int i = 0; i < N; ++i) {
1118 mf[i]->FillBoundary_finish();
1135std::enable_if_t<IsFabArray<MF>::value>
1140 BL_PROFILE(
"FillBoundaryAndSync_nowait(Vector)");
1141 const int N = mf.
size();
1142 for (
int i = 0; i < N; ++i) {
1143 mf[i]->FillBoundaryAndSync_nowait(scomp[i], ncomp[i], nghost[i], period[i]);
1154std::enable_if_t<IsFabArray<MF>::value>
1162 ncomp.reserve(mf.
size());
1163 nghost.reserve(mf.
size());
1164 for (
auto const&
x : mf) {
1165 ncomp.push_back(
x->nComp());
1166 nghost.push_back(
x->nGrowVect());
1177std::enable_if_t<IsFabArray<MF>::value>
1180 BL_PROFILE(
"FillBoundaryAndSync_finish(Vector)");
1181 const int N = mf.
size();
1182 for (
int i = 0; i < N; ++i) {
1183 mf[i]->FillBoundaryAndSync_finish();
1203std::enable_if_t<IsFabArray<MF>::value>
1210 const int N = mf.
size();
1211 for (
int i = 0; i < N; ++i) {
1212 mf[i]->FillBoundary_nowait(scomp[i], ncomp[i], nghost[i], period[i],
1213 cross.empty() ? 0 : cross[i]);
1215 for (
int i = 0; i < N; ++i) {
1216 mf[i]->FillBoundary_finish();
1220 using FAB =
typename MF::FABType::value_type;
1221 using T =
typename FAB::value_type;
1223 const int nmfs = mf.
size();
1224 Vector<FabArrayBase::CommMetaData const*> cmds;
1228 for (
int imf = 0; imf < nmfs; ++imf) {
1229 if (nghost[imf].
max() > 0) {
1230 auto const& TheFB = mf[imf]->getFB(nghost[imf], period[imf],
1231 cross.empty() ? 0 : cross[imf]);
1233 cmds.push_back(
static_cast<FabArrayBase::CommMetaData const*
>(&TheFB));
1234 N_locs += TheFB.m_LocTags->
size();
1235 N_rcvs += TheFB.m_RcvTags->size();
1236 N_snds += TheFB.m_SndTags->size();
1238 cmds.push_back(
nullptr);
1242 using TagT = Array4CopyTag<T>;
1243 Vector<TagT> local_tags;
1244 local_tags.reserve(N_locs);
1246 for (
int imf = 0; imf < nmfs; ++imf) {
1248 auto const& tags = *(cmds[imf]->m_LocTags);
1249 for (
auto const& tag : tags) {
1250 local_tags.push_back({(*mf[imf])[tag.dstIndex].array (scomp[imf],ncomp[imf]),
1251 (*mf[imf])[tag.srcIndex].const_array(scomp[imf],ncomp[imf]),
1253 (tag.sbox.smallEnd()-tag.dbox.smallEnd()).dim3()});
1259 detail::fbv_copy(local_tags);
1271 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
return; }
1273 char* the_recv_data =
nullptr;
1274 Vector<int> recv_from;
1275 Vector<std::size_t> recv_size;
1276 Vector<MPI_Request> recv_reqs;
1277 Vector<MPI_Status> recv_stat;
1278 Vector<TagT> recv_tags;
1282 for (
int imf = 0; imf < nmfs; ++imf) {
1284 auto const& tags = *(cmds[imf]->m_RcvTags);
1285 for (
const auto& kv : tags) {
1286 recv_from.push_back(kv.first);
1291 const int nrecv = recv_from.size();
1294 recv_stat.resize(nrecv);
1296 recv_tags.reserve(N_rcvs);
1298 Vector<Vector<std::size_t> > recv_offset(nrecv);
1299 Vector<std::size_t>
offset;
1300 recv_size.reserve(nrecv);
1302 std::size_t TotalRcvsVolume = 0;
1303 for (
int i = 0; i < nrecv; ++i) {
1304 std::size_t nbytes = 0;
1305 for (
int imf = 0; imf < nmfs; ++imf) {
1307 auto const& tags = *(cmds[imf]->m_RcvTags);
1308 auto it = tags.find(recv_from[i]);
1309 if (it != tags.end()) {
1310 for (
auto const& cct : it->
second) {
1311 auto& dfab = (*mf[imf])[cct.dstIndex];
1312 recv_offset[i].push_back(nbytes);
1313 recv_tags.push_back({dfab.array(scomp[imf],ncomp[imf]),
1314 makeArray4<T const>(
nullptr,cct.dbox,ncomp[imf]),
1315 cct.dbox, Dim3{0,0,0}});
1316 nbytes += dfab.nBytes(cct.dbox,ncomp[imf]);
1322 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
1328 offset.push_back(TotalRcvsVolume);
1329 TotalRcvsVolume += nbytes;
1331 recv_size.push_back(nbytes);
1337 for (
int i = 0; i < nrecv; ++i) {
1338 char* p = the_recv_data +
offset[i];
1341 (p, recv_size[i], rank, SeqNum, comm).
req();
1342 for (
int j = 0, nj = recv_offset[i].size(); j < nj; ++j) {
1343 recv_tags[k++].sfab.p = (T
const*)(p + recv_offset[i][j]);
1348 char* the_send_data =
nullptr;
1349 Vector<int> send_rank;
1350 Vector<char*> send_data;
1351 Vector<std::size_t> send_size;
1352 Vector<MPI_Request> send_reqs;
1354 for (
int imf = 0; imf < nmfs; ++imf) {
1356 auto const& tags = *(cmds[imf]->m_SndTags);
1357 for (
auto const& kv : tags) {
1358 send_rank.push_back(kv.first);
1363 const int nsend = send_rank.size();
1365 send_data.resize(nsend,
nullptr);
1368 Vector<TagT> send_tags;
1369 send_tags.reserve(N_snds);
1371 Vector<Vector<std::size_t> > send_offset(nsend);
1372 Vector<std::size_t>
offset;
1373 send_size.reserve(nsend);
1375 std::size_t TotalSndsVolume = 0;
1376 for (
int i = 0; i < nsend; ++i) {
1377 std::size_t nbytes = 0;
1378 for (
int imf = 0; imf < nmfs; ++imf) {
1380 auto const& tags = *(cmds[imf]->m_SndTags);
1381 auto it = tags.find(send_rank[i]);
1382 if (it != tags.end()) {
1383 for (
auto const& cct : it->
second) {
1384 auto const& sfab = (*mf[imf])[cct.srcIndex];
1385 send_offset[i].push_back(nbytes);
1386 send_tags.push_back({amrex::makeArray4<T>(
nullptr,cct.sbox,ncomp[imf]),
1387 sfab.const_array(scomp[imf],ncomp[imf]),
1388 cct.sbox, Dim3{0,0,0}});
1389 nbytes += sfab.nBytes(cct.sbox,ncomp[imf]);
1395 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
1401 offset.push_back(TotalSndsVolume);
1402 TotalSndsVolume += nbytes;
1404 send_size.push_back(nbytes);
1409 for (
int i = 0; i < nsend; ++i) {
1410 send_data[i] = the_send_data +
offset[i];
1411 for (
int j = 0, nj = send_offset[i].size(); j < nj; ++j) {
1412 send_tags[k++].dfab.p = (T*)(send_data[i] + send_offset[i][j]);
1416 detail::fbv_copy(send_tags);
1418 FabArray<FAB>::PostSnds(send_data, send_size, send_rank, send_reqs, SeqNum);
1421#if !defined(AMREX_DEBUG)
1427 detail::fbv_copy(local_tags);
1428#if !defined(AMREX_DEBUG)
1436 if (!FabArrayBase::CheckRcvStats(recv_stat, recv_size, SeqNum)) {
1437 amrex::Abort(
"FillBoundary(vector) failed with wrong message size");
1441 detail::fbv_copy(recv_tags);
1447 Vector<MPI_Status> stats(send_reqs.size());
1469std::enable_if_t<IsFabArray<MF>::value>
1475 const int N = mf.
size();
1476 for (
int i = 0; i < N; ++i) {
1477 mf[i]->FillBoundaryAndSync_nowait(scomp[i], ncomp[i], nghost[i], period[i]);
1479 for (
int i = 0; i < N; ++i) {
1480 mf[i]->FillBoundaryAndSync_finish();
1486std::enable_if_t<IsFabArray<MF>::value>
1495std::enable_if_t<IsFabArray<MF>::value>
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define BL_PROFILE_SYNC_STOP()
Definition AMReX_BLProfiler.H:645
#define BL_PROFILE_SYNC_START_TIMED(fname)
Definition AMReX_BLProfiler.H:644
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:49
#define BL_ASSERT(EX)
Definition AMReX_BLassert.H:39
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:37
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_PRAGMA_SIMD
Definition AMReX_Extension.H:80
#define AMREX_NODISCARD
Definition AMReX_Extension.H:252
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1139
#define AMREX_LOOP_4D(bx, ncomp, i, j, k, n, block)
Definition AMReX_Loop.nolint.H:16
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
A collection of Boxes stored in an Array.
Definition AMReX_BoxArray.H:568
IndexType ixType() const noexcept
Return index type of this BoxArray.
Definition AMReX_BoxArray.H:858
Calculates the distribution of FABs to MPI processes.
Definition AMReX_DistributionMapping.H:43
IntVect nGrowVect() const noexcept
Definition AMReX_FabArrayBase.H:80
int size() const noexcept
Return the number of FABs in the FabArray.
Definition AMReX_FabArrayBase.H:110
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition AMReX_FabArrayBase.H:131
bool empty() const noexcept
Definition AMReX_FabArrayBase.H:89
CpOp
parallel copy or add
Definition AMReX_FabArrayBase.H:394
@ ADD
Definition AMReX_FabArrayBase.H:394
@ COPY
Definition AMReX_FabArrayBase.H:394
Box box(int K) const noexcept
Return the Kth Box in the BoxArray. That is, the valid region of the Kth grid.
Definition AMReX_FabArrayBase.H:101
DistributionMapping distributionMap
Definition AMReX_FabArrayBase.H:445
static int MaxComp
The maximum number of components to copy() at a time.
Definition AMReX_FabArrayBase.H:292
BoxArray boxarray
Definition AMReX_FabArrayBase.H:444
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition AMReX_FabArrayBase.H:95
An Array of FortranArrayBox(FAB)-like Objects.
Definition AMReX_FabArray.H:350
void ParallelCopyToGhost_finish()
Definition AMReX_FabArrayCommI.H:381
void FBEP_nowait(int scomp, int ncomp, const IntVect &nghost, const Periodicity &period, bool cross, bool enforce_periodicity_only=false, bool override_sync=false, IntVect const &sumboundary_src_nghost=IntVect(-1), bool deterministic=false)
Definition AMReX_FabArrayCommI.H:10
Array4< typename FabArray< FAB >::value_type const > const_array(const MFIter &mfi) const noexcept
Definition AMReX_FabArray.H:590
void ParallelCopy(const FabArray< FAB > &src, const Periodicity &period=Periodicity::NonPeriodic(), CpOp op=FabArrayBase::COPY)
Definition AMReX_FabArray.H:850
void Redistribute(const FabArray< FAB > &src, int scomp, int dcomp, int ncomp, const IntVect &nghost)
Copy from src to this. this and src have the same BoxArray, but different DistributionMapping.
Definition AMReX_FabArrayCommI.H:979
void ParallelCopyToGhost(const FabArray< FAB > &src, int scomp, int dcomp, int ncomp, const IntVect &snghost, const IntVect &dnghost, const Periodicity &period=Periodicity::NonPeriodic())
Definition AMReX_FabArrayCommI.H:350
void ParallelCopy_finish()
Definition AMReX_FabArrayCommI.H:669
void ParallelAdd(const FabArray< FAB > &src, const Periodicity &period=Periodicity::NonPeriodic())
This function copies data from src to this FabArray. Each FAB in fa is intersected with all FABs in t...
Definition AMReX_FabArray.H:847
void ParallelCopyToGhost_nowait(const FabArray< FAB > &src, int scomp, int dcomp, int ncomp, const IntVect &snghost, const IntVect &dnghost, const Periodicity &period=Periodicity::NonPeriodic())
Definition AMReX_FabArrayCommI.H:367
void FillBoundary_test()
Definition AMReX_FabArrayCommI.H:1006
void copyTo(FAB &dest, int nghost=0) const
Copy the values contained in the intersection of the valid + nghost region of this FabArray with the ...
Definition AMReX_FabArray.H:2649
void ParallelCopy_nowait(const FabArray< FAB > &src, const Periodicity &period=Periodicity::NonPeriodic(), CpOp op=FabArrayBase::COPY)
Definition AMReX_FabArray.H:864
void FillBoundary_finish()
Definition AMReX_FabArrayCommI.H:219
__host__ __device__ bool cellCentered() const noexcept
True if the IndexTypeND is CELL based in all directions.
Definition AMReX_IndexType.H:104
__host__ __device__ constexpr bool allGE(const IntVectND< dim > &rhs) const noexcept
Returns true if this is greater than or equal to argument for all components. NOTE: This is NOT a str...
Definition AMReX_IntVect.H:450
__host__ static __device__ constexpr IntVectND< dim > TheZeroVector() noexcept
This static member function returns a reference to a constant IntVectND object, all of whose dim argu...
Definition AMReX_IntVect.H:679
__host__ __device__ constexpr int max() const noexcept
maximum (no absolute values) value
Definition AMReX_IntVect.H:221
MPI_Request req() const
Definition AMReX_ParallelDescriptor.H:74
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition AMReX_Periodicity.H:17
static const Periodicity & NonPeriodic() noexcept
Definition AMReX_Periodicity.cpp:52
bool isAnyPeriodic() const noexcept
Definition AMReX_Periodicity.H:22
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
Long size() const noexcept
Definition AMReX_Vector.H:53
amrex_long Long
Definition AMReX_INT.H:30
__host__ __device__ Dim3 ubound(Array4< T > const &a) noexcept
Return the inclusive upper bounds of an Array4 in Dim3 form.
Definition AMReX_Array4.H:1331
__host__ __device__ Dim3 lbound(Array4< T > const &a) noexcept
Return the inclusive lower bounds of an Array4 in Dim3 form.
Definition AMReX_Array4.H:1317
__host__ __device__ BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition AMReX_Box.H:1280
Arena * The_Comms_Arena()
Definition AMReX_Arena.cpp:865
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:845
int MyProc() noexcept
Definition AMReX_ParallelDescriptor.H:128
int NProcs() noexcept
Definition AMReX_ParallelDescriptor.H:255
bool inGraphRegion()
Definition AMReX_GpuControl.H:121
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:435
bool inLaunchRegion() noexcept
Definition AMReX_GpuControl.H:92
bool inNoSyncRegion() noexcept
Definition AMReX_GpuControl.H:152
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:421
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int global_to_local_rank(int rank) noexcept
Definition AMReX_ParallelContext.H:98
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
void Test(MPI_Request &, int &, MPI_Status &)
Definition AMReX_ParallelDescriptor.cpp:1220
Message Asend(const T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1172
bool UseGpuAwareMpi()
Definition AMReX_ParallelDescriptor.H:113
void Waitall(Vector< MPI_Request > &, Vector< MPI_Status > &)
Definition AMReX_ParallelDescriptor.cpp:1308
void Bcast(void *, int, MPI_Datatype, int, MPI_Comm)
Definition AMReX_ParallelDescriptor.cpp:1295
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition AMReX_ParallelDescriptor.H:696
Message Arecv(T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1214
int MPI_Comm
Definition AMReX_ccse-mpi.H:51
static constexpr int MPI_REQUEST_NULL
Definition AMReX_ccse-mpi.H:57
Definition AMReX_Amr.cpp:49
std::enable_if_t< IsFabArray< MF >::value > FillBoundaryAndSync_nowait(Vector< MF * > const &mf, Vector< int > const &scomp, Vector< int > const &ncomp, Vector< IntVect > const &nghost, Vector< Periodicity > const &period)
Launch FillBoundaryAndSync_nowait across a vector of FabArrays.
Definition AMReX_FabArrayCommI.H:1136
@ make_alias
Definition AMReX_MakeType.H:7
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139
std::enable_if_t< IsFabArray< MF >::value > FillBoundaryAndSync_finish(Vector< MF * > const &mf)
Wait for outstanding FillBoundaryAndSync_nowait operations launched with the vector helper to complet...
Definition AMReX_FabArrayCommI.H:1178
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:193
std::unique_ptr< char, TheFaArenaDeleter > TheFaArenaPointer
Definition AMReX_FabArray.H:104
DistributionMapping const & DistributionMap(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2866
IntVect nGrowVect(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2856
std::enable_if_t< IsFabArray< MF >::value > FillBoundary_finish(Vector< MF * > const &mf)
Wait for outstanding FillBoundary_nowait operations launched with the vector helper to complete.
Definition AMReX_FabArrayCommI.H:1113
std::enable_if_t< IsFabArray< MF >::value > FillBoundary(Vector< MF * > const &mf, Vector< int > const &scomp, Vector< int > const &ncomp, Vector< IntVect > const &nghost, Vector< Periodicity > const &period, Vector< int > const &cross={})
Perform FillBoundary on a batch of FabArrays (e.g., MultiFabs).
Definition AMReX_FabArrayCommI.H:1204
void Copy(FabArray< DFAB > &dst, FabArray< SFAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArray.H:181
std::enable_if_t< IsFabArray< MF >::value > FillBoundary_nowait(Vector< MF * > const &mf, Vector< int > const &scomp, Vector< int > const &ncomp, Vector< IntVect > const &nghost, Vector< Periodicity > const &period, Vector< int > const &cross={})
Launch FillBoundary_nowait across a vector of FabArrays.
Definition AMReX_FabArrayCommI.H:1067
double second() noexcept
Definition AMReX_Utility.cpp:940
IntVectND< 3 > IntVect
IntVect is an alias for amrex::IntVectND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:33
__host__ __device__ constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:44
void Add(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArray.H:244
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:240
std::size_t aligned_size(std::size_t align_requirement, std::size_t size) noexcept
Given a minimum required size in bytes, this returns the smallest size greater or equal to size that ...
Definition AMReX_Arena.H:33
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition AMReX_FabArrayUtility.H:2019
void RemoveDuplicates(Vector< T > &vec)
Definition AMReX_Vector.H:211
std::enable_if_t< IsFabArray< MF >::value > FillBoundaryAndSync(Vector< MF * > const &mf, Vector< int > const &scomp, Vector< int > const &ncomp, Vector< IntVect > const &nghost, Vector< Periodicity > const &period)
Perform FillBoundaryAndSync on a batch of FabArrays (e.g., MultiFabs).
Definition AMReX_FabArrayCommI.H:1470
BoxArray const & boxArray(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2861
parallel copy or add
Definition AMReX_FabArrayBase.H:538
std::uint64_t m_id
Definition AMReX_FabArrayBase.H:553
FillBoundary.
Definition AMReX_FabArrayBase.H:488
std::uint64_t m_id
Definition AMReX_FabArrayBase.H:494
IntVect m_sb_snghost
Definition AMReX_FabArrayBase.H:498
Definition AMReX_TypeTraits.H:66
Definition AMReX_TypeTraits.H:297
FabArray memory allocation information.
Definition AMReX_FabArray.H:66