8template <
typename BUF,
class F>
9requires (BaseFabType<F>)
13 bool enforce_periodicity_only,
15 IntVect const& sumboundary_src_nghost,
22 AMREX_ASSERT(!enforce_periodicity_only || !override_sync);
24 bool sumboundary = sumboundary_src_nghost.
allGE(0);
27 if (enforce_periodicity_only) {
29 }
else if (override_sync) {
30 work_to_do = (nghost.
max() > 0) || !is_cell_centered();
31 }
else if (sumboundary) {
34 work_to_do = nghost.
max() > 0;
36 if (!work_to_do) {
return; }
38 const FB& TheFB = getFB(nghost, period, cross, enforce_periodicity_only, override_sync, sumboundary_src_nghost);
46 if (N_locs == 0) {
return; }
50#if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
53 FB_local_copy_cuda_graph_1(TheFB, scomp, ncomp);
60 FB_local_add_gpu(TheFB, scomp, ncomp, deterministic);
65 FB_local_copy_gpu(TheFB, scomp, ncomp);
77 FB_local_add_cpu(TheFB, scomp, ncomp);
82 FB_local_copy_cpu(TheFB, scomp, ncomp);
97 const int N_locs = TheFB.
m_LocTags->size();
98 const int N_rcvs = TheFB.
m_RcvTags->size();
99 const int N_snds = TheFB.
m_SndTags->size();
101 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
106 fbd = std::make_unique<FBData<FAB>>();
111 fbd->deterministic = deterministic;
118 PostRcvs<BUF>(*TheFB.
m_RcvTags, fbd->the_recv_data,
119 fbd->recv_data, fbd->recv_size, fbd->recv_from, fbd->recv_reqs,
121 fbd->recv_stat.resize(N_rcvs);
127 char*& the_send_data = fbd->the_send_data;
136 PrepareSendBuffers<BUF>(*TheFB.
m_SndTags, the_send_data, send_data, send_size, send_rank,
137 send_reqs, send_cctc, ncomp);
142#if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
144 FB_pack_send_buffer_cuda_graph(TheFB, scomp, ncomp, send_data, send_size, send_cctc);
149 pack_send_buffer_gpu<BUF>(*
this, scomp, ncomp, send_data, send_size, send_cctc, TheFB.
m_id);
155 pack_send_buffer_cpu<BUF>(*
this, scomp, ncomp, send_data, send_size, send_cctc);
159 PostSnds(send_data, send_size, send_rank, send_reqs, SeqNum);
172#if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
174 FB_local_copy_cuda_graph_n(TheFB, scomp, ncomp);
181 FB_local_add_gpu(TheFB, scomp, ncomp, deterministic);
186 FB_local_copy_gpu(TheFB, scomp, ncomp);
198 FB_local_add_cpu(TheFB, scomp, ncomp);
203 FB_local_copy_cpu(TheFB, scomp, ncomp);
218template <
typename BUF,
class F>
230 const FB* TheFB = fbd->fb;
232 const auto N_rcvs =
static_cast<int>(TheFB->
m_RcvTags->size());
236 for (
int k = 0; k < N_rcvs; k++)
238 if (fbd->recv_size[k] > 0)
240 auto const& cctc = TheFB->
m_RcvTags->at(fbd->recv_from[k]);
241 recv_cctc[k] = &cctc;
245 int actual_n_rcvs = N_rcvs - std::count(fbd->recv_data.begin(), fbd->recv_data.end(),
nullptr);
247 if (actual_n_rcvs > 0) {
250 if (!CheckRcvStats(fbd->recv_stat, fbd->recv_size, fbd->tag))
252 amrex::Abort(
"FillBoundary_finish failed with wrong message size");
263#if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
266 FB_unpack_recv_buffer_cuda_graph(*TheFB, fbd->scomp, fbd->ncomp,
267 fbd->recv_data, fbd->recv_size,
268 recv_cctc, is_thread_safe);
273 bool deterministic = fbd->deterministic;
274 unpack_recv_buffer_gpu<BUF>(*
this, fbd->scomp, fbd->ncomp, fbd->recv_data, fbd->recv_size,
275 recv_cctc, op, is_thread_safe, TheFB->
m_id, deterministic);
281 unpack_recv_buffer_cpu<BUF>(*
this, fbd->scomp, fbd->ncomp, fbd->recv_data, fbd->recv_size,
282 recv_cctc, op, is_thread_safe);
285 if (fbd->the_recv_data)
288 fbd->the_recv_data =
nullptr;
292 const auto N_snds =
static_cast<int>(TheFB->
m_SndTags->size());
297 fbd->the_send_data =
nullptr;
320 ParallelCopy_nowait(src, scomp, dcomp, ncomp, snghost, dnghost, period, op, a_cpc,
321 false, deterministic);
322 ParallelCopy_finish();
333 ParallelCopy_nowait(src,src_comp,dest_comp,num_comp,snghost,dnghost,
offset,period);
334 ParallelCopy_finish();
345 ParallelCopy_nowait(src,src_comp,dest_comp,num_comp,snghost,dnghost,
offset,period,
347 ParallelCopy_finish();
360 BL_PROFILE(
"FabArray::ParallelCopyToGhost()");
362 ParallelCopy_nowait(src, scomp, dcomp, ncomp, snghost, dnghost, period,
364 ParallelCopy_finish();
377 ParallelCopy_nowait(src, scomp, dcomp, ncomp, snghost, dnghost, period,
385 ParallelCopy_finish();
400 bool to_ghost_cells_only,
403 ParallelCopy_nowait(src,scomp,dcomp,ncomp,snghost,dnghost,
IntVect(0),period,op,a_cpc,
404 to_ghost_cells_only, deterministic);
419 bool to_ghost_cells_only,
423 BL_PROFILE(
"FabArray::ParallelCopy_nowait()");
427 if (empty() || src.
empty()) {
439 (this->size() == 1) && (src.
size() == 1) &&
443 auto const& da = this->array(0, dcomp);
452 da(i,j,k,n) = sa(i,j,k,n);
463#pragma omp parallel for collapse(3)
465 for (
int n = 0; n < ncomp; ++n) {
466 for (
int k = lo.z; k <= hi.z; ++k) {
467 for (
int j = lo.y; j <= hi.y; ++j) {
469 for (
int i = lo.x; i <= hi.x; ++i) {
470 da(i,j,k,n) = sa(i,j,k,n);
478 da(i,j,k,n) += sa(i,j,k,n);
489#pragma omp parallel for collapse(3)
491 for (
int n = 0; n < ncomp; ++n) {
492 for (
int k = lo.z; k <= hi.z; ++k) {
493 for (
int j = lo.y; j <= hi.y; ++j) {
495 for (
int i = lo.x; i <= hi.x; ++i) {
496 da(i,j,k,n) += sa(i,j,k,n);
519 Add(*
this, src, scomp, dcomp, ncomp,
IntVect(0));
525 const CPC& thecpc = (a_cpc) ? *a_cpc : getCPC(dnghost, src, snghost, period,
526 to_ghost_cells_only,
offset);
535 if (N_locs == 0) {
return; }
539 PC_local_gpu(thecpc, src, scomp, dcomp, ncomp, op, deterministic);
544 PC_local_cpu(thecpc, src, scomp, dcomp, ncomp, op);
558 const int N_snds = thecpc.
m_SndTags->size();
559 const int N_rcvs = thecpc.
m_RcvTags->size();
560 const int N_locs = thecpc.
m_LocTags->size();
562 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
573 int NCompLeft = ncomp;
574 int SC = scomp, DC = dcomp, NC;
576 for (
int ipass = 0; ipass < ncomp; )
578 pcd = std::make_unique<PCData<FAB>>();
587 const bool last_iter = (NCompLeft == NC);
596 pcd->the_recv_data =
nullptr;
598 pcd->actual_n_rcvs = 0;
600 PostRcvs(*thecpc.
m_RcvTags, pcd->the_recv_data,
601 pcd->recv_data, pcd->recv_size, pcd->recv_from, pcd->recv_reqs, NC, pcd->tag);
602 pcd->actual_n_rcvs = N_rcvs - std::count(pcd->recv_size.begin(), pcd->recv_size.end(), 0);
615 src.PrepareSendBuffers(*thecpc.
m_SndTags, pcd->the_send_data, send_data, send_size,
616 send_rank, pcd->send_reqs, send_cctc, NC);
621 pack_send_buffer_gpu(src, SC, NC, send_data, send_size, send_cctc, thecpc.
m_id);
626 pack_send_buffer_cpu(src, SC, NC, send_data, send_size, send_cctc);
641 PC_local_gpu(thecpc, src, SC, DC, NC, op, deterministic);
646 PC_local_cpu(thecpc, src, SC, DC, NC, op);
652 ParallelCopy_finish();
676 BL_PROFILE(
"FabArray::ParallelCopy_finish()");
679 if (!pcd) {
return; }
681 const CPC* thecpc = pcd->cpc;
683 const auto N_snds =
static_cast<int>(thecpc->
m_SndTags->size());
684 const auto N_rcvs =
static_cast<int>(thecpc->
m_RcvTags->size());
689 for (
int k = 0; k < N_rcvs; ++k)
691 if (pcd->recv_size[k] > 0)
693 auto const& cctc = thecpc->
m_RcvTags->at(pcd->recv_from[k]);
694 recv_cctc[k] = &cctc;
698 if (pcd->actual_n_rcvs > 0) {
702 if (!CheckRcvStats(stats, pcd->recv_size, pcd->tag))
704 amrex::Abort(
"ParallelCopy failed with wrong message size");
714 unpack_recv_buffer_gpu(*
this, pcd->DC, pcd->NC, pcd->recv_data, pcd->recv_size,
715 recv_cctc, pcd->op, is_thread_safe, thecpc->
m_id,
721 unpack_recv_buffer_cpu(*
this, pcd->DC, pcd->NC, pcd->recv_data, pcd->recv_size,
722 recv_cctc, pcd->op, is_thread_safe);
725 if (pcd->the_recv_data)
728 pcd->the_recv_data =
nullptr;
738 pcd->the_send_data =
nullptr;
752 BL_ASSERT(dcomp + ncomp <= dest.nComp());
764 destmf.ParallelCopy(*
this, scomp, 0, ncomp, nghost, 0);
767 using T =
typename FAB::value_type;
769 Long count = dest.numPts()*ncomp;
770 T*
const p0 = dest.dataPtr(dcomp);
794template <
typename BUF>
804 char* pointer =
nullptr;
805 PrepareSendBuffers<BUF>(SndTags, pointer, send_data, send_size, send_rank, send_reqs, send_cctc, ncomp);
810template <
typename BUF>
812FabArray<FAB>::PrepareSendBuffers (
const MapOfCopyComTagContainers& SndTags,
813 char*& the_send_data,
814 Vector<char*>& send_data,
815 Vector<std::size_t>& send_size,
816 Vector<int>& send_rank,
817 Vector<MPI_Request>& send_reqs,
818 Vector<const CopyComTagsContainer*>& send_cctc,
826 const auto N_snds = SndTags.size();
827 if (N_snds == 0) {
return; }
828 send_data.reserve(N_snds);
829 send_size.reserve(N_snds);
830 send_rank.reserve(N_snds);
831 send_reqs.reserve(N_snds);
832 send_cctc.reserve(N_snds);
835 std::size_t total_volume = 0;
836 for (
auto const& kv : SndTags)
838 auto const& cctc = kv.second;
840 std::size_t nbytes = 0;
841 for (
auto const& cct : kv.
second)
843 nbytes += cct.sbox.numPts() * ncomp *
sizeof(BUF);
846 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
853 offset.push_back(total_volume);
854 total_volume += nbytes;
856 send_data.push_back(
nullptr);
857 send_size.push_back(nbytes);
858 send_rank.push_back(kv.first);
860 send_cctc.push_back(&cctc);
863 if (total_volume > 0)
866 for (
int i = 0, N =
static_cast<int>(send_size.size()); i < N; ++i) {
867 send_data[i] = the_send_data +
offset[i];
870 the_send_data =
nullptr;
876FabArray<FAB>::PostSnds (Vector<char*>
const& send_data,
877 Vector<std::size_t>
const& send_size,
878 Vector<int>
const& send_rank,
879 Vector<MPI_Request>& send_reqs,
884 const auto N_snds =
static_cast<int>(send_reqs.size());
885 for (
int j = 0; j < N_snds; ++j)
887 if (send_size[j] > 0) {
890 (send_data[j], send_size[j], rank, SeqNum, comm).
req();
896template <
typename BUF>
897TheFaArenaPointer FabArray<FAB>::PostRcvs (
const MapOfCopyComTagContainers& RcvTags,
898 Vector<char*>& recv_data,
899 Vector<std::size_t>& recv_size,
900 Vector<int>& recv_from,
901 Vector<MPI_Request>& recv_reqs,
905 char* pointer =
nullptr;
906 PostRcvs(RcvTags, pointer, recv_data, recv_size, recv_from, recv_reqs, ncomp, SeqNum);
911template <
typename BUF>
913FabArray<FAB>::PostRcvs (
const MapOfCopyComTagContainers& RcvTags,
914 char*& the_recv_data,
915 Vector<char*>& recv_data,
916 Vector<std::size_t>& recv_size,
917 Vector<int>& recv_from,
918 Vector<MPI_Request>& recv_reqs,
927 Vector<std::size_t>
offset;
928 std::size_t TotalRcvsVolume = 0;
929 for (
const auto& kv : RcvTags)
931 std::size_t nbytes = 0;
932 for (
auto const& cct : kv.
second)
934 nbytes += cct.dbox.numPts() * ncomp *
sizeof(BUF);
937 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
944 offset.push_back(TotalRcvsVolume);
945 TotalRcvsVolume += nbytes;
947 recv_data.push_back(
nullptr);
948 recv_size.push_back(nbytes);
949 recv_from.push_back(kv.first);
953 const auto nrecv =
static_cast<int>(recv_from.size());
957 if (TotalRcvsVolume == 0)
959 the_recv_data =
nullptr;
965 for (
int i = 0; i < nrecv; ++i)
967 recv_data[i] = the_recv_data +
offset[i];
968 if (recv_size[i] > 0)
972 (recv_data[i], recv_size[i], rank, SeqNum, comm).
req();
988 "FabArray::Redistribute: must have the same BoxArray");
992 Copy(*
this, src, scomp, dcomp, ncomp, nghost);
1010#if defined(AMREX_USE_MPI) && !defined(AMREX_DEBUG)
1020template <
class TagT>
1023 const int N = tags.
size();
1024 if (N == 0) {
return; }
1030 const int ncomp = tag.dfab.nComp();
1031 for (
int n = 0; n < ncomp; ++n) {
1032 tag.dfab(i,j,k,n) = tag.sfab(i+tag.offset.x,j+tag.offset.y,k+tag.offset.z,n);
1039#pragma omp parallel for
1041 for (
int itag = 0; itag < N; ++itag) {
1042 auto const& tag = tags[itag];
1043 const int ncomp = tag.dfab.nComp();
1046 tag.dfab(i,j,k,n) = tag.sfab(i+tag.offset.x,j+tag.offset.y,k+tag.offset.z,n);
1067template <FabArrayType MF>
1075 const int N = mf.
size();
1076 for (
int i = 0; i < N; ++i) {
1077 mf[i]->FillBoundary_nowait(scomp[i], ncomp[i], nghost[i], period[i],
1078 cross.empty() ? 0 : cross[i]);
1090template <FabArrayType MF>
1099 ncomp.reserve(mf.
size());
1100 nghost.reserve(mf.
size());
1101 for (
auto const&
x : mf) {
1102 ncomp.push_back(
x->nComp());
1103 nghost.push_back(
x->nGrowVect());
1113template <FabArrayType MF>
1118 const int N = mf.
size();
1119 for (
int i = 0; i < N; ++i) {
1120 mf[i]->FillBoundary_finish();
1136template <FabArrayType MF>
1142 BL_PROFILE(
"FillBoundaryAndSync_nowait(Vector)");
1143 const int N = mf.
size();
1144 for (
int i = 0; i < N; ++i) {
1145 mf[i]->FillBoundaryAndSync_nowait(scomp[i], ncomp[i], nghost[i], period[i]);
1155template <FabArrayType MF>
1164 ncomp.reserve(mf.
size());
1165 nghost.reserve(mf.
size());
1166 for (
auto const&
x : mf) {
1167 ncomp.push_back(
x->nComp());
1168 nghost.push_back(
x->nGrowVect());
1178template <FabArrayType MF>
1182 BL_PROFILE(
"FillBoundaryAndSync_finish(Vector)");
1183 const int N = mf.
size();
1184 for (
int i = 0; i < N; ++i) {
1185 mf[i]->FillBoundaryAndSync_finish();
1204template <FabArrayType MF>
1212 const int N = mf.
size();
1213 for (
int i = 0; i < N; ++i) {
1214 mf[i]->FillBoundary_nowait(scomp[i], ncomp[i], nghost[i], period[i],
1215 cross.empty() ? 0 : cross[i]);
1217 for (
int i = 0; i < N; ++i) {
1218 mf[i]->FillBoundary_finish();
1222 using FAB =
typename MF::FABType::value_type;
1223 using T =
typename FAB::value_type;
1225 const int nmfs = mf.
size();
1226 Vector<FabArrayBase::CommMetaData const*> cmds;
1230 for (
int imf = 0; imf < nmfs; ++imf) {
1231 if (nghost[imf].
max() > 0) {
1232 auto const& TheFB = mf[imf]->getFB(nghost[imf], period[imf],
1233 cross.empty() ? 0 : cross[imf]);
1235 cmds.push_back(
static_cast<FabArrayBase::CommMetaData const*
>(&TheFB));
1236 N_locs += TheFB.m_LocTags->
size();
1237 N_rcvs += TheFB.m_RcvTags->size();
1238 N_snds += TheFB.m_SndTags->size();
1240 cmds.push_back(
nullptr);
1244 using TagT = Array4CopyTag<T>;
1245 Vector<TagT> local_tags;
1246 local_tags.reserve(N_locs);
1248 for (
int imf = 0; imf < nmfs; ++imf) {
1250 auto const& tags = *(cmds[imf]->m_LocTags);
1251 for (
auto const& tag : tags) {
1252 local_tags.push_back(TagT{.dfab = (*mf[imf])[tag.dstIndex].array(scomp[imf],ncomp[imf]),
1253 .dindex = tag.dstIndex,
1254 .sfab = (*mf[imf])[tag.srcIndex].const_array(scomp[imf],ncomp[imf]),
1256 .offset = (tag.sbox.smallEnd()-tag.dbox.smallEnd()).dim3()});
1262 detail::fbv_copy(local_tags);
1274 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
return; }
1276 char* the_recv_data =
nullptr;
1277 Vector<int> recv_from;
1278 Vector<std::size_t> recv_size;
1279 Vector<MPI_Request> recv_reqs;
1280 Vector<MPI_Status> recv_stat;
1281 Vector<TagT> recv_tags;
1285 for (
int imf = 0; imf < nmfs; ++imf) {
1287 auto const& tags = *(cmds[imf]->m_RcvTags);
1288 for (
const auto& kv : tags) {
1289 recv_from.push_back(kv.first);
1294 const int nrecv = recv_from.size();
1297 recv_stat.resize(nrecv);
1299 recv_tags.reserve(N_rcvs);
1301 Vector<Vector<std::size_t> > recv_offset(nrecv);
1302 Vector<std::size_t>
offset;
1303 recv_size.reserve(nrecv);
1305 std::size_t TotalRcvsVolume = 0;
1306 for (
int i = 0; i < nrecv; ++i) {
1307 std::size_t nbytes = 0;
1308 for (
int imf = 0; imf < nmfs; ++imf) {
1310 auto const& tags = *(cmds[imf]->m_RcvTags);
1311 auto it = tags.find(recv_from[i]);
1312 if (it != tags.end()) {
1313 for (
auto const& cct : it->
second) {
1314 auto& dfab = (*mf[imf])[cct.dstIndex];
1315 recv_offset[i].push_back(nbytes);
1316 recv_tags.push_back(TagT{
1317 .dfab = dfab.array(scomp[imf],ncomp[imf]),
1318 .dindex = cct.dstIndex,
1319 .sfab = makeArray4<T const>(
nullptr,cct.dbox,ncomp[imf]),
1321 .offset = Dim3{.x = 0, .y = 0, .z = 0}
1323 nbytes += dfab.nBytes(cct.dbox,ncomp[imf]);
1329 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
1335 offset.push_back(TotalRcvsVolume);
1336 TotalRcvsVolume += nbytes;
1338 recv_size.push_back(nbytes);
1344 for (
int i = 0; i < nrecv; ++i) {
1345 char* p = the_recv_data +
offset[i];
1348 (p, recv_size[i], rank, SeqNum, comm).
req();
1349 for (
int j = 0, nj = recv_offset[i].size(); j < nj; ++j) {
1350 recv_tags[k++].sfab.p = (T
const*)(p + recv_offset[i][j]);
1355 char* the_send_data =
nullptr;
1356 Vector<int> send_rank;
1357 Vector<char*> send_data;
1358 Vector<std::size_t> send_size;
1359 Vector<MPI_Request> send_reqs;
1361 for (
int imf = 0; imf < nmfs; ++imf) {
1363 auto const& tags = *(cmds[imf]->m_SndTags);
1364 for (
auto const& kv : tags) {
1365 send_rank.push_back(kv.first);
1370 const int nsend = send_rank.size();
1372 send_data.resize(nsend,
nullptr);
1376 send_tags.reserve(N_snds);
1380 send_size.reserve(nsend);
1382 std::size_t TotalSndsVolume = 0;
1383 for (
int i = 0; i < nsend; ++i) {
1384 std::size_t nbytes = 0;
1385 for (
int imf = 0; imf < nmfs; ++imf) {
1387 auto const& tags = *(cmds[imf]->m_SndTags);
1388 auto it = tags.find(send_rank[i]);
1389 if (it != tags.end()) {
1390 for (
auto const& cct : it->second) {
1391 auto const& sfab = (*mf[imf])[cct.srcIndex];
1392 send_offset[i].push_back(nbytes);
1393 send_tags.push_back(TagT{
1394 .dfab = amrex::makeArray4<T>(
nullptr,cct.sbox,ncomp[imf]),
1395 .dindex = cct.dstIndex,
1396 .sfab = sfab.const_array(scomp[imf],ncomp[imf]),
1398 .offset =
Dim3{.
x = 0, .y = 0, .z = 0}
1400 nbytes += sfab.nBytes(cct.sbox,ncomp[imf]);
1406 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
1412 offset.push_back(TotalSndsVolume);
1413 TotalSndsVolume += nbytes;
1415 send_size.push_back(nbytes);
1420 for (
int i = 0; i < nsend; ++i) {
1421 send_data[i] = the_send_data +
offset[i];
1422 for (
int j = 0, nj = send_offset[i].size(); j < nj; ++j) {
1423 send_tags[k++].dfab.p = (T*)(send_data[i] + send_offset[i][j]);
1427 detail::fbv_copy(send_tags);
1432#if !defined(AMREX_DEBUG)
1438 detail::fbv_copy(local_tags);
1439#if !defined(AMREX_DEBUG)
1447 if (!FabArrayBase::CheckRcvStats(recv_stat, recv_size, SeqNum)) {
1448 amrex::Abort(
"FillBoundary(vector) failed with wrong message size");
1452 detail::fbv_copy(recv_tags);
1458 Vector<MPI_Status> stats(send_reqs.size());
1479template <FabArrayType MF>
1486 const int N = mf.
size();
1487 for (
int i = 0; i < N; ++i) {
1488 mf[i]->FillBoundaryAndSync_nowait(scomp[i], ncomp[i], nghost[i], period[i]);
1490 for (
int i = 0; i < N; ++i) {
1491 mf[i]->FillBoundaryAndSync_finish();
1496template <FabArrayType MF>
1505template <FabArrayType MF>
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define BL_PROFILE_SYNC_STOP()
Definition AMReX_BLProfiler.H:645
#define BL_PROFILE_SYNC_START_TIMED(fname)
Definition AMReX_BLProfiler.H:644
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:49
#define BL_ASSERT(EX)
Definition AMReX_BLassert.H:39
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:37
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_PRAGMA_SIMD
Definition AMReX_Extension.H:80
#define AMREX_NODISCARD
Definition AMReX_Extension.H:252
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1139
#define AMREX_LOOP_4D(bx, ncomp, i, j, k, n, block)
Definition AMReX_Loop.nolint.H:16
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
A collection of Boxes stored in an Array.
Definition AMReX_BoxArray.H:564
IndexType ixType() const noexcept
Return index type of this BoxArray.
Definition AMReX_BoxArray.H:854
Calculates the distribution of FABs to MPI processes.
Definition AMReX_DistributionMapping.H:43
IntVect nGrowVect() const noexcept
Definition AMReX_FabArrayBase.H:80
int size() const noexcept
Return the number of FABs in the FabArray.
Definition AMReX_FabArrayBase.H:110
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition AMReX_FabArrayBase.H:131
bool empty() const noexcept
Definition AMReX_FabArrayBase.H:89
CpOp
parallel copy or add
Definition AMReX_FabArrayBase.H:394
@ ADD
Definition AMReX_FabArrayBase.H:394
@ COPY
Definition AMReX_FabArrayBase.H:394
Box box(int K) const noexcept
Return the Kth Box in the BoxArray. That is, the valid region of the Kth grid.
Definition AMReX_FabArrayBase.H:101
DistributionMapping distributionMap
Definition AMReX_FabArrayBase.H:445
static int MaxComp
The maximum number of components to copy() at a time.
Definition AMReX_FabArrayBase.H:292
BoxArray boxarray
Definition AMReX_FabArrayBase.H:444
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition AMReX_FabArrayBase.H:95
An Array of FortranArrayBox(FAB)-like Objects.
Definition AMReX_FabArray.H:344
void ParallelCopyToGhost_finish()
Definition AMReX_FabArrayCommI.H:383
void ParallelCopy(const FabArray< FAB > &src, const Periodicity &period=Periodicity::NonPeriodic(), CpOp op=FabArrayBase::COPY)
Definition AMReX_FabArray.H:873
void Redistribute(const FabArray< FAB > &src, int scomp, int dcomp, int ncomp, const IntVect &nghost)
Copy from src to this. this and src have the same BoxArray, but different DistributionMapping.
Definition AMReX_FabArrayCommI.H:981
void ParallelCopyToGhost(const FabArray< FAB > &src, int scomp, int dcomp, int ncomp, const IntVect &snghost, const IntVect &dnghost, const Periodicity &period=Periodicity::NonPeriodic())
Definition AMReX_FabArrayCommI.H:352
void ParallelCopy_finish()
Definition AMReX_FabArrayCommI.H:671
void ParallelAdd(const FabArray< FAB > &src, const Periodicity &period=Periodicity::NonPeriodic())
This function copies data from src to this FabArray. Each FAB in fa is intersected with all FABs in t...
Definition AMReX_FabArray.H:870
void ParallelCopyToGhost_nowait(const FabArray< FAB > &src, int scomp, int dcomp, int ncomp, const IntVect &snghost, const IntVect &dnghost, const Periodicity &period=Periodicity::NonPeriodic())
Definition AMReX_FabArrayCommI.H:369
void FillBoundary_test()
Definition AMReX_FabArrayCommI.H:1008
void copyTo(FAB &dest, int nghost=0) const
Copy the values contained in the intersection of the valid + nghost region of this FabArray with the ...
Definition AMReX_FabArray.H:2696
void ParallelCopy_nowait(const FabArray< FAB > &src, const Periodicity &period=Periodicity::NonPeriodic(), CpOp op=FabArrayBase::COPY)
Definition AMReX_FabArray.H:887
Array4< typename FabArray< FAB >::value_type const > const_array(const MFIter &mfi) const noexcept
Definition AMReX_FabArray.H:585
__host__ __device__ bool cellCentered() const noexcept
True if the IndexTypeND is CELL based in all directions.
Definition AMReX_IndexType.H:102
__host__ __device__ constexpr bool allGE(const IntVectND< dim > &rhs) const noexcept
Returns true if this is greater than or equal to argument for all components. NOTE: This is NOT a str...
Definition AMReX_IntVect.H:542
__host__ static __device__ constexpr IntVectND< dim > TheZeroVector() noexcept
This static member function returns a reference to a constant IntVectND object, all of whose dim argu...
Definition AMReX_IntVect.H:771
__host__ __device__ constexpr int max() const noexcept
maximum (no absolute values) value
Definition AMReX_IntVect.H:313
MPI_Request req() const
Definition AMReX_ParallelDescriptor.H:74
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition AMReX_Periodicity.H:17
static const Periodicity & NonPeriodic() noexcept
Definition AMReX_Periodicity.cpp:52
bool isAnyPeriodic() const noexcept
Definition AMReX_Periodicity.H:22
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:29
Long size() const noexcept
Definition AMReX_Vector.H:54
Checks if a type is derived from amrex::BaseFab.
Definition AMReX_Concepts.H:13
amrex_long Long
Definition AMReX_INT.H:30
__host__ __device__ Dim3 ubound(Array4< T > const &a) noexcept
Return the inclusive upper bounds of an Array4 in Dim3 form.
Definition AMReX_Array4.H:1359
__host__ __device__ Dim3 lbound(Array4< T > const &a) noexcept
Return the inclusive lower bounds of an Array4 in Dim3 form.
Definition AMReX_Array4.H:1345
__host__ __device__ BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition AMReX_Box.H:1289
Arena * The_Comms_Arena()
Definition AMReX_Arena.cpp:880
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:860
int MyProc() noexcept
Definition AMReX_ParallelDescriptor.H:128
int NProcs() noexcept
Definition AMReX_ParallelDescriptor.H:255
bool inGraphRegion()
Definition AMReX_GpuControl.H:117
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:435
bool inLaunchRegion() noexcept
Definition AMReX_GpuControl.H:88
bool inNoSyncRegion() noexcept
Definition AMReX_GpuControl.H:148
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:421
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int global_to_local_rank(int rank) noexcept
Definition AMReX_ParallelContext.H:98
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
void Test(MPI_Request &, int &, MPI_Status &)
Definition AMReX_ParallelDescriptor.cpp:1220
Message Asend(const T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1154
bool UseGpuAwareMpi()
Definition AMReX_ParallelDescriptor.H:113
void Waitall(Vector< MPI_Request > &, Vector< MPI_Status > &)
Definition AMReX_ParallelDescriptor.cpp:1308
void Bcast(void *, int, MPI_Datatype, int, MPI_Comm)
Definition AMReX_ParallelDescriptor.cpp:1295
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition AMReX_ParallelDescriptor.H:678
Message Arecv(T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1196
int MPI_Comm
Definition AMReX_ccse-mpi.H:51
static constexpr int MPI_REQUEST_NULL
Definition AMReX_ccse-mpi.H:57
Definition AMReX_Amr.cpp:50
@ make_alias
Definition AMReX_MakeType.H:7
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139
void FillBoundary_finish(Vector< MF * > const &mf)
Wait for outstanding FillBoundary_nowait operations launched with the vector helper to complete.
Definition AMReX_FabArrayCommI.H:1115
void Copy(FabArray< DFAB > &dst, FabArray< SFAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArray.H:180
void Add(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArray.H:239
std::unique_ptr< char, TheFaArenaDeleter > TheFaArenaPointer
Definition AMReX_FabArray.H:106
DistributionMapping const & DistributionMap(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2867
IntVect nGrowVect(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2857
void FillBoundary_nowait(Vector< MF * > const &mf, Vector< int > const &scomp, Vector< int > const &ncomp, Vector< IntVect > const &nghost, Vector< Periodicity > const &period, Vector< int > const &cross={})
Launch FillBoundary_nowait across a vector of FabArrays.
Definition AMReX_FabArrayCommI.H:1069
void FillBoundaryAndSync_nowait(Vector< MF * > const &mf, Vector< int > const &scomp, Vector< int > const &ncomp, Vector< IntVect > const &nghost, Vector< Periodicity > const &period)
Launch FillBoundaryAndSync_nowait across a vector of FabArrays.
Definition AMReX_FabArrayCommI.H:1138
void FillBoundaryAndSync(Vector< MF * > const &mf, Vector< int > const &scomp, Vector< int > const &ncomp, Vector< IntVect > const &nghost, Vector< Periodicity > const &period)
Perform FillBoundaryAndSync on a batch of FabArrays (e.g., MultiFabs).
Definition AMReX_FabArrayCommI.H:1481
void ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:202
double second() noexcept
Definition AMReX_Utility.cpp:940
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition AMReX_FabArrayUtility.H:1951
IntVectND< 3 > IntVect
IntVect is an alias for amrex::IntVectND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:33
__host__ __device__ constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:45
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:241
std::size_t aligned_size(std::size_t align_requirement, std::size_t size) noexcept
Given a minimum required size in bytes, this returns the smallest size greater or equal to size that ...
Definition AMReX_Arena.H:33
void RemoveDuplicates(Vector< T > &vec)
Definition AMReX_Vector.H:210
void FillBoundaryAndSync_finish(Vector< MF * > const &mf)
Wait for outstanding FillBoundaryAndSync_nowait operations launched with the vector helper to complet...
Definition AMReX_FabArrayCommI.H:1180
void FillBoundary(Vector< MF * > const &mf, Vector< int > const &scomp, Vector< int > const &ncomp, Vector< IntVect > const &nghost, Vector< Periodicity > const &period, Vector< int > const &cross={})
Perform FillBoundary on a batch of FabArrays (e.g., MultiFabs).
Definition AMReX_FabArrayCommI.H:1206
BoxArray const & boxArray(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2862
Definition AMReX_Dim3.H:13
int x
Definition AMReX_Dim3.H:13
parallel copy or add
Definition AMReX_FabArrayBase.H:538
std::uint64_t m_id
Definition AMReX_FabArrayBase.H:553
FillBoundary.
Definition AMReX_FabArrayBase.H:488
std::uint64_t m_id
Definition AMReX_FabArrayBase.H:494
IntVect m_sb_snghost
Definition AMReX_FabArrayBase.H:498
Definition AMReX_TypeTraits.H:61
Definition AMReX_TypeTraits.H:277
FabArray memory allocation information.
Definition AMReX_FabArray.H:68