8template <typename BUF, class F, std::enable_if_t<IsBaseFab<F>::value,
int>Z>
12 bool enforce_periodicity_only,
14 IntVect const& sumboundary_src_nghost,
21 AMREX_ASSERT(!enforce_periodicity_only || !override_sync);
23 bool sumboundary = sumboundary_src_nghost.
allGE(0);
26 if (enforce_periodicity_only) {
28 }
else if (override_sync) {
29 work_to_do = (nghost.
max() > 0) || !is_cell_centered();
30 }
else if (sumboundary) {
33 work_to_do = nghost.
max() > 0;
35 if (!work_to_do) {
return; }
37 const FB& TheFB = getFB(nghost, period, cross, enforce_periodicity_only, override_sync, sumboundary_src_nghost);
45 if (N_locs == 0) {
return; }
49#if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
52 FB_local_copy_cuda_graph_1(TheFB, scomp, ncomp);
59 FB_local_add_gpu(TheFB, scomp, ncomp, deterministic);
64 FB_local_copy_gpu(TheFB, scomp, ncomp);
76 FB_local_add_cpu(TheFB, scomp, ncomp);
81 FB_local_copy_cpu(TheFB, scomp, ncomp);
96 const int N_locs = TheFB.
m_LocTags->size();
97 const int N_rcvs = TheFB.
m_RcvTags->size();
98 const int N_snds = TheFB.
m_SndTags->size();
100 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
105 fbd = std::make_unique<FBData<FAB>>();
110 fbd->deterministic = deterministic;
117 PostRcvs<BUF>(*TheFB.
m_RcvTags, fbd->the_recv_data,
118 fbd->recv_data, fbd->recv_size, fbd->recv_from, fbd->recv_reqs,
120 fbd->recv_stat.resize(N_rcvs);
126 char*& the_send_data = fbd->the_send_data;
135 PrepareSendBuffers<BUF>(*TheFB.
m_SndTags, the_send_data, send_data, send_size, send_rank,
136 send_reqs, send_cctc, ncomp);
141#if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
143 FB_pack_send_buffer_cuda_graph(TheFB, scomp, ncomp, send_data, send_size, send_cctc);
148 pack_send_buffer_gpu<BUF>(*
this, scomp, ncomp, send_data, send_size, send_cctc, TheFB.
m_id);
154 pack_send_buffer_cpu<BUF>(*
this, scomp, ncomp, send_data, send_size, send_cctc);
158 PostSnds(send_data, send_size, send_rank, send_reqs, SeqNum);
171#if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
173 FB_local_copy_cuda_graph_n(TheFB, scomp, ncomp);
180 FB_local_add_gpu(TheFB, scomp, ncomp, deterministic);
185 FB_local_copy_gpu(TheFB, scomp, ncomp);
197 FB_local_add_cpu(TheFB, scomp, ncomp);
202 FB_local_copy_cpu(TheFB, scomp, ncomp);
217template <typename BUF, class F, std::enable_if_t<IsBaseFab<F>::value,
int>Z>
228 const FB* TheFB = fbd->fb;
230 const auto N_rcvs =
static_cast<int>(TheFB->
m_RcvTags->size());
234 for (
int k = 0; k < N_rcvs; k++)
236 if (fbd->recv_size[k] > 0)
238 auto const& cctc = TheFB->
m_RcvTags->at(fbd->recv_from[k]);
239 recv_cctc[k] = &cctc;
243 int actual_n_rcvs = N_rcvs - std::count(fbd->recv_data.begin(), fbd->recv_data.end(),
nullptr);
245 if (actual_n_rcvs > 0) {
248 if (!CheckRcvStats(fbd->recv_stat, fbd->recv_size, fbd->tag))
250 amrex::Abort(
"FillBoundary_finish failed with wrong message size");
261#if defined(__CUDACC__) && defined(AMREX_USE_CUDA)
264 FB_unpack_recv_buffer_cuda_graph(*TheFB, fbd->scomp, fbd->ncomp,
265 fbd->recv_data, fbd->recv_size,
266 recv_cctc, is_thread_safe);
271 bool deterministic = fbd->deterministic;
272 unpack_recv_buffer_gpu<BUF>(*
this, fbd->scomp, fbd->ncomp, fbd->recv_data, fbd->recv_size,
273 recv_cctc, op, is_thread_safe, TheFB->
m_id, deterministic);
279 unpack_recv_buffer_cpu<BUF>(*
this, fbd->scomp, fbd->ncomp, fbd->recv_data, fbd->recv_size,
280 recv_cctc, op, is_thread_safe);
283 if (fbd->the_recv_data)
286 fbd->the_recv_data =
nullptr;
290 const auto N_snds =
static_cast<int>(TheFB->
m_SndTags->size());
295 fbd->the_send_data =
nullptr;
318 ParallelCopy_nowait(src, scomp, dcomp, ncomp, snghost, dnghost, period, op, a_cpc,
319 false, deterministic);
320 ParallelCopy_finish();
331 ParallelCopy_nowait(src,src_comp,dest_comp,num_comp,snghost,dnghost,
offset,period);
332 ParallelCopy_finish();
343 ParallelCopy_nowait(src,src_comp,dest_comp,num_comp,snghost,dnghost,
offset,period,
345 ParallelCopy_finish();
358 BL_PROFILE(
"FabArray::ParallelCopyToGhost()");
360 ParallelCopy_nowait(src, scomp, dcomp, ncomp, snghost, dnghost, period,
362 ParallelCopy_finish();
375 ParallelCopy_nowait(src, scomp, dcomp, ncomp, snghost, dnghost, period,
383 ParallelCopy_finish();
398 bool to_ghost_cells_only,
401 ParallelCopy_nowait(src,scomp,dcomp,ncomp,snghost,dnghost,
IntVect(0),period,op,a_cpc,
402 to_ghost_cells_only, deterministic);
417 bool to_ghost_cells_only,
421 BL_PROFILE(
"FabArray::ParallelCopy_nowait()");
425 if (empty() || src.
empty()) {
437 (this->size() == 1) && (src.
size() == 1) &&
441 auto const& da = this->array(0, dcomp);
450 da(i,j,k,n) = sa(i,j,k,n);
461#pragma omp parallel for collapse(3)
463 for (
int n = 0; n < ncomp; ++n) {
464 for (
int k = lo.z; k <= hi.z; ++k) {
465 for (
int j = lo.y; j <= hi.y; ++j) {
467 for (
int i = lo.x; i <= hi.x; ++i) {
468 da(i,j,k,n) = sa(i,j,k,n);
476 da(i,j,k,n) += sa(i,j,k,n);
487#pragma omp parallel for collapse(3)
489 for (
int n = 0; n < ncomp; ++n) {
490 for (
int k = lo.z; k <= hi.z; ++k) {
491 for (
int j = lo.y; j <= hi.y; ++j) {
493 for (
int i = lo.x; i <= hi.x; ++i) {
494 da(i,j,k,n) += sa(i,j,k,n);
517 Add(*
this, src, scomp, dcomp, ncomp,
IntVect(0));
523 const CPC& thecpc = (a_cpc) ? *a_cpc : getCPC(dnghost, src, snghost, period,
524 to_ghost_cells_only,
offset);
533 if (N_locs == 0) {
return; }
537 PC_local_gpu(thecpc, src, scomp, dcomp, ncomp, op, deterministic);
542 PC_local_cpu(thecpc, src, scomp, dcomp, ncomp, op);
556 const int N_snds = thecpc.
m_SndTags->size();
557 const int N_rcvs = thecpc.
m_RcvTags->size();
558 const int N_locs = thecpc.
m_LocTags->size();
560 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
571 int NCompLeft = ncomp;
572 int SC = scomp, DC = dcomp, NC;
574 for (
int ipass = 0; ipass < ncomp; )
576 pcd = std::make_unique<PCData<FAB>>();
583 const bool last_iter = (NCompLeft == NC);
592 pcd->the_recv_data =
nullptr;
594 pcd->actual_n_rcvs = 0;
596 PostRcvs(*thecpc.
m_RcvTags, pcd->the_recv_data,
597 pcd->recv_data, pcd->recv_size, pcd->recv_from, pcd->recv_reqs, NC, pcd->tag);
598 pcd->actual_n_rcvs = N_rcvs - std::count(pcd->recv_size.begin(), pcd->recv_size.end(), 0);
611 src.PrepareSendBuffers(*thecpc.
m_SndTags, pcd->the_send_data, send_data, send_size,
612 send_rank, pcd->send_reqs, send_cctc, NC);
617 pack_send_buffer_gpu(src, SC, NC, send_data, send_size, send_cctc, thecpc.
m_id);
622 pack_send_buffer_cpu(src, SC, NC, send_data, send_size, send_cctc);
637 PC_local_gpu(thecpc, src, SC, DC, NC, op, deterministic);
642 PC_local_cpu(thecpc, src, SC, DC, NC, op);
648 ParallelCopy_finish();
672 BL_PROFILE(
"FabArray::ParallelCopy_finish()");
675 if (!pcd) {
return; }
677 const CPC* thecpc = pcd->cpc;
679 const auto N_snds =
static_cast<int>(thecpc->
m_SndTags->size());
680 const auto N_rcvs =
static_cast<int>(thecpc->
m_RcvTags->size());
685 for (
int k = 0; k < N_rcvs; ++k)
687 if (pcd->recv_size[k] > 0)
689 auto const& cctc = thecpc->
m_RcvTags->at(pcd->recv_from[k]);
690 recv_cctc[k] = &cctc;
694 if (pcd->actual_n_rcvs > 0) {
698 if (!CheckRcvStats(stats, pcd->recv_size, pcd->tag))
700 amrex::Abort(
"ParallelCopy failed with wrong message size");
710 unpack_recv_buffer_gpu(*
this, pcd->DC, pcd->NC, pcd->recv_data, pcd->recv_size,
711 recv_cctc, pcd->op, is_thread_safe, thecpc->
m_id,
false);
716 unpack_recv_buffer_cpu(*
this, pcd->DC, pcd->NC, pcd->recv_data, pcd->recv_size,
717 recv_cctc, pcd->op, is_thread_safe);
720 if (pcd->the_recv_data)
723 pcd->the_recv_data =
nullptr;
733 pcd->the_send_data =
nullptr;
747 BL_ASSERT(dcomp + ncomp <= dest.nComp());
759 destmf.ParallelCopy(*
this, scomp, 0, ncomp, nghost, 0);
762 using T =
typename FAB::value_type;
764 Long count = dest.numPts()*ncomp;
765 T*
const p0 = dest.dataPtr(dcomp);
768 if (dest.arena()->isDevice()) {
788template <
typename BUF>
798 char* pointer =
nullptr;
799 PrepareSendBuffers<BUF>(SndTags, pointer, send_data, send_size, send_rank, send_reqs, send_cctc, ncomp);
804template <
typename BUF>
806FabArray<FAB>::PrepareSendBuffers (
const MapOfCopyComTagContainers& SndTags,
807 char*& the_send_data,
808 Vector<char*>& send_data,
809 Vector<std::size_t>& send_size,
810 Vector<int>& send_rank,
811 Vector<MPI_Request>& send_reqs,
812 Vector<const CopyComTagsContainer*>& send_cctc,
820 const auto N_snds = SndTags.size();
821 if (N_snds == 0) {
return; }
822 send_data.reserve(N_snds);
823 send_size.reserve(N_snds);
824 send_rank.reserve(N_snds);
825 send_reqs.reserve(N_snds);
826 send_cctc.reserve(N_snds);
829 std::size_t total_volume = 0;
830 for (
auto const& kv : SndTags)
832 auto const& cctc = kv.second;
834 std::size_t nbytes = 0;
835 for (
auto const& cct : kv.
second)
837 nbytes += cct.sbox.numPts() * ncomp *
sizeof(BUF);
840 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
847 offset.push_back(total_volume);
848 total_volume += nbytes;
850 send_data.push_back(
nullptr);
851 send_size.push_back(nbytes);
852 send_rank.push_back(kv.first);
854 send_cctc.push_back(&cctc);
857 if (total_volume > 0)
860 for (
int i = 0, N =
static_cast<int>(send_size.size()); i < N; ++i) {
861 send_data[i] = the_send_data +
offset[i];
864 the_send_data =
nullptr;
870FabArray<FAB>::PostSnds (Vector<char*>
const& send_data,
871 Vector<std::size_t>
const& send_size,
872 Vector<int>
const& send_rank,
873 Vector<MPI_Request>& send_reqs,
878 const auto N_snds =
static_cast<int>(send_reqs.size());
879 for (
int j = 0; j < N_snds; ++j)
881 if (send_size[j] > 0) {
884 (send_data[j], send_size[j], rank, SeqNum, comm).
req();
890template <
typename BUF>
891TheFaArenaPointer FabArray<FAB>::PostRcvs (
const MapOfCopyComTagContainers& RcvTags,
892 Vector<char*>& recv_data,
893 Vector<std::size_t>& recv_size,
894 Vector<int>& recv_from,
895 Vector<MPI_Request>& recv_reqs,
899 char* pointer =
nullptr;
900 PostRcvs(RcvTags, pointer, recv_data, recv_size, recv_from, recv_reqs, ncomp, SeqNum);
905template <
typename BUF>
907FabArray<FAB>::PostRcvs (
const MapOfCopyComTagContainers& RcvTags,
908 char*& the_recv_data,
909 Vector<char*>& recv_data,
910 Vector<std::size_t>& recv_size,
911 Vector<int>& recv_from,
912 Vector<MPI_Request>& recv_reqs,
921 Vector<std::size_t>
offset;
922 std::size_t TotalRcvsVolume = 0;
923 for (
const auto& kv : RcvTags)
925 std::size_t nbytes = 0;
926 for (
auto const& cct : kv.
second)
928 nbytes += cct.dbox.numPts() * ncomp *
sizeof(BUF);
931 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
939 TotalRcvsVolume += nbytes;
941 recv_data.push_back(
nullptr);
942 recv_size.push_back(nbytes);
943 recv_from.push_back(kv.first);
947 const auto nrecv =
static_cast<int>(recv_from.size());
951 if (TotalRcvsVolume == 0)
953 the_recv_data =
nullptr;
959 for (
int i = 0; i < nrecv; ++i)
961 recv_data[i] = the_recv_data +
offset[i];
962 if (recv_size[i] > 0)
966 (recv_data[i], recv_size[i], rank, SeqNum, comm).
req();
982 "FabArray::Redistribute: must have the same BoxArray");
986 Copy(*
this, src, scomp, dcomp, ncomp, nghost);
1004#if defined(AMREX_USE_MPI) && !defined(AMREX_DEBUG)
1014template <
class TagT>
1017 const int N = tags.
size();
1018 if (N == 0) {
return; }
1020 if (Gpu::inLaunchRegion()) {
1024 const int ncomp = tag.dfab.nComp();
1025 for (
int n = 0; n < ncomp; ++n) {
1026 tag.dfab(i,j,k,n) = tag.sfab(i+tag.offset.x,j+tag.offset.y,k+tag.offset.z,n);
1033#pragma omp parallel for
1035 for (
int itag = 0; itag < N; ++itag) {
1036 auto const& tag = tags[itag];
1037 const int ncomp = tag.dfab.nComp();
1040 tag.dfab(i,j,k,n) = tag.sfab(i+tag.offset.x,j+tag.offset.y,k+tag.offset.z,n);
1049std::enable_if_t<IsFabArray<MF>::value>
1056 const int N = mf.
size();
1057 for (
int i = 0; i < N; ++i) {
1058 mf[i]->FillBoundary_nowait(scomp[i], ncomp[i], nghost[i], period[i],
1059 cross.empty() ? 0 : cross[i]);
1061 for (
int i = 0; i < N; ++i) {
1062 mf[i]->FillBoundary_finish();
1066 using FAB =
typename MF::FABType::value_type;
1067 using T =
typename FAB::value_type;
1069 const int nmfs = mf.
size();
1074 for (
int imf = 0; imf < nmfs; ++imf) {
1075 if (nghost[imf].
max() > 0) {
1076 auto const& TheFB = mf[imf]->getFB(nghost[imf], period[imf],
1077 cross.empty() ? 0 : cross[imf]);
1081 N_rcvs += TheFB.m_RcvTags->size();
1082 N_snds += TheFB.m_SndTags->size();
1084 cmds.push_back(
nullptr);
1088 using TagT = Array4CopyTag<T>;
1089 Vector<TagT> local_tags;
1090 local_tags.reserve(N_locs);
1092 for (
int imf = 0; imf < nmfs; ++imf) {
1094 auto const& tags = *(cmds[imf]->m_LocTags);
1095 for (
auto const& tag : tags) {
1096 local_tags.push_back({(*mf[imf])[tag.dstIndex].array (scomp[imf],ncomp[imf]),
1097 (*mf[imf])[tag.srcIndex].const_array(scomp[imf],ncomp[imf]),
1099 (tag.sbox.smallEnd()-tag.dbox.smallEnd()).dim3()});
1105 detail::fbv_copy(local_tags);
1117 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
return; }
1119 char* the_recv_data =
nullptr;
1120 Vector<int> recv_from;
1121 Vector<std::size_t> recv_size;
1122 Vector<MPI_Request> recv_reqs;
1123 Vector<MPI_Status> recv_stat;
1124 Vector<TagT> recv_tags;
1128 for (
int imf = 0; imf < nmfs; ++imf) {
1130 auto const& tags = *(cmds[imf]->m_RcvTags);
1131 for (
const auto& kv : tags) {
1132 recv_from.push_back(kv.first);
1137 const int nrecv = recv_from.size();
1140 recv_stat.resize(nrecv);
1142 recv_tags.reserve(N_rcvs);
1144 Vector<Vector<std::size_t> > recv_offset(nrecv);
1145 Vector<std::size_t>
offset;
1146 recv_size.reserve(nrecv);
1148 std::size_t TotalRcvsVolume = 0;
1149 for (
int i = 0; i < nrecv; ++i) {
1150 std::size_t nbytes = 0;
1151 for (
int imf = 0; imf < nmfs; ++imf) {
1153 auto const& tags = *(cmds[imf]->m_RcvTags);
1154 auto it = tags.find(recv_from[i]);
1155 if (it != tags.end()) {
1156 for (
auto const& cct : it->
second) {
1157 auto& dfab = (*mf[imf])[cct.dstIndex];
1158 recv_offset[i].push_back(nbytes);
1159 recv_tags.push_back({dfab.array(scomp[imf],ncomp[imf]),
1160 makeArray4<T const>(
nullptr,cct.dbox,ncomp[imf]),
1161 cct.dbox, Dim3{0,0,0}});
1162 nbytes += dfab.nBytes(cct.dbox,ncomp[imf]);
1168 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
1174 offset.push_back(TotalRcvsVolume);
1175 TotalRcvsVolume += nbytes;
1177 recv_size.push_back(nbytes);
1183 for (
int i = 0; i < nrecv; ++i) {
1184 char* p = the_recv_data +
offset[i];
1187 (p, recv_size[i], rank, SeqNum, comm).
req();
1188 for (
int j = 0, nj = recv_offset[i].size(); j < nj; ++j) {
1189 recv_tags[k++].sfab.p = (T
const*)(p + recv_offset[i][j]);
1194 char* the_send_data =
nullptr;
1195 Vector<int> send_rank;
1196 Vector<char*> send_data;
1197 Vector<std::size_t> send_size;
1198 Vector<MPI_Request> send_reqs;
1200 for (
int imf = 0; imf < nmfs; ++imf) {
1202 auto const& tags = *(cmds[imf]->m_SndTags);
1203 for (
auto const& kv : tags) {
1204 send_rank.push_back(kv.first);
1209 const int nsend = send_rank.size();
1211 send_data.resize(nsend,
nullptr);
1214 Vector<TagT> send_tags;
1215 send_tags.reserve(N_snds);
1217 Vector<Vector<std::size_t> > send_offset(nsend);
1218 Vector<std::size_t>
offset;
1219 send_size.reserve(nsend);
1221 std::size_t TotalSndsVolume = 0;
1222 for (
int i = 0; i < nsend; ++i) {
1223 std::size_t nbytes = 0;
1224 for (
int imf = 0; imf < nmfs; ++imf) {
1226 auto const& tags = *(cmds[imf]->m_SndTags);
1227 auto it = tags.find(send_rank[i]);
1228 if (it != tags.end()) {
1229 for (
auto const& cct : it->
second) {
1230 auto const& sfab = (*mf[imf])[cct.srcIndex];
1231 send_offset[i].push_back(nbytes);
1232 send_tags.push_back({amrex::makeArray4<T>(
nullptr,cct.sbox,ncomp[imf]),
1233 sfab.const_array(scomp[imf],ncomp[imf]),
1234 cct.sbox, Dim3{0,0,0}});
1235 nbytes += sfab.nBytes(cct.sbox,ncomp[imf]);
1241 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
1247 offset.push_back(TotalSndsVolume);
1248 TotalSndsVolume += nbytes;
1250 send_size.push_back(nbytes);
1255 for (
int i = 0; i < nsend; ++i) {
1256 send_data[i] = the_send_data +
offset[i];
1257 for (
int j = 0, nj = send_offset[i].size(); j < nj; ++j) {
1258 send_tags[k++].dfab.p = (T*)(send_data[i] + send_offset[i][j]);
1262 detail::fbv_copy(send_tags);
1264 FabArray<FAB>::PostSnds(send_data, send_size, send_rank, send_reqs, SeqNum);
1267#if !defined(AMREX_DEBUG)
1273 detail::fbv_copy(local_tags);
1274#if !defined(AMREX_DEBUG)
1282 if (!FabArrayBase::CheckRcvStats(recv_stat, recv_size, SeqNum)) {
1283 amrex::Abort(
"FillBoundary(vector) failed with wrong message size");
1287 detail::fbv_copy(recv_tags);
1293 Vector<MPI_Status> stats(send_reqs.size());
1303std::enable_if_t<IsFabArray<MF>::value>
1310 ncomp.reserve(mf.
size());
1311 nghost.reserve(mf.
size());
1312 for (
auto const&
x : mf) {
1313 ncomp.push_back(
x->nComp());
1314 nghost.push_back(
x->nGrowVect());
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define BL_PROFILE_SYNC_STOP()
Definition AMReX_BLProfiler.H:645
#define BL_PROFILE_SYNC_START_TIMED(fname)
Definition AMReX_BLProfiler.H:644
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:49
#define BL_ASSERT(EX)
Definition AMReX_BLassert.H:39
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:37
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_PRAGMA_SIMD
Definition AMReX_Extension.H:80
#define AMREX_NODISCARD
Definition AMReX_Extension.H:251
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1089
#define AMREX_LOOP_4D(bx, ncomp, i, j, k, n, block)
Definition AMReX_Loop.nolint.H:16
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
A collection of Boxes stored in an Array.
Definition AMReX_BoxArray.H:567
IndexType ixType() const noexcept
Return index type of this BoxArray.
Definition AMReX_BoxArray.H:857
Calculates the distribution of FABs to MPI processes.
Definition AMReX_DistributionMapping.H:43
IntVect nGrowVect() const noexcept
Definition AMReX_FabArrayBase.H:80
int size() const noexcept
Return the number of FABs in the FabArray.
Definition AMReX_FabArrayBase.H:110
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition AMReX_FabArrayBase.H:131
bool empty() const noexcept
Definition AMReX_FabArrayBase.H:89
CpOp
parallel copy or add
Definition AMReX_FabArrayBase.H:394
@ ADD
Definition AMReX_FabArrayBase.H:394
@ COPY
Definition AMReX_FabArrayBase.H:394
Box box(int K) const noexcept
Return the Kth Box in the BoxArray. That is, the valid region of the Kth grid.
Definition AMReX_FabArrayBase.H:101
DistributionMapping distributionMap
Definition AMReX_FabArrayBase.H:445
static int MaxComp
The maximum number of components to copy() at a time.
Definition AMReX_FabArrayBase.H:292
BoxArray boxarray
Definition AMReX_FabArrayBase.H:444
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition AMReX_FabArrayBase.H:95
An Array of FortranArrayBox(FAB)-like Objects.
Definition AMReX_FabArray.H:347
void ParallelCopyToGhost_finish()
Definition AMReX_FabArrayCommI.H:381
void FBEP_nowait(int scomp, int ncomp, const IntVect &nghost, const Periodicity &period, bool cross, bool enforce_periodicity_only=false, bool override_sync=false, IntVect const &sumboundary_src_nghost=IntVect(-1), bool deterministic=false)
Definition AMReX_FabArrayCommI.H:10
Array4< typename FabArray< FAB >::value_type const > const_array(const MFIter &mfi) const noexcept
Definition AMReX_FabArray.H:587
void ParallelCopy(const FabArray< FAB > &src, const Periodicity &period=Periodicity::NonPeriodic(), CpOp op=FabArrayBase::COPY)
Definition AMReX_FabArray.H:847
void Redistribute(const FabArray< FAB > &src, int scomp, int dcomp, int ncomp, const IntVect &nghost)
Copy from src to this. this and src have the same BoxArray, but different DistributionMapping.
Definition AMReX_FabArrayCommI.H:975
void ParallelCopyToGhost(const FabArray< FAB > &src, int scomp, int dcomp, int ncomp, const IntVect &snghost, const IntVect &dnghost, const Periodicity &period=Periodicity::NonPeriodic())
Definition AMReX_FabArrayCommI.H:350
void ParallelCopy_finish()
Definition AMReX_FabArrayCommI.H:667
void ParallelAdd(const FabArray< FAB > &src, const Periodicity &period=Periodicity::NonPeriodic())
This function copies data from src to this FabArray. Each FAB in fa is intersected with all FABs in t...
Definition AMReX_FabArray.H:844
void ParallelCopyToGhost_nowait(const FabArray< FAB > &src, int scomp, int dcomp, int ncomp, const IntVect &snghost, const IntVect &dnghost, const Periodicity &period=Periodicity::NonPeriodic())
Definition AMReX_FabArrayCommI.H:367
void FillBoundary_test()
Definition AMReX_FabArrayCommI.H:1002
void copyTo(FAB &dest, int nghost=0) const
Copy the values contained in the intersection of the valid + nghost region of this FabArray with the ...
Definition AMReX_FabArray.H:2644
void ParallelCopy_nowait(const FabArray< FAB > &src, const Periodicity &period=Periodicity::NonPeriodic(), CpOp op=FabArrayBase::COPY)
Definition AMReX_FabArray.H:861
void FillBoundary_finish()
Definition AMReX_FabArrayCommI.H:219
__host__ __device__ bool cellCentered() const noexcept
True if the IndexTypeND is CELL based in all directions.
Definition AMReX_IndexType.H:104
__host__ __device__ bool allGE(const IntVectND< dim > &rhs) const noexcept
Returns true if this is greater than or equal to argument for all components. NOTE: This is NOT a str...
Definition AMReX_IntVect.H:451
__host__ __device__ int max() const noexcept
maximum (no absolute values) value
Definition AMReX_IntVect.H:222
__host__ static __device__ constexpr IntVectND< dim > TheZeroVector() noexcept
This static member function returns a reference to a constant IntVectND object, all of whose dim argu...
Definition AMReX_IntVect.H:680
MPI_Request req() const
Definition AMReX_ParallelDescriptor.H:74
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition AMReX_Periodicity.H:17
static const Periodicity & NonPeriodic() noexcept
Definition AMReX_Periodicity.cpp:52
bool isAnyPeriodic() const noexcept
Definition AMReX_Periodicity.H:22
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
Long size() const noexcept
Definition AMReX_Vector.H:53
amrex_long Long
Definition AMReX_INT.H:30
__host__ __device__ BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition AMReX_Box.H:1280
int MyProc() noexcept
Definition AMReX_ParallelDescriptor.H:126
int NProcs() noexcept
Definition AMReX_ParallelDescriptor.H:246
bool inGraphRegion()
Definition AMReX_GpuControl.H:121
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:263
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:315
bool inLaunchRegion() noexcept
Definition AMReX_GpuControl.H:92
bool inNoSyncRegion() noexcept
Definition AMReX_GpuControl.H:152
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:301
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int global_to_local_rank(int rank) noexcept
Definition AMReX_ParallelContext.H:98
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
void Test(MPI_Request &, int &, MPI_Status &)
Definition AMReX_ParallelDescriptor.cpp:1216
Message Asend(const T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1164
void Waitall(Vector< MPI_Request > &, Vector< MPI_Status > &)
Definition AMReX_ParallelDescriptor.cpp:1304
void Bcast(void *, int, MPI_Datatype, int, MPI_Comm)
Definition AMReX_ParallelDescriptor.cpp:1291
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition AMReX_ParallelDescriptor.H:688
Message Arecv(T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1206
int MPI_Comm
Definition AMReX_ccse-mpi.H:51
static constexpr int MPI_REQUEST_NULL
Definition AMReX_ccse-mpi.H:57
Definition AMReX_Amr.cpp:49
__host__ __device__ Dim3 ubound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:319
@ make_alias
Definition AMReX_MakeType.H:7
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:138
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:193
std::unique_ptr< char, TheFaArenaDeleter > TheFaArenaPointer
Definition AMReX_FabArray.H:104
DistributionMapping const & DistributionMap(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2869
IntVect nGrowVect(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2859
std::enable_if_t< IsFabArray< MF >::value > FillBoundary(Vector< MF * > const &mf, Vector< int > const &scomp, Vector< int > const &ncomp, Vector< IntVect > const &nghost, Vector< Periodicity > const &period, Vector< int > const &cross={})
Definition AMReX_FabArrayCommI.H:1050
void Copy(FabArray< DFAB > &dst, FabArray< SFAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArray.H:180
double second() noexcept
Definition AMReX_Utility.cpp:940
Arena * The_Comms_Arena()
Definition AMReX_Arena.cpp:843
IntVectND< 3 > IntVect
IntVect is an alias for amrex::IntVectND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:30
__host__ __device__ constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:35
void Add(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArray.H:241
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:823
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:230
std::size_t aligned_size(std::size_t align_requirement, std::size_t size) noexcept
Given a minimum required size of size bytes, this returns the next largest arena size that will align...
Definition AMReX_Arena.H:30
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition AMReX_FabArrayUtility.H:2019
void RemoveDuplicates(Vector< T > &vec)
Definition AMReX_Vector.H:211
BoxArray const & boxArray(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2864
__host__ __device__ Dim3 lbound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:312
parallel copy or add
Definition AMReX_FabArrayBase.H:538
std::uint64_t m_id
Definition AMReX_FabArrayBase.H:553
FillBoundary.
Definition AMReX_FabArrayBase.H:488
std::uint64_t m_id
Definition AMReX_FabArrayBase.H:494
IntVect m_sb_snghost
Definition AMReX_FabArrayBase.H:498
Definition AMReX_TypeTraits.H:66
Definition AMReX_TypeTraits.H:280
FabArray memory allocation information.
Definition AMReX_FabArray.H:66