1#ifndef BL_PARALLELDESCRIPTOR_H
2#define BL_PARALLELDESCRIPTOR_H
3#include <AMReX_Config.H>
46template <
typename T>
class LayoutData;
49namespace ParallelDescriptor
79 bool m_finished =
true;
86 void MPI_Error(
const char* file,
int line,
const char* str,
int rc);
88#define BL_MPI_REQUIRE(x) \
91 if ( int l_status_ = (x) ) \
93 amrex::ParallelDescriptor::MPI_Error(__FILE__,__LINE__,#x, l_status_); \
104 char*** argv =
nullptr,
138 MPI_Comm_rank(comm,&r);
154#if defined(BL_USE_MPI3)
155 MPI_Barrier(m_team_comm);
164 if (omp_in_parallel()) {
170#if defined(BL_USE_MPI3)
171 std::atomic_thread_fence(std::memory_order_release);
172 MPI_Barrier(m_team_comm);
173 std::atomic_thread_fence(std::memory_order_acquire);
181#if defined(BL_USE_MPI3)
183 MPI_Comm_free(&m_team_comm);
184 if (m_rankInTeam==0) { MPI_Comm_free(&m_lead_comm); }
202 int m_do_team_reduce;
220 inline int MinTag () noexcept {
return m_MinTag; }
221 inline int MaxTag () noexcept {
return m_MaxTag; }
266 BL_MPI_REQUIRE(MPI_Comm_size(comm, &s));
312 return m_Team.m_size;
317 return m_Team.m_numTeams;
322 return m_Team.m_color;
327 return m_Team.m_lead;
332 return m_Team.m_rankInTeam;
337 return (rank >= 0) ? (rank - rank % m_Team.m_size) :
MPI_PROC_NULL;
362 return m_Team.m_do_team_reduce;
364 inline const ProcessTeam&
369 inline std::pair<int,int>
382 int nr = ntot / nworkers;
383 int nlft = ntot - nr * nworkers;
385 rb =
begin + rit * (nr + 1);
388 rb =
begin + rit * nr + nlft;
395 int nthreads = omp_get_num_threads();
397 int tid = omp_get_thread_num();
399 int nr = ntot / nthreads;
400 int nlft = ntot - nr * nthreads;
402 rb += tid * (nr + 1);
405 rb += tid * nr + nlft;
411 return std::make_pair(rb,re);
413 template <
typename F>
417 for (
int i = range.first; i < range.second; ++i) {
421 template <
typename F>
425 for (
int i = range.first; i < range.second; ++i) {
429 template <
typename F>
433 for (
int i = range.first; i < range.second; ++i) {
450 void Abort (
int errorcode = SIGABRT,
bool backtrace =
true);
454 double second () noexcept;
472 template <std::floating_point T>
476 template <std::floating_point T>
483 template <std::floating_point T>
488 template <std::floating_point T>
492 template <std::floating_point T>
499 template <std::floating_point T>
504 template <std::floating_point T>
508 template <std::floating_point T>
515 template <std::floating_point T>
520 template <std::floating_point T>
524 template <std::floating_point T>
531 template <std::floating_point T>
536 template <std::floating_point T>
540 template <std::floating_point T>
547 template <std::floating_point T>
552 template <std::floating_point T>
556 template <std::floating_point T>
563 template <std::floating_point T>
673 void Gather (
Real const* sendbuf,
int nsend,
Real* recvbuf,
int root);
680 template <
class T> [[nodiscard]] Message
Asend(
const T*,
size_t n,
int pid,
int tag);
681 template <
class T> [[nodiscard]] Message
Asend(
const T*,
size_t n,
int pid,
int tag,
MPI_Comm comm);
682 template <
class T> [[nodiscard]] Message
Asend(
const std::vector<T>& buf,
int pid,
int tag);
684 template <
class T> Message
Arecv(T*,
size_t n,
int pid,
int tag);
685 template <
class T> Message
Arecv(T*,
size_t n,
int pid,
int tag,
MPI_Comm comm);
686 template <
class T> Message
Arecv(std::vector<T>& buf,
int pid,
int tag);
688 template <
class T> Message
Send(
const T* buf,
size_t n,
int dst_pid,
int tag);
689 template <
class T> Message
Send(
const T* buf,
size_t n,
int dst_pid,
int tag,
MPI_Comm comm);
690 template <
class T> Message
Send(
const std::vector<T>& buf,
int dst_pid,
int tag);
692 template <
class T> Message
Recv(T*,
size_t n,
int pid,
int tag);
693 template <
class T> Message
Recv(T*,
size_t n,
int pid,
int tag,
MPI_Comm comm);
694 template <
class T> Message
Recv(std::vector<T>& buf,
int pid,
int tag);
696 template <
class T>
void Bcast(T*,
size_t n,
int root = 0);
697 template <
class T>
void Bcast(T*,
size_t n,
int root,
const MPI_Comm &comm);
700 template <
class T,
class T1>
void Scatter(T*,
size_t n,
const T1*,
size_t n1,
int root);
702 template <
class T,
class T1>
void Gather(
const T*,
size_t n, T1*,
size_t n1,
int root);
703 template <
class T> std::vector<T>
Gather(
const T&,
int root);
705 template <
class T>
void Gatherv (
const T* send,
int sc,
706 T* recv,
const std::vector<int>& rc,
const std::vector<int>& disp,
721 bool bExitOnError =
true,
737 void PMI_Initialize();
738 void PMI_PrintMeshcoords(
const pmi_mesh_coord_t *pmi_mesh_coord);
742 int select_comm_data_type (std::size_t nbytes);
743 std::size_t sizeof_selected_comm_data_type (std::size_t nbytes);
753ParallelDescriptor::Message
759namespace ParallelDescriptor {
762Asend<char> (
const char* buf,
size_t n,
int dst_pid,
int tag,
MPI_Comm comm);
766Asend (
const T* buf,
size_t n,
int dst_pid,
int tag,
MPI_Comm comm)
768 static_assert(!std::is_same_v<char,T>,
"Asend: char version has been specialized");
774 BL_MPI_REQUIRE( MPI_Isend(
const_cast<T*
>(buf),
781 BL_COMM_PROFILE(BLProfiler::AsendTsiiM, BLProfiler::AfterCall(), dst_pid, tag);
787ParallelDescriptor::Message
794ParallelDescriptor::Message
800namespace ParallelDescriptor {
803Send<char> (
const char* buf,
size_t n,
int dst_pid,
int tag,
MPI_Comm comm);
807Send (
const T* buf,
size_t n,
int dst_pid,
int tag,
MPI_Comm comm)
809 static_assert(!std::is_same_v<char,T>,
"Send: char version has been specialized");
813#ifdef BL_COMM_PROFILING
814 int dst_pid_world(-1);
816 BL_MPI_REQUIRE( MPI_Comm_group(comm, &groupComm) );
817 BL_MPI_REQUIRE( MPI_Comm_group(
Communicator(), &groupWorld) );
818 BL_MPI_REQUIRE( MPI_Group_translate_ranks(groupComm, 1, &dst_pid, groupWorld, &dst_pid_world) );
820 BL_COMM_PROFILE(BLProfiler::SendTsii, n *
sizeof(T), dst_pid_world, tag);
823 BL_MPI_REQUIRE( MPI_Send(
const_cast<T*
>(buf),
829 BL_COMM_PROFILE(BLProfiler::SendTsii, BLProfiler::AfterCall(), dst_pid, tag);
835ParallelDescriptor::Message
842ParallelDescriptor::Message
848namespace ParallelDescriptor {
851Arecv<char> (
char* buf,
size_t n,
int src_pid,
int tag,
MPI_Comm comm);
857 static_assert(!std::is_same_v<char,T>,
"Arecv: char version has been specialized");
863 BL_MPI_REQUIRE( MPI_Irecv(buf,
870 BL_COMM_PROFILE(BLProfiler::ArecvTsiiM, BLProfiler::AfterCall(), src_pid, tag);
876ParallelDescriptor::Message
883ParallelDescriptor::Message
889namespace ParallelDescriptor {
892Recv<char> (
char* buf,
size_t n,
int src_pid,
int tag,
MPI_Comm comm);
896Recv (T* buf,
size_t n,
int src_pid,
int tag,
MPI_Comm comm)
898 static_assert(!std::is_same_v<char,T>,
"Recv: char version has been specialized");
901 BL_COMM_PROFILE(BLProfiler::RecvTsii, BLProfiler::BeforeCall(), src_pid, tag);
904 BL_MPI_REQUIRE( MPI_Recv(buf,
911#ifdef BL_COMM_PROFILING
912 int src_pid_comm(stat.MPI_SOURCE);
913 int src_pid_world(stat.MPI_SOURCE);
914 if(src_pid_comm != MPI_ANY_SOURCE) {
916 BL_MPI_REQUIRE( MPI_Comm_group(comm, &groupComm) );
917 BL_MPI_REQUIRE( MPI_Comm_group(
Communicator(), &groupWorld) );
918 BL_MPI_REQUIRE( MPI_Group_translate_ranks(groupComm, 1, &src_pid_comm, groupWorld, &src_pid_world) );
921 BL_COMM_PROFILE(BLProfiler::RecvTsii, n *
sizeof(T), src_pid_world, stat.MPI_TAG);
928ParallelDescriptor::Message
944 BL_ASSERT(n <
static_cast<size_t>(std::numeric_limits<int>::max()));
947 BL_COMM_PROFILE(BLProfiler::BCastTsi, BLProfiler::BeforeCall(), root, BLProfiler::NoTag());
949 BL_MPI_REQUIRE( MPI_Bcast(t,
951 Mpi_typemap<T>::type(),
954 BL_COMM_PROFILE(BLProfiler::BCastTsi, n *
sizeof(T), root, BLProfiler::NoTag());
970 BL_ASSERT(n <
static_cast<size_t>(std::numeric_limits<int>::max()));
973 BL_COMM_PROFILE(BLProfiler::BCastTsi, BLProfiler::BeforeCall(), root, BLProfiler::NoTag());
975 BL_MPI_REQUIRE( MPI_Bcast(t,
977 Mpi_typemap<T>::type(),
980 BL_COMM_PROFILE(BLProfiler::BCastTsi, n *
sizeof(T), root, BLProfiler::NoTag());
983template <
class T,
class T1>
992 BL_COMM_PROFILE(BLProfiler::GatherTsT1Si, BLProfiler::BeforeCall(), root, BLProfiler::NoTag());
994 BL_ASSERT(n <
static_cast<size_t>(std::numeric_limits<int>::max()));
995 BL_ASSERT(n1 <
static_cast<size_t>(std::numeric_limits<int>::max()));
997 BL_MPI_REQUIRE( MPI_Gather(
const_cast<T*
>(t),
999 Mpi_typemap<T>::type(),
1002 Mpi_typemap<T1>::type(),
1005 BL_COMM_PROFILE(BLProfiler::GatherTsT1Si, n *
sizeof(T), root, BLProfiler::NoTag());
1013 BL_COMM_PROFILE(BLProfiler::GatherTi, BLProfiler::BeforeCall(), root, BLProfiler::NoTag());
1015 std::vector<T> resl;
1017 BL_MPI_REQUIRE( MPI_Gather(
const_cast<T*
>(&t),
1019 Mpi_typemap<T>::type(),
1022 Mpi_typemap<T>::type(),
1025 BL_COMM_PROFILE(BLProfiler::GatherTi,
sizeof(T), root, BLProfiler::NoTag());
1032 T* recv,
const std::vector<int>& rc,
const std::vector<int>& disp,
1036 BL_COMM_PROFILE(BLProfiler::Gatherv, BLProfiler::BeforeCall(), root, BLProfiler::NoTag());
1042 BL_COMM_PROFILE(BLProfiler::Gatherv, std::accumulate(rc.begin(),rc.end(),0)*
sizeof(T), root, BLProfiler::NoTag());
1048 Vector<T>& recvbuf,
int root)
1053 Vector<T> T_to_send;
1054 T_to_send.reserve(sendbuf.local_size());
1056 for (
int i : sendbuf.IndexArray())
1058 T_to_send.push_back(sendbuf[i]);
1062 Vector<int> recvcount(nprocs, 0);
1063 recvbuf.resize(sendbuf.size());
1064 const Vector<int>& old_pmap = sendbuf.DistributionMap().ProcessorMap();
1065 for (
int i : old_pmap)
1071 Vector<Vector<int>> new_ind_to_old_ind(nprocs);
1072 for (
int i=0; i<nprocs; ++i)
1074 new_ind_to_old_ind[i].reserve(recvcount[i]);
1076 for (
int i=0; i<old_pmap.size(); ++i)
1078 new_ind_to_old_ind[old_pmap[i]].push_back(i);
1082 Vector<int> new_index_to_old_index;
1083 new_index_to_old_index.reserve(old_pmap.size());
1084 for (
const Vector<int>& v : new_ind_to_old_ind)
1090 new_index_to_old_index.push_back(el);
1095 Vector<int> disp(nprocs);
1096 if (!disp.empty()) { disp[0] = 0; }
1097 std::partial_sum(recvcount.begin(), recvcount.end()-1, disp.begin()+1);
1098 Vector<T> new_index_to_T(sendbuf.size());
1100 MPI_Gatherv(T_to_send.data(), T_to_send.size(),
1102 new_index_to_T.data(), recvcount.data(), disp.data(),
1112 Vector<int> old_index_to_new_index(sendbuf.size());
1114 for (
int i=0; i<old_index_to_new_index.size(); ++i)
1116 old_index_to_new_index[new_index_to_old_index[i]] = i;
1119 for (
int i=0; i<recvbuf.size(); ++i)
1121 recvbuf[i] = new_index_to_T[old_index_to_new_index[i]];
1126template <
class T,
class T1>
1135 BL_COMM_PROFILE(BLProfiler::ScatterTsT1si, BLProfiler::BeforeCall(), root, BLProfiler::NoTag());
1137 BL_MPI_REQUIRE( MPI_Scatter(
const_cast<T1*
>(t1),
1139 Mpi_typemap<T1>::type(),
1142 Mpi_typemap<T>::type(),
1145 BL_COMM_PROFILE(BLProfiler::ScatterTsT1si, n *
sizeof(T), root, BLProfiler::NoTag());
1150namespace ParallelDescriptor
1168Asend(
const std::vector<T>& ,
int ,
int )
1175Send(
const T* ,
size_t ,
int ,
int )
1189Send(
const std::vector<T>& ,
int ,
int )
1246template <
class T,
class T1>
1248Gather (
const T* t,
size_t n, T1* t1,
size_t n1,
int )
1253 int const sc =
static_cast<int>(n);
1254 for (
int j=0; j<sc; ++j) { t1[j] = t[j]; }
1261 std::vector<T> resl(1);
1269 T* recv,
const std::vector<int>& ,
1270 const std::vector<int>& ,
int )
1272 for (
int j=0; j<sc; ++j) { recv[j] = send[j]; }
1280 recvbuf.resize(sendbuf.
size());
1282 for (
int i=0; i<sendbuf.
size(); ++i)
1284 recvbuf[i] = sendbuf[i];
1288template <
class T,
class T1>
1296namespace ParallelDescriptor {
1304void DoAllReduce (T* r, MPI_Op op,
int cnt)
1307 Lazy::EvalReduction();
1312 BL_MPI_REQUIRE( MPI_Allreduce(MPI_IN_PLACE, r, cnt,
1313 Mpi_typemap<T>::type(), op,
1318void DoReduce (T* r, MPI_Op op,
int cnt,
int cpu)
1321 Lazy::EvalReduction();
1327 BL_MPI_REQUIRE( MPI_Reduce(MPI_IN_PLACE, r, cnt,
1328 Mpi_typemap<T>::type(), op,
1331 BL_MPI_REQUIRE( MPI_Reduce(r, r, cnt,
1332 Mpi_typemap<T>::type(), op,
1341 template <std::
floating_po
int T>
1343 detail::DoAllReduce<T>(&rvar,MPI_SUM,1);
1346 template <std::
floating_po
int T>
1348 detail::DoAllReduce<T>(rvar,MPI_SUM,cnt);
1351 template <std::
floating_po
int T>
1354 int cnt = rvar.size();
1355 Vector<T> tmp{std::begin(rvar), std::end(rvar)};
1356 detail::DoAllReduce<T>(tmp.data(),MPI_SUM,cnt);
1357 for (
int i = 0; i < cnt; ++i) {
1358 rvar[i].get() = tmp[i];
1363 template <std::
floating_po
int T>
1365 detail::DoReduce<T>(&rvar,MPI_SUM,1,cpu);
1368 template <std::
floating_po
int T>
1370 detail::DoReduce<T>(rvar,MPI_SUM,cnt,cpu);
1373 template <std::
floating_po
int T>
1376 int cnt = rvar.size();
1377 Vector<T> tmp{std::begin(rvar), std::end(rvar)};
1378 detail::DoReduce<T>(tmp.data(),MPI_SUM,cnt,cpu);
1379 for (
int i = 0; i < cnt; ++i) {
1380 rvar[i].get() = tmp[i];
1385 template <std::
floating_po
int T>
1387 detail::DoAllReduce<T>(&rvar,MPI_MAX,1);
1390 template <std::
floating_po
int T>
1392 detail::DoAllReduce<T>(rvar,MPI_MAX,cnt);
1395 template <std::
floating_po
int T>
1398 int cnt = rvar.size();
1399 Vector<T> tmp{std::begin(rvar), std::end(rvar)};
1400 detail::DoAllReduce<T>(tmp.data(),MPI_MAX,cnt);
1401 for (
int i = 0; i < cnt; ++i) {
1402 rvar[i].get() = tmp[i];
1407 template <std::
floating_po
int T>
1409 detail::DoReduce<T>(&rvar,MPI_MAX,1,cpu);
1412 template <std::
floating_po
int T>
1414 detail::DoReduce<T>(rvar,MPI_MAX,cnt,cpu);
1417 template <std::
floating_po
int T>
1420 int cnt = rvar.size();
1421 Vector<T> tmp{std::begin(rvar), std::end(rvar)};
1422 detail::DoReduce<T>(tmp.data(),MPI_MAX,cnt,cpu);
1423 for (
int i = 0; i < cnt; ++i) {
1424 rvar[i].get() = tmp[i];
1429 template <std::
floating_po
int T>
1431 detail::DoAllReduce<T>(&rvar,MPI_MIN,1);
1434 template <std::
floating_po
int T>
1436 detail::DoAllReduce<T>(rvar,MPI_MIN,cnt);
1439 template <std::
floating_po
int T>
1442 int cnt = rvar.size();
1443 Vector<T> tmp{std::begin(rvar), std::end(rvar)};
1444 detail::DoAllReduce<T>(tmp.data(),MPI_MIN,cnt);
1445 for (
int i = 0; i < cnt; ++i) {
1446 rvar[i].get() = tmp[i];
1451 template <std::
floating_po
int T>
1453 detail::DoReduce<T>(&rvar,MPI_MIN,1,cpu);
1456 template <std::
floating_po
int T>
1458 detail::DoReduce<T>(rvar,MPI_MIN,cnt,cpu);
1461 template <std::
floating_po
int T>
1464 int cnt = rvar.size();
1465 Vector<T> tmp{std::begin(rvar), std::end(rvar)};
1466 detail::DoReduce<T>(tmp.data(),MPI_MIN,cnt,cpu);
1467 for (
int i = 0; i < cnt; ++i) {
1468 rvar[i].get() = tmp[i];
1475 template <std::
floating_po
int T>
1478 template <std::
floating_po
int T>
1481 template <std::
floating_po
int T>
1482 void ReduceRealSum (Vector<std::reference_wrapper<T> >
const&) {}
1485 template <std::
floating_po
int T>
1488 template <std::
floating_po
int T>
1491 template <std::
floating_po
int T>
1492 void ReduceRealSum (Vector<std::reference_wrapper<T> >
const&,
int) {}
1495 template <std::
floating_po
int T>
1498 template <std::
floating_po
int T>
1501 template <std::
floating_po
int T>
1502 void ReduceRealMax (Vector<std::reference_wrapper<T> >
const&) {}
1505 template <std::
floating_po
int T>
1508 template <std::
floating_po
int T>
1511 template <std::
floating_po
int T>
1512 void ReduceRealMax (Vector<std::reference_wrapper<T> >
const&,
int) {}
1515 template <std::
floating_po
int T>
1518 template <std::
floating_po
int T>
1521 template <std::
floating_po
int T>
1522 void ReduceRealMin (Vector<std::reference_wrapper<T> >
const&) {}
1525 template <std::
floating_po
int T>
1528 template <std::
floating_po
int T>
1531 template <std::
floating_po
int T>
1532 void ReduceRealMin (Vector<std::reference_wrapper<T> >
const&,
int) {}
1538namespace ParallelDescriptor {
1545 static_assert(std::is_same<T,double>() ||
1546 std::is_same<T,float >(),
1547 "Unsupported type T for GpuComplex");
1548 if constexpr (std::is_same<T,double>()) {
1549 return MPI_C_DOUBLE_COMPLEX;
1551 return MPI_C_FLOAT_COMPLEX;
1556template<
typename TV,
typename TI>
1564 static_assert(std::is_trivially_copyable_v<T>,
1565 "To communicate with MPI, ValLocPair must be trivially copyable.");
1566 static_assert(std::is_standard_layout_v<T>,
1567 "To communicate with MPI, ValLocPair must be standard layout");
1574 int blocklens[] = { 1, 1 };
1576 BL_MPI_REQUIRE( MPI_Get_address(&vlp[0].value, &disp[0]) );
1577 BL_MPI_REQUIRE( MPI_Get_address(&vlp[0].index, &disp[1]) );
1580 BL_MPI_REQUIRE( MPI_Type_create_struct(2, blocklens, disp, types,
1582 MPI_Aint lb, extent;
1583 BL_MPI_REQUIRE( MPI_Type_get_extent(mpi_type, &lb, &extent) );
1584 if (extent !=
sizeof(T)) {
1586 BL_MPI_REQUIRE( MPI_Type_create_resized(tmp, 0,
sizeof(vlp[0]), &mpi_type) );
1587 BL_MPI_REQUIRE( MPI_Type_free(&tmp) );
1589 BL_MPI_REQUIRE( MPI_Type_commit( &mpi_type ) );
1591 m_mpi_types.push_back(&mpi_type);
1597template <
typename T,
typename F>
1600 static MPI_Op mpi_op = MPI_OP_NULL;
1601 if (mpi_op == MPI_OP_NULL) {
1602 static auto user_fn = [] (
void *invec,
void *inoutvec,
int* len,
1605 auto in =
static_cast<T const*
>(invec);
1606 auto out =
static_cast<T*
>(inoutvec);
1607 for (
int i = 0; i < *len; ++i) {
1608 out[i] =
F()(in[i],out[i]);
1611 BL_MPI_REQUIRE( MPI_Op_create(user_fn, 1, &mpi_op) );
1612 m_mpi_ops.push_back(&mpi_op);
#define BL_COMM_PROFILE(cft, size, pid, tag)
Definition AMReX_BLProfiler.H:587
#define BL_PROFILE_T_S(fname, T)
Definition AMReX_BLProfiler.H:554
#define BL_ASSERT(EX)
Definition AMReX_BLassert.H:39
#define AMREX_EXPORT
Definition AMReX_Extension.H:191
int size() const noexcept
Return the number of FABs in the FabArray.
Definition AMReX_FabArrayBase.H:110
a one-thingy-per-box distributed object
Definition AMReX_LayoutData.H:13
Hold the description and status of communication data.
Definition AMReX_ParallelDescriptor.H:57
Message(MPI_Request req_, MPI_Datatype type_)
Definition AMReX_ParallelDescriptor.H:61
MPI_Datatype type() const
Definition AMReX_ParallelDescriptor.H:73
MPI_Request req() const
Definition AMReX_ParallelDescriptor.H:74
bool test()
Definition AMReX_ParallelDescriptor.cpp:1191
MPI_Status stat() const
Definition AMReX_ParallelDescriptor.H:75
void wait()
Definition AMReX_ParallelDescriptor.cpp:1187
Message(MPI_Status stat_, MPI_Datatype type_)
Definition AMReX_ParallelDescriptor.H:65
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:29
amrex_real Real
Floating Point Type for Fields.
Definition AMReX_REAL.H:79
amrex_long Long
Definition AMReX_INT.H:30
void ReduceIntMax(int &)
Definition AMReX_ParallelDescriptor.cpp:1266
void ReduceLongAnd(Long &)
Definition AMReX_ParallelDescriptor.cpp:1235
void ReduceBoolAnd(bool &)
Definition AMReX_ParallelDescriptor.cpp:1289
int MyProc() noexcept
Definition AMReX_ParallelDescriptor.H:128
void Gather(Real const *sendbuf, int nsend, Real *recvbuf, int root)
Definition AMReX_ParallelDescriptor.cpp:1173
void Barrier(const std::string &)
Definition AMReX_ParallelDescriptor.cpp:1215
void ReduceIntSum(int &)
Definition AMReX_ParallelDescriptor.cpp:1265
void ReduceBoolOr(bool &)
Definition AMReX_ParallelDescriptor.cpp:1290
void ReadAndBcastFile(const std::string &filename, Vector< char > &charBuf, bool bExitOnError, const MPI_Comm &comm)
Definition AMReX_ParallelDescriptor.cpp:1495
int NProcs() noexcept
Definition AMReX_ParallelDescriptor.H:255
void ReduceLongSum(Long &)
Definition AMReX_ParallelDescriptor.cpp:1236
int IOProcessorNumber() noexcept
The MPI rank number of the I/O Processor (probably rank 0). This rank is usually used to write to std...
Definition AMReX_ParallelDescriptor.H:279
void ReduceLongMax(Long &)
Definition AMReX_ParallelDescriptor.cpp:1237
bool IOProcessor() noexcept
Is this CPU the I/O Processor? To get the rank number, call IOProcessorNumber()
Definition AMReX_ParallelDescriptor.H:289
void ReduceLongMin(Long &)
Definition AMReX_ParallelDescriptor.cpp:1238
void ReduceIntMin(int &)
Definition AMReX_ParallelDescriptor.cpp:1267
void EvalReduction()
Definition AMReX_Lazy.cpp:20
int NProcs()
Process ID in MPI_COMM_WORLD.
Definition AMReX_MPMD.cpp:122
int MyProc()
Definition AMReX_MPMD.cpp:117
int MyProcAll() noexcept
my rank in world communicator
Definition AMReX_ParallelContext.H:61
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int get_inc_mpi_tag() noexcept
get and increment mpi tag in current frame
Definition AMReX_ParallelContext.H:93
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
int NProcsAll() noexcept
number of ranks in world communicator
Definition AMReX_ParallelContext.H:59
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
bool sameTeam(int rank) noexcept
Definition AMReX_ParallelDescriptor.H:345
int RankInLeadComm(int rank) noexcept
Definition AMReX_ParallelDescriptor.H:355
void Test(MPI_Request &, int &, MPI_Status &)
Definition AMReX_ParallelDescriptor.cpp:1220
const char * ErrorString(int)
ErrorString return string associated with error internal error condition.
Definition AMReX_ParallelDescriptor.cpp:1213
Message Asend(const T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1154
MPI_Comm Communicator() noexcept
Definition AMReX_ParallelDescriptor.H:223
void EndTeams()
Definition AMReX_ParallelDescriptor.cpp:1656
void Waitany(Vector< MPI_Request > &, int &, MPI_Status &)
Definition AMReX_ParallelDescriptor.cpp:1312
void Gatherv(const T *send, int sc, T *recv, const std::vector< int > &rc, const std::vector< int > &disp, int root)
Definition AMReX_ParallelDescriptor.H:1268
Message Abarrier()
Definition AMReX_ParallelDescriptor.cpp:1217
void ReduceRealMin(Vector< std::reference_wrapper< Real > > const &)
Definition AMReX_ParallelDescriptor.cpp:1229
void StartParallel(int *, char ***, MPI_Comm)
Perform any needed parallel initialization. This MUST be the first routine in this class called from ...
Definition AMReX_ParallelDescriptor.cpp:1165
bool isTeamLead() noexcept
Definition AMReX_ParallelDescriptor.H:340
void Wait(MPI_Request &, MPI_Status &)
Definition AMReX_ParallelDescriptor.cpp:1304
std::string mpi_level_to_string(int mtlev)
Definition AMReX_ParallelDescriptor.cpp:1662
void ReduceRealSum(Vector< std::reference_wrapper< Real > > const &)
Definition AMReX_ParallelDescriptor.cpp:1227
void Waitsome(Vector< MPI_Request > &, int &, Vector< int > &, Vector< MPI_Status > &)
Definition AMReX_ParallelDescriptor.cpp:1316
bool UseGpuAwareMpi()
Definition AMReX_ParallelDescriptor.H:113
std::pair< int, int > team_range(int begin, int end, int rit=-1, int nworkers=0) noexcept
Definition AMReX_ParallelDescriptor.H:370
int MyTeamLead() noexcept
Definition AMReX_ParallelDescriptor.H:325
void Waitall(Vector< MPI_Request > &, Vector< MPI_Status > &)
Definition AMReX_ParallelDescriptor.cpp:1308
int MyTeamColor() noexcept
Definition AMReX_ParallelDescriptor.H:320
bool doTeamReduce() noexcept
Definition AMReX_ParallelDescriptor.H:360
int MinTag() noexcept
Definition AMReX_ParallelDescriptor.H:220
int TeamLead(int rank) noexcept
Definition AMReX_ParallelDescriptor.H:335
int MyRankInProcessor() noexcept
Definition AMReX_ParallelDescriptor.H:243
Message Send(const T *buf, size_t n, int dst_pid, int tag)
Definition AMReX_ParallelDescriptor.H:1175
void Initialize()
Definition AMReX_ParallelDescriptor.cpp:1546
void Finalize()
Definition AMReX_ParallelDescriptor.cpp:1590
int TeamSize() noexcept
Definition AMReX_ParallelDescriptor.H:310
void EndParallel()
Perform any needed parallel finalization. This MUST be the last routine in this class called from wit...
Definition AMReX_ParallelDescriptor.cpp:1197
void Bcast(void *, int, MPI_Datatype, int, MPI_Comm)
Definition AMReX_ParallelDescriptor.cpp:1295
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition AMReX_ParallelDescriptor.H:678
void IProbe(int, int, int &, MPI_Status &)
Definition AMReX_ParallelDescriptor.cpp:1222
int MyRankInNode() noexcept
Definition AMReX_ParallelDescriptor.H:233
void GatherLayoutDataToVector(const LayoutData< T > &sendbuf, Vector< T > &recvbuf, int root)
Gather LayoutData values to a vector on root.
Definition AMReX_ParallelDescriptor.H:1277
int MaxTag() noexcept
Definition AMReX_ParallelDescriptor.H:221
void Comm_dup(MPI_Comm, MPI_Comm &)
Definition AMReX_ParallelDescriptor.cpp:1225
int NProcsPerProcessor() noexcept
Definition AMReX_ParallelDescriptor.H:238
double second() noexcept
Returns wall-clock seconds since start of execution.
Definition AMReX_ParallelDescriptor.cpp:1298
const std::string Unnamed("Unnamed")
Used as default argument to ParallelDescriptor::Barrier().
int MyRankInTeam() noexcept
Definition AMReX_ParallelDescriptor.H:330
void Scatter(T *, size_t n, const T1 *, size_t n1, int root)
Definition AMReX_ParallelDescriptor.H:1290
int NProcsPerNode() noexcept
Definition AMReX_ParallelDescriptor.H:228
void Abort(int errorcode, bool backtrace)
Abort with specified error code.
Definition AMReX_ParallelDescriptor.cpp:1204
void team_for(int begin, int end, const F &f)
Definition AMReX_ParallelDescriptor.H:414
int NTeams() noexcept
Definition AMReX_ParallelDescriptor.H:315
Message Recv(T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1217
void StartTeams()
Split the process pool into teams.
Definition AMReX_ParallelDescriptor.cpp:1599
Message Arecv(T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1196
MPI_Op Mpi_op()
Definition AMReX_ParallelDescriptor.H:1598
void ReduceRealMax(Vector< std::reference_wrapper< Real > > const &)
Definition AMReX_ParallelDescriptor.cpp:1228
const ProcessTeam & MyTeam() noexcept
Definition AMReX_ParallelDescriptor.H:365
static constexpr int MPI_DATATYPE_NULL
Definition AMReX_ccse-mpi.H:56
static constexpr int MPI_PROC_NULL
Definition AMReX_ccse-mpi.H:61
int MPI_Comm
Definition AMReX_ccse-mpi.H:51
static constexpr int MPI_COMM_WORLD
Definition AMReX_ccse-mpi.H:58
int MPI_Group
Definition AMReX_ccse-mpi.H:52
int MPI_Op
Definition AMReX_ccse-mpi.H:50
int MPI_Request
Definition AMReX_ccse-mpi.H:54
static constexpr int MPI_REQUEST_NULL
Definition AMReX_ccse-mpi.H:57
int MPI_Datatype
Definition AMReX_ccse-mpi.H:53
Definition AMReX_Amr.cpp:50
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139
__host__ __device__ Dim3 begin(BoxND< dim > const &box) noexcept
Definition AMReX_Box.H:2018
__host__ __device__ Dim3 end(BoxND< dim > const &box) noexcept
Definition AMReX_Box.H:2028
A host / device complex number type, because std::complex doesn't work in device code with Cuda yet.
Definition AMReX_GpuComplex.H:30
static MPI_Datatype type()
Definition AMReX_ParallelDescriptor.H:1543
static MPI_Datatype type()
Definition AMReX_ParallelDescriptor.H:1559
Communication datatype (note: this structure also works without MPI)
Definition AMReX_ccse-mpi.H:78
static MPI_Datatype type()
Provide functionalities needed to construct a team of processes to perform a particular job.
Definition AMReX_ParallelDescriptor.H:148
MPI_Comm team_t
Definition AMReX_ParallelDescriptor.H:149
void clear()
free a communicator
Definition AMReX_ParallelDescriptor.H:180
void Barrier() const
synchronize processes within the team
Definition AMReX_ParallelDescriptor.H:152
const MPI_Comm & get_team_comm() const
return the communicator
Definition AMReX_ParallelDescriptor.H:193
const MPI_Comm & get_lead_comm() const
Definition AMReX_ParallelDescriptor.H:194
void MemoryBarrier() const
memory fence
Definition AMReX_ParallelDescriptor.H:161
const team_t & get() const
Definition AMReX_ParallelDescriptor.H:189
Definition AMReX_ValLocPair.H:10
Definition AMReX_ccse-mpi.H:55