1 #ifndef BL_PARALLELDESCRIPTOR_H
2 #define BL_PARALLELDESCRIPTOR_H
3 #include <AMReX_Config.H>
46 template <
typename T>
class LayoutData;
49 namespace ParallelDescriptor
86 void MPI_Error(
const char* file,
int line,
const char* str,
int rc);
88 #define BL_MPI_REQUIRE(x) \
91 if ( int l_status_ = (x) ) \
93 amrex::ParallelDescriptor::MPI_Error(__FILE__,__LINE__,#x, l_status_); \
104 char*** argv =
nullptr,
134 MPI_Comm_rank(comm,&
r);
150 #if defined(BL_USE_MPI3)
166 #if defined(BL_USE_MPI3)
167 std::atomic_thread_fence(std::memory_order_release);
169 std::atomic_thread_fence(std::memory_order_acquire);
177 #if defined(BL_USE_MPI3)
253 BL_MPI_REQUIRE(MPI_Comm_size(comm, &s));
348 inline const ProcessTeam&
353 inline std::pair<int,int>
366 int nr = ntot / nworkers;
367 int nlft = ntot - nr * nworkers;
369 rb =
begin + rit * (nr + 1);
372 rb =
begin + rit * nr + nlft;
383 int nr = ntot / nthreads;
384 int nlft = ntot - nr * nthreads;
386 rb += tid * (nr + 1);
389 rb += tid * nr + nlft;
395 return std::make_pair(rb,re);
397 template <
typename F>
401 for (
int i = range.first; i < range.second; ++i) {
405 template <
typename F>
409 for (
int i = range.first; i < range.second; ++i) {
413 template <
typename F>
417 for (
int i = range.first; i < range.second; ++i) {
432 void Abort (
int errorcode = SIGABRT,
bool backtrace =
true);
436 double second () noexcept;
449 template <typename T>
450 std::enable_if_t<std::is_floating_point_v<T>>
453 template <typename T>
454 std::enable_if_t<std::is_floating_point_v<T>>
460 template <typename T>
461 std::enable_if_t<std::is_floating_point_v<T>>
465 template <typename T>
466 std::enable_if_t<std::is_floating_point_v<T>>
469 template <typename T>
470 std::enable_if_t<std::is_floating_point_v<T>>
476 template <typename T>
477 std::enable_if_t<std::is_floating_point_v<T>>
481 template <typename T>
482 std::enable_if_t<std::is_floating_point_v<T>>
485 template <typename T>
486 std::enable_if_t<std::is_floating_point_v<T>>
492 template <typename T>
493 std::enable_if_t<std::is_floating_point_v<T>>
497 template <typename T>
498 std::enable_if_t<std::is_floating_point_v<T>>
501 template <typename T>
502 std::enable_if_t<std::is_floating_point_v<T>>
508 template <typename T>
509 std::enable_if_t<std::is_floating_point_v<T>>
513 template <typename T>
514 std::enable_if_t<std::is_floating_point_v<T>>
517 template <typename T>
518 std::enable_if_t<std::is_floating_point_v<T>>
524 template <typename T>
525 std::enable_if_t<std::is_floating_point_v<T>>
529 template <typename T>
530 std::enable_if_t<std::is_floating_point_v<T>>
533 template <typename T>
534 std::enable_if_t<std::is_floating_point_v<T>>
540 template <typename T>
541 std::enable_if_t<std::is_floating_point_v<T>>
608 void Gather (Real const* sendbuf,
int nsend, Real* recvbuf,
int root);
615 template <
class T> Message
Asend(
const T*,
size_t n,
int pid,
int tag);
616 template <
class T> Message
Asend(
const T*,
size_t n,
int pid,
int tag,
MPI_Comm comm);
617 template <
class T> Message
Asend(
const std::vector<T>& buf,
int pid,
int tag);
619 template <
class T> Message
Arecv(T*,
size_t n,
int pid,
int tag);
620 template <
class T> Message
Arecv(T*,
size_t n,
int pid,
int tag,
MPI_Comm comm);
621 template <
class T> Message
Arecv(std::vector<T>& buf,
int pid,
int tag);
623 template <
class T> Message
Send(
const T* buf,
size_t n,
int dst_pid,
int tag);
624 template <
class T> Message
Send(
const T* buf,
size_t n,
int dst_pid,
int tag,
MPI_Comm comm);
625 template <
class T> Message
Send(
const std::vector<T>& buf,
int dst_pid,
int tag);
627 template <
class T> Message
Recv(T*,
size_t n,
int pid,
int tag);
628 template <
class T> Message
Recv(T*,
size_t n,
int pid,
int tag,
MPI_Comm comm);
629 template <
class T> Message
Recv(std::vector<T>& buf,
int pid,
int tag);
631 template <
class T>
void Bcast(T*,
size_t n,
int root = 0);
632 template <
class T>
void Bcast(T*,
size_t n,
int root,
const MPI_Comm &comm);
635 template <
class T,
class T1>
void Scatter(T*,
size_t n,
const T1*,
size_t n1,
int root);
637 template <
class T,
class T1>
void Gather(
const T*,
size_t n, T1*,
size_t n1,
int root);
638 template <
class T> std::vector<T>
Gather(
const T&,
int root);
640 template <
class T>
void Gatherv (
const T* send,
int sc,
641 T* recv,
const std::vector<int>& rc,
const std::vector<int>& disp,
655 bool bExitOnError =
true,
671 void PMI_Initialize();
672 void PMI_PrintMeshcoords(
const pmi_mesh_coord_t *pmi_mesh_coord);
676 int select_comm_data_type (std::size_t nbytes);
677 std::size_t sizeof_selected_comm_data_type (std::size_t nbytes);
687 ParallelDescriptor::Message
693 namespace ParallelDescriptor {
696 Asend<char> (
const char* buf,
size_t n,
int dst_pid,
int tag,
MPI_Comm comm);
700 Asend (
const T* buf,
size_t n,
int dst_pid,
int tag,
MPI_Comm comm)
702 static_assert(!std::is_same_v<char,T>,
"Asend: char version has been specialized");
708 BL_MPI_REQUIRE( MPI_Isend(
const_cast<T*
>(buf),
715 BL_COMM_PROFILE(BLProfiler::AsendTsiiM, BLProfiler::AfterCall(), dst_pid, tag);
721 ParallelDescriptor::Message
728 ParallelDescriptor::Message
734 namespace ParallelDescriptor {
737 Send<char> (
const char* buf,
size_t n,
int dst_pid,
int tag,
MPI_Comm comm);
741 Send (
const T* buf,
size_t n,
int dst_pid,
int tag,
MPI_Comm comm)
743 static_assert(!std::is_same_v<char,T>,
"Send: char version has been specialized");
747 #ifdef BL_COMM_PROFILING
748 int dst_pid_world(-1);
750 BL_MPI_REQUIRE( MPI_Comm_group(comm, &groupComm) );
751 BL_MPI_REQUIRE( MPI_Comm_group(
Communicator(), &groupWorld) );
752 BL_MPI_REQUIRE( MPI_Group_translate_ranks(groupComm, 1, &dst_pid, groupWorld, &dst_pid_world) );
754 BL_COMM_PROFILE(BLProfiler::SendTsii, n *
sizeof(T), dst_pid_world, tag);
757 BL_MPI_REQUIRE( MPI_Send(
const_cast<T*
>(buf),
763 BL_COMM_PROFILE(BLProfiler::SendTsii, BLProfiler::AfterCall(), dst_pid, tag);
769 ParallelDescriptor::Message
776 ParallelDescriptor::Message
782 namespace ParallelDescriptor {
785 Arecv<char> (
char* buf,
size_t n,
int src_pid,
int tag,
MPI_Comm comm);
789 Arecv (T* buf,
size_t n,
int src_pid,
int tag,
MPI_Comm comm)
791 static_assert(!std::is_same_v<char,T>,
"Arecv: char version has been specialized");
797 BL_MPI_REQUIRE( MPI_Irecv(buf,
804 BL_COMM_PROFILE(BLProfiler::ArecvTsiiM, BLProfiler::AfterCall(), src_pid, tag);
810 ParallelDescriptor::Message
817 ParallelDescriptor::Message
823 namespace ParallelDescriptor {
826 Recv<char> (
char* buf,
size_t n,
int src_pid,
int tag,
MPI_Comm comm);
830 Recv (T* buf,
size_t n,
int src_pid,
int tag,
MPI_Comm comm)
832 static_assert(!std::is_same_v<char,T>,
"Recv: char version has been specialized");
835 BL_COMM_PROFILE(BLProfiler::RecvTsii, BLProfiler::BeforeCall(), src_pid, tag);
838 BL_MPI_REQUIRE( MPI_Recv(buf,
845 #ifdef BL_COMM_PROFILING
846 int src_pid_comm(stat.MPI_SOURCE);
847 int src_pid_world(stat.MPI_SOURCE);
848 if(src_pid_comm != MPI_ANY_SOURCE) {
850 BL_MPI_REQUIRE( MPI_Comm_group(comm, &groupComm) );
851 BL_MPI_REQUIRE( MPI_Comm_group(
Communicator(), &groupWorld) );
852 BL_MPI_REQUIRE( MPI_Group_translate_ranks(groupComm, 1, &src_pid_comm, groupWorld, &src_pid_world) );
855 BL_COMM_PROFILE(BLProfiler::RecvTsii, n *
sizeof(T), src_pid_world, stat.MPI_TAG);
862 ParallelDescriptor::Message
881 BL_COMM_PROFILE(BLProfiler::BCastTsi, BLProfiler::BeforeCall(), root, BLProfiler::NoTag());
883 BL_MPI_REQUIRE( MPI_Bcast(t,
885 Mpi_typemap<T>::type(),
888 BL_COMM_PROFILE(BLProfiler::BCastTsi, n *
sizeof(T), root, BLProfiler::NoTag());
907 BL_COMM_PROFILE(BLProfiler::BCastTsi, BLProfiler::BeforeCall(), root, BLProfiler::NoTag());
909 BL_MPI_REQUIRE( MPI_Bcast(t,
911 Mpi_typemap<T>::type(),
914 BL_COMM_PROFILE(BLProfiler::BCastTsi, n *
sizeof(T), root, BLProfiler::NoTag());
917 template <
class T,
class T1>
926 BL_COMM_PROFILE(BLProfiler::GatherTsT1Si, BLProfiler::BeforeCall(), root, BLProfiler::NoTag());
931 BL_MPI_REQUIRE( MPI_Gather(
const_cast<T*
>(t),
933 Mpi_typemap<T>::type(),
936 Mpi_typemap<T1>::type(),
939 BL_COMM_PROFILE(BLProfiler::GatherTsT1Si, n *
sizeof(T), root, BLProfiler::NoTag());
947 BL_COMM_PROFILE(BLProfiler::GatherTi, BLProfiler::BeforeCall(), root, BLProfiler::NoTag());
951 BL_MPI_REQUIRE( MPI_Gather(
const_cast<T*
>(&t),
953 Mpi_typemap<T>::type(),
956 Mpi_typemap<T>::type(),
959 BL_COMM_PROFILE(BLProfiler::GatherTi,
sizeof(T), root, BLProfiler::NoTag());
966 T* recv,
const std::vector<int>& rc,
const std::vector<int>& disp,
982 Vector<T>& recvbuf,
int root)
988 T_to_send.reserve(sendbuf.local_size());
990 for (
int i : sendbuf.IndexArray())
992 T_to_send.push_back(sendbuf[i]);
996 Vector<int> recvcount(nprocs, 0);
997 recvbuf.resize(sendbuf.size());
998 const Vector<int>& old_pmap = sendbuf.DistributionMap().ProcessorMap();
999 for (
int i : old_pmap)
1005 Vector<Vector<int>> new_ind_to_old_ind(nprocs);
1006 for (
int i=0; i<nprocs; ++i)
1008 new_ind_to_old_ind[i].reserve(recvcount[i]);
1010 for (
int i=0; i<old_pmap.size(); ++i)
1012 new_ind_to_old_ind[old_pmap[i]].push_back(i);
1016 Vector<int> new_index_to_old_index;
1017 new_index_to_old_index.reserve(old_pmap.size());
1018 for (
const Vector<int>& v : new_ind_to_old_ind)
1024 new_index_to_old_index.push_back(el);
1029 Vector<int> disp(nprocs);
1030 if (!disp.empty()) { disp[0] = 0; }
1031 std::partial_sum(recvcount.begin(), recvcount.end()-1, disp.begin()+1);
1032 Vector<T> new_index_to_T(sendbuf.size());
1034 MPI_Gatherv(T_to_send.data(), T_to_send.size(),
1036 new_index_to_T.data(), recvcount.data(), disp.data(),
1046 Vector<int> old_index_to_new_index(sendbuf.size());
1048 for (
int i=0; i<old_index_to_new_index.size(); ++i)
1050 old_index_to_new_index[new_index_to_old_index[i]] = i;
1053 for (
int i=0; i<recvbuf.size(); ++i)
1055 recvbuf[i] = new_index_to_T[old_index_to_new_index[i]];
1060 template <
class T,
class T1>
1069 BL_COMM_PROFILE(BLProfiler::ScatterTsT1si, BLProfiler::BeforeCall(), root, BLProfiler::NoTag());
1071 BL_MPI_REQUIRE( MPI_Scatter(
const_cast<T1*
>(t1),
1073 Mpi_typemap<T1>::type(),
1076 Mpi_typemap<T>::type(),
1079 BL_COMM_PROFILE(BLProfiler::ScatterTsT1si, n *
sizeof(T), root, BLProfiler::NoTag());
1084 namespace ParallelDescriptor
1102 Asend(
const std::vector<T>& ,
int ,
int )
1109 Send(
const T* ,
size_t ,
int ,
int )
1123 Send(
const std::vector<T>& ,
int ,
int )
1180 template <
class T,
class T1>
1182 Gather (
const T* t,
size_t n, T1* t1,
size_t n1,
int )
1187 int const sc =
static_cast<int>(n);
1188 for (
int j=0; j<sc; ++j) { t1[j] = t[j]; }
1195 std::vector<T> resl(1);
1203 T* recv,
const std::vector<int>& ,
1204 const std::vector<int>& ,
int )
1206 for (
int j=0; j<sc; ++j) { recv[j] = send[j]; }
1214 recvbuf.resize(sendbuf.
size());
1216 for (
int i=0; i<sendbuf.
size(); ++i)
1218 recvbuf[i] = sendbuf[i];
1222 template <
class T,
class T1>
1230 namespace ParallelDescriptor {
1232 #ifdef AMREX_USE_MPI
1236 template<
typename T>
1245 BL_MPI_REQUIRE( MPI_Allreduce(MPI_IN_PLACE,
r, cnt,
1250 template<
typename T>
1260 BL_MPI_REQUIRE( MPI_Reduce(MPI_IN_PLACE,
r, cnt,
1264 BL_MPI_REQUIRE( MPI_Reduce(
r,
r, cnt,
1273 template <
typename T>
1274 std::enable_if_t<std::is_floating_point_v<T>>
1276 detail::DoAllReduce<T>(&rvar,MPI_SUM,1);
1279 template <
typename T>
1280 std::enable_if_t<std::is_floating_point_v<T>>
1282 detail::DoAllReduce<T>(rvar,MPI_SUM,cnt);
1285 template <
typename T>
1286 std::enable_if_t<std::is_floating_point_v<T>>
1289 int cnt = rvar.size();
1291 detail::DoAllReduce<T>(tmp.data(),MPI_SUM,cnt);
1292 for (
int i = 0; i < cnt; ++i) {
1293 rvar[i].get() = tmp[i];
1298 template <
typename T>
1299 std::enable_if_t<std::is_floating_point_v<T>>
1301 detail::DoReduce<T>(&rvar,MPI_SUM,1,cpu);
1304 template <
typename T>
1305 std::enable_if_t<std::is_floating_point_v<T>>
1307 detail::DoReduce<T>(rvar,MPI_SUM,cnt,cpu);
1310 template <
typename T>
1311 std::enable_if_t<std::is_floating_point_v<T>>
1314 int cnt = rvar.size();
1316 detail::DoReduce<T>(tmp.data(),MPI_SUM,cnt,cpu);
1317 for (
int i = 0; i < cnt; ++i) {
1318 rvar[i].get() = tmp[i];
1323 template <
typename T>
1324 std::enable_if_t<std::is_floating_point_v<T>>
1326 detail::DoAllReduce<T>(&rvar,MPI_MAX,1);
1329 template <
typename T>
1330 std::enable_if_t<std::is_floating_point_v<T>>
1332 detail::DoAllReduce<T>(rvar,MPI_MAX,cnt);
1335 template <
typename T>
1336 std::enable_if_t<std::is_floating_point_v<T>>
1339 int cnt = rvar.size();
1341 detail::DoAllReduce<T>(tmp.data(),MPI_MAX,cnt);
1342 for (
int i = 0; i < cnt; ++i) {
1343 rvar[i].get() = tmp[i];
1348 template <
typename T>
1349 std::enable_if_t<std::is_floating_point_v<T>>
1351 detail::DoReduce<T>(&rvar,MPI_MAX,1,cpu);
1354 template <
typename T>
1355 std::enable_if_t<std::is_floating_point_v<T>>
1357 detail::DoReduce<T>(rvar,MPI_MAX,cnt,cpu);
1360 template <
typename T>
1361 std::enable_if_t<std::is_floating_point_v<T>>
1364 int cnt = rvar.size();
1366 detail::DoReduce<T>(tmp.data(),MPI_MAX,cnt,cpu);
1367 for (
int i = 0; i < cnt; ++i) {
1368 rvar[i].get() = tmp[i];
1373 template <
typename T>
1374 std::enable_if_t<std::is_floating_point_v<T>>
1376 detail::DoAllReduce<T>(&rvar,MPI_MIN,1);
1379 template <
typename T>
1380 std::enable_if_t<std::is_floating_point_v<T>>
1382 detail::DoAllReduce<T>(rvar,MPI_MIN,cnt);
1385 template <
typename T>
1386 std::enable_if_t<std::is_floating_point_v<T>>
1389 int cnt = rvar.size();
1391 detail::DoAllReduce<T>(tmp.data(),MPI_MIN,cnt);
1392 for (
int i = 0; i < cnt; ++i) {
1393 rvar[i].get() = tmp[i];
1398 template <
typename T>
1399 std::enable_if_t<std::is_floating_point_v<T>>
1401 detail::DoReduce<T>(&rvar,MPI_MIN,1,cpu);
1404 template <
typename T>
1405 std::enable_if_t<std::is_floating_point_v<T>>
1407 detail::DoReduce<T>(rvar,MPI_MIN,cnt,cpu);
1410 template <
typename T>
1411 std::enable_if_t<std::is_floating_point_v<T>>
1414 int cnt = rvar.size();
1416 detail::DoReduce<T>(tmp.data(),MPI_MIN,cnt,cpu);
1417 for (
int i = 0; i < cnt; ++i) {
1418 rvar[i].get() = tmp[i];
1425 template <
typename T>
1426 std::enable_if_t<std::is_floating_point_v<T>>
1429 template <
typename T>
1430 std::enable_if_t<std::is_floating_point_v<T>>
1433 template <
typename T>
1434 std::enable_if_t<std::is_floating_point_v<T>>
1435 ReduceRealSum (Vector<std::reference_wrapper<T> >
const&) {}
1438 template <
typename T>
1439 std::enable_if_t<std::is_floating_point_v<T>>
1442 template <
typename T>
1443 std::enable_if_t<std::is_floating_point_v<T>>
1446 template <
typename T>
1447 std::enable_if_t<std::is_floating_point_v<T>>
1448 ReduceRealSum (Vector<std::reference_wrapper<T> >
const&,
int) {}
1451 template <
typename T>
1452 std::enable_if_t<std::is_floating_point_v<T>>
1455 template <
typename T>
1456 std::enable_if_t<std::is_floating_point_v<T>>
1459 template <
typename T>
1460 std::enable_if_t<std::is_floating_point_v<T>>
1461 ReduceRealMax (Vector<std::reference_wrapper<T> >
const&) {}
1464 template <
typename T>
1465 std::enable_if_t<std::is_floating_point_v<T>>
1468 template <
typename T>
1469 std::enable_if_t<std::is_floating_point_v<T>>
1472 template <
typename T>
1473 std::enable_if_t<std::is_floating_point_v<T>>
1474 ReduceRealMax (Vector<std::reference_wrapper<T> >
const&,
int) {}
1477 template <
typename T>
1478 std::enable_if_t<std::is_floating_point_v<T>>
1481 template <
typename T>
1482 std::enable_if_t<std::is_floating_point_v<T>>
1485 template <
typename T>
1486 std::enable_if_t<std::is_floating_point_v<T>>
1487 ReduceRealMin (Vector<std::reference_wrapper<T> >
const&) {}
1490 template <
typename T>
1491 std::enable_if_t<std::is_floating_point_v<T>>
1494 template <
typename T>
1495 std::enable_if_t<std::is_floating_point_v<T>>
1498 template <
typename T>
1499 std::enable_if_t<std::is_floating_point_v<T>>
1500 ReduceRealMin (Vector<std::reference_wrapper<T> >
const&,
int) {}
1505 #ifdef AMREX_USE_MPI
1506 namespace ParallelDescriptor {
1513 static_assert(std::is_same<T,double>() ||
1514 std::is_same<T,float >(),
1515 "Unsupported type T for GpuComplex");
1516 if constexpr (std::is_same<T,double>()) {
1517 return MPI_C_DOUBLE_COMPLEX;
1519 return MPI_C_FLOAT_COMPLEX;
1524 template<
typename TV,
typename TI>
1532 static_assert(std::is_trivially_copyable_v<T>,
1533 "To communicate with MPI, ValLocPair must be trivially copyable.");
1534 static_assert(std::is_standard_layout_v<T>,
1535 "To communicate with MPI, ValLocPair must be standard layout");
1542 int blocklens[] = { 1, 1 };
1544 BL_MPI_REQUIRE( MPI_Get_address(&vlp[0].value, &disp[0]) );
1545 BL_MPI_REQUIRE( MPI_Get_address(&vlp[0].index, &disp[1]) );
1548 BL_MPI_REQUIRE( MPI_Type_create_struct(2, blocklens, disp, types,
1550 MPI_Aint lb, extent;
1551 BL_MPI_REQUIRE( MPI_Type_get_extent(mpi_type, &lb, &extent) );
1552 if (extent !=
sizeof(T)) {
1554 BL_MPI_REQUIRE( MPI_Type_create_resized(tmp, 0,
sizeof(vlp[0]), &mpi_type) );
1555 BL_MPI_REQUIRE( MPI_Type_free(&tmp) );
1557 BL_MPI_REQUIRE( MPI_Type_commit( &mpi_type ) );
1565 template <
typename T,
typename F>
1568 static MPI_Op mpi_op = MPI_OP_NULL;
1569 if (mpi_op == MPI_OP_NULL) {
1570 static auto user_fn = [] (
void *invec,
void *inoutvec,
int* len,
1573 auto in =
static_cast<T const*
>(invec);
1574 auto out =
static_cast<T*
>(inoutvec);
1575 for (
int i = 0; i < *len; ++i) {
1576 out[i] =
F()(in[i],out[i]);
1579 BL_MPI_REQUIRE( MPI_Op_create(user_fn, 1, &mpi_op) );
#define BL_COMM_PROFILE(cft, size, pid, tag)
Definition: AMReX_BLProfiler.H:587
#define BL_PROFILE_T_S(fname, T)
Definition: AMReX_BLProfiler.H:554
#define BL_ASSERT(EX)
Definition: AMReX_BLassert.H:39
#define AMREX_EXPORT
Definition: AMReX_Extension.H:191
int MPI_Comm
Definition: AMReX_ccse-mpi.H:47
int MPI_Request
Definition: AMReX_ccse-mpi.H:50
int MPI_Group
Definition: AMReX_ccse-mpi.H:48
int MPI_Op
Definition: AMReX_ccse-mpi.H:46
static constexpr int MPI_COMM_WORLD
Definition: AMReX_ccse-mpi.H:54
static constexpr int MPI_PROC_NULL
Definition: AMReX_ccse-mpi.H:57
static constexpr int MPI_DATATYPE_NULL
Definition: AMReX_ccse-mpi.H:52
int MPI_Datatype
Definition: AMReX_ccse-mpi.H:49
static constexpr int MPI_REQUEST_NULL
Definition: AMReX_ccse-mpi.H:53
int size() const noexcept
Return the number of FABs in the FabArray.
Definition: AMReX_FabArrayBase.H:109
a one-thingy-per-box distributed object
Definition: AMReX_LayoutData.H:13
Hold the description and status of communication data.
Definition: AMReX_ParallelDescriptor.H:57
Message(MPI_Request req_, MPI_Datatype type_)
Definition: AMReX_ParallelDescriptor.H:61
MPI_Datatype type() const
Definition: AMReX_ParallelDescriptor.H:73
MPI_Request req() const
Definition: AMReX_ParallelDescriptor.H:74
bool test()
Definition: AMReX_ParallelDescriptor.cpp:1178
MPI_Status m_stat
Definition: AMReX_ParallelDescriptor.H:82
MPI_Request m_req
Definition: AMReX_ParallelDescriptor.H:81
MPI_Status stat() const
Definition: AMReX_ParallelDescriptor.H:75
void wait()
Definition: AMReX_ParallelDescriptor.cpp:1174
MPI_Datatype m_type
Definition: AMReX_ParallelDescriptor.H:80
Message(MPI_Status stat_, MPI_Datatype type_)
Definition: AMReX_ParallelDescriptor.H:65
bool m_finished
Definition: AMReX_ParallelDescriptor.H:79
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition: AMReX_Vector.H:27
void EvalReduction()
Definition: AMReX_Lazy.cpp:20
int MyProcAll() noexcept
my rank in world communicator
Definition: AMReX_ParallelContext.H:61
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition: AMReX_ParallelContext.H:70
int get_inc_mpi_tag() noexcept
get and increment mpi tag in current frame
Definition: AMReX_ParallelContext.H:93
int MyProcSub() noexcept
my sub-rank in current frame
Definition: AMReX_ParallelContext.H:76
int NProcsAll() noexcept
number of ranks in world communicator
Definition: AMReX_ParallelContext.H:59
int NProcsSub() noexcept
number of ranks in current frame
Definition: AMReX_ParallelContext.H:74
void DoAllReduce(T *r, MPI_Op op, int cnt)
Definition: AMReX_ParallelDescriptor.H:1237
void DoReduce(T *r, MPI_Op op, int cnt, int cpu)
Definition: AMReX_ParallelDescriptor.H:1251
bool sameTeam(int rank) noexcept
Definition: AMReX_ParallelDescriptor.H:329
int RankInLeadComm(int rank) noexcept
Definition: AMReX_ParallelDescriptor.H:339
void Test(MPI_Request &, int &, MPI_Status &)
Definition: AMReX_ParallelDescriptor.cpp:1207
const char * ErrorString(int)
ErrorString return string associated with error internal error condition.
Definition: AMReX_ParallelDescriptor.cpp:1200
int m_rank_in_processor
Definition: AMReX_ParallelDescriptor.cpp:73
Message Asend(const T *, size_t n, int pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1088
const int ioProcessor
The MPI rank number of the I/O Processor (probably rank 0). This rank is usually used to write to std...
Definition: AMReX_ParallelDescriptor.cpp:82
MPI_Comm Communicator() noexcept
Definition: AMReX_ParallelDescriptor.H:210
void ReduceIntSum(int &)
Integer sum reduction.
Definition: AMReX_ParallelDescriptor.cpp:1252
Vector< MPI_Datatype * > m_mpi_types
Definition: AMReX_ParallelDescriptor.cpp:76
void EndTeams()
Definition: AMReX_ParallelDescriptor.cpp:1585
void Waitany(Vector< MPI_Request > &, int &, MPI_Status &)
Definition: AMReX_ParallelDescriptor.cpp:1299
void Gatherv(const T *send, int sc, T *recv, const std::vector< int > &rc, const std::vector< int > &disp, int root)
Definition: AMReX_ParallelDescriptor.H:1202
Message Abarrier()
Definition: AMReX_ParallelDescriptor.cpp:1204
void ReduceRealMin(Vector< std::reference_wrapper< Real > > const &)
Definition: AMReX_ParallelDescriptor.cpp:1216
void StartParallel(int *, char ***, MPI_Comm)
Perform any needed parallel initialization. This MUST be the first routine in this class called from ...
Definition: AMReX_ParallelDescriptor.cpp:1152
const ProcessTeam & MyTeam() noexcept
Definition: AMReX_ParallelDescriptor.H:349
bool isTeamLead() noexcept
Definition: AMReX_ParallelDescriptor.H:324
int m_nprocs_per_node
Definition: AMReX_ParallelDescriptor.cpp:69
void Wait(MPI_Request &, MPI_Status &)
Definition: AMReX_ParallelDescriptor.cpp:1291
int MyProc() noexcept
return the rank number local to the current Parallel Context
Definition: AMReX_ParallelDescriptor.H:125
Vector< MPI_Op * > m_mpi_ops
Definition: AMReX_ParallelDescriptor.cpp:77
std::string mpi_level_to_string(int mtlev)
Definition: AMReX_ParallelDescriptor.cpp:1591
ProcessTeam m_Team
Definition: AMReX_ParallelDescriptor.cpp:65
int m_nprocs_per_processor
Definition: AMReX_ParallelDescriptor.cpp:72
void ReduceRealSum(Vector< std::reference_wrapper< Real > > const &)
Definition: AMReX_ParallelDescriptor.cpp:1214
void Waitsome(Vector< MPI_Request > &, int &, Vector< int > &, Vector< MPI_Status > &)
Definition: AMReX_ParallelDescriptor.cpp:1303
bool UseGpuAwareMpi()
Definition: AMReX_ParallelDescriptor.H:111
void ReduceLongMin(Long &)
Long min reduction.
Definition: AMReX_ParallelDescriptor.cpp:1225
void ReadAndBcastFile(const std::string &filename, Vector< char > &charBuf, bool bExitOnError, const MPI_Comm &comm)
Definition: AMReX_ParallelDescriptor.cpp:1459
void ReduceLongAnd(Long &)
Long and-wise reduction.
Definition: AMReX_ParallelDescriptor.cpp:1222
int MyTeamLead() noexcept
Definition: AMReX_ParallelDescriptor.H:309
void Waitall(Vector< MPI_Request > &, Vector< MPI_Status > &)
Definition: AMReX_ParallelDescriptor.cpp:1295
int MyTeamColor() noexcept
Definition: AMReX_ParallelDescriptor.H:304
bool doTeamReduce() noexcept
Definition: AMReX_ParallelDescriptor.H:344
int MinTag() noexcept
Definition: AMReX_ParallelDescriptor.H:206
void ReduceIntMax(int &)
Integer max reduction.
Definition: AMReX_ParallelDescriptor.cpp:1253
void ReduceLongMax(Long &)
Long max reduction.
Definition: AMReX_ParallelDescriptor.cpp:1224
int NProcs(MPI_Comm comm) noexcept
Definition: AMReX_ParallelDescriptor.H:249
int TeamLead(int rank) noexcept
Definition: AMReX_ParallelDescriptor.H:319
int MyRankInProcessor() noexcept
Definition: AMReX_ParallelDescriptor.H:234
void ReduceBoolAnd(bool &)
And-wise boolean reduction.
Definition: AMReX_ParallelDescriptor.cpp:1276
Message Send(const T *buf, size_t n, int dst_pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1109
MPI_Comm m_comm
Definition: AMReX_ParallelDescriptor.cpp:67
void ReduceBoolOr(bool &)
Or-wise boolean reduction.
Definition: AMReX_ParallelDescriptor.cpp:1277
void Initialize()
Definition: AMReX_ParallelDescriptor.cpp:1511
void Finalize()
Definition: AMReX_ParallelDescriptor.cpp:1522
bool use_gpu_aware_mpi
Definition: AMReX_ParallelDescriptor.cpp:60
int TeamSize() noexcept
Definition: AMReX_ParallelDescriptor.H:294
void ReduceLongSum(Long &)
Long sum reduction.
Definition: AMReX_ParallelDescriptor.cpp:1223
void EndParallel()
Perform any needed parallel finalization. This MUST be the last routine in this class called from wit...
Definition: AMReX_ParallelDescriptor.cpp:1184
void Bcast(void *, int, MPI_Datatype, int, MPI_Comm)
Definition: AMReX_ParallelDescriptor.cpp:1282
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition: AMReX_ParallelDescriptor.H:613
int NProcs() noexcept
return the number of MPI ranks local to the current Parallel Context
Definition: AMReX_ParallelDescriptor.H:243
void IProbe(int, int, int &, MPI_Status &)
Definition: AMReX_ParallelDescriptor.cpp:1209
int MyRankInNode() noexcept
Definition: AMReX_ParallelDescriptor.H:222
void GatherLayoutDataToVector(const LayoutData< T > &sendbuf, Vector< T > &recvbuf, int root)
Gather LayoutData values to a vector on root.
Definition: AMReX_ParallelDescriptor.H:1211
Message Send(const std::vector< T > &buf, int dst_pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1123
int MaxTag() noexcept
Definition: AMReX_ParallelDescriptor.H:207
void Gather(Real const *sendbuf, int nsend, Real *recvbuf, int root)
Parallel gather.
Definition: AMReX_ParallelDescriptor.cpp:1160
void Comm_dup(MPI_Comm, MPI_Comm &)
Definition: AMReX_ParallelDescriptor.cpp:1212
int IOProcessorNumber() noexcept
Definition: AMReX_ParallelDescriptor.H:266
int NProcsPerProcessor() noexcept
Definition: AMReX_ParallelDescriptor.H:228
void Barrier(const std::string &)
Definition: AMReX_ParallelDescriptor.cpp:1202
int m_rank_in_node
Definition: AMReX_ParallelDescriptor.cpp:70
double second() noexcept
Returns wall-clock seconds since start of execution.
Definition: AMReX_ParallelDescriptor.cpp:1285
const std::string Unnamed("Unnamed")
Used as default argument to ParallelDescriptor::Barrier().
Message Asend(const std::vector< T > &buf, int pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1102
bool IOProcessor() noexcept
Is this CPU the I/O Processor? To get the rank number, call IOProcessorNumber()
Definition: AMReX_ParallelDescriptor.H:275
int MyRankInTeam() noexcept
Definition: AMReX_ParallelDescriptor.H:314
Message Arecv(std::vector< T > &buf, int pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1144
int m_MaxTag
Definition: AMReX_ParallelDescriptor.cpp:80
void Scatter(T *, size_t n, const T1 *, size_t n1, int root)
Definition: AMReX_ParallelDescriptor.H:1224
int NProcsPerNode() noexcept
Definition: AMReX_ParallelDescriptor.H:216
int MyProc(MPI_Comm comm) noexcept
Definition: AMReX_ParallelDescriptor.H:130
void Abort(int errorcode, bool backtrace)
Abort with specified error code.
Definition: AMReX_ParallelDescriptor.cpp:1191
std::pair< int, int > team_range(int begin, int end, int rit=-1, int nworkers=0) noexcept
Definition: AMReX_ParallelDescriptor.H:354
void team_for(int begin, int end, const F &f)
Definition: AMReX_ParallelDescriptor.H:398
Message Recv(std::vector< T > &buf, int pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1165
int NTeams() noexcept
Definition: AMReX_ParallelDescriptor.H:299
Message Recv(T *, size_t n, int pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1151
void StartTeams()
Split the process pool into teams.
Definition: AMReX_ParallelDescriptor.cpp:1531
Message Arecv(T *, size_t n, int pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1130
MPI_Op Mpi_op()
Definition: AMReX_ParallelDescriptor.H:1566
int m_MinTag
Definition: AMReX_ParallelDescriptor.cpp:80
void ReduceIntMin(int &)
Integer min reduction.
Definition: AMReX_ParallelDescriptor.cpp:1254
void ReduceRealMax(Vector< std::reference_wrapper< Real > > const &)
Definition: AMReX_ParallelDescriptor.cpp:1215
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44
@ max
Definition: AMReX_ParallelReduce.H:17
integer function omp_get_num_threads()
Definition: AMReX_omp_mod.F90:29
integer function omp_get_thread_num()
Definition: AMReX_omp_mod.F90:37
logical function omp_in_parallel()
Definition: AMReX_omp_mod.F90:41
Definition: AMReX_Amr.cpp:49
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 end(BoxND< dim > const &box) noexcept
Definition: AMReX_Box.H:1890
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition: AMReX.H:111
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 begin(BoxND< dim > const &box) noexcept
Definition: AMReX_Box.H:1881
Definition: AMReX_FabArrayCommI.H:841
Definition: AMReX_ccse-mpi.H:51
A host / device complex number type, because std::complex doesn't work in device code with Cuda yet.
Definition: AMReX_GpuComplex.H:29
static MPI_Datatype type()
Definition: AMReX_ParallelDescriptor.H:1511
static MPI_Datatype type()
Definition: AMReX_ParallelDescriptor.H:1527
Communication datatype (note: this structure also works without MPI)
Definition: AMReX_ccse-mpi.H:68
static MPI_Datatype type()
Provide functionalities needed to construct a team of processes to perform a particular job.
Definition: AMReX_ParallelDescriptor.H:144
MPI_Comm team_t
Definition: AMReX_ParallelDescriptor.H:145
int m_color
Definition: AMReX_ParallelDescriptor.H:194
int m_size
Definition: AMReX_ParallelDescriptor.H:193
void clear()
free a communicator
Definition: AMReX_ParallelDescriptor.H:176
const MPI_Comm & get_lead_comm() const
Definition: AMReX_ParallelDescriptor.H:190
void Barrier() const
synchronize processes within the team
Definition: AMReX_ParallelDescriptor.H:148
int m_do_team_reduce
Definition: AMReX_ParallelDescriptor.H:197
const team_t & get() const
Definition: AMReX_ParallelDescriptor.H:185
int m_numTeams
Definition: AMReX_ParallelDescriptor.H:192
MPI_Comm m_team_comm
Definition: AMReX_ParallelDescriptor.H:199
const MPI_Comm & get_team_comm() const
return the communicator
Definition: AMReX_ParallelDescriptor.H:189
int m_rankInTeam
Definition: AMReX_ParallelDescriptor.H:196
MPI_Comm m_lead_comm
Definition: AMReX_ParallelDescriptor.H:200
int m_lead
Definition: AMReX_ParallelDescriptor.H:195
void MemoryBarrier() const
memory fence
Definition: AMReX_ParallelDescriptor.H:157
Definition: AMReX_ValLocPair.H:10