1#ifndef BL_PARALLELDESCRIPTOR_H
2#define BL_PARALLELDESCRIPTOR_H
3#include <AMReX_Config.H>
46template <
typename T>
class LayoutData;
49namespace ParallelDescriptor
79 bool m_finished =
true;
86 void MPI_Error(
const char* file,
int line,
const char* str,
int rc);
88#define BL_MPI_REQUIRE(x) \
91 if ( int l_status_ = (x) ) \
93 amrex::ParallelDescriptor::MPI_Error(__FILE__,__LINE__,#x, l_status_); \
104 char*** argv =
nullptr,
138 MPI_Comm_rank(comm,&r);
154#if defined(BL_USE_MPI3)
155 MPI_Barrier(m_team_comm);
164 if (omp_in_parallel()) {
170#if defined(BL_USE_MPI3)
171 std::atomic_thread_fence(std::memory_order_release);
172 MPI_Barrier(m_team_comm);
173 std::atomic_thread_fence(std::memory_order_acquire);
181#if defined(BL_USE_MPI3)
183 MPI_Comm_free(&m_team_comm);
184 if (m_rankInTeam==0) { MPI_Comm_free(&m_lead_comm); }
202 int m_do_team_reduce;
220 inline int MinTag () noexcept {
return m_MinTag; }
221 inline int MaxTag () noexcept {
return m_MaxTag; }
266 BL_MPI_REQUIRE(MPI_Comm_size(comm, &s));
312 return m_Team.m_size;
317 return m_Team.m_numTeams;
322 return m_Team.m_color;
327 return m_Team.m_lead;
332 return m_Team.m_rankInTeam;
337 return (rank >= 0) ? (rank - rank % m_Team.m_size) :
MPI_PROC_NULL;
362 return m_Team.m_do_team_reduce;
364 inline const ProcessTeam&
369 inline std::pair<int,int>
382 int nr = ntot / nworkers;
383 int nlft = ntot - nr * nworkers;
385 rb =
begin + rit * (nr + 1);
388 rb =
begin + rit * nr + nlft;
395 int nthreads = omp_get_num_threads();
397 int tid = omp_get_thread_num();
399 int nr = ntot / nthreads;
400 int nlft = ntot - nr * nthreads;
402 rb += tid * (nr + 1);
405 rb += tid * nr + nlft;
411 return std::make_pair(rb,re);
413 template <
typename F>
417 for (
int i = range.first; i < range.second; ++i) {
421 template <
typename F>
425 for (
int i = range.first; i < range.second; ++i) {
429 template <
typename F>
433 for (
int i = range.first; i < range.second; ++i) {
450 void Abort (
int errorcode = SIGABRT,
bool backtrace =
true);
454 double second () noexcept;
472 template <typename T>
473 std::enable_if_t<std::is_floating_point_v<T>>
477 template <typename T>
478 std::enable_if_t<std::is_floating_point_v<T>>
485 template <typename T>
486 std::enable_if_t<std::is_floating_point_v<T>>
491 template <typename T>
492 std::enable_if_t<std::is_floating_point_v<T>>
496 template <typename T>
497 std::enable_if_t<std::is_floating_point_v<T>>
504 template <typename T>
505 std::enable_if_t<std::is_floating_point_v<T>>
510 template <typename T>
511 std::enable_if_t<std::is_floating_point_v<T>>
515 template <typename T>
516 std::enable_if_t<std::is_floating_point_v<T>>
523 template <typename T>
524 std::enable_if_t<std::is_floating_point_v<T>>
529 template <typename T>
530 std::enable_if_t<std::is_floating_point_v<T>>
534 template <typename T>
535 std::enable_if_t<std::is_floating_point_v<T>>
542 template <typename T>
543 std::enable_if_t<std::is_floating_point_v<T>>
548 template <typename T>
549 std::enable_if_t<std::is_floating_point_v<T>>
553 template <typename T>
554 std::enable_if_t<std::is_floating_point_v<T>>
561 template <typename T>
562 std::enable_if_t<std::is_floating_point_v<T>>
567 template <typename T>
568 std::enable_if_t<std::is_floating_point_v<T>>
572 template <typename T>
573 std::enable_if_t<std::is_floating_point_v<T>>
580 template <typename T>
581 std::enable_if_t<std::is_floating_point_v<T>>
691 void Gather (
Real const* sendbuf,
int nsend,
Real* recvbuf,
int root);
698 template <
class T> [[nodiscard]] Message
Asend(
const T*,
size_t n,
int pid,
int tag);
699 template <
class T> [[nodiscard]] Message
Asend(
const T*,
size_t n,
int pid,
int tag,
MPI_Comm comm);
700 template <
class T> [[nodiscard]] Message
Asend(
const std::vector<T>& buf,
int pid,
int tag);
702 template <
class T> Message
Arecv(T*,
size_t n,
int pid,
int tag);
703 template <
class T> Message
Arecv(T*,
size_t n,
int pid,
int tag,
MPI_Comm comm);
704 template <
class T> Message
Arecv(std::vector<T>& buf,
int pid,
int tag);
706 template <
class T> Message
Send(
const T* buf,
size_t n,
int dst_pid,
int tag);
707 template <
class T> Message
Send(
const T* buf,
size_t n,
int dst_pid,
int tag,
MPI_Comm comm);
708 template <
class T> Message
Send(
const std::vector<T>& buf,
int dst_pid,
int tag);
710 template <
class T> Message
Recv(T*,
size_t n,
int pid,
int tag);
711 template <
class T> Message
Recv(T*,
size_t n,
int pid,
int tag,
MPI_Comm comm);
712 template <
class T> Message
Recv(std::vector<T>& buf,
int pid,
int tag);
714 template <
class T>
void Bcast(T*,
size_t n,
int root = 0);
715 template <
class T>
void Bcast(T*,
size_t n,
int root,
const MPI_Comm &comm);
718 template <
class T,
class T1>
void Scatter(T*,
size_t n,
const T1*,
size_t n1,
int root);
720 template <
class T,
class T1>
void Gather(
const T*,
size_t n, T1*,
size_t n1,
int root);
721 template <
class T> std::vector<T>
Gather(
const T&,
int root);
723 template <
class T>
void Gatherv (
const T* send,
int sc,
724 T* recv,
const std::vector<int>& rc,
const std::vector<int>& disp,
739 bool bExitOnError =
true,
755 void PMI_Initialize();
756 void PMI_PrintMeshcoords(
const pmi_mesh_coord_t *pmi_mesh_coord);
760 int select_comm_data_type (std::size_t nbytes);
761 std::size_t sizeof_selected_comm_data_type (std::size_t nbytes);
771ParallelDescriptor::Message
777namespace ParallelDescriptor {
780Asend<char> (
const char* buf,
size_t n,
int dst_pid,
int tag,
MPI_Comm comm);
784Asend (
const T* buf,
size_t n,
int dst_pid,
int tag,
MPI_Comm comm)
786 static_assert(!std::is_same_v<char,T>,
"Asend: char version has been specialized");
792 BL_MPI_REQUIRE( MPI_Isend(
const_cast<T*
>(buf),
799 BL_COMM_PROFILE(BLProfiler::AsendTsiiM, BLProfiler::AfterCall(), dst_pid, tag);
805ParallelDescriptor::Message
812ParallelDescriptor::Message
818namespace ParallelDescriptor {
821Send<char> (
const char* buf,
size_t n,
int dst_pid,
int tag,
MPI_Comm comm);
825Send (
const T* buf,
size_t n,
int dst_pid,
int tag,
MPI_Comm comm)
827 static_assert(!std::is_same_v<char,T>,
"Send: char version has been specialized");
831#ifdef BL_COMM_PROFILING
832 int dst_pid_world(-1);
834 BL_MPI_REQUIRE( MPI_Comm_group(comm, &groupComm) );
835 BL_MPI_REQUIRE( MPI_Comm_group(
Communicator(), &groupWorld) );
836 BL_MPI_REQUIRE( MPI_Group_translate_ranks(groupComm, 1, &dst_pid, groupWorld, &dst_pid_world) );
838 BL_COMM_PROFILE(BLProfiler::SendTsii, n *
sizeof(T), dst_pid_world, tag);
841 BL_MPI_REQUIRE( MPI_Send(
const_cast<T*
>(buf),
847 BL_COMM_PROFILE(BLProfiler::SendTsii, BLProfiler::AfterCall(), dst_pid, tag);
853ParallelDescriptor::Message
860ParallelDescriptor::Message
866namespace ParallelDescriptor {
869Arecv<char> (
char* buf,
size_t n,
int src_pid,
int tag,
MPI_Comm comm);
875 static_assert(!std::is_same_v<char,T>,
"Arecv: char version has been specialized");
881 BL_MPI_REQUIRE( MPI_Irecv(buf,
888 BL_COMM_PROFILE(BLProfiler::ArecvTsiiM, BLProfiler::AfterCall(), src_pid, tag);
894ParallelDescriptor::Message
901ParallelDescriptor::Message
907namespace ParallelDescriptor {
910Recv<char> (
char* buf,
size_t n,
int src_pid,
int tag,
MPI_Comm comm);
914Recv (T* buf,
size_t n,
int src_pid,
int tag,
MPI_Comm comm)
916 static_assert(!std::is_same_v<char,T>,
"Recv: char version has been specialized");
919 BL_COMM_PROFILE(BLProfiler::RecvTsii, BLProfiler::BeforeCall(), src_pid, tag);
922 BL_MPI_REQUIRE( MPI_Recv(buf,
929#ifdef BL_COMM_PROFILING
930 int src_pid_comm(stat.MPI_SOURCE);
931 int src_pid_world(stat.MPI_SOURCE);
932 if(src_pid_comm != MPI_ANY_SOURCE) {
934 BL_MPI_REQUIRE( MPI_Comm_group(comm, &groupComm) );
935 BL_MPI_REQUIRE( MPI_Comm_group(
Communicator(), &groupWorld) );
936 BL_MPI_REQUIRE( MPI_Group_translate_ranks(groupComm, 1, &src_pid_comm, groupWorld, &src_pid_world) );
939 BL_COMM_PROFILE(BLProfiler::RecvTsii, n *
sizeof(T), src_pid_world, stat.MPI_TAG);
946ParallelDescriptor::Message
962 BL_ASSERT(n <
static_cast<size_t>(std::numeric_limits<int>::max()));
965 BL_COMM_PROFILE(BLProfiler::BCastTsi, BLProfiler::BeforeCall(), root, BLProfiler::NoTag());
967 BL_MPI_REQUIRE( MPI_Bcast(t,
969 Mpi_typemap<T>::type(),
972 BL_COMM_PROFILE(BLProfiler::BCastTsi, n *
sizeof(T), root, BLProfiler::NoTag());
988 BL_ASSERT(n <
static_cast<size_t>(std::numeric_limits<int>::max()));
991 BL_COMM_PROFILE(BLProfiler::BCastTsi, BLProfiler::BeforeCall(), root, BLProfiler::NoTag());
993 BL_MPI_REQUIRE( MPI_Bcast(t,
995 Mpi_typemap<T>::type(),
998 BL_COMM_PROFILE(BLProfiler::BCastTsi, n *
sizeof(T), root, BLProfiler::NoTag());
1001template <
class T,
class T1>
1010 BL_COMM_PROFILE(BLProfiler::GatherTsT1Si, BLProfiler::BeforeCall(), root, BLProfiler::NoTag());
1012 BL_ASSERT(n <
static_cast<size_t>(std::numeric_limits<int>::max()));
1013 BL_ASSERT(n1 <
static_cast<size_t>(std::numeric_limits<int>::max()));
1015 BL_MPI_REQUIRE( MPI_Gather(
const_cast<T*
>(t),
1017 Mpi_typemap<T>::type(),
1020 Mpi_typemap<T1>::type(),
1023 BL_COMM_PROFILE(BLProfiler::GatherTsT1Si, n *
sizeof(T), root, BLProfiler::NoTag());
1031 BL_COMM_PROFILE(BLProfiler::GatherTi, BLProfiler::BeforeCall(), root, BLProfiler::NoTag());
1033 std::vector<T> resl;
1035 BL_MPI_REQUIRE( MPI_Gather(
const_cast<T*
>(&t),
1037 Mpi_typemap<T>::type(),
1040 Mpi_typemap<T>::type(),
1043 BL_COMM_PROFILE(BLProfiler::GatherTi,
sizeof(T), root, BLProfiler::NoTag());
1050 T* recv,
const std::vector<int>& rc,
const std::vector<int>& disp,
1054 BL_COMM_PROFILE(BLProfiler::Gatherv, BLProfiler::BeforeCall(), root, BLProfiler::NoTag());
1060 BL_COMM_PROFILE(BLProfiler::Gatherv, std::accumulate(rc.begin(),rc.end(),0)*
sizeof(T), root, BLProfiler::NoTag());
1066 Vector<T>& recvbuf,
int root)
1071 Vector<T> T_to_send;
1072 T_to_send.reserve(sendbuf.local_size());
1074 for (
int i : sendbuf.IndexArray())
1076 T_to_send.push_back(sendbuf[i]);
1080 Vector<int> recvcount(nprocs, 0);
1081 recvbuf.resize(sendbuf.size());
1082 const Vector<int>& old_pmap = sendbuf.DistributionMap().ProcessorMap();
1083 for (
int i : old_pmap)
1089 Vector<Vector<int>> new_ind_to_old_ind(nprocs);
1090 for (
int i=0; i<nprocs; ++i)
1092 new_ind_to_old_ind[i].reserve(recvcount[i]);
1094 for (
int i=0; i<old_pmap.size(); ++i)
1096 new_ind_to_old_ind[old_pmap[i]].push_back(i);
1100 Vector<int> new_index_to_old_index;
1101 new_index_to_old_index.reserve(old_pmap.size());
1102 for (
const Vector<int>& v : new_ind_to_old_ind)
1108 new_index_to_old_index.push_back(el);
1113 Vector<int> disp(nprocs);
1114 if (!disp.empty()) { disp[0] = 0; }
1115 std::partial_sum(recvcount.begin(), recvcount.end()-1, disp.begin()+1);
1116 Vector<T> new_index_to_T(sendbuf.size());
1118 MPI_Gatherv(T_to_send.data(), T_to_send.size(),
1120 new_index_to_T.data(), recvcount.data(), disp.data(),
1130 Vector<int> old_index_to_new_index(sendbuf.size());
1132 for (
int i=0; i<old_index_to_new_index.size(); ++i)
1134 old_index_to_new_index[new_index_to_old_index[i]] = i;
1137 for (
int i=0; i<recvbuf.size(); ++i)
1139 recvbuf[i] = new_index_to_T[old_index_to_new_index[i]];
1144template <
class T,
class T1>
1153 BL_COMM_PROFILE(BLProfiler::ScatterTsT1si, BLProfiler::BeforeCall(), root, BLProfiler::NoTag());
1155 BL_MPI_REQUIRE( MPI_Scatter(
const_cast<T1*
>(t1),
1157 Mpi_typemap<T1>::type(),
1160 Mpi_typemap<T>::type(),
1163 BL_COMM_PROFILE(BLProfiler::ScatterTsT1si, n *
sizeof(T), root, BLProfiler::NoTag());
1168namespace ParallelDescriptor
1186Asend(
const std::vector<T>& ,
int ,
int )
1193Send(
const T* ,
size_t ,
int ,
int )
1207Send(
const std::vector<T>& ,
int ,
int )
1264template <
class T,
class T1>
1266Gather (
const T* t,
size_t n, T1* t1,
size_t n1,
int )
1271 int const sc =
static_cast<int>(n);
1272 for (
int j=0; j<sc; ++j) { t1[j] = t[j]; }
1279 std::vector<T> resl(1);
1287 T* recv,
const std::vector<int>& ,
1288 const std::vector<int>& ,
int )
1290 for (
int j=0; j<sc; ++j) { recv[j] = send[j]; }
1298 recvbuf.resize(sendbuf.
size());
1300 for (
int i=0; i<sendbuf.
size(); ++i)
1302 recvbuf[i] = sendbuf[i];
1306template <
class T,
class T1>
1314namespace ParallelDescriptor {
1322void DoAllReduce (T* r, MPI_Op op,
int cnt)
1325 Lazy::EvalReduction();
1330 BL_MPI_REQUIRE( MPI_Allreduce(MPI_IN_PLACE, r, cnt,
1331 Mpi_typemap<T>::type(), op,
1336void DoReduce (T* r, MPI_Op op,
int cnt,
int cpu)
1339 Lazy::EvalReduction();
1345 BL_MPI_REQUIRE( MPI_Reduce(MPI_IN_PLACE, r, cnt,
1346 Mpi_typemap<T>::type(), op,
1349 BL_MPI_REQUIRE( MPI_Reduce(r, r, cnt,
1350 Mpi_typemap<T>::type(), op,
1359 template <
typename T>
1360 std::enable_if_t<std::is_floating_point_v<T>>
1362 detail::DoAllReduce<T>(&rvar,MPI_SUM,1);
1365 template <
typename T>
1366 std::enable_if_t<std::is_floating_point_v<T>>
1368 detail::DoAllReduce<T>(rvar,MPI_SUM,cnt);
1371 template <
typename T>
1372 std::enable_if_t<std::is_floating_point_v<T>>
1375 int cnt = rvar.size();
1376 Vector<T> tmp{std::begin(rvar), std::end(rvar)};
1377 detail::DoAllReduce<T>(tmp.data(),MPI_SUM,cnt);
1378 for (
int i = 0; i < cnt; ++i) {
1379 rvar[i].get() = tmp[i];
1384 template <
typename T>
1385 std::enable_if_t<std::is_floating_point_v<T>>
1387 detail::DoReduce<T>(&rvar,MPI_SUM,1,cpu);
1390 template <
typename T>
1391 std::enable_if_t<std::is_floating_point_v<T>>
1393 detail::DoReduce<T>(rvar,MPI_SUM,cnt,cpu);
1396 template <
typename T>
1397 std::enable_if_t<std::is_floating_point_v<T>>
1400 int cnt = rvar.size();
1401 Vector<T> tmp{std::begin(rvar), std::end(rvar)};
1402 detail::DoReduce<T>(tmp.data(),MPI_SUM,cnt,cpu);
1403 for (
int i = 0; i < cnt; ++i) {
1404 rvar[i].get() = tmp[i];
1409 template <
typename T>
1410 std::enable_if_t<std::is_floating_point_v<T>>
1412 detail::DoAllReduce<T>(&rvar,MPI_MAX,1);
1415 template <
typename T>
1416 std::enable_if_t<std::is_floating_point_v<T>>
1418 detail::DoAllReduce<T>(rvar,MPI_MAX,cnt);
1421 template <
typename T>
1422 std::enable_if_t<std::is_floating_point_v<T>>
1425 int cnt = rvar.size();
1426 Vector<T> tmp{std::begin(rvar), std::end(rvar)};
1427 detail::DoAllReduce<T>(tmp.data(),MPI_MAX,cnt);
1428 for (
int i = 0; i < cnt; ++i) {
1429 rvar[i].get() = tmp[i];
1434 template <
typename T>
1435 std::enable_if_t<std::is_floating_point_v<T>>
1437 detail::DoReduce<T>(&rvar,MPI_MAX,1,cpu);
1440 template <
typename T>
1441 std::enable_if_t<std::is_floating_point_v<T>>
1443 detail::DoReduce<T>(rvar,MPI_MAX,cnt,cpu);
1446 template <
typename T>
1447 std::enable_if_t<std::is_floating_point_v<T>>
1450 int cnt = rvar.size();
1451 Vector<T> tmp{std::begin(rvar), std::end(rvar)};
1452 detail::DoReduce<T>(tmp.data(),MPI_MAX,cnt,cpu);
1453 for (
int i = 0; i < cnt; ++i) {
1454 rvar[i].get() = tmp[i];
1459 template <
typename T>
1460 std::enable_if_t<std::is_floating_point_v<T>>
1462 detail::DoAllReduce<T>(&rvar,MPI_MIN,1);
1465 template <
typename T>
1466 std::enable_if_t<std::is_floating_point_v<T>>
1468 detail::DoAllReduce<T>(rvar,MPI_MIN,cnt);
1471 template <
typename T>
1472 std::enable_if_t<std::is_floating_point_v<T>>
1475 int cnt = rvar.size();
1476 Vector<T> tmp{std::begin(rvar), std::end(rvar)};
1477 detail::DoAllReduce<T>(tmp.data(),MPI_MIN,cnt);
1478 for (
int i = 0; i < cnt; ++i) {
1479 rvar[i].get() = tmp[i];
1484 template <
typename T>
1485 std::enable_if_t<std::is_floating_point_v<T>>
1487 detail::DoReduce<T>(&rvar,MPI_MIN,1,cpu);
1490 template <
typename T>
1491 std::enable_if_t<std::is_floating_point_v<T>>
1493 detail::DoReduce<T>(rvar,MPI_MIN,cnt,cpu);
1496 template <
typename T>
1497 std::enable_if_t<std::is_floating_point_v<T>>
1500 int cnt = rvar.size();
1501 Vector<T> tmp{std::begin(rvar), std::end(rvar)};
1502 detail::DoReduce<T>(tmp.data(),MPI_MIN,cnt,cpu);
1503 for (
int i = 0; i < cnt; ++i) {
1504 rvar[i].get() = tmp[i];
1511 template <
typename T>
1512 std::enable_if_t<std::is_floating_point_v<T>>
1515 template <
typename T>
1516 std::enable_if_t<std::is_floating_point_v<T>>
1519 template <
typename T>
1520 std::enable_if_t<std::is_floating_point_v<T>>
1521 ReduceRealSum (Vector<std::reference_wrapper<T> >
const&) {}
1524 template <
typename T>
1525 std::enable_if_t<std::is_floating_point_v<T>>
1528 template <
typename T>
1529 std::enable_if_t<std::is_floating_point_v<T>>
1532 template <
typename T>
1533 std::enable_if_t<std::is_floating_point_v<T>>
1534 ReduceRealSum (Vector<std::reference_wrapper<T> >
const&,
int) {}
1537 template <
typename T>
1538 std::enable_if_t<std::is_floating_point_v<T>>
1541 template <
typename T>
1542 std::enable_if_t<std::is_floating_point_v<T>>
1545 template <
typename T>
1546 std::enable_if_t<std::is_floating_point_v<T>>
1547 ReduceRealMax (Vector<std::reference_wrapper<T> >
const&) {}
1550 template <
typename T>
1551 std::enable_if_t<std::is_floating_point_v<T>>
1554 template <
typename T>
1555 std::enable_if_t<std::is_floating_point_v<T>>
1558 template <
typename T>
1559 std::enable_if_t<std::is_floating_point_v<T>>
1560 ReduceRealMax (Vector<std::reference_wrapper<T> >
const&,
int) {}
1563 template <
typename T>
1564 std::enable_if_t<std::is_floating_point_v<T>>
1567 template <
typename T>
1568 std::enable_if_t<std::is_floating_point_v<T>>
1571 template <
typename T>
1572 std::enable_if_t<std::is_floating_point_v<T>>
1573 ReduceRealMin (Vector<std::reference_wrapper<T> >
const&) {}
1576 template <
typename T>
1577 std::enable_if_t<std::is_floating_point_v<T>>
1580 template <
typename T>
1581 std::enable_if_t<std::is_floating_point_v<T>>
1584 template <
typename T>
1585 std::enable_if_t<std::is_floating_point_v<T>>
1586 ReduceRealMin (Vector<std::reference_wrapper<T> >
const&,
int) {}
1592namespace ParallelDescriptor {
1599 static_assert(std::is_same<T,double>() ||
1600 std::is_same<T,float >(),
1601 "Unsupported type T for GpuComplex");
1602 if constexpr (std::is_same<T,double>()) {
1603 return MPI_C_DOUBLE_COMPLEX;
1605 return MPI_C_FLOAT_COMPLEX;
1610template<
typename TV,
typename TI>
1618 static_assert(std::is_trivially_copyable_v<T>,
1619 "To communicate with MPI, ValLocPair must be trivially copyable.");
1620 static_assert(std::is_standard_layout_v<T>,
1621 "To communicate with MPI, ValLocPair must be standard layout");
1628 int blocklens[] = { 1, 1 };
1630 BL_MPI_REQUIRE( MPI_Get_address(&vlp[0].value, &disp[0]) );
1631 BL_MPI_REQUIRE( MPI_Get_address(&vlp[0].index, &disp[1]) );
1634 BL_MPI_REQUIRE( MPI_Type_create_struct(2, blocklens, disp, types,
1636 MPI_Aint lb, extent;
1637 BL_MPI_REQUIRE( MPI_Type_get_extent(mpi_type, &lb, &extent) );
1638 if (extent !=
sizeof(T)) {
1640 BL_MPI_REQUIRE( MPI_Type_create_resized(tmp, 0,
sizeof(vlp[0]), &mpi_type) );
1641 BL_MPI_REQUIRE( MPI_Type_free(&tmp) );
1643 BL_MPI_REQUIRE( MPI_Type_commit( &mpi_type ) );
1645 m_mpi_types.push_back(&mpi_type);
1651template <
typename T,
typename F>
1654 static MPI_Op mpi_op = MPI_OP_NULL;
1655 if (mpi_op == MPI_OP_NULL) {
1656 static auto user_fn = [] (
void *invec,
void *inoutvec,
int* len,
1659 auto in =
static_cast<T const*
>(invec);
1660 auto out =
static_cast<T*
>(inoutvec);
1661 for (
int i = 0; i < *len; ++i) {
1662 out[i] =
F()(in[i],out[i]);
1665 BL_MPI_REQUIRE( MPI_Op_create(user_fn, 1, &mpi_op) );
1666 m_mpi_ops.push_back(&mpi_op);
#define BL_COMM_PROFILE(cft, size, pid, tag)
Definition AMReX_BLProfiler.H:587
#define BL_PROFILE_T_S(fname, T)
Definition AMReX_BLProfiler.H:554
#define BL_ASSERT(EX)
Definition AMReX_BLassert.H:39
#define AMREX_EXPORT
Definition AMReX_Extension.H:191
int size() const noexcept
Return the number of FABs in the FabArray.
Definition AMReX_FabArrayBase.H:110
a one-thingy-per-box distributed object
Definition AMReX_LayoutData.H:13
Hold the description and status of communication data.
Definition AMReX_ParallelDescriptor.H:57
Message(MPI_Request req_, MPI_Datatype type_)
Definition AMReX_ParallelDescriptor.H:61
MPI_Datatype type() const
Definition AMReX_ParallelDescriptor.H:73
MPI_Request req() const
Definition AMReX_ParallelDescriptor.H:74
bool test()
Definition AMReX_ParallelDescriptor.cpp:1191
MPI_Status stat() const
Definition AMReX_ParallelDescriptor.H:75
void wait()
Definition AMReX_ParallelDescriptor.cpp:1187
Message(MPI_Status stat_, MPI_Datatype type_)
Definition AMReX_ParallelDescriptor.H:65
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
amrex_real Real
Floating Point Type for Fields.
Definition AMReX_REAL.H:79
amrex_long Long
Definition AMReX_INT.H:30
void ReduceIntMax(int &)
Definition AMReX_ParallelDescriptor.cpp:1266
void ReduceLongAnd(Long &)
Definition AMReX_ParallelDescriptor.cpp:1235
void ReduceBoolAnd(bool &)
Definition AMReX_ParallelDescriptor.cpp:1289
int MyProc() noexcept
Definition AMReX_ParallelDescriptor.H:128
void Gather(Real const *sendbuf, int nsend, Real *recvbuf, int root)
Definition AMReX_ParallelDescriptor.cpp:1173
void Barrier(const std::string &)
Definition AMReX_ParallelDescriptor.cpp:1215
void ReduceIntSum(int &)
Definition AMReX_ParallelDescriptor.cpp:1265
void ReduceBoolOr(bool &)
Definition AMReX_ParallelDescriptor.cpp:1290
void ReadAndBcastFile(const std::string &filename, Vector< char > &charBuf, bool bExitOnError, const MPI_Comm &comm)
Definition AMReX_ParallelDescriptor.cpp:1495
int NProcs() noexcept
Definition AMReX_ParallelDescriptor.H:255
void ReduceLongSum(Long &)
Definition AMReX_ParallelDescriptor.cpp:1236
int IOProcessorNumber() noexcept
The MPI rank number of the I/O Processor (probably rank 0). This rank is usually used to write to std...
Definition AMReX_ParallelDescriptor.H:279
void ReduceLongMax(Long &)
Definition AMReX_ParallelDescriptor.cpp:1237
bool IOProcessor() noexcept
Is this CPU the I/O Processor? To get the rank number, call IOProcessorNumber()
Definition AMReX_ParallelDescriptor.H:289
void ReduceLongMin(Long &)
Definition AMReX_ParallelDescriptor.cpp:1238
void ReduceIntMin(int &)
Definition AMReX_ParallelDescriptor.cpp:1267
void EvalReduction()
Definition AMReX_Lazy.cpp:20
int NProcs()
Process ID in MPI_COMM_WORLD.
Definition AMReX_MPMD.cpp:122
int MyProc()
Definition AMReX_MPMD.cpp:117
int MyProcAll() noexcept
my rank in world communicator
Definition AMReX_ParallelContext.H:61
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int get_inc_mpi_tag() noexcept
get and increment mpi tag in current frame
Definition AMReX_ParallelContext.H:93
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
int NProcsAll() noexcept
number of ranks in world communicator
Definition AMReX_ParallelContext.H:59
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
bool sameTeam(int rank) noexcept
Definition AMReX_ParallelDescriptor.H:345
int RankInLeadComm(int rank) noexcept
Definition AMReX_ParallelDescriptor.H:355
void Test(MPI_Request &, int &, MPI_Status &)
Definition AMReX_ParallelDescriptor.cpp:1220
const char * ErrorString(int)
ErrorString return string associated with error internal error condition.
Definition AMReX_ParallelDescriptor.cpp:1213
Message Asend(const T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1172
MPI_Comm Communicator() noexcept
Definition AMReX_ParallelDescriptor.H:223
void EndTeams()
Definition AMReX_ParallelDescriptor.cpp:1654
void Waitany(Vector< MPI_Request > &, int &, MPI_Status &)
Definition AMReX_ParallelDescriptor.cpp:1312
void Gatherv(const T *send, int sc, T *recv, const std::vector< int > &rc, const std::vector< int > &disp, int root)
Definition AMReX_ParallelDescriptor.H:1286
Message Abarrier()
Definition AMReX_ParallelDescriptor.cpp:1217
void ReduceRealMin(Vector< std::reference_wrapper< Real > > const &)
Definition AMReX_ParallelDescriptor.cpp:1229
void StartParallel(int *, char ***, MPI_Comm)
Perform any needed parallel initialization. This MUST be the first routine in this class called from ...
Definition AMReX_ParallelDescriptor.cpp:1165
bool isTeamLead() noexcept
Definition AMReX_ParallelDescriptor.H:340
void Wait(MPI_Request &, MPI_Status &)
Definition AMReX_ParallelDescriptor.cpp:1304
std::string mpi_level_to_string(int mtlev)
Definition AMReX_ParallelDescriptor.cpp:1660
void ReduceRealSum(Vector< std::reference_wrapper< Real > > const &)
Definition AMReX_ParallelDescriptor.cpp:1227
void Waitsome(Vector< MPI_Request > &, int &, Vector< int > &, Vector< MPI_Status > &)
Definition AMReX_ParallelDescriptor.cpp:1316
bool UseGpuAwareMpi()
Definition AMReX_ParallelDescriptor.H:113
std::pair< int, int > team_range(int begin, int end, int rit=-1, int nworkers=0) noexcept
Definition AMReX_ParallelDescriptor.H:370
int MyTeamLead() noexcept
Definition AMReX_ParallelDescriptor.H:325
void Waitall(Vector< MPI_Request > &, Vector< MPI_Status > &)
Definition AMReX_ParallelDescriptor.cpp:1308
int MyTeamColor() noexcept
Definition AMReX_ParallelDescriptor.H:320
bool doTeamReduce() noexcept
Definition AMReX_ParallelDescriptor.H:360
int MinTag() noexcept
Definition AMReX_ParallelDescriptor.H:220
int TeamLead(int rank) noexcept
Definition AMReX_ParallelDescriptor.H:335
int MyRankInProcessor() noexcept
Definition AMReX_ParallelDescriptor.H:243
Message Send(const T *buf, size_t n, int dst_pid, int tag)
Definition AMReX_ParallelDescriptor.H:1193
void Initialize()
Definition AMReX_ParallelDescriptor.cpp:1547
void Finalize()
Definition AMReX_ParallelDescriptor.cpp:1591
int TeamSize() noexcept
Definition AMReX_ParallelDescriptor.H:310
void EndParallel()
Perform any needed parallel finalization. This MUST be the last routine in this class called from wit...
Definition AMReX_ParallelDescriptor.cpp:1197
void Bcast(void *, int, MPI_Datatype, int, MPI_Comm)
Definition AMReX_ParallelDescriptor.cpp:1295
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition AMReX_ParallelDescriptor.H:696
void IProbe(int, int, int &, MPI_Status &)
Definition AMReX_ParallelDescriptor.cpp:1222
int MyRankInNode() noexcept
Definition AMReX_ParallelDescriptor.H:233
void GatherLayoutDataToVector(const LayoutData< T > &sendbuf, Vector< T > &recvbuf, int root)
Gather LayoutData values to a vector on root.
Definition AMReX_ParallelDescriptor.H:1295
int MaxTag() noexcept
Definition AMReX_ParallelDescriptor.H:221
void Comm_dup(MPI_Comm, MPI_Comm &)
Definition AMReX_ParallelDescriptor.cpp:1225
int NProcsPerProcessor() noexcept
Definition AMReX_ParallelDescriptor.H:238
double second() noexcept
Returns wall-clock seconds since start of execution.
Definition AMReX_ParallelDescriptor.cpp:1298
const std::string Unnamed("Unnamed")
Used as default argument to ParallelDescriptor::Barrier().
int MyRankInTeam() noexcept
Definition AMReX_ParallelDescriptor.H:330
void Scatter(T *, size_t n, const T1 *, size_t n1, int root)
Definition AMReX_ParallelDescriptor.H:1308
int NProcsPerNode() noexcept
Definition AMReX_ParallelDescriptor.H:228
void Abort(int errorcode, bool backtrace)
Abort with specified error code.
Definition AMReX_ParallelDescriptor.cpp:1204
void team_for(int begin, int end, const F &f)
Definition AMReX_ParallelDescriptor.H:414
int NTeams() noexcept
Definition AMReX_ParallelDescriptor.H:315
Message Recv(T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1235
void StartTeams()
Split the process pool into teams.
Definition AMReX_ParallelDescriptor.cpp:1600
Message Arecv(T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1214
MPI_Op Mpi_op()
Definition AMReX_ParallelDescriptor.H:1652
void ReduceRealMax(Vector< std::reference_wrapper< Real > > const &)
Definition AMReX_ParallelDescriptor.cpp:1228
const ProcessTeam & MyTeam() noexcept
Definition AMReX_ParallelDescriptor.H:365
static constexpr int MPI_DATATYPE_NULL
Definition AMReX_ccse-mpi.H:56
static constexpr int MPI_PROC_NULL
Definition AMReX_ccse-mpi.H:61
int MPI_Comm
Definition AMReX_ccse-mpi.H:51
static constexpr int MPI_COMM_WORLD
Definition AMReX_ccse-mpi.H:58
int MPI_Group
Definition AMReX_ccse-mpi.H:52
int MPI_Op
Definition AMReX_ccse-mpi.H:50
int MPI_Request
Definition AMReX_ccse-mpi.H:54
static constexpr int MPI_REQUEST_NULL
Definition AMReX_ccse-mpi.H:57
int MPI_Datatype
Definition AMReX_ccse-mpi.H:53
Definition AMReX_Amr.cpp:49
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:138
__host__ __device__ Dim3 begin(BoxND< dim > const &box) noexcept
Definition AMReX_Box.H:2006
__host__ __device__ Dim3 end(BoxND< dim > const &box) noexcept
Definition AMReX_Box.H:2015
A host / device complex number type, because std::complex doesn't work in device code with Cuda yet.
Definition AMReX_GpuComplex.H:30
static MPI_Datatype type()
Definition AMReX_ParallelDescriptor.H:1597
static MPI_Datatype type()
Definition AMReX_ParallelDescriptor.H:1613
Communication datatype (note: this structure also works without MPI)
Definition AMReX_ccse-mpi.H:78
static MPI_Datatype type()
Provide functionalities needed to construct a team of processes to perform a particular job.
Definition AMReX_ParallelDescriptor.H:148
MPI_Comm team_t
Definition AMReX_ParallelDescriptor.H:149
void clear()
free a communicator
Definition AMReX_ParallelDescriptor.H:180
void Barrier() const
synchronize processes within the team
Definition AMReX_ParallelDescriptor.H:152
const MPI_Comm & get_team_comm() const
return the communicator
Definition AMReX_ParallelDescriptor.H:193
const MPI_Comm & get_lead_comm() const
Definition AMReX_ParallelDescriptor.H:194
void MemoryBarrier() const
memory fence
Definition AMReX_ParallelDescriptor.H:161
const team_t & get() const
Definition AMReX_ParallelDescriptor.H:189
Definition AMReX_ValLocPair.H:10
Definition AMReX_ccse-mpi.H:55