1#ifndef AMREX_PARALLEL_REDUCE_H_
2#define AMREX_PARALLEL_REDUCE_H_
3#include <AMReX_Config.H>
28 const std::array<MPI_Op, 5> mpi_ops = {{
37 inline void Reduce (ReduceOp op, T* v,
int cnt,
int root, MPI_Comm comm)
39 auto mpi_op = mpi_ops[
static_cast<int>(op)];
42 MPI_Allreduce(MPI_IN_PLACE, v, cnt, ParallelDescriptor::Mpi_typemap<T>::type(),
46 const auto* sendbuf = (ParallelDescriptor::MyProc(comm) == root) ?
47 (
void const*)(MPI_IN_PLACE) : (void const*) v;
48 MPI_Reduce(sendbuf, v, cnt, ParallelDescriptor::Mpi_typemap<T>::type(),
54 inline void Reduce (ReduceOp op, T& v,
int root, MPI_Comm comm) {
55 Reduce(op, &v, 1, root, comm);
59 inline void Reduce (ReduceOp op, Vector<std::reference_wrapper<T> >
const & v,
60 int root, MPI_Comm comm)
62 Vector<T> sndrcv(std::begin(v), std::end(v));
63 Reduce(op, sndrcv.data(), v.size(), root, comm);
64 for (
int i = 0; i < v.size(); ++i) {
65 v[i].get() = sndrcv[i];
70 inline void Gather (
const T* v,
int cnt, T* vs,
int root, MPI_Comm comm)
72 auto mpi_type = ParallelDescriptor::Mpi_typemap<T>::type();
75 BL_COMM_PROFILE(BLProfiler::Allgather,
sizeof(T), BLProfiler::BeforeCall(),
78 MPI_Allgather(
const_cast<T*
>(v), cnt, mpi_type, vs, cnt, mpi_type, comm);
79 BL_COMM_PROFILE(BLProfiler::Allgather,
sizeof(T), BLProfiler::AfterCall(),
83 MPI_Gather(
const_cast<T*
>(v), cnt, mpi_type, vs, cnt, mpi_type, root, comm);
88 inline void Gather (
const T& v, T * vs,
int root, MPI_Comm comm) {
89 Gather(&v, 1, vs, root, comm);
93 template<
typename T>
void Reduce (ReduceOp , T* ,
int ,
int , MPI_Comm ) {}
94 template<
typename T>
void Reduce (ReduceOp , T& ,
int , MPI_Comm ) {}
95 template<
typename T>
void Reduce (ReduceOp , Vector<std::reference_wrapper<T> >
const & ,
int , MPI_Comm ) {}
97 template<
typename T>
void Gather (
const T* ,
int , T* ,
int , MPI_Comm ) {}
98 template<
typename T>
void Gather (
const T& , T * ,
int , MPI_Comm ) {}
103namespace ParallelAllGather {
107 detail::Gather(v, cnt, vs, -1, comm);
112 detail::Gather(v, vs, -1, comm);
116namespace ParallelGather {
120 detail::Gather(v, cnt, vs, root, comm);
125 detail::Gather(v, vs, root, comm);
129namespace ParallelAllReduce {
132 template<
typename K,
typename V>
136 MPI_Allreduce(MPI_IN_PLACE, &vi, 1,
146 template<
typename K,
typename V>
150 MPI_Allreduce(MPI_IN_PLACE, vi, cnt,
160 template<
typename K,
typename V>
164 MPI_Allreduce(MPI_IN_PLACE, &vi, 1,
174 template<
typename K,
typename V>
178 MPI_Allreduce(MPI_IN_PLACE, vi, cnt,
190 detail::Reduce(detail::ReduceOp::max, v, -1, comm);
195 detail::Reduce(detail::ReduceOp::max, v, cnt, -1, comm);
200 detail::Reduce<T>(detail::ReduceOp::max, v, -1, comm);
206 detail::Reduce(detail::ReduceOp::min, v, -1, comm);
211 detail::Reduce(detail::ReduceOp::min, v, cnt, -1, comm);
216 detail::Reduce<T>(detail::ReduceOp::min, v, -1, comm);
222 detail::Reduce(detail::ReduceOp::sum, v, -1, comm);
227 detail::Reduce(detail::ReduceOp::sum, v, cnt, -1, comm);
232 detail::Reduce<T>(detail::ReduceOp::sum, v, -1, comm);
237 auto iv =
static_cast<int>(v);
238 detail::Reduce(detail::ReduceOp::lor, iv, -1, comm);
239 v =
static_cast<bool>(iv);
244 auto iv =
static_cast<int>(v);
245 detail::Reduce(detail::ReduceOp::land, iv, -1, comm);
246 v =
static_cast<bool>(iv);
250namespace ParallelReduce {
253 template<
typename K,
typename V>
258 MPI_Reduce(&tmp, &vi, 1,
269 template<
typename K,
typename V>
273 (
void const*)(MPI_IN_PLACE) : (
void const*)vi;
275 MPI_Reduce(sendbuf, vi, cnt,
286 template<
typename K,
typename V>
291 MPI_Reduce(&tmp, &vi, 1,
302 template<
typename K,
typename V>
306 (
void const*)(MPI_IN_PLACE) : (
void const*)vi;
308 MPI_Reduce(sendbuf, vi, cnt,
321 detail::Reduce(detail::ReduceOp::max, v, root, comm);
326 detail::Reduce(detail::ReduceOp::max, v, cnt, root, comm);
331 detail::Reduce<T>(detail::ReduceOp::max, v, root, comm);
337 detail::Reduce(detail::ReduceOp::min, v, root, comm);
342 detail::Reduce(detail::ReduceOp::min, v, cnt, root, comm);
347 detail::Reduce<T>(detail::ReduceOp::min, v, root, comm);
353 detail::Reduce(detail::ReduceOp::sum, v, root, comm);
358 detail::Reduce(detail::ReduceOp::sum, v, cnt, root, comm);
363 detail::Reduce<T>(detail::ReduceOp::sum, v, root, comm);
368 auto iv =
static_cast<int>(v);
369 detail::Reduce(detail::ReduceOp::lor, iv, root, comm);
370 v =
static_cast<bool>(iv);
375 auto iv =
static_cast<int>(v);
376 detail::Reduce(detail::ReduceOp::land, iv, root, comm);
377 v =
static_cast<bool>(iv);
#define BL_COMM_PROFILE(cft, size, pid, tag)
Definition AMReX_BLProfiler.H:587
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
void Or(bool &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:236
int MyProc() noexcept
Definition AMReX_ParallelDescriptor.H:128
void Gather(Real const *sendbuf, int nsend, Real *recvbuf, int root)
Definition AMReX_ParallelDescriptor.cpp:1173
void Min(KeyValuePair< K, V > &vi, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:287
void Or(bool &v, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:367
void Max(KeyValuePair< K, V > &vi, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:254
void And(bool &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:243
void Min(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:161
void Sum(T &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:221
void And(bool &v, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:374
void AllGather(const T *v, int cnt, T *vs, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:106
void Sum(T &v, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:352
void Gather(const T *v, int cnt, T *vs, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:119
void Max(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:133
MPI_Op Mpi_op()
Definition AMReX_ParallelDescriptor.H:1652
int MPI_Comm
Definition AMReX_ccse-mpi.H:51
Definition AMReX_Amr.cpp:49
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:138
__host__ __device__ constexpr const T & min(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:21
__host__ __device__ constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:35
Definition AMReX_Functional.H:41
Definition AMReX_Functional.H:32
Communication datatype (note: this structure also works without MPI)
Definition AMReX_ccse-mpi.H:78
Definition AMReX_ValLocPair.H:10