Block-Structured AMR Software Framework
AMReX_ParallelReduce.H
Go to the documentation of this file.
1 #ifndef AMREX_PARALLEL_REDUCE_H_
2 #define AMREX_PARALLEL_REDUCE_H_
3 #include <AMReX_Config.H>
4 
5 #include <AMReX.H>
6 #include <AMReX_Functional.H>
8 #include <AMReX_Print.H>
9 #include <AMReX_Vector.H>
10 #include <type_traits>
11 
12 namespace amrex {
13 
14 namespace detail {
15 
16  enum ReduceOp : int {
17  max = 0,
18  min,
19  sum,
20  lor,
21  land
22  };
23 
24 #ifdef BL_USE_MPI
25 
26  // NOTE: the order of these needs to match the order in the ReduceOp enum above
27  const std::array<MPI_Op, 5> mpi_ops = {{
28  MPI_MAX,
29  MPI_MIN,
30  MPI_SUM,
31  MPI_LOR,
32  MPI_LAND
33  }};
34 
35  template<typename T>
36  inline void Reduce (ReduceOp op, T* v, int cnt, int root, MPI_Comm comm)
37  {
38  auto mpi_op = mpi_ops[static_cast<int>(op)]; // NOLINT
39  if (root == -1) {
40  // TODO: add BL_COMM_PROFILE commands
41  MPI_Allreduce(MPI_IN_PLACE, v, cnt, ParallelDescriptor::Mpi_typemap<T>::type(),
42  mpi_op, comm);
43  } else {
44  // TODO: add BL_COMM_PROFILE commands
45  const auto* sendbuf = (ParallelDescriptor::MyProc(comm) == root) ?
46  (void const*)(MPI_IN_PLACE) : (void const*) v;
47  MPI_Reduce(sendbuf, v, cnt, ParallelDescriptor::Mpi_typemap<T>::type(),
48  mpi_op, root, comm);
49  }
50  }
51 
52  template<typename T>
53  inline void Reduce (ReduceOp op, T& v, int root, MPI_Comm comm) {
54  Reduce(op, &v, 1, root, comm);
55  }
56 
57  template<typename T>
58  inline void Reduce (ReduceOp op, Vector<std::reference_wrapper<T> > const & v,
59  int root, MPI_Comm comm)
60  {
61  Vector<T> sndrcv(std::begin(v), std::end(v));
62  Reduce(op, sndrcv.data(), v.size(), root, comm);
63  for (int i = 0; i < v.size(); ++i) {
64  v[i].get() = sndrcv[i];
65  }
66  }
67 
68  template<typename T>
69  inline void Gather (const T* v, int cnt, T* vs, int root, MPI_Comm comm)
70  {
72  if (root == -1) {
73  // TODO: check these BL_COMM_PROFILE commands
74  BL_COMM_PROFILE(BLProfiler::Allgather, sizeof(T), BLProfiler::BeforeCall(),
75  BLProfiler::NoTag());
76  // const_cast for MPI-2
77  MPI_Allgather(const_cast<T*>(v), cnt, mpi_type, vs, cnt, mpi_type, comm);
78  BL_COMM_PROFILE(BLProfiler::Allgather, sizeof(T), BLProfiler::AfterCall(),
79  BLProfiler::NoTag());
80  } else {
81  // TODO: add BL_COMM_PROFILE commands
82  MPI_Gather(const_cast<T*>(v), cnt, mpi_type, vs, cnt, mpi_type, root, comm);
83  }
84  }
85 
86  template<typename T>
87  inline void Gather (const T& v, T * vs, int root, MPI_Comm comm) {
88  Gather(&v, 1, vs, root, comm);
89  }
90 
91 #else
92  template<typename T> void Reduce (ReduceOp /*op*/, T* /*v*/, int /*cnt*/, int /*root*/, MPI_Comm /*comm*/) {}
93  template<typename T> void Reduce (ReduceOp /*op*/, T& /*v*/, int /*root*/, MPI_Comm /*comm*/) {}
94  template<typename T> void Reduce (ReduceOp /*op*/, Vector<std::reference_wrapper<T> > const & /*v*/, int /*root*/, MPI_Comm /*comm*/) {}
95 
96  template<typename T> void Gather (const T* /*v*/, int /*cnt*/, T* /*vs*/, int /*root*/, MPI_Comm /*comm*/) {}
97  template<typename T> void Gather (const T& /*v*/, T * /*vs*/, int /*root*/, MPI_Comm /*comm*/) {}
98 #endif
99 }
100 
101 namespace ParallelAllGather {
102  template<typename T>
103  void AllGather (const T* v, int cnt, T* vs, MPI_Comm comm) {
104  detail::Gather(v, cnt, vs, -1, comm);
105  }
106  template<typename T>
107  void AllGather (const T& v, T* vs, MPI_Comm comm) {
108  detail::Gather(v, vs, -1, comm);
109  }
110 }
111 
112 namespace ParallelGather {
113  template<typename T>
114  void Gather (const T* v, int cnt, T* vs, int root, MPI_Comm comm) {
115  detail::Gather(v, cnt, vs, root, comm);
116  }
117  template<typename T>
118  void Gather (const T& v, T* vs, int root, MPI_Comm comm) {
119  detail::Gather(v, vs, root, comm);
120  }
121 }
122 
123 namespace ParallelAllReduce {
124 
125  template<typename K, typename V>
126  void Max (KeyValuePair<K,V>& vi, MPI_Comm comm) {
127 #ifdef AMREX_USE_MPI
128  using T = KeyValuePair<K,V>;
129  MPI_Allreduce(MPI_IN_PLACE, &vi, 1,
131  // () needed to work around PETSc macro
133 #else
134  amrex::ignore_unused(vi, comm);
135 #endif
136  }
137 
138  template<typename K, typename V>
139  void Max (KeyValuePair<K,V>* vi, int cnt, MPI_Comm comm) {
140 #ifdef AMREX_USE_MPI
141  using T = KeyValuePair<K,V>;
142  MPI_Allreduce(MPI_IN_PLACE, vi, cnt,
144  // () needed to work around PETSc macro
146 #else
147  amrex::ignore_unused(vi, cnt, comm);
148 #endif
149  }
150 
151  template<typename K, typename V>
152  void Min (KeyValuePair<K,V>& vi, MPI_Comm comm) {
153 #ifdef AMREX_USE_MPI
154  using T = KeyValuePair<K,V>;
155  MPI_Allreduce(MPI_IN_PLACE, &vi, 1,
157  // () needed to work around PETSc macro
159 #else
160  amrex::ignore_unused(vi, comm);
161 #endif
162  }
163 
164  template<typename K, typename V>
165  void Min (KeyValuePair<K,V>* vi, int cnt, MPI_Comm comm) {
166 #ifdef AMREX_USE_MPI
167  using T = KeyValuePair<K,V>;
168  MPI_Allreduce(MPI_IN_PLACE, vi, cnt,
170  // () needed to work around PETSc macro
172 #else
173  amrex::ignore_unused(vi, cnt, comm);
174 #endif
175  }
176 
177  template<typename T>
178  void Max (T& v, MPI_Comm comm) {
180  }
181  template<typename T>
182  void Max (T* v, int cnt, MPI_Comm comm) {
183  detail::Reduce(detail::ReduceOp::max, v, cnt, -1, comm);
184  }
185  template<typename T>
186  void Max (Vector<std::reference_wrapper<T> > v, MPI_Comm comm) {
187  detail::Reduce<T>(detail::ReduceOp::max, v, -1, comm);
188  }
189 
190  template<typename T>
191  void Min (T& v, MPI_Comm comm) {
193  }
194  template<typename T>
195  void Min (T* v, int cnt, MPI_Comm comm) {
196  detail::Reduce(detail::ReduceOp::min, v, cnt, -1, comm);
197  }
198  template<typename T>
199  void Min (Vector<std::reference_wrapper<T> > v, MPI_Comm comm) {
200  detail::Reduce<T>(detail::ReduceOp::min, v, -1, comm);
201  }
202 
203  template<typename T>
204  void Sum (T& v, MPI_Comm comm) {
206  }
207  template<typename T>
208  void Sum (T* v, int cnt, MPI_Comm comm) {
209  detail::Reduce(detail::ReduceOp::sum, v, cnt, -1, comm);
210  }
211  template<typename T>
212  void Sum (Vector<std::reference_wrapper<T> > v, MPI_Comm comm) {
213  detail::Reduce<T>(detail::ReduceOp::sum, v, -1, comm);
214  }
215 
216  inline void Or (bool & v, MPI_Comm comm) {
217  auto iv = static_cast<int>(v);
218  detail::Reduce(detail::ReduceOp::lor, iv, -1, comm);
219  v = static_cast<bool>(iv);
220  }
221 
222  inline void And (bool & v, MPI_Comm comm) {
223  auto iv = static_cast<int>(v);
224  detail::Reduce(detail::ReduceOp::land, iv, -1, comm);
225  v = static_cast<bool>(iv);
226  }
227 }
228 
229 namespace ParallelReduce {
230 
231  template<typename K, typename V>
232  void Max (KeyValuePair<K,V>& vi, int root, MPI_Comm comm) {
233 #ifdef AMREX_USE_MPI
234  auto tmp = vi;
235  using T = KeyValuePair<K,V>;
236  MPI_Reduce(&tmp, &vi, 1,
238  // () needed to work around PETSc macro
240  root, comm);
241 #else
242  amrex::ignore_unused(vi, root, comm);
243 #endif
244  }
245 
246  template<typename K, typename V>
247  void Max (KeyValuePair<K,V>* vi, int cnt, int root, MPI_Comm comm) {
248 #ifdef AMREX_USE_MPI
249  const auto *sendbuf = (ParallelDescriptor::MyProc(comm) == root) ?
250  (void const*)(MPI_IN_PLACE) : (void const*)vi;
251  using T = KeyValuePair<K,V>;
252  MPI_Reduce(sendbuf, vi, cnt,
254  // () needed to work around PETSc macro
256  root, comm);
257 #else
258  amrex::ignore_unused(vi, cnt, root, comm);
259 #endif
260  }
261 
262  template<typename K, typename V>
263  void Min (KeyValuePair<K,V>& vi, int root, MPI_Comm comm) {
264 #ifdef AMREX_USE_MPI
265  auto tmp = vi;
266  using T = KeyValuePair<K,V>;
267  MPI_Reduce(&tmp, &vi, 1,
269  // () needed to work around PETSc macro
271  root, comm);
272 #else
273  amrex::ignore_unused(vi, root, comm);
274 #endif
275  }
276 
277  template<typename K, typename V>
278  void Min (KeyValuePair<K,V>* vi, int cnt, int root, MPI_Comm comm) {
279 #ifdef AMREX_USE_MPI
280  const auto *sendbuf = (ParallelDescriptor::MyProc(comm) == root) ?
281  (void const*)(MPI_IN_PLACE) : (void const*)vi;
282  using T = KeyValuePair<K,V>;
283  MPI_Reduce(sendbuf, vi, cnt,
285  // () needed to work around PETSc macro
287  root, comm);
288 #else
289  amrex::ignore_unused(vi, cnt, root, comm);
290 #endif
291  }
292 
293  template<typename T>
294  void Max (T& v, int root, MPI_Comm comm) {
295  detail::Reduce(detail::ReduceOp::max, v, root, comm);
296  }
297  template<typename T>
298  void Max (T* v, int cnt, int root, MPI_Comm comm) {
299  detail::Reduce(detail::ReduceOp::max, v, cnt, root, comm);
300  }
301  template<typename T>
302  void Max (Vector<std::reference_wrapper<T> > v, int root, MPI_Comm comm) {
303  detail::Reduce<T>(detail::ReduceOp::max, v, root, comm);
304  }
305 
306  template<typename T>
307  void Min (T& v, int root, MPI_Comm comm) {
308  detail::Reduce(detail::ReduceOp::min, v, root, comm);
309  }
310  template<typename T>
311  void Min (T* v, int cnt, int root, MPI_Comm comm) {
312  detail::Reduce(detail::ReduceOp::min, v, cnt, root, comm);
313  }
314  template<typename T>
315  void Min (Vector<std::reference_wrapper<T> > v, int root, MPI_Comm comm) {
316  detail::Reduce<T>(detail::ReduceOp::min, v, root, comm);
317  }
318 
319  template<typename T>
320  void Sum (T& v, int root, MPI_Comm comm) {
321  detail::Reduce(detail::ReduceOp::sum, v, root, comm);
322  }
323  template<typename T>
324  void Sum (T* v, int cnt, int root, MPI_Comm comm) {
325  detail::Reduce(detail::ReduceOp::sum, v, cnt, root, comm);
326  }
327  template<typename T>
328  void Sum (Vector<std::reference_wrapper<T> > v, int root, MPI_Comm comm) {
329  detail::Reduce<T>(detail::ReduceOp::sum, v, root, comm);
330  }
331 
332  inline void Or (bool & v, int root, MPI_Comm comm) {
333  auto iv = static_cast<int>(v);
334  detail::Reduce(detail::ReduceOp::lor, iv, root, comm);
335  v = static_cast<bool>(iv);
336  }
337 
338  inline void And (bool & v, int root, MPI_Comm comm) {
339  auto iv = static_cast<int>(v);
340  detail::Reduce(detail::ReduceOp::land, iv, root, comm);
341  v = static_cast<bool>(iv);
342  }
343 }
344 
345 }
346 
347 #endif
#define BL_COMM_PROFILE(cft, size, pid, tag)
Definition: AMReX_BLProfiler.H:587
int MPI_Comm
Definition: AMReX_ccse-mpi.H:47
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition: AMReX_Vector.H:27
void AllGather(const T *v, int cnt, T *vs, MPI_Comm comm)
Definition: AMReX_ParallelReduce.H:103
void Or(bool &v, MPI_Comm comm)
Definition: AMReX_ParallelReduce.H:216
void And(bool &v, MPI_Comm comm)
Definition: AMReX_ParallelReduce.H:222
void Min(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition: AMReX_ParallelReduce.H:152
void Sum(T &v, MPI_Comm comm)
Definition: AMReX_ParallelReduce.H:204
void Max(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition: AMReX_ParallelReduce.H:126
int MyProc() noexcept
return the rank number local to the current Parallel Context
Definition: AMReX_ParallelDescriptor.H:125
MPI_Op Mpi_op()
Definition: AMReX_ParallelDescriptor.H:1566
void Gather(const T *v, int cnt, T *vs, int root, MPI_Comm comm)
Definition: AMReX_ParallelReduce.H:114
void Min(KeyValuePair< K, V > &vi, int root, MPI_Comm comm)
Definition: AMReX_ParallelReduce.H:263
void Or(bool &v, int root, MPI_Comm comm)
Definition: AMReX_ParallelReduce.H:332
void Max(KeyValuePair< K, V > &vi, int root, MPI_Comm comm)
Definition: AMReX_ParallelReduce.H:232
void And(bool &v, int root, MPI_Comm comm)
Definition: AMReX_ParallelReduce.H:338
void Sum(T &v, int root, MPI_Comm comm)
Definition: AMReX_ParallelReduce.H:320
ReduceOp
Definition: AMReX_ParallelReduce.H:16
@ land
Definition: AMReX_ParallelReduce.H:21
@ min
Definition: AMReX_ParallelReduce.H:18
@ max
Definition: AMReX_ParallelReduce.H:17
@ lor
Definition: AMReX_ParallelReduce.H:20
@ sum
Definition: AMReX_ParallelReduce.H:19
void Reduce(ReduceOp, T *, int, int, MPI_Comm)
Definition: AMReX_ParallelReduce.H:92
void Gather(const T *, int, T *, int, MPI_Comm)
Definition: AMReX_ParallelReduce.H:96
Definition: AMReX_Amr.cpp:49
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 end(BoxND< dim > const &box) noexcept
Definition: AMReX_Box.H:1890
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition: AMReX.H:111
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 begin(BoxND< dim > const &box) noexcept
Definition: AMReX_Box.H:1881
Definition: AMReX_FabArrayCommI.H:896
Definition: AMReX_Functional.H:41
Definition: AMReX_Functional.H:32
Communication datatype (note: this structure also works without MPI)
Definition: AMReX_ccse-mpi.H:68
Definition: AMReX_ValLocPair.H:10