Block-Structured AMR Software Framework
 
Loading...
Searching...
No Matches
AMReX_ParallelReduce.H
Go to the documentation of this file.
1#ifndef AMREX_PARALLEL_REDUCE_H_
2#define AMREX_PARALLEL_REDUCE_H_
3#include <AMReX_Config.H>
4
5#include <AMReX.H>
6#include <AMReX_Functional.H>
8#include <AMReX_Print.H>
9#include <AMReX_Vector.H>
10#include <type_traits>
11
12namespace amrex {
13
15namespace detail {
16
17 enum ReduceOp : int {
18 max = 0,
19 min,
20 sum,
21 lor,
22 land
23 };
24
25#ifdef BL_USE_MPI
26
27 // NOTE: the order of these needs to match the order in the ReduceOp enum above
28 const std::array<MPI_Op, 5> mpi_ops = {{
29 MPI_MAX,
30 MPI_MIN,
31 MPI_SUM,
32 MPI_LOR,
33 MPI_LAND
34 }};
35
36 template<typename T>
37 inline void Reduce (ReduceOp op, T* v, int cnt, int root, MPI_Comm comm)
38 {
39 auto mpi_op = mpi_ops[static_cast<int>(op)]; // NOLINT
40 if (root == -1) {
41 // TODO: add BL_COMM_PROFILE commands
42 MPI_Allreduce(MPI_IN_PLACE, v, cnt, ParallelDescriptor::Mpi_typemap<T>::type(),
43 mpi_op, comm);
44 } else {
45 // TODO: add BL_COMM_PROFILE commands
46 const auto* sendbuf = (ParallelDescriptor::MyProc(comm) == root) ?
47 (void const*)(MPI_IN_PLACE) : (void const*) v;
48 MPI_Reduce(sendbuf, v, cnt, ParallelDescriptor::Mpi_typemap<T>::type(),
49 mpi_op, root, comm);
50 }
51 }
52
53 template<typename T>
54 inline void Reduce (ReduceOp op, T& v, int root, MPI_Comm comm) {
55 Reduce(op, &v, 1, root, comm);
56 }
57
58 template<typename T>
59 inline void Reduce (ReduceOp op, Vector<std::reference_wrapper<T> > const & v,
60 int root, MPI_Comm comm)
61 {
62 Vector<T> sndrcv(std::begin(v), std::end(v));
63 Reduce(op, sndrcv.data(), v.size(), root, comm);
64 for (int i = 0; i < v.size(); ++i) {
65 v[i].get() = sndrcv[i];
66 }
67 }
68
69 template<typename T>
70 inline void Gather (const T* v, int cnt, T* vs, int root, MPI_Comm comm)
71 {
72 auto mpi_type = ParallelDescriptor::Mpi_typemap<T>::type();
73 if (root == -1) {
74 // TODO: check these BL_COMM_PROFILE commands
75 BL_COMM_PROFILE(BLProfiler::Allgather, sizeof(T), BLProfiler::BeforeCall(),
76 BLProfiler::NoTag());
77 // const_cast for MPI-2
78 MPI_Allgather(const_cast<T*>(v), cnt, mpi_type, vs, cnt, mpi_type, comm);
79 BL_COMM_PROFILE(BLProfiler::Allgather, sizeof(T), BLProfiler::AfterCall(),
80 BLProfiler::NoTag());
81 } else {
82 // TODO: add BL_COMM_PROFILE commands
83 MPI_Gather(const_cast<T*>(v), cnt, mpi_type, vs, cnt, mpi_type, root, comm);
84 }
85 }
86
87 template<typename T>
88 inline void Gather (const T& v, T * vs, int root, MPI_Comm comm) {
89 Gather(&v, 1, vs, root, comm);
90 }
91
92#else
93 template<typename T> void Reduce (ReduceOp /*op*/, T* /*v*/, int /*cnt*/, int /*root*/, MPI_Comm /*comm*/) {}
94 template<typename T> void Reduce (ReduceOp /*op*/, T& /*v*/, int /*root*/, MPI_Comm /*comm*/) {}
95 template<typename T> void Reduce (ReduceOp /*op*/, Vector<std::reference_wrapper<T> > const & /*v*/, int /*root*/, MPI_Comm /*comm*/) {}
96
97 template<typename T> void Gather (const T* /*v*/, int /*cnt*/, T* /*vs*/, int /*root*/, MPI_Comm /*comm*/) {}
98 template<typename T> void Gather (const T& /*v*/, T * /*vs*/, int /*root*/, MPI_Comm /*comm*/) {}
99#endif
100}
102
103namespace ParallelAllGather {
105 template<typename T>
106 void AllGather (const T* v, int cnt, T* vs, MPI_Comm comm) {
107 detail::Gather(v, cnt, vs, -1, comm);
108 }
110 template<typename T>
111 void AllGather (const T& v, T* vs, MPI_Comm comm) {
112 detail::Gather(v, vs, -1, comm);
113 }
114}
115
116namespace ParallelGather {
118 template<typename T>
119 void Gather (const T* v, int cnt, T* vs, int root, MPI_Comm comm) {
120 detail::Gather(v, cnt, vs, root, comm);
121 }
123 template<typename T>
124 void Gather (const T& v, T* vs, int root, MPI_Comm comm) {
125 detail::Gather(v, vs, root, comm);
126 }
127}
128
129namespace ParallelAllReduce {
130
132 template<typename K, typename V>
133 void Max (KeyValuePair<K,V>& vi, MPI_Comm comm) {
134#ifdef AMREX_USE_MPI
135 using T = KeyValuePair<K,V>;
136 MPI_Allreduce(MPI_IN_PLACE, &vi, 1,
138 // () needed to work around PETSc macro
140#else
141 amrex::ignore_unused(vi, comm);
142#endif
143 }
144
146 template<typename K, typename V>
147 void Max (KeyValuePair<K,V>* vi, int cnt, MPI_Comm comm) {
148#ifdef AMREX_USE_MPI
149 using T = KeyValuePair<K,V>;
150 MPI_Allreduce(MPI_IN_PLACE, vi, cnt,
152 // () needed to work around PETSc macro
154#else
155 amrex::ignore_unused(vi, cnt, comm);
156#endif
157 }
158
160 template<typename K, typename V>
161 void Min (KeyValuePair<K,V>& vi, MPI_Comm comm) {
162#ifdef AMREX_USE_MPI
163 using T = KeyValuePair<K,V>;
164 MPI_Allreduce(MPI_IN_PLACE, &vi, 1,
166 // () needed to work around PETSc macro
168#else
169 amrex::ignore_unused(vi, comm);
170#endif
171 }
172
174 template<typename K, typename V>
175 void Min (KeyValuePair<K,V>* vi, int cnt, MPI_Comm comm) {
176#ifdef AMREX_USE_MPI
177 using T = KeyValuePair<K,V>;
178 MPI_Allreduce(MPI_IN_PLACE, vi, cnt,
180 // () needed to work around PETSc macro
182#else
183 amrex::ignore_unused(vi, cnt, comm);
184#endif
185 }
186
188 template<typename T>
189 void Max (T& v, MPI_Comm comm) {
190 detail::Reduce(detail::ReduceOp::max, v, -1, comm);
191 }
193 template<typename T>
194 void Max (T* v, int cnt, MPI_Comm comm) {
195 detail::Reduce(detail::ReduceOp::max, v, cnt, -1, comm);
196 }
198 template<typename T>
199 void Max (Vector<std::reference_wrapper<T> > v, MPI_Comm comm) {
200 detail::Reduce<T>(detail::ReduceOp::max, v, -1, comm);
201 }
202
204 template<typename T>
205 void Min (T& v, MPI_Comm comm) {
206 detail::Reduce(detail::ReduceOp::min, v, -1, comm);
207 }
209 template<typename T>
210 void Min (T* v, int cnt, MPI_Comm comm) {
211 detail::Reduce(detail::ReduceOp::min, v, cnt, -1, comm);
212 }
214 template<typename T>
215 void Min (Vector<std::reference_wrapper<T> > v, MPI_Comm comm) {
216 detail::Reduce<T>(detail::ReduceOp::min, v, -1, comm);
217 }
218
220 template<typename T>
221 void Sum (T& v, MPI_Comm comm) {
222 detail::Reduce(detail::ReduceOp::sum, v, -1, comm);
223 }
225 template<typename T>
226 void Sum (T* v, int cnt, MPI_Comm comm) {
227 detail::Reduce(detail::ReduceOp::sum, v, cnt, -1, comm);
228 }
230 template<typename T>
231 void Sum (Vector<std::reference_wrapper<T> > v, MPI_Comm comm) {
232 detail::Reduce<T>(detail::ReduceOp::sum, v, -1, comm);
233 }
234
236 inline void Or (bool & v, MPI_Comm comm) {
237 auto iv = static_cast<int>(v);
238 detail::Reduce(detail::ReduceOp::lor, iv, -1, comm);
239 v = static_cast<bool>(iv);
240 }
241
243 inline void And (bool & v, MPI_Comm comm) {
244 auto iv = static_cast<int>(v);
245 detail::Reduce(detail::ReduceOp::land, iv, -1, comm);
246 v = static_cast<bool>(iv);
247 }
248}
249
250namespace ParallelReduce {
251
253 template<typename K, typename V>
254 void Max (KeyValuePair<K,V>& vi, int root, MPI_Comm comm) {
255#ifdef AMREX_USE_MPI
256 auto tmp = vi;
257 using T = KeyValuePair<K,V>;
258 MPI_Reduce(&tmp, &vi, 1,
260 // () needed to work around PETSc macro
262 root, comm);
263#else
264 amrex::ignore_unused(vi, root, comm);
265#endif
266 }
267
269 template<typename K, typename V>
270 void Max (KeyValuePair<K,V>* vi, int cnt, int root, MPI_Comm comm) {
271#ifdef AMREX_USE_MPI
272 const auto *sendbuf = (ParallelDescriptor::MyProc(comm) == root) ?
273 (void const*)(MPI_IN_PLACE) : (void const*)vi;
274 using T = KeyValuePair<K,V>;
275 MPI_Reduce(sendbuf, vi, cnt,
277 // () needed to work around PETSc macro
279 root, comm);
280#else
281 amrex::ignore_unused(vi, cnt, root, comm);
282#endif
283 }
284
286 template<typename K, typename V>
287 void Min (KeyValuePair<K,V>& vi, int root, MPI_Comm comm) {
288#ifdef AMREX_USE_MPI
289 auto tmp = vi;
290 using T = KeyValuePair<K,V>;
291 MPI_Reduce(&tmp, &vi, 1,
293 // () needed to work around PETSc macro
295 root, comm);
296#else
297 amrex::ignore_unused(vi, root, comm);
298#endif
299 }
300
302 template<typename K, typename V>
303 void Min (KeyValuePair<K,V>* vi, int cnt, int root, MPI_Comm comm) {
304#ifdef AMREX_USE_MPI
305 const auto *sendbuf = (ParallelDescriptor::MyProc(comm) == root) ?
306 (void const*)(MPI_IN_PLACE) : (void const*)vi;
307 using T = KeyValuePair<K,V>;
308 MPI_Reduce(sendbuf, vi, cnt,
310 // () needed to work around PETSc macro
312 root, comm);
313#else
314 amrex::ignore_unused(vi, cnt, root, comm);
315#endif
316 }
317
319 template<typename T>
320 void Max (T& v, int root, MPI_Comm comm) {
321 detail::Reduce(detail::ReduceOp::max, v, root, comm);
322 }
324 template<typename T>
325 void Max (T* v, int cnt, int root, MPI_Comm comm) {
326 detail::Reduce(detail::ReduceOp::max, v, cnt, root, comm);
327 }
329 template<typename T>
330 void Max (Vector<std::reference_wrapper<T> > v, int root, MPI_Comm comm) {
331 detail::Reduce<T>(detail::ReduceOp::max, v, root, comm);
332 }
333
335 template<typename T>
336 void Min (T& v, int root, MPI_Comm comm) {
337 detail::Reduce(detail::ReduceOp::min, v, root, comm);
338 }
340 template<typename T>
341 void Min (T* v, int cnt, int root, MPI_Comm comm) {
342 detail::Reduce(detail::ReduceOp::min, v, cnt, root, comm);
343 }
345 template<typename T>
346 void Min (Vector<std::reference_wrapper<T> > v, int root, MPI_Comm comm) {
347 detail::Reduce<T>(detail::ReduceOp::min, v, root, comm);
348 }
349
351 template<typename T>
352 void Sum (T& v, int root, MPI_Comm comm) {
353 detail::Reduce(detail::ReduceOp::sum, v, root, comm);
354 }
356 template<typename T>
357 void Sum (T* v, int cnt, int root, MPI_Comm comm) {
358 detail::Reduce(detail::ReduceOp::sum, v, cnt, root, comm);
359 }
361 template<typename T>
362 void Sum (Vector<std::reference_wrapper<T> > v, int root, MPI_Comm comm) {
363 detail::Reduce<T>(detail::ReduceOp::sum, v, root, comm);
364 }
365
367 inline void Or (bool & v, int root, MPI_Comm comm) {
368 auto iv = static_cast<int>(v);
369 detail::Reduce(detail::ReduceOp::lor, iv, root, comm);
370 v = static_cast<bool>(iv);
371 }
372
374 inline void And (bool & v, int root, MPI_Comm comm) {
375 auto iv = static_cast<int>(v);
376 detail::Reduce(detail::ReduceOp::land, iv, root, comm);
377 v = static_cast<bool>(iv);
378 }
379}
380
381}
382
383#endif
#define BL_COMM_PROFILE(cft, size, pid, tag)
Definition AMReX_BLProfiler.H:587
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
void Or(bool &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:236
int MyProc() noexcept
Definition AMReX_ParallelDescriptor.H:128
void Gather(Real const *sendbuf, int nsend, Real *recvbuf, int root)
Definition AMReX_ParallelDescriptor.cpp:1173
void Min(KeyValuePair< K, V > &vi, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:287
void Or(bool &v, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:367
void Max(KeyValuePair< K, V > &vi, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:254
void And(bool &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:243
void Min(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:161
void Sum(T &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:221
void And(bool &v, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:374
void AllGather(const T *v, int cnt, T *vs, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:106
void Sum(T &v, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:352
void Gather(const T *v, int cnt, T *vs, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:119
void Max(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:133
MPI_Op Mpi_op()
Definition AMReX_ParallelDescriptor.H:1652
int MPI_Comm
Definition AMReX_ccse-mpi.H:51
Definition AMReX_Amr.cpp:49
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:138
__host__ __device__ constexpr const T & min(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:21
__host__ __device__ constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:35
Definition AMReX_Functional.H:41
Definition AMReX_Functional.H:32
Communication datatype (note: this structure also works without MPI)
Definition AMReX_ccse-mpi.H:78
Definition AMReX_ValLocPair.H:10