Block-Structured AMR Software Framework
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Loading...
Searching...
No Matches
AMReX_ParallelReduce.H
Go to the documentation of this file.
1#ifndef AMREX_PARALLEL_REDUCE_H_
2#define AMREX_PARALLEL_REDUCE_H_
3#include <AMReX_Config.H>
4
5#include <AMReX.H>
6#include <AMReX_Functional.H>
8#include <AMReX_Print.H>
9#include <AMReX_Vector.H>
10#include <type_traits>
11
12namespace amrex {
13
14namespace detail {
15
16 enum ReduceOp : int {
17 max = 0,
21 land
22 };
23
24#ifdef BL_USE_MPI
25
26 // NOTE: the order of these needs to match the order in the ReduceOp enum above
27 const std::array<MPI_Op, 5> mpi_ops = {{
28 MPI_MAX,
29 MPI_MIN,
30 MPI_SUM,
31 MPI_LOR,
32 MPI_LAND
33 }};
34
35 template<typename T>
36 inline void Reduce (ReduceOp op, T* v, int cnt, int root, MPI_Comm comm)
37 {
38 auto mpi_op = mpi_ops[static_cast<int>(op)]; // NOLINT
39 if (root == -1) {
40 // TODO: add BL_COMM_PROFILE commands
41 MPI_Allreduce(MPI_IN_PLACE, v, cnt, ParallelDescriptor::Mpi_typemap<T>::type(),
42 mpi_op, comm);
43 } else {
44 // TODO: add BL_COMM_PROFILE commands
45 const auto* sendbuf = (ParallelDescriptor::MyProc(comm) == root) ?
46 (void const*)(MPI_IN_PLACE) : (void const*) v;
47 MPI_Reduce(sendbuf, v, cnt, ParallelDescriptor::Mpi_typemap<T>::type(),
48 mpi_op, root, comm);
49 }
50 }
51
52 template<typename T>
53 inline void Reduce (ReduceOp op, T& v, int root, MPI_Comm comm) {
54 Reduce(op, &v, 1, root, comm);
55 }
56
57 template<typename T>
58 inline void Reduce (ReduceOp op, Vector<std::reference_wrapper<T> > const & v,
59 int root, MPI_Comm comm)
60 {
61 Vector<T> sndrcv(std::begin(v), std::end(v));
62 Reduce(op, sndrcv.data(), v.size(), root, comm);
63 for (int i = 0; i < v.size(); ++i) {
64 v[i].get() = sndrcv[i];
65 }
66 }
67
68 template<typename T>
69 inline void Gather (const T* v, int cnt, T* vs, int root, MPI_Comm comm)
70 {
72 if (root == -1) {
73 // TODO: check these BL_COMM_PROFILE commands
74 BL_COMM_PROFILE(BLProfiler::Allgather, sizeof(T), BLProfiler::BeforeCall(),
75 BLProfiler::NoTag());
76 // const_cast for MPI-2
77 MPI_Allgather(const_cast<T*>(v), cnt, mpi_type, vs, cnt, mpi_type, comm);
78 BL_COMM_PROFILE(BLProfiler::Allgather, sizeof(T), BLProfiler::AfterCall(),
79 BLProfiler::NoTag());
80 } else {
81 // TODO: add BL_COMM_PROFILE commands
82 MPI_Gather(const_cast<T*>(v), cnt, mpi_type, vs, cnt, mpi_type, root, comm);
83 }
84 }
85
86 template<typename T>
87 inline void Gather (const T& v, T * vs, int root, MPI_Comm comm) {
88 Gather(&v, 1, vs, root, comm);
89 }
90
91#else
92 template<typename T> void Reduce (ReduceOp /*op*/, T* /*v*/, int /*cnt*/, int /*root*/, MPI_Comm /*comm*/) {}
93 template<typename T> void Reduce (ReduceOp /*op*/, T& /*v*/, int /*root*/, MPI_Comm /*comm*/) {}
94 template<typename T> void Reduce (ReduceOp /*op*/, Vector<std::reference_wrapper<T> > const & /*v*/, int /*root*/, MPI_Comm /*comm*/) {}
95
96 template<typename T> void Gather (const T* /*v*/, int /*cnt*/, T* /*vs*/, int /*root*/, MPI_Comm /*comm*/) {}
97 template<typename T> void Gather (const T& /*v*/, T * /*vs*/, int /*root*/, MPI_Comm /*comm*/) {}
98#endif
99}
100
101namespace ParallelAllGather {
102 template<typename T>
103 void AllGather (const T* v, int cnt, T* vs, MPI_Comm comm) {
104 detail::Gather(v, cnt, vs, -1, comm);
105 }
106 template<typename T>
107 void AllGather (const T& v, T* vs, MPI_Comm comm) {
108 detail::Gather(v, vs, -1, comm);
109 }
110}
111
112namespace ParallelGather {
113 template<typename T>
114 void Gather (const T* v, int cnt, T* vs, int root, MPI_Comm comm) {
115 detail::Gather(v, cnt, vs, root, comm);
116 }
117 template<typename T>
118 void Gather (const T& v, T* vs, int root, MPI_Comm comm) {
119 detail::Gather(v, vs, root, comm);
120 }
121}
122
123namespace ParallelAllReduce {
124
125 template<typename K, typename V>
126 void Max (KeyValuePair<K,V>& vi, MPI_Comm comm) {
127#ifdef AMREX_USE_MPI
128 using T = KeyValuePair<K,V>;
129 MPI_Allreduce(MPI_IN_PLACE, &vi, 1,
131 // () needed to work around PETSc macro
133#else
134 amrex::ignore_unused(vi, comm);
135#endif
136 }
137
138 template<typename K, typename V>
139 void Max (KeyValuePair<K,V>* vi, int cnt, MPI_Comm comm) {
140#ifdef AMREX_USE_MPI
141 using T = KeyValuePair<K,V>;
142 MPI_Allreduce(MPI_IN_PLACE, vi, cnt,
144 // () needed to work around PETSc macro
146#else
147 amrex::ignore_unused(vi, cnt, comm);
148#endif
149 }
150
151 template<typename K, typename V>
152 void Min (KeyValuePair<K,V>& vi, MPI_Comm comm) {
153#ifdef AMREX_USE_MPI
154 using T = KeyValuePair<K,V>;
155 MPI_Allreduce(MPI_IN_PLACE, &vi, 1,
157 // () needed to work around PETSc macro
159#else
160 amrex::ignore_unused(vi, comm);
161#endif
162 }
163
164 template<typename K, typename V>
165 void Min (KeyValuePair<K,V>* vi, int cnt, MPI_Comm comm) {
166#ifdef AMREX_USE_MPI
167 using T = KeyValuePair<K,V>;
168 MPI_Allreduce(MPI_IN_PLACE, vi, cnt,
170 // () needed to work around PETSc macro
172#else
173 amrex::ignore_unused(vi, cnt, comm);
174#endif
175 }
176
177 template<typename T>
178 void Max (T& v, MPI_Comm comm) {
180 }
181 template<typename T>
182 void Max (T* v, int cnt, MPI_Comm comm) {
183 detail::Reduce(detail::ReduceOp::max, v, cnt, -1, comm);
184 }
185 template<typename T>
186 void Max (Vector<std::reference_wrapper<T> > v, MPI_Comm comm) {
187 detail::Reduce<T>(detail::ReduceOp::max, v, -1, comm);
188 }
189
190 template<typename T>
191 void Min (T& v, MPI_Comm comm) {
193 }
194 template<typename T>
195 void Min (T* v, int cnt, MPI_Comm comm) {
196 detail::Reduce(detail::ReduceOp::min, v, cnt, -1, comm);
197 }
198 template<typename T>
199 void Min (Vector<std::reference_wrapper<T> > v, MPI_Comm comm) {
200 detail::Reduce<T>(detail::ReduceOp::min, v, -1, comm);
201 }
202
203 template<typename T>
204 void Sum (T& v, MPI_Comm comm) {
206 }
207 template<typename T>
208 void Sum (T* v, int cnt, MPI_Comm comm) {
209 detail::Reduce(detail::ReduceOp::sum, v, cnt, -1, comm);
210 }
211 template<typename T>
212 void Sum (Vector<std::reference_wrapper<T> > v, MPI_Comm comm) {
213 detail::Reduce<T>(detail::ReduceOp::sum, v, -1, comm);
214 }
215
216 inline void Or (bool & v, MPI_Comm comm) {
217 auto iv = static_cast<int>(v);
219 v = static_cast<bool>(iv);
220 }
221
222 inline void And (bool & v, MPI_Comm comm) {
223 auto iv = static_cast<int>(v);
225 v = static_cast<bool>(iv);
226 }
227}
228
229namespace ParallelReduce {
230
231 template<typename K, typename V>
232 void Max (KeyValuePair<K,V>& vi, int root, MPI_Comm comm) {
233#ifdef AMREX_USE_MPI
234 auto tmp = vi;
235 using T = KeyValuePair<K,V>;
236 MPI_Reduce(&tmp, &vi, 1,
238 // () needed to work around PETSc macro
240 root, comm);
241#else
242 amrex::ignore_unused(vi, root, comm);
243#endif
244 }
245
246 template<typename K, typename V>
247 void Max (KeyValuePair<K,V>* vi, int cnt, int root, MPI_Comm comm) {
248#ifdef AMREX_USE_MPI
249 const auto *sendbuf = (ParallelDescriptor::MyProc(comm) == root) ?
250 (void const*)(MPI_IN_PLACE) : (void const*)vi;
251 using T = KeyValuePair<K,V>;
252 MPI_Reduce(sendbuf, vi, cnt,
254 // () needed to work around PETSc macro
256 root, comm);
257#else
258 amrex::ignore_unused(vi, cnt, root, comm);
259#endif
260 }
261
262 template<typename K, typename V>
263 void Min (KeyValuePair<K,V>& vi, int root, MPI_Comm comm) {
264#ifdef AMREX_USE_MPI
265 auto tmp = vi;
266 using T = KeyValuePair<K,V>;
267 MPI_Reduce(&tmp, &vi, 1,
269 // () needed to work around PETSc macro
271 root, comm);
272#else
273 amrex::ignore_unused(vi, root, comm);
274#endif
275 }
276
277 template<typename K, typename V>
278 void Min (KeyValuePair<K,V>* vi, int cnt, int root, MPI_Comm comm) {
279#ifdef AMREX_USE_MPI
280 const auto *sendbuf = (ParallelDescriptor::MyProc(comm) == root) ?
281 (void const*)(MPI_IN_PLACE) : (void const*)vi;
282 using T = KeyValuePair<K,V>;
283 MPI_Reduce(sendbuf, vi, cnt,
285 // () needed to work around PETSc macro
287 root, comm);
288#else
289 amrex::ignore_unused(vi, cnt, root, comm);
290#endif
291 }
292
293 template<typename T>
294 void Max (T& v, int root, MPI_Comm comm) {
296 }
297 template<typename T>
298 void Max (T* v, int cnt, int root, MPI_Comm comm) {
299 detail::Reduce(detail::ReduceOp::max, v, cnt, root, comm);
300 }
301 template<typename T>
302 void Max (Vector<std::reference_wrapper<T> > v, int root, MPI_Comm comm) {
303 detail::Reduce<T>(detail::ReduceOp::max, v, root, comm);
304 }
305
306 template<typename T>
307 void Min (T& v, int root, MPI_Comm comm) {
309 }
310 template<typename T>
311 void Min (T* v, int cnt, int root, MPI_Comm comm) {
312 detail::Reduce(detail::ReduceOp::min, v, cnt, root, comm);
313 }
314 template<typename T>
315 void Min (Vector<std::reference_wrapper<T> > v, int root, MPI_Comm comm) {
316 detail::Reduce<T>(detail::ReduceOp::min, v, root, comm);
317 }
318
319 template<typename T>
320 void Sum (T& v, int root, MPI_Comm comm) {
322 }
323 template<typename T>
324 void Sum (T* v, int cnt, int root, MPI_Comm comm) {
325 detail::Reduce(detail::ReduceOp::sum, v, cnt, root, comm);
326 }
327 template<typename T>
328 void Sum (Vector<std::reference_wrapper<T> > v, int root, MPI_Comm comm) {
329 detail::Reduce<T>(detail::ReduceOp::sum, v, root, comm);
330 }
331
332 inline void Or (bool & v, int root, MPI_Comm comm) {
333 auto iv = static_cast<int>(v);
334 detail::Reduce(detail::ReduceOp::lor, iv, root, comm);
335 v = static_cast<bool>(iv);
336 }
337
338 inline void And (bool & v, int root, MPI_Comm comm) {
339 auto iv = static_cast<int>(v);
340 detail::Reduce(detail::ReduceOp::land, iv, root, comm);
341 v = static_cast<bool>(iv);
342 }
343}
344
345}
346
347#endif
#define BL_COMM_PROFILE(cft, size, pid, tag)
Definition AMReX_BLProfiler.H:587
int MPI_Comm
Definition AMReX_ccse-mpi.H:47
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:27
void AllGather(const T *v, int cnt, T *vs, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:103
void Or(bool &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:216
void And(bool &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:222
void Min(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:152
void Sum(T &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:204
void Max(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:126
int MyProc() noexcept
return the rank number local to the current Parallel Context
Definition AMReX_ParallelDescriptor.H:125
MPI_Op Mpi_op()
Definition AMReX_ParallelDescriptor.H:1566
void Gather(const T *v, int cnt, T *vs, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:114
void Min(KeyValuePair< K, V > &vi, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:263
void Or(bool &v, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:332
void Max(KeyValuePair< K, V > &vi, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:232
void And(bool &v, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:338
void Sum(T &v, int root, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:320
ReduceOp
Definition AMReX_ParallelReduce.H:16
@ land
Definition AMReX_ParallelReduce.H:21
@ min
Definition AMReX_ParallelReduce.H:18
@ max
Definition AMReX_ParallelReduce.H:17
@ lor
Definition AMReX_ParallelReduce.H:20
@ sum
Definition AMReX_ParallelReduce.H:19
void Reduce(ReduceOp, T *, int, int, MPI_Comm)
Definition AMReX_ParallelReduce.H:92
void Gather(const T *, int, T *, int, MPI_Comm)
Definition AMReX_ParallelReduce.H:96
Definition AMReX_Amr.cpp:49
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:127
Definition AMReX_FabArrayCommI.H:896
Definition AMReX_Functional.H:41
Definition AMReX_Functional.H:32
Communication datatype (note: this structure also works without MPI)
Definition AMReX_ccse-mpi.H:68
Definition AMReX_ValLocPair.H:10