Block-Structured AMR Software Framework
AMReX_FabArrayUtility.H
Go to the documentation of this file.
1 #ifndef AMREX_FABARRAY_UTILITY_H_
2 #define AMREX_FABARRAY_UTILITY_H_
3 #include <AMReX_Config.H>
4 
5 #include <AMReX_FabArray.H>
6 #include <AMReX_LayoutData.H>
7 #include <AMReX_Print.H>
8 #include <AMReX_ParReduce.H>
9 #include <limits>
10 
11 namespace amrex {
12 
13 template <class FAB, class F,
14  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
15 typename FAB::value_type
16 ReduceSum (FabArray<FAB> const& fa, int nghost, F&& f) {
17  return ReduceSum(fa, IntVect(nghost), std::forward<F>(f));
18 }
19 
20 namespace fudetail {
21 template <class FAB, class F,
22  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
23 typename FAB::value_type
24 ReduceSum_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
25 {
26  using value_type = typename FAB::value_type;
27  value_type sm = 0;
28 
29 #ifdef AMREX_USE_OMP
30 #pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
31 #endif
32  for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
33  {
34  const Box& bx = mfi.growntilebox(nghost);
35  auto const& arr = fa.const_array(mfi);
36  sm += f(bx, arr);
37  }
38 
39  return sm;
40 }
41 }
42 
43 #ifdef AMREX_USE_GPU
44 namespace fudetail {
45 template <class OP, class FAB, class F>
46 std::enable_if_t<IsBaseFab<FAB>::value,
47  std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
48  std::is_same<OP,ReduceOpLogicalOr>::value,
49  int, typename FAB::value_type> >
50 ReduceMF (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
51 {
52  using T = std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
53  std::is_same<OP,ReduceOpLogicalOr>::value,
54  int, typename FAB::value_type>;
55  auto typ = fa.ixType();
56  auto const& ma = fa.const_arrays();
57  return ParReduce(TypeList<OP>{}, TypeList<T>{}, fa, nghost,
58  [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
59  {
60  return { static_cast<T>(f(amrex::makeSingleCellBox(i,j,k,typ), ma[box_no])) };
61  });
62 }
63 
64 template <class OP, class FAB1, class FAB2, class F>
65 std::enable_if_t<IsBaseFab<FAB1>::value && IsBaseFab<FAB2>::value,
66  std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
67  std::is_same<OP,ReduceOpLogicalOr>::value,
68  int, typename FAB1::value_type> >
69 ReduceMF (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, IntVect const& nghost, F const& f)
70 {
71  using T = std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
72  std::is_same<OP,ReduceOpLogicalOr>::value,
73  int, typename FAB1::value_type>;
74  auto typ = fa1.ixType();
75  auto const& ma1 = fa1.const_arrays();
76  auto const& ma2 = fa2.const_arrays();
77  return ParReduce(TypeList<OP>{}, TypeList<T>{}, fa1, nghost,
78  [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
79  {
80  return { static_cast<T>(f(amrex::makeSingleCellBox(i,j,k,typ),
81  ma1[box_no], ma2[box_no])) };
82  });
83 }
84 
85 template <class OP, class FAB1, class FAB2, class FAB3, class F>
86 std::enable_if_t<IsBaseFab<FAB1>::value && IsBaseFab<FAB2>::value && IsBaseFab<FAB3>::value,
87  std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
88  std::is_same<OP,ReduceOpLogicalOr>::value,
89  int, typename FAB1::value_type> >
90 ReduceMF (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
91  FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
92 {
93  using T = std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
94  std::is_same<OP,ReduceOpLogicalOr>::value,
95  int, typename FAB1::value_type>;
96  auto typ = fa1.ixType();
97  auto const& ma1 = fa1.const_arrays();
98  auto const& ma2 = fa2.const_arrays();
99  auto const& ma3 = fa3.const_arrays();
100  return ParReduce(TypeList<OP>{}, TypeList<T>{}, fa1, nghost,
101  [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
102  {
103  return { static_cast<T>(f(amrex::makeSingleCellBox(i,j,k,typ),
104  ma1[box_no], ma2[box_no], ma3[box_no])) };
105  });
106 }
107 
108 template <class FAB, class F>
109 std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
110 ReduceSum_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
111 {
112  return ReduceSum_host(fa,nghost,std::forward<F>(f));
113 }
114 
115 template <class FAB, class F>
116 std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
117 ReduceSum_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
118 {
119  amrex::ignore_unused(fa,nghost,f);
120  amrex::Abort("ReduceSum: Launch Region is off. Device lambda cannot be called by host.");
121  return 0;
122 }
123 }
124 
125 template <class FAB, class F,
126  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
127 typename FAB::value_type
128 ReduceSum (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
129 {
130  if (Gpu::inLaunchRegion()) {
131  return fudetail::ReduceMF<ReduceOpSum>(fa, nghost, std::forward<F>(f));
132  } else {
133  return fudetail::ReduceSum_host_wrapper(fa, nghost, std::forward<F>(f));
134  }
135 }
136 #else
137 template <class FAB, class F,
138  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
139 typename FAB::value_type
140 ReduceSum (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
141 {
142  return fudetail::ReduceSum_host(fa, nghost, std::forward<F>(f));
143 }
144 #endif
145 
146 template <class FAB1, class FAB2, class F,
147  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
148 typename FAB1::value_type
149 ReduceSum (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
150  int nghost, F&& f) {
151  return ReduceSum(fa1, fa2, IntVect(nghost), std::forward<F>(f));
152 }
153 
154 namespace fudetail {
155 template <class FAB1, class FAB2, class F,
156  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
157 typename FAB1::value_type
159  IntVect const& nghost, F const& f)
160 {
161  using value_type = typename FAB1::value_type;
162  value_type sm = 0;
163 
164 #ifdef AMREX_USE_OMP
165 #pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
166 #endif
167  for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
168  {
169  const Box& bx = mfi.growntilebox(nghost);
170  const auto& arr1 = fa1.const_array(mfi);
171  const auto& arr2 = fa2.const_array(mfi);
172  sm += f(bx, arr1, arr2);
173  }
174 
175  return sm;
176 }
177 }
178 
179 #ifdef AMREX_USE_GPU
180 namespace fudetail {
181 template <class FAB1, class FAB2, class F>
182 std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
184  IntVect const& nghost, F&& f)
185 {
186  return ReduceSum_host(fa1,fa2,nghost,std::forward<F>(f));
187 }
188 
189 template <class FAB1, class FAB2, class F>
190 std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
192  IntVect const& nghost, F&& f)
193 {
194  amrex::ignore_unused(fa1,fa2,nghost,f);
195  amrex::Abort("ReduceSum: Launch Region is off. Device lambda cannot be called by host.");
196  return 0;
197 }
198 }
199 
200 template <class FAB1, class FAB2, class F,
201  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
202 typename FAB1::value_type
203 ReduceSum (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
204  IntVect const& nghost, F&& f)
205 {
206  if (Gpu::inLaunchRegion()) {
207  return fudetail::ReduceMF<ReduceOpSum>(fa1,fa2,nghost,std::forward<F>(f));
208  } else {
209  return fudetail::ReduceSum_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
210  }
211 }
212 #else
213 template <class FAB1, class FAB2, class F,
214  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
215 typename FAB1::value_type
216 ReduceSum (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
217  IntVect const& nghost, F&& f)
218 {
219  return fudetail::ReduceSum_host(fa1,fa2,nghost,std::forward<F>(f));
220 }
221 #endif
222 
223 template <class FAB1, class FAB2, class FAB3, class F,
224  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
225 typename FAB1::value_type
226 ReduceSum (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, FabArray<FAB3> const& fa3,
227  int nghost, F&& f)
228 {
229  return ReduceSum(fa1, fa2, fa3, IntVect(nghost), std::forward<F>(f));
230 }
231 
232 namespace fudetail {
233 template <class FAB1, class FAB2, class FAB3, class F,
234  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
235 typename FAB1::value_type
237  FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
238 {
239  using value_type = typename FAB1::value_type;
240  value_type sm = 0;
241 
242 #ifdef AMREX_USE_OMP
243 #pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
244 #endif
245  for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
246  {
247  const Box& bx = mfi.growntilebox(nghost);
248  const auto& arr1 = fa1.const_array(mfi);
249  const auto& arr2 = fa2.const_array(mfi);
250  const auto& arr3 = fa3.const_array(mfi);
251  sm += f(bx, arr1, arr2, arr3);
252  }
253 
254  return sm;
255 }
256 }
257 
258 #ifdef AMREX_USE_GPU
259 namespace fudetail {
260 template <class FAB1, class FAB2, class FAB3, class F>
261 std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
263  FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
264 {
265  return fudetail::ReduceSum_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
266 }
267 
268 template <class FAB1, class FAB2, class FAB3, class F>
269 std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
271  FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
272 {
273  amrex::ignore_unused(fa1,fa2,fa3,nghost,f);
274  amrex::Abort("ReduceSum: Launch Region is off. Device lambda cannot be called by host.");
275  return 0;
276 }
277 }
278 
279 template <class FAB1, class FAB2, class FAB3, class F,
280  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
281 typename FAB1::value_type
282 ReduceSum (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
283  FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
284 {
285  if (Gpu::inLaunchRegion()) {
286  return fudetail::ReduceMF<ReduceOpSum>(fa1,fa2,fa3,nghost,std::forward<F>(f));
287  } else {
288  return fudetail::ReduceSum_host_wrapper(fa1,fa2,fa3,nghost,std::forward<F>(f));
289  }
290 }
291 #else
292 template <class FAB1, class FAB2, class FAB3, class F,
293  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
294 typename FAB1::value_type
295 ReduceSum (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
296  FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
297 {
298  return fudetail::ReduceSum_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
299 }
300 #endif
301 
302 template <class FAB, class F,
303  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
304 typename FAB::value_type
305 ReduceMin (FabArray<FAB> const& fa, int nghost, F&& f)
306 {
307  return ReduceMin(fa, IntVect(nghost), std::forward<F>(f));
308 }
309 
310 namespace fudetail {
311 template <class FAB, class F,
312  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
313 typename FAB::value_type
314 ReduceMin_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
315 {
316  using value_type = typename FAB::value_type;
318 
319 #ifdef AMREX_USE_OMP
320 #pragma omp parallel reduction(min:r)
321 #endif
322  for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
323  {
324  const Box& bx = mfi.growntilebox(nghost);
325  const auto& arr = fa.const_array(mfi);
326  r = std::min(r, f(bx, arr));
327  }
328  return r;
329 }
330 }
331 
332 #ifdef AMREX_USE_GPU
333 namespace fudetail {
334 template <class FAB, class F>
335 std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
336 ReduceMin_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
337 {
338  return ReduceMin_host(fa,nghost,std::forward<F>(f));
339 }
340 
341 template <class FAB, class F>
342 std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
343 ReduceMin_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
344 {
345  amrex::ignore_unused(fa,nghost,f);
346  amrex::Abort("ReduceMin: Launch Region is off. Device lambda cannot be called by host.");
347  return 0;
348 }
349 }
350 
351 template <class FAB, class F,
352  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
353 typename FAB::value_type
354 ReduceMin (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
355 {
356  if (Gpu::inLaunchRegion()) {
357  return fudetail::ReduceMF<ReduceOpMin>(fa, nghost, std::forward<F>(f));
358  } else {
359  return fudetail::ReduceMin_host_wrapper(fa, nghost, std::forward<F>(f));
360  }
361 }
362 #else
363 template <class FAB, class F,
364  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
365 typename FAB::value_type
366 ReduceMin (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
367 {
368  return fudetail::ReduceMin_host(fa, nghost, std::forward<F>(f));
369 }
370 #endif
371 
372 template <class FAB1, class FAB2, class F,
373  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
374 typename FAB1::value_type
375 ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, int nghost, F&& f)
376 {
377  return ReduceMin(fa1, fa2, IntVect(nghost), std::forward<F>(f));
378 }
379 
380 namespace fudetail {
381 template <class FAB1, class FAB2, class F,
382  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
383 typename FAB1::value_type
385  IntVect const& nghost, F const& f)
386 {
387  using value_type = typename FAB1::value_type;
389 
390 #ifdef AMREX_USE_OMP
391 #pragma omp parallel reduction(min:r)
392 #endif
393  for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
394  {
395  const Box& bx = mfi.growntilebox(nghost);
396  const auto& arr1 = fa1.const_array(mfi);
397  const auto& arr2 = fa2.const_array(mfi);
398  r = std::min(r, f(bx, arr1, arr2));
399  }
400 
401  return r;
402 }
403 }
404 
405 #ifdef AMREX_USE_GPU
406 namespace fudetail {
407 template <class FAB1, class FAB2, class F>
408 std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
410  IntVect const& nghost, F&& f)
411 {
412  return fudetail::ReduceMin_host(fa1,fa2,nghost,std::forward<F>(f));
413 }
414 
415 template <class FAB1, class FAB2, class F>
416 std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
418  IntVect const& nghost, F&& f)
419 {
420  amrex::ignore_unused(fa1,fa2,nghost,f);
421  amrex::Abort("ReduceMin: Launch Region is off. Device lambda cannot be called by host.");
422  return 0;
423 }
424 }
425 
426 template <class FAB1, class FAB2, class F,
427  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
428 typename FAB1::value_type
429 ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
430  IntVect const& nghost, F&& f)
431 {
432  if (Gpu::inLaunchRegion()) {
433  return fudetail::ReduceMF<ReduceOpMin>(fa1,fa2,nghost,std::forward<F>(f));
434  } else {
435  return fudetail::ReduceMin_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
436  }
437 }
438 #else
439 template <class FAB1, class FAB2, class F,
440  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
441 typename FAB1::value_type
442 ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
443  IntVect const& nghost, F&& f)
444 {
445  return fudetail::ReduceMin_host(fa1,fa2,nghost,std::forward<F>(f));
446 }
447 #endif
448 
449 template <class FAB1, class FAB2, class FAB3, class F,
450  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
451 typename FAB1::value_type
452 ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, FabArray<FAB3> const& fa3,
453  int nghost, F&& f)
454 {
455  return ReduceMin(fa1, fa2, fa3, IntVect(nghost), std::forward<F>(f));
456 }
457 
458 namespace fudetail {
459 template <class FAB1, class FAB2, class FAB3, class F,
460  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
461 typename FAB1::value_type
463  FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
464 {
465  using value_type = typename FAB1::value_type;
467 
468 #ifdef AMREX_USE_OMP
469 #pragma omp parallel reduction(min:r)
470 #endif
471  for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
472  {
473  const Box& bx = mfi.growntilebox(nghost);
474  const auto& arr1 = fa1.const_array(mfi);
475  const auto& arr2 = fa2.const_array(mfi);
476  const auto& arr3 = fa3.const_array(mfi);
477  r = std::min(r, f(bx, arr1, arr2, arr3));
478  }
479 
480  return r;
481 }
482 }
483 
484 #ifdef AMREX_USE_GPU
485 namespace fudetail {
486 template <class FAB1, class FAB2, class FAB3, class F>
487 std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
489  FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
490 {
491  return fudetail::ReduceMin_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
492 }
493 
494 template <class FAB1, class FAB2, class FAB3, class F>
495 std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
497  FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
498 {
499  amrex::ignore_unused(fa1,fa2,fa3,nghost,f);
500  amrex::Abort("ReduceMin: Launch Region is off. Device lambda lambda cannot be called by host.");
501  return 0;
502 }
503 }
504 
505 template <class FAB1, class FAB2, class FAB3, class F,
506  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
507 typename FAB1::value_type
508 ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
509  FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
510 {
511  if (Gpu::inLaunchRegion()) {
512  return fudetail::ReduceMF<ReduceOpMin>(fa1,fa2,fa3,nghost,std::forward<F>(f));
513  } else {
514  return fudetail::ReduceMin_host_wrapper(fa1,fa2,fa3,nghost,std::forward<F>(f));
515  }
516 }
517 #else
518 template <class FAB1, class FAB2, class FAB3, class F,
519  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
520 typename FAB1::value_type
521 ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
522  FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
523 {
524  return fudetail::ReduceMin_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
525 }
526 #endif
527 
528 template <class FAB, class F,
529  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
530 typename FAB::value_type
531 ReduceMax (FabArray<FAB> const& fa, int nghost, F&& f)
532 {
533  return ReduceMax(fa, IntVect(nghost), std::forward<F>(f));
534 }
535 
536 namespace fudetail {
537 template <class FAB, class F,
538  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
539 typename FAB::value_type
540 ReduceMax_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
541 {
542  using value_type = typename FAB::value_type;
543  value_type r = std::numeric_limits<value_type>::lowest();
544 
545 #ifdef AMREX_USE_OMP
546 #pragma omp parallel reduction(max:r)
547 #endif
548  for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
549  {
550  const Box& bx = mfi.growntilebox(nghost);
551  const auto& arr = fa.const_array(mfi);
552  r = std::max(r, f(bx, arr));
553  }
554 
555  return r;
556 }
557 }
558 
559 #ifdef AMREX_USE_GPU
560 namespace fudetail {
561 template <class FAB, class F>
562 std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
563 ReduceMax_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
564 {
565  return ReduceMax_host(fa,nghost,std::forward<F>(f));
566 }
567 
568 template <class FAB, class F>
569 std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
570 ReduceMax_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
571 {
572  amrex::ignore_unused(fa,nghost,f);
573  amrex::Abort("ReduceMax: Launch Region is off. Device lambda cannot be called by host.");
574  return 0;
575 }
576 }
577 
578 template <class FAB, class F,
579  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
580 typename FAB::value_type
581 ReduceMax (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
582 {
583  if (Gpu::inLaunchRegion()) {
584  return fudetail::ReduceMF<ReduceOpMax>(fa,nghost,std::forward<F>(f));
585  } else {
586  return fudetail::ReduceMax_host_wrapper(fa,nghost,std::forward<F>(f));
587  }
588 }
589 #else
590 template <class FAB, class F,
591  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
592 typename FAB::value_type
593 ReduceMax (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
594 {
595  return fudetail::ReduceMax_host(fa,nghost,std::forward<F>(f));
596 }
597 #endif
598 
599 template <class FAB1, class FAB2, class F,
600  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
601 typename FAB1::value_type
602 ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, int nghost, F&& f)
603 {
604  return ReduceMax(fa1, fa2, IntVect(nghost), std::forward<F>(f));
605 }
606 
607 namespace fudetail {
608 template <class FAB1, class FAB2, class F,
609  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
610 typename FAB1::value_type
612  IntVect const& nghost, F const& f)
613 {
614  using value_type = typename FAB1::value_type;
615  value_type r = std::numeric_limits<value_type>::lowest();
616 
617 #ifdef AMREX_USE_OMP
618 #pragma omp parallel reduction(max:r)
619 #endif
620  for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
621  {
622  const Box& bx = mfi.growntilebox(nghost);
623  const auto& arr1 = fa1.const_array(mfi);
624  const auto& arr2 = fa2.const_array(mfi);
625  r = std::max(r, f(bx, arr1, arr2));
626  }
627 
628  return r;
629 }
630 }
631 
632 #ifdef AMREX_USE_GPU
633 namespace fudetail {
634 template <class FAB1, class FAB2, class F>
635 std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
637  IntVect const& nghost, F&& f)
638 {
639  return ReduceMax_host(fa1,fa2,nghost,std::forward<F>(f));
640 }
641 
642 template <class FAB1, class FAB2, class F>
643 std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
645  IntVect const& nghost, F&& f)
646 {
647  amrex::ignore_unused(fa1,fa2,nghost,f);
648  amrex::Abort("ReduceMax: Launch Region is off. Device lambda cannot be called by host.");
649  return 0;
650 }
651 }
652 
653 template <class FAB1, class FAB2, class F,
654  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
655 typename FAB1::value_type
656 ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
657  IntVect const& nghost, F&& f)
658 {
659  if (Gpu::inLaunchRegion()) {
660  return fudetail::ReduceMF<ReduceOpMax>(fa1,fa2,nghost,std::forward<F>(f));
661  } else {
662  return fudetail::ReduceMax_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
663  }
664 }
665 #else
666 template <class FAB1, class FAB2, class F,
667  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
668 typename FAB1::value_type
669 ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
670  IntVect const& nghost, F&& f)
671 {
672  return fudetail::ReduceMax_host(fa1,fa2,nghost,std::forward<F>(f));
673 }
674 #endif
675 
676 template <class FAB1, class FAB2, class FAB3, class F,
677  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
678 typename FAB1::value_type
679 ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, FabArray<FAB3> const& fa3,
680  int nghost, F&& f)
681 {
682  return ReduceMax(fa1, fa2, fa3, IntVect(nghost), std::forward<F>(f));
683 }
684 
685 namespace fudetail {
686 template <class FAB1, class FAB2, class FAB3, class F,
687  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
688 typename FAB1::value_type
690  FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
691 {
692  using value_type = typename FAB1::value_type;
693  value_type r = std::numeric_limits<value_type>::lowest();
694 
695 #ifdef AMREX_USE_OMP
696 #pragma omp parallel reduction(max:r)
697 #endif
698  for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
699  {
700  const Box& bx = mfi.growntilebox(nghost);
701  const auto& arr1 = fa1.const_array(mfi);
702  const auto& arr2 = fa2.const_array(mfi);
703  const auto& arr3 = fa3.const_array(mfi);
704  r = std::max(r, f(bx, arr1, arr2, arr3));
705  }
706 
707  return r;
708 }
709 }
710 
711 #ifdef AMREX_USE_GPU
712 namespace fudetail {
713 template <class FAB1, class FAB2, class FAB3, class F>
714 std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
716  FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
717 {
718  return fudetail::ReduceMax_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
719 }
720 
721 template <class FAB1, class FAB2, class FAB3, class F>
722 std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
724  FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
725 {
726  amrex::ignore_unused(fa1,fa2,fa3,nghost,f);
727  amrex::Abort("ReduceMax: Launch Region is off. Device lambda lambda cannot be called by host.");
728  return 0;
729 }
730 }
731 
732 template <class FAB1, class FAB2, class FAB3, class F,
733  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
734 typename FAB1::value_type
735 ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
736  FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
737 {
738  if (Gpu::inLaunchRegion()) {
739  return fudetail::ReduceMF<ReduceOpMax>(fa1,fa2,fa3,nghost,std::forward<F>(f));
740  } else {
741  return fudetail::ReduceMax_host_wrapper(fa1,fa2,fa3,nghost,std::forward<F>(f));
742  }
743 }
744 #else
745 template <class FAB1, class FAB2, class FAB3, class F,
746  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
747 typename FAB1::value_type
748 ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
749  FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
750 {
751  return fudetail::ReduceMax_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
752 }
753 #endif
754 
755 template <class FAB, class F,
756  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
757 bool
758 ReduceLogicalAnd (FabArray<FAB> const& fa, int nghost, F&& f)
759 {
760  return ReduceLogicalAnd(fa, IntVect(nghost), std::forward<F>(f));
761 }
762 
763 namespace fudetail {
764 template <class FAB, class F,
765  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
766 bool
767 ReduceLogicalAnd_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
768 {
769  int r = true;
770 
771 #ifdef AMREX_USE_OMP
772 #pragma omp parallel reduction(&&:r)
773 #endif
774  for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
775  {
776  const Box& bx = mfi.growntilebox(nghost);
777  const auto& arr = fa.const_array(mfi);
778  r = r && f(bx, arr);
779  }
780 
781  return r;
782 }
783 }
784 
785 #ifdef AMREX_USE_GPU
786 namespace fudetail {
787 template <class FAB, class F>
788 std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, bool>
789 ReduceLogicalAnd_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
790 {
791  return ReduceLogicalAnd_host(fa,nghost,std::forward<F>(f));
792 }
793 
794 template <class FAB, class F>
795 std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, bool>
796 ReduceLogicalAnd_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
797 {
798  amrex::ignore_unused(fa,nghost,f);
799  amrex::Abort("ReduceLogicalAnd: Launch Region is off. Device lambda cannot be called by host.");
800  return false;
801 }
802 }
803 
804 template <class FAB, class F,
805  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
806 bool
807 ReduceLogicalAnd (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
808 {
809  if (Gpu::inLaunchRegion()) {
810  return fudetail::ReduceMF<ReduceOpLogicalAnd>(fa,nghost,std::forward<F>(f));
811  } else {
812  return fudetail::ReduceLogicalAnd_host_wrapper(fa,nghost,std::forward<F>(f));
813  }
814 }
815 #else
816 template <class FAB, class F,
817  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
818 bool
819 ReduceLogicalAnd (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
820 {
821  return fudetail::ReduceLogicalAnd_host(fa,nghost,std::forward<F>(f));
822 }
823 #endif
824 
825 template <class FAB1, class FAB2, class F,
826  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
827 bool
829  int nghost, F&& f)
830 {
831  return ReduceLogicalAnd(fa1, fa2, IntVect(nghost), std::forward<F>(f));
832 }
833 
834 namespace fudetail {
835 template <class FAB1, class FAB2, class F,
836  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
837 bool
839  IntVect const& nghost, F const& f)
840 {
841  int r = true;
842 
843 #ifdef AMREX_USE_OMP
844 #pragma omp parallel reduction(&&:r)
845 #endif
846  for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
847  {
848  const Box& bx = mfi.growntilebox(nghost);
849  const auto& arr1 = fa1.const_array(mfi);
850  const auto& arr2 = fa2.const_array(mfi);
851  r = r && f(bx, arr1, arr2);
852  }
853 
854  return r;
855 }
856 }
857 
858 #ifdef AMREX_USE_GPU
859 namespace fudetail {
860 template <class FAB1, class FAB2, class F>
861 std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, bool>
863  IntVect const& nghost, F&& f)
864 {
865  return ReduceLogicalAnd_host(fa1,fa2,nghost,std::forward<F>(f));
866 }
867 
868 template <class FAB1, class FAB2, class F>
869 std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, bool>
871  IntVect const& nghost, F&& f)
872 {
873  amrex::ignore_unused(fa1,fa2,nghost,f);
874  amrex::Abort("ReduceLogicalAnd: Luanch Region is off. Device lambda cannot be called by host.");
875  return false;
876 }
877 }
878 
879 template <class FAB1, class FAB2, class F,
880  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
881 bool
883  IntVect const& nghost, F&& f)
884 {
885  if (Gpu::inLaunchRegion()) {
886  return fudetail::ReduceMF<ReduceOpLogicalAnd>(fa1,fa2,nghost,std::forward<F>(f));
887  } else {
888  return fudetail::ReduceLogicalAnd_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
889  }
890 }
891 #else
892 template <class FAB1, class FAB2, class F,
893  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
894 bool
895 ReduceLogicalAnd (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
896  IntVect const& nghost, F&& f)
897 {
898  return fudetail::ReduceLogicalAnd_host(fa1,fa2,nghost,std::forward<F>(f));
899 }
900 #endif
901 
902 template <class FAB, class F,
903  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
904 bool
905 ReduceLogicalOr (FabArray<FAB> const& fa, int nghost, F&& f)
906 {
907  return ReduceLogicalOr(fa, IntVect(nghost), std::forward<F>(f));
908 }
909 
910 namespace fudetail {
911 template <class FAB, class F,
912  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
913 bool
914 ReduceLogicalOr_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
915 {
916  int r = false;
917 
918 #ifdef AMREX_USE_OMP
919 #pragma omp parallel reduction(||:r)
920 #endif
921  for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
922  {
923  const Box& bx = mfi.growntilebox(nghost);
924  const auto& arr = fa.const_array(mfi);
925  r = r || f(bx, arr);
926  }
927 
928  return r;
929 }
930 }
931 
932 #ifdef AMREX_USE_GPU
933 namespace fudetail {
934 template <class FAB, class F>
935 std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, bool>
936 ReduceLogicalOr_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
937 {
938  return ReduceLogicalOr_host(fa,nghost,std::forward<F>(f));
939 }
940 
941 template <class FAB, class F>
942 std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, bool>
943 ReduceLogicalOr_host (FabArray<FAB> const& fa, IntVect const& nghost, F&& /*f*/)
944 {
945  amrex::ignore_unused(fa,nghost);
946  amrex::Abort("ReduceLogicalOr: Launch Region is off. Device lambda cannot be called by host.");
947  return 0;
948 }
949 }
950 
951 template <class FAB, class F,
952  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
953 bool
954 ReduceLogicalOr (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
955 {
956  if (Gpu::inLaunchRegion()) {
957  return fudetail::ReduceMF<ReduceOpLogicalOr>(fa,nghost,std::forward<F>(f));
958  } else {
959  return fudetail::ReduceLogicalOr_host_wrapper(fa,nghost,std::forward<F>(f));
960  }
961 }
962 #else
963 template <class FAB, class F,
964  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
965 bool
966 ReduceLogicalOr (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
967 {
968  return fudetail::ReduceLogicalOr_host(fa,nghost,std::forward<F>(f));
969 }
970 #endif
971 
972 template <class FAB1, class FAB2, class F,
973  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
974 bool
976  int nghost, F&& f)
977 {
978  return ReduceLogicalOr(fa1, fa2, IntVect(nghost), std::forward<F>(f));
979 }
980 
981 namespace fudetail {
982 template <class FAB1, class FAB2, class F,
983  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
984 bool
986  IntVect const& nghost, F const& f)
987 {
988  int r = false;
989 
990 #ifdef AMREX_USE_OMP
991 #pragma omp parallel reduction(||:r)
992 #endif
993  for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
994  {
995  const Box& bx = mfi.growntilebox(nghost);
996  const auto& arr1 = fa1.const_array(mfi);
997  const auto& arr2 = fa2.const_array(mfi);
998  r = r || f(bx, arr1, arr2);
999  }
1000 
1001  return r;
1002 }
1003 }
1004 
1005 #ifdef AMREX_USE_GPU
1006 namespace fudetail {
1007 template <class FAB1, class FAB2, class F>
1008 std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, bool>
1010  IntVect const& nghost, F&& f)
1011 {
1012  return fudetail::ReduceLogicalOr_host(fa1,fa2,nghost,std::forward<F>(f));
1013 }
1014 
1015 template <class FAB1, class FAB2, class F>
1016 std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, bool>
1018  IntVect const& nghost, F&& f)
1019 {
1020  amrex::ignore_unused(fa1,fa2,nghost,f);
1021  amrex::Abort("ReeuceLogicalOr: Launch Region is off. Device lambda cannot be called by host.");
1022  return false;
1023 }
1024 }
1025 
1026 template <class FAB1, class FAB2, class F,
1027  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
1028 bool
1030  IntVect const& nghost, F&& f)
1031 {
1032  if (Gpu::inLaunchRegion()) {
1033  return fudetail::ReduceMF<ReduceOpLogicalOr>(fa1,fa2,nghost,std::forward<F>(f));
1034  } else {
1035  return fudetail::ReduceLogicalOr_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
1036  }
1037 }
1038 #else
1039 template <class FAB1, class FAB2, class F,
1040  class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
1041 bool
1042 ReduceLogicalOr (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
1043  IntVect const& nghost, F&& f)
1044 {
1045  return fudetail::ReduceLogicalOr_host(fa1,fa2,nghost,std::forward<F>(f));
1046 }
1047 #endif
1048 
1049 template <class FAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1050 void
1051 printCell (FabArray<FAB> const& mf, const IntVect& cell, int comp = -1,
1052  const IntVect& ng = IntVect::TheZeroVector())
1053 {
1054  for (MFIter mfi(mf); mfi.isValid(); ++mfi)
1055  {
1056  const Box& bx = amrex::grow(mfi.validbox(), ng);
1057  if (bx.contains(cell)) {
1058  int n = (comp >= 0) ? 1 : mf.nComp();
1059  auto const& fab = mf.const_array(mfi);
1061  auto* dp = pv.data();
1062  auto f = [=] AMREX_GPU_HOST_DEVICE ()
1063  {
1064  if (comp >= 0) {
1065  *dp = fab(cell, comp);
1066  } else {
1067  for (int i = 0; i < n; ++i) {
1068  dp[i] = fab(cell,i);
1069  }
1070  }
1071  };
1072 
1073 #ifdef AMREX_USE_GPU
1074  if (mf.arena()->isManaged() || mf.arena()->isDevice()) {
1077  } else
1078 #endif
1079  {
1080  f();
1081  }
1082 
1083  if (comp >= 0) {
1084  amrex::AllPrint().SetPrecision(17) << " At cell " << cell << " in Box " << bx
1085  << ": " << *dp << '\n';
1086  } else {
1087  std::ostringstream ss;
1088  ss.precision(17);
1089  for (int i = 0; i < n-1; ++i)
1090  {
1091  ss << dp[i] << ", ";
1092  }
1093  ss << dp[n-1];
1094  amrex::AllPrint() << " At cell " << cell << " in Box " << bx
1095  << ": " << ss.str() << '\n';
1096  }
1097  }
1098  }
1099 }
1100 
1101 template <class FAB,
1102  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1103 void
1104 Subtract (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, int nghost)
1105 {
1106  Subtract(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
1107 }
1108 
1109 template <class FAB,
1110  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1111 void
1112 Subtract (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
1113 {
1114 #ifdef AMREX_USE_GPU
1115  if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
1116  auto const& dstfa = dst.arrays();
1117  auto const& srcfa = src.const_arrays();
1118  ParallelFor(dst, nghost, numcomp,
1119  [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1120  {
1121  dstfa[box_no](i,j,k,n+dstcomp) -= srcfa[box_no](i,j,k,n+srccomp);
1122  });
1123  if (!Gpu::inNoSyncRegion()) {
1125  }
1126  } else
1127 #endif
1128  {
1129 #ifdef AMREX_USE_OMP
1130 #pragma omp parallel if (Gpu::notInLaunchRegion())
1131 #endif
1132  for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1133  {
1134  const Box& bx = mfi.growntilebox(nghost);
1135  if (bx.ok())
1136  {
1137  auto const srcFab = src.array(mfi);
1138  auto dstFab = dst.array(mfi);
1139  AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1140  {
1141  dstFab(i,j,k,n+dstcomp) -= srcFab(i,j,k,n+srccomp);
1142  });
1143  }
1144  }
1145  }
1146 }
1147 
1148 
1149 template <class FAB,
1150  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1151 void
1152 Multiply (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, int nghost)
1153 {
1154  Multiply(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
1155 }
1156 
1157 template <class FAB,
1158  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1159 void
1160 Multiply (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
1161 {
1162 #ifdef AMREX_USE_GPU
1163  if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
1164  auto const& dstfa = dst.arrays();
1165  auto const& srcfa = src.const_arrays();
1166  ParallelFor(dst, nghost, numcomp,
1167  [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1168  {
1169  dstfa[box_no](i,j,k,n+dstcomp) *= srcfa[box_no](i,j,k,n+srccomp);
1170  });
1171  if (!Gpu::inNoSyncRegion()) {
1173  }
1174  } else
1175 #endif
1176  {
1177 #ifdef AMREX_USE_OMP
1178 #pragma omp parallel if (Gpu::notInLaunchRegion())
1179 #endif
1180  for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1181  {
1182  const Box& bx = mfi.growntilebox(nghost);
1183  if (bx.ok())
1184  {
1185  auto const srcFab = src.array(mfi);
1186  auto dstFab = dst.array(mfi);
1187  AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1188  {
1189  dstFab(i,j,k,n+dstcomp) *= srcFab(i,j,k,n+srccomp);
1190  });
1191  }
1192  }
1193  }
1194 }
1195 
1196 
1197 template <class FAB,
1198  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1199 void
1200 Divide (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, int nghost)
1201 {
1202  Divide(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
1203 }
1204 
1205 template <class FAB,
1206  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1207 void
1208 Divide (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
1209 {
1210 #ifdef AMREX_USE_GPU
1211  if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
1212  auto const& dstfa = dst.arrays();
1213  auto const& srcfa = src.const_arrays();
1214  ParallelFor(dst, nghost, numcomp,
1215  [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1216  {
1217  dstfa[box_no](i,j,k,n+dstcomp) /= srcfa[box_no](i,j,k,n+srccomp);
1218  });
1219  if (!Gpu::inNoSyncRegion()) {
1221  }
1222  } else
1223 #endif
1224  {
1225 #ifdef AMREX_USE_OMP
1226 #pragma omp parallel if (Gpu::notInLaunchRegion())
1227 #endif
1228  for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1229  {
1230  const Box& bx = mfi.growntilebox(nghost);
1231  if (bx.ok())
1232  {
1233  auto const srcFab = src.array(mfi);
1234  auto dstFab = dst.array(mfi);
1235  AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1236  {
1237  dstFab(i,j,k,n+dstcomp) /= srcFab(i,j,k,n+srccomp);
1238  });
1239  }
1240  }
1241  }
1242 }
1243 
1244 template <class FAB,
1245  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1246 void
1247 Abs (FabArray<FAB>& fa, int icomp, int numcomp, int nghost)
1248 {
1249  Abs(fa,icomp,numcomp,IntVect(nghost));
1250 }
1251 
1252 template <class FAB,
1253  class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1254 void
1255 Abs (FabArray<FAB>& fa, int icomp, int numcomp, const IntVect& nghost)
1256 {
1257 #ifdef AMREX_USE_GPU
1258  if (Gpu::inLaunchRegion() && fa.isFusingCandidate()) {
1259  auto const& fabarr = fa.arrays();
1260  ParallelFor(fa, nghost, numcomp,
1261  [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1262  {
1263  fabarr[box_no](i,j,k,n+icomp) = std::abs(fabarr[box_no](i,j,k,n+icomp));
1264  });
1265  if (!Gpu::inNoSyncRegion()) {
1267  }
1268  } else
1269 #endif
1270  {
1271 #ifdef AMREX_USE_OMP
1272 #pragma omp parallel if (Gpu::notInLaunchRegion())
1273 #endif
1274  for (MFIter mfi(fa,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1275  {
1276  const Box& bx = mfi.growntilebox(nghost);
1277  if (bx.ok())
1278  {
1279  auto const& fab = fa.array(mfi);
1280  AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1281  {
1282  fab(i,j,k,n+icomp) = std::abs(fab(i,j,k,n+icomp));
1283  });
1284  }
1285  }
1286  }
1287 }
1288 
1289 template <class FAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1290 void
1291 prefetchToHost (FabArray<FAB> const& fa, const bool synchronous = true)
1292 {
1293 #ifdef AMREX_USE_GPU
1294  if (fa.arena()->isManaged()) {
1295  for (MFIter mfi(fa, MFItInfo().SetDeviceSync(synchronous)); mfi.isValid(); ++mfi) {
1296  fa.prefetchToHost(mfi);
1297  }
1298  }
1299 #else
1300  amrex::ignore_unused(fa,synchronous);
1301 #endif
1302 }
1303 
1304 template <class FAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1305 void
1306 prefetchToDevice (FabArray<FAB> const& fa, const bool synchronous = true)
1307 {
1308 #ifdef AMREX_USE_GPU
1309  if (fa.arena()->isManaged()) {
1310  for (MFIter mfi(fa, MFItInfo().SetDeviceSync(synchronous)); mfi.isValid(); ++mfi) {
1311  fa.prefetchToDevice(mfi);
1312  }
1313  }
1314 #else
1315  amrex::ignore_unused(fa,synchronous);
1316 #endif
1317 }
1318 
1319 
1320 template <class FAB, class IFAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value
1321  && IsBaseFab<IFAB>::value> >
1322 void
1323 OverrideSync (FabArray<FAB> & fa, FabArray<IFAB> const& msk, const Periodicity& period)
1324 {
1325  BL_PROFILE("OverrideSync()");
1326 
1327  OverrideSync_nowait(fa, msk, period);
1328  OverrideSync_finish(fa);
1329 }
1330 
1331 
1332 template <class FAB, class IFAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value
1333  && IsBaseFab<IFAB>::value> >
1334 void
1336 {
1337  BL_PROFILE("OverrideSync_nowait()");
1338  AMREX_ASSERT_WITH_MESSAGE(!fa.os_temp, "OverrideSync_nowait() called when already in progress.");
1339 
1340  if (fa.ixType().cellCentered()) { return; }
1341 
1342  const int ncomp = fa.nComp();
1343 
1344 #ifdef AMREX_USE_GPU
1345  if (Gpu::inLaunchRegion() && fa.isFusingCandidate()) {
1346  auto const& fabarr = fa.arrays();
1347  auto const& ifabarr = msk.const_arrays();
1348  ParallelFor(fa, IntVect(0), ncomp,
1349  [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1350  {
1351  if (!ifabarr[box_no](i,j,k)) { fabarr[box_no](i,j,k,n) = 0; }
1352  });
1353  if (!Gpu::inNoSyncRegion()) {
1355  }
1356  } else
1357 #endif
1358  {
1359 #ifdef AMREX_USE_OMP
1360 #pragma omp parallel if (Gpu::notInLaunchRegion())
1361 #endif
1362  for (MFIter mfi(fa,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1363  {
1364  const Box& bx = mfi.tilebox();
1365  auto fab = fa.array(mfi);
1366  auto const ifab = msk.array(mfi);
1367  AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n,
1368  {
1369  if (!ifab(i,j,k)) { fab(i,j,k,n) = 0; }
1370  });
1371  }
1372  }
1373 
1374  fa.os_temp = std::make_unique< FabArray<FAB> > ( fa.boxArray(), fa.DistributionMap(),
1375  ncomp, 0, MFInfo(), fa.Factory() );
1376  fa.os_temp->setVal(0);
1377  fa.os_temp->ParallelCopy_nowait(fa, period, FabArrayBase::ADD);
1378 }
1379 
1380 template <class FAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1381 void
1383 {
1384  BL_PROFILE("OverrideSync_finish()");
1385 
1386  if (fa.ixType().cellCentered()) { return; }
1387 
1388  fa.os_temp->ParallelCopy_finish();
1389  amrex::Copy(fa, *(fa.os_temp), 0, 0, fa.nComp(), 0);
1390 
1391  fa.os_temp.reset();
1392 }
1393 
1394 template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1395 void
1397  int scomp, int dcomp, int ncomp)
1398 {
1399  AMREX_ASSERT(isMFIterSafe(dst, src));
1400  AMREX_ASSERT(dst.nGrowVect() == src.nGrowVect());
1401 #ifdef AMREX_USE_GPU
1402  for (MFIter mfi(dst); mfi.isValid(); ++mfi) {
1403  void* pdst = dst[mfi].dataPtr(dcomp);
1404  void const* psrc = src[mfi].dataPtr(scomp);
1405  Gpu::dtoh_memcpy_async(pdst, psrc, dst[mfi].nBytes(mfi.fabbox(), ncomp));
1406  }
1407 #else
1408  Copy(dst, src, scomp, dcomp, ncomp, dst.nGrowVect());
1409 #endif
1410 }
1411 
1412 template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1413 void
1415 {
1416  dtoh_memcpy(dst, src, 0, 0, dst.nComp());
1417 }
1418 
1419 template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1420 void
1422  int scomp, int dcomp, int ncomp)
1423 {
1424  AMREX_ASSERT(isMFIterSafe(dst, src));
1425  AMREX_ASSERT(dst.nGrowVect() == src.nGrowVect());
1426 #ifdef AMREX_USE_GPU
1427  for (MFIter mfi(dst); mfi.isValid(); ++mfi) {
1428  void* pdst = dst[mfi].dataPtr(dcomp);
1429  void const* psrc = src[mfi].dataPtr(scomp);
1430  Gpu::htod_memcpy_async(pdst, psrc, dst[mfi].nBytes(mfi.fabbox(), ncomp));
1431  }
1432 #else
1433  Copy(dst, src, scomp, dcomp, ncomp, dst.nGrowVect());
1434 #endif
1435 }
1436 
1437 template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1438 void
1440 {
1441  htod_memcpy(dst, src, 0, 0, dst.nComp());
1442 }
1443 
1444 template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1445 IntVect
1446 indexFromValue (FabArray<FAB> const& mf, int comp, IntVect const& nghost,
1447  typename FAB::value_type value)
1448 {
1449  IntVect loc;
1450 
1451 #ifdef AMREX_USE_GPU
1452  if (Gpu::inLaunchRegion())
1453  {
1454  amrex::Gpu::Buffer<int> aa({0,AMREX_D_DECL(0,0,0)});
1455  int* p = aa.data();
1456  // This is a device ptr to 1+AMREX_SPACEDIM int zeros.
1457  // The first is used as an atomic bool and the others for intvect.
1458  if (mf.isFusingCandidate()) {
1459  auto const& ma = mf.const_arrays();
1460  ParallelFor(mf, nghost, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
1461  {
1462  int* flag = p;
1463  if (*flag == 0) {
1464  if (ma[box_no](i,j,k,comp) == value) {
1465  if (Gpu::Atomic::Exch(flag,1) == 0) {
1466  AMREX_D_TERM(p[1] = i;,
1467  p[2] = j;,
1468  p[3] = k;);
1469  }
1470  }
1471  }
1472  });
1473  } else {
1474  for (MFIter mfi(mf,MFItInfo().SetDeviceSync(false)); mfi.isValid(); ++mfi) {
1475  const Box& bx = amrex::grow(mfi.validbox(), nghost);
1476  auto const& arr = mf.const_array(mfi);
1477  amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
1478  {
1479  int* flag = p;
1480  if (*flag == 0) {
1481  if (arr(i,j,k,comp) == value) {
1482  if (Gpu::Atomic::Exch(flag,1) == 0) {
1483  AMREX_D_TERM(p[1] = i;,
1484  p[2] = j;,
1485  p[3] = k;);
1486  }
1487  }
1488  }
1489  });
1490  }
1491  }
1492  int const* tmp = aa.copyToHost();
1493  AMREX_D_TERM(loc[0] = tmp[1];,
1494  loc[1] = tmp[2];,
1495  loc[2] = tmp[3];);
1496  }
1497  else
1498 #endif
1499  {
1500  bool f = false;
1501 #ifdef AMREX_USE_OMP
1502 #pragma omp parallel
1503 #endif
1504  {
1505  IntVect priv_loc = IntVect::TheMinVector();
1506  for (MFIter mfi(mf,true); mfi.isValid(); ++mfi)
1507  {
1508  const Box& bx = mfi.growntilebox(nghost);
1509  auto const& fab = mf.const_array(mfi);
1510  AMREX_LOOP_3D(bx, i, j, k,
1511  {
1512  if (fab(i,j,k,comp) == value) {
1513  priv_loc = IntVect(AMREX_D_DECL(i,j,k));
1514  }
1515  });
1516  }
1517 
1518  if (priv_loc.allGT(IntVect::TheMinVector())) {
1519  bool old;
1520 // we should be able to test on _OPENMP < 201107 for capture (version 3.1)
1521 // but we must work around a bug in gcc < 4.9
1522 // And, with NVHPC 21.9 to <23.1, we saw an ICE with the atomic capture (NV bug: #3390723)
1523 #if defined(AMREX_USE_OMP) && defined(_OPENMP) && (_OPENMP < 201307 || (defined(__NVCOMPILER) && __NVCOMPILER_MAJOR__ < 23)) // OpenMP 4.0
1524 #pragma omp critical (amrex_indexfromvalue)
1525 #elif defined(AMREX_USE_OMP)
1526 #pragma omp atomic capture
1527 #endif
1528  {
1529  old = f;
1530  f = true;
1531  }
1532 
1533  if (old == false) { loc = priv_loc; }
1534  }
1535  }
1536  }
1537 
1538  return loc;
1539 }
1540 
1552 template <typename FAB, std::enable_if_t<IsBaseFab<FAB>::value,int> FOO = 0>
1553 typename FAB::value_type
1554 Dot (FabArray<FAB> const& x, int xcomp, FabArray<FAB> const& y, int ycomp, int ncomp,
1555  IntVect const& nghost, bool local = false)
1556 {
1557  BL_ASSERT(x.boxArray() == y.boxArray());
1558  BL_ASSERT(x.DistributionMap() == y.DistributionMap());
1559  BL_ASSERT(x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost));
1560 
1561  BL_PROFILE("amrex::Dot()");
1562 
1563  using T = typename FAB::value_type;
1564  auto sm = T(0.0);
1565 #ifdef AMREX_USE_GPU
1566  if (Gpu::inLaunchRegion()) {
1567  auto const& xma = x.const_arrays();
1568  auto const& yma = y.const_arrays();
1569  sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, x, nghost,
1570  [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
1571  {
1572  auto t = T(0.0);
1573  auto const& xfab = xma[box_no];
1574  auto const& yfab = yma[box_no];
1575  for (int n = 0; n < ncomp; ++n) {
1576  t += xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n);
1577  }
1578  return t;
1579  });
1580  } else
1581 #endif
1582  {
1583 #ifdef AMREX_USE_OMP
1584 #pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
1585 #endif
1586  for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
1587  {
1588  Box const& bx = mfi.growntilebox(nghost);
1589  auto const& xfab = x.const_array(mfi);
1590  auto const& yfab = y.const_array(mfi);
1591  AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
1592  {
1593  sm += xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n);
1594  });
1595  }
1596  }
1597 
1598  if (!local) {
1600  }
1601 
1602  return sm;
1603 }
1604 
1606 template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1607 void setVal (MF& dst, typename MF::value_type val)
1608 {
1609  dst.setVal(val);
1610 }
1611 
1613 template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1614 void setBndry (MF& dst, typename MF::value_type val, int scomp, int ncomp)
1615 {
1616  dst.setBndry(val, scomp, ncomp);
1617 }
1618 
1620 template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1621 void Scale (MF& dst, typename MF::value_type val, int scomp, int ncomp, int nghost)
1622 {
1623  dst.mult(val, scomp, ncomp, nghost);
1624 }
1625 
1627 template <class DMF, class SMF,
1628  std::enable_if_t<IsMultiFabLike_v<DMF> &&
1629  IsMultiFabLike_v<SMF>, int> = 0>
1630 void LocalCopy (DMF& dst, SMF const& src, int scomp, int dcomp,
1631  int ncomp, IntVect const& nghost)
1632 {
1633  amrex::Copy(dst, src, scomp, dcomp, ncomp, nghost);
1634 }
1635 
1637 template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1638 void LocalAdd (MF& dst, MF const& src, int scomp, int dcomp,
1639  int ncomp, IntVect const& nghost)
1640 {
1641  amrex::Add(dst, src, scomp, dcomp, ncomp, nghost);
1642 }
1643 
1645 template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1646 void Saxpy (MF& dst, typename MF::value_type a, MF const& src, int scomp, int dcomp,
1647  int ncomp, IntVect const& nghost)
1648 {
1649  MF::Saxpy(dst, a, src, scomp, dcomp, ncomp, nghost);
1650 }
1651 
1653 template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1654 void Xpay (MF& dst, typename MF::value_type a, MF const& src, int scomp, int dcomp,
1655  int ncomp, IntVect const& nghost)
1656 {
1657  MF::Xpay(dst, a, src, scomp, dcomp, ncomp, nghost);
1658 }
1659 
1661 template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1662 void LinComb (MF& dst,
1663  typename MF::value_type a, MF const& src_a, int acomp,
1664  typename MF::value_type b, MF const& src_b, int bcomp,
1665  int dcomp, int ncomp, IntVect const& nghost)
1666 {
1667  MF::LinComb(dst, a, src_a, acomp, b, src_b, bcomp, dcomp, ncomp, nghost);
1668 }
1669 
1671 template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
1672 void ParallelCopy (MF& dst, MF const& src, int scomp, int dcomp, int ncomp,
1673  IntVect const& ng_src = IntVect(0),
1674  IntVect const& ng_dst = IntVect(0),
1675  Periodicity const& period = Periodicity::NonPeriodic())
1676 {
1677  dst.ParallelCopy(src, scomp, dcomp, ncomp, ng_src, ng_dst, period);
1678 }
1679 
1680 template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
1681 [[nodiscard]] typename MF::value_type
1682 norminf (MF const& mf, int scomp, int ncomp, IntVect const& nghost,
1683  bool local = false)
1684 {
1685  return mf.norminf(scomp, ncomp, nghost, local);
1686 }
1687 
1689 template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1690 void setVal (Array<MF,N>& dst, typename MF::value_type val)
1691 {
1692  for (auto& mf: dst) {
1693  mf.setVal(val);
1694  }
1695 }
1696 
1698 template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1699 void setBndry (Array<MF,N>& dst, typename MF::value_type val, int scomp, int ncomp)
1700 {
1701  for (auto& mf : dst) {
1702  mf.setBndry(val, scomp, ncomp);
1703  }
1704 }
1705 
1707 template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1708 void Scale (Array<MF,N>& dst, typename MF::value_type val, int scomp, int ncomp,
1709  int nghost)
1710 {
1711  for (auto& mf : dst) {
1712  mf.mult(val, scomp, ncomp, nghost);
1713  }
1714 }
1715 
1717 template <class DMF, class SMF, std::size_t N,
1718  std::enable_if_t<IsMultiFabLike_v<DMF> &&
1719  IsMultiFabLike_v<SMF>, int> = 0>
1720 void LocalCopy (Array<DMF,N>& dst, Array<SMF,N> const& src, int scomp, int dcomp,
1721  int ncomp, IntVect const& nghost)
1722 {
1723  for (std::size_t i = 0; i < N; ++i) {
1724  amrex::Copy(dst[i], src[i], scomp, dcomp, ncomp, nghost);
1725  }
1726 }
1727 
1729 template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1730 void LocalAdd (Array<MF,N>& dst, Array<MF,N> const& src, int scomp, int dcomp,
1731  int ncomp, IntVect const& nghost)
1732 {
1733  for (std::size_t i = 0; i < N; ++i) {
1734  amrex::Add(dst[i], src[i], scomp, dcomp, ncomp, nghost);
1735  }
1736 }
1737 
1739 template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1740 void Saxpy (Array<MF,N>& dst, typename MF::value_type a,
1741  Array<MF,N> const& src, int scomp, int dcomp, int ncomp,
1742  IntVect const& nghost)
1743 {
1744  for (std::size_t i = 0; i < N; ++i) {
1745  MF::Saxpy(dst[i], a, src[i], scomp, dcomp, ncomp, nghost);
1746  }
1747 }
1748 
1750 template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1751 void Xpay (Array<MF,N>& dst, typename MF::value_type a,
1752  Array<MF,N> const& src, int scomp, int dcomp, int ncomp,
1753  IntVect const& nghost)
1754 {
1755  for (std::size_t i = 0; i < N; ++i) {
1756  MF::Xpay(dst[i], a, src[i], scomp, dcomp, ncomp, nghost);
1757  }
1758 }
1759 
1761 template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1763  typename MF::value_type a, Array<MF,N> const& src_a, int acomp,
1764  typename MF::value_type b, Array<MF,N> const& src_b, int bcomp,
1765  int dcomp, int ncomp, IntVect const& nghost)
1766 {
1767  for (std::size_t i = 0; i < N; ++i) {
1768  MF::LinComb(dst[i], a, src_a[i], acomp, b, src_b[i], bcomp, dcomp, ncomp, nghost);
1769  }
1770 }
1771 
1773 template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
1774 void ParallelCopy (Array<MF,N>& dst, Array<MF,N> const& src,
1775  int scomp, int dcomp, int ncomp,
1776  IntVect const& ng_src = IntVect(0),
1777  IntVect const& ng_dst = IntVect(0),
1778  Periodicity const& period = Periodicity::NonPeriodic())
1779 {
1780  for (std::size_t i = 0; i < N; ++i) {
1781  dst[i].ParallelCopy(src[i], scomp, dcomp, ncomp, ng_src, ng_dst, period);
1782  }
1783 }
1784 
1785 template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
1786 [[nodiscard]] typename MF::value_type
1787 norminf (Array<MF,N> const& mf, int scomp, int ncomp, IntVect const& nghost,
1788  bool local = false)
1789 {
1790  auto r = typename MF::value_type(0);
1791  for (std::size_t i = 0; i < N; ++i) {
1792  auto tmp = mf[i].norminf(scomp, ncomp, nghost, true);
1793  r = std::max(r,tmp);
1794  }
1795  if (!local) {
1797  }
1798  return r;
1799 }
1800 
1801 template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
1802  int> = 0>
1803 [[nodiscard]] int nComp (Array<MF,N> const& mf)
1804 {
1805  return mf[0].nComp();
1806 }
1807 
1808 template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
1809  int> = 0>
1810 [[nodiscard]] IntVect nGrowVect (Array<MF,N> const& mf)
1811 {
1812  return mf[0].nGrowVect();
1813 }
1814 
1815 template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
1816  int> = 0>
1817 [[nodiscard]] BoxArray const&
1819 {
1820  return mf[0].boxArray();
1821 }
1822 
1823 template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
1824  int> = 0>
1825 [[nodiscard]] DistributionMapping const&
1827 {
1828  return mf[0].DistributionMap();
1829 }
1830 
1831 }
1832 
1833 #endif
#define BL_PROFILE(a)
Definition: AMReX_BLProfiler.H:551
#define BL_ASSERT(EX)
Definition: AMReX_BLassert.H:39
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: AMReX_BLassert.H:37
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D(...)
Definition: AMReX_GpuLaunch.nolint.H:55
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition: AMReX_GpuQualifiers.H:20
Real * pdst
Definition: AMReX_HypreMLABecLap.cpp:1090
#define AMREX_LOOP_3D(bx, i, j, k, block)
Definition: AMReX_Loop.nolint.H:4
#define AMREX_LOOP_4D(bx, ncomp, i, j, k, n, block)
Definition: AMReX_Loop.nolint.H:16
#define AMREX_D_TERM(a, b, c)
Definition: AMReX_SPACE.H:129
#define AMREX_D_DECL(a, b, c)
Definition: AMReX_SPACE.H:104
Print on all processors of the default communicator.
Definition: AMReX_Print.H:117
virtual bool isManaged() const
Definition: AMReX_Arena.cpp:79
virtual bool isDevice() const
Definition: AMReX_Arena.cpp:91
AMREX_GPU_HOST_DEVICE bool ok() const noexcept
Checks if it is a proper BoxND (including a valid type).
Definition: AMReX_Box.H:200
AMREX_GPU_HOST_DEVICE bool contains(const IntVectND< dim > &p) const noexcept
Returns true if argument is contained within BoxND.
Definition: AMReX_Box.H:204
IntVect nGrowVect() const noexcept
Definition: AMReX_FabArrayBase.H:79
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition: AMReX_FabArrayBase.H:94
bool isFusingCandidate() const noexcept
Is this a good candidate for kernel fusing?
IndexType ixType() const noexcept
Return index type.
Definition: AMReX_FabArrayBase.H:85
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition: AMReX_FabArrayBase.H:130
@ ADD
Definition: AMReX_FabArrayBase.H:393
int nComp() const noexcept
Return number of variables (aka components) associated with each point.
Definition: AMReX_FabArrayBase.H:82
An Array of FortranArrayBox(FAB)-like Objects.
Definition: AMReX_FabArray.H:344
Array4< typename FabArray< FAB >::value_type const > const_array(const MFIter &mfi) const noexcept
Definition: AMReX_FabArray.H:1593
std::unique_ptr< FabArray< FAB > > os_temp
Definition: AMReX_FabArray.H:1412
void prefetchToDevice(const MFIter &mfi) const noexcept
Definition: AMReX_FabArray.H:1549
const FabFactory< FAB > & Factory() const noexcept
Definition: AMReX_FabArray.H:442
Array4< typename FabArray< FAB >::value_type const > array(const MFIter &mfi) const noexcept
Definition: AMReX_FabArray.H:1561
void setBndry(value_type val)
Set all values in the boundary region to val.
Definition: AMReX_FabArray.H:2298
void setVal(value_type val)
Set all components in the entire region of each FAB to val.
Definition: AMReX_FabArray.H:2497
MultiArray4< typename FabArray< FAB >::value_type > arrays() noexcept
Definition: AMReX_FabArray.H:1657
void mult(value_type val, int comp, int num_comp, int nghost=0)
Definition: AMReX_FabArray.H:2750
Arena * arena() const noexcept
Definition: AMReX_FabArray.H:445
MultiArray4< typename FabArray< FAB >::value_type const > const_arrays() const noexcept
Definition: AMReX_FabArray.H:1675
void prefetchToHost(const MFIter &mfi) const noexcept
Definition: AMReX_FabArray.H:1537
Definition: AMReX_Tuple.H:93
Definition: AMReX_GpuBuffer.H:17
T const * data() const noexcept
Definition: AMReX_GpuBuffer.H:65
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool cellCentered() const noexcept
True if the IndexTypeND is CELL based in all directions.
Definition: AMReX_IndexType.H:101
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool allGE(const IntVectND< dim > &rhs) const noexcept
Returns true if this is greater than or equal to argument for all components. NOTE: This is NOT a str...
Definition: AMReX_IntVect.H:443
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool allGT(const IntVectND< dim > &rhs) const noexcept
Returns true if this is greater than argument for all components. NOTE: This is NOT a strict weak ord...
Definition: AMReX_IntVect.H:418
AMREX_GPU_HOST_DEVICE static constexpr AMREX_FORCE_INLINE IntVectND< dim > TheMinVector() noexcept
Definition: AMReX_IntVect.H:720
AMREX_GPU_HOST_DEVICE static constexpr AMREX_FORCE_INLINE IntVectND< dim > TheZeroVector() noexcept
This static member function returns a reference to a constant IntVectND object, all of whose dim argu...
Definition: AMReX_IntVect.H:672
Definition: AMReX_MFIter.H:57
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition: AMReX_MFIter.H:141
Definition: AMReX_PODVector.H:246
T * data() noexcept
Definition: AMReX_PODVector.H:593
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition: AMReX_Periodicity.H:17
static const Periodicity & NonPeriodic() noexcept
Definition: AMReX_Periodicity.cpp:52
Print & SetPrecision(int p)
Definition: AMReX_Print.H:89
@ FAB
Definition: AMReX_AmrvisConstants.H:86
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T Exch(T *address, T val) noexcept
Definition: AMReX_GpuAtomic.H:485
void streamSynchronize() noexcept
Definition: AMReX_GpuDevice.H:237
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:265
bool inLaunchRegion() noexcept
Definition: AMReX_GpuControl.H:86
bool inNoSyncRegion() noexcept
Definition: AMReX_GpuControl.H:146
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:251
void Sum(T &v, MPI_Comm comm)
Definition: AMReX_ParallelReduce.H:204
void Max(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition: AMReX_ParallelReduce.H:126
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition: AMReX_ParallelContext.H:70
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44
@ min
Definition: AMReX_ParallelReduce.H:18
@ max
Definition: AMReX_ParallelReduce.H:17
std::enable_if_t<!amrex::DefinitelyNotHostRunnable< F >::value, typename FAB::value_type > ReduceSum_host_wrapper(FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Definition: AMReX_FabArrayUtility.H:110
FAB::value_type ReduceMax_host(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition: AMReX_FabArrayUtility.H:540
std::enable_if_t< IsBaseFab< FAB >::value, std::conditional_t< std::is_same< OP, ReduceOpLogicalAnd >::value||std::is_same< OP, ReduceOpLogicalOr >::value, int, typename FAB::value_type > > ReduceMF(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition: AMReX_FabArrayUtility.H:50
std::enable_if_t<!amrex::DefinitelyNotHostRunnable< F >::value, typename FAB::value_type > ReduceMax_host_wrapper(FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Definition: AMReX_FabArrayUtility.H:563
std::enable_if_t<!amrex::DefinitelyNotHostRunnable< F >::value, typename FAB::value_type > ReduceMin_host_wrapper(FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Definition: AMReX_FabArrayUtility.H:336
bool ReduceLogicalAnd_host(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition: AMReX_FabArrayUtility.H:767
FAB::value_type ReduceMin_host(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition: AMReX_FabArrayUtility.H:314
std::enable_if_t<!amrex::DefinitelyNotHostRunnable< F >::value, bool > ReduceLogicalOr_host_wrapper(FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Definition: AMReX_FabArrayUtility.H:936
bool ReduceLogicalOr_host(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition: AMReX_FabArrayUtility.H:914
std::enable_if_t<!amrex::DefinitelyNotHostRunnable< F >::value, bool > ReduceLogicalAnd_host_wrapper(FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Definition: AMReX_FabArrayUtility.H:789
FAB::value_type ReduceSum_host(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition: AMReX_FabArrayUtility.H:24
Definition: AMReX_Amr.cpp:49
MF::value_type norminf(MF const &mf, int scomp, int ncomp, IntVect const &nghost, bool local=false)
Definition: AMReX_FabArrayUtility.H:1682
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:200
DistributionMapping const & DistributionMap(FabArrayBase const &fa)
int nComp(FabArrayBase const &fa)
FAB::value_type ReduceMax(FabArray< FAB > const &fa, int nghost, F &&f)
Definition: AMReX_FabArrayUtility.H:531
void Saxpy(MF &dst, typename MF::value_type a, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst += a * src
Definition: AMReX_FabArrayUtility.H:1646
void Xpay(Array< MF, N > &dst, typename MF::value_type a, Array< MF, N > const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst = src + a * dst
Definition: AMReX_FabArrayUtility.H:1751
IntVect nGrowVect(FabArrayBase const &fa)
void LocalCopy(DMF &dst, SMF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst = src
Definition: AMReX_FabArrayUtility.H:1630
void printCell(FabArray< FAB > const &mf, const IntVect &cell, int comp=-1, const IntVect &ng=IntVect::TheZeroVector())
Definition: AMReX_FabArrayUtility.H:1051
bool ReduceLogicalAnd(FabArray< FAB > const &fa, int nghost, F &&f)
Definition: AMReX_FabArrayUtility.H:758
void Abs(FabArray< FAB > &fa, int icomp, int numcomp, int nghost)
Definition: AMReX_FabArrayUtility.H:1247
void LocalAdd(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst += src
Definition: AMReX_FabArrayUtility.H:1638
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition: AMReX_Box.H:1211
void Copy(FabArray< DFAB > &dst, FabArray< SFAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition: AMReX_FabArray.H:179
void prefetchToDevice(FabArray< FAB > const &fa, const bool synchronous=true)
Definition: AMReX_FabArrayUtility.H:1306
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T abs(const GpuComplex< T > &a_z) noexcept
Return the absolute value of a complex number.
Definition: AMReX_GpuComplex.H:356
void Subtract(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition: AMReX_FabArrayUtility.H:1104
void OverrideSync(FabArray< FAB > &fa, FabArray< IFAB > const &msk, const Periodicity &period)
Definition: AMReX_FabArrayUtility.H:1323
bool isMFIterSafe(const FabArrayBase &x, const FabArrayBase &y)
Definition: AMReX_MFIter.H:219
BoxArray const & boxArray(FabArrayBase const &fa)
ReduceData< Ts... >::Type ParReduce(TypeList< Ops... > operation_list, TypeList< Ts... > type_list, FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Parallel reduce for MultiFab/FabArray.
Definition: AMReX_ParReduce.H:47
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > makeSingleCellBox(int i, int j, int k, IndexTypeND< dim > typ=IndexTypeND< dim >::TheCellType())
Definition: AMReX_Box.H:2009
bool ReduceLogicalOr(FabArray< FAB > const &fa, int nghost, F &&f)
Definition: AMReX_FabArrayUtility.H:905
IntVectND< AMREX_SPACEDIM > IntVect
Definition: AMReX_BaseFwd.H:30
void LinComb(MF &dst, typename MF::value_type a, MF const &src_a, int acomp, typename MF::value_type b, MF const &src_b, int bcomp, int dcomp, int ncomp, IntVect const &nghost)
dst = a*src_a + b*src_b
Definition: AMReX_FabArrayUtility.H:1662
FAB::value_type ReduceMin(FabArray< FAB > const &fa, int nghost, F &&f)
Definition: AMReX_FabArrayUtility.H:305
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition: AMReX.H:111
void dtoh_memcpy(FabArray< FAB > &dst, FabArray< FAB > const &src, int scomp, int dcomp, int ncomp)
Definition: AMReX_FabArrayUtility.H:1396
void setBndry(MF &dst, typename MF::value_type val, int scomp, int ncomp)
dst = val in ghost cells.
Definition: AMReX_FabArrayUtility.H:1614
void single_task(L &&f) noexcept
Definition: AMReX_GpuLaunchFunctsC.H:1307
void Divide(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition: AMReX_FabArrayUtility.H:1200
void Xpay(MF &dst, typename MF::value_type a, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst = src + a * dst
Definition: AMReX_FabArrayUtility.H:1654
void Scale(MF &dst, typename MF::value_type val, int scomp, int ncomp, int nghost)
dst *= val
Definition: AMReX_FabArrayUtility.H:1621
void prefetchToHost(FabArray< FAB > const &fa, const bool synchronous=true)
Definition: AMReX_FabArrayUtility.H:1291
bool TilingIfNotGPU() noexcept
Definition: AMReX_MFIter.H:12
void Saxpy(Array< MF, N > &dst, typename MF::value_type a, Array< MF, N > const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst += a * src
Definition: AMReX_FabArrayUtility.H:1740
void Add(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition: AMReX_FabArray.H:240
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition: AMReX.cpp:225
const int[]
Definition: AMReX_BLProfiler.cpp:1664
void OverrideSync_finish(FabArray< FAB > &fa)
Definition: AMReX_FabArrayUtility.H:1382
FAB::value_type Dot(FabArray< FAB > const &x, int xcomp, FabArray< FAB > const &y, int ycomp, int ncomp, IntVect const &nghost, bool local=false)
Compute dot products of two FabArrays.
Definition: AMReX_FabArrayUtility.H:1554
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition: AMReX_FabArrayUtility.H:1672
void htod_memcpy(FabArray< FAB > &dst, FabArray< FAB > const &src, int scomp, int dcomp, int ncomp)
Definition: AMReX_FabArrayUtility.H:1421
void setVal(MF &dst, typename MF::value_type val)
dst = val
Definition: AMReX_FabArrayUtility.H:1607
void Multiply(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition: AMReX_FabArrayUtility.H:1152
FAB::value_type ReduceSum(FabArray< FAB > const &fa, int nghost, F &&f)
Definition: AMReX_FabArrayUtility.H:16
IntVect indexFromValue(FabArray< FAB > const &mf, int comp, IntVect const &nghost, typename FAB::value_type value)
Definition: AMReX_FabArrayUtility.H:1446
void LinComb(Array< MF, N > &dst, typename MF::value_type a, Array< MF, N > const &src_a, int acomp, typename MF::value_type b, Array< MF, N > const &src_b, int bcomp, int dcomp, int ncomp, IntVect const &nghost)
dst = a*src_a + b*src_b
Definition: AMReX_FabArrayUtility.H:1762
void OverrideSync_nowait(FabArray< FAB > &fa, FabArray< IFAB > const &msk, const Periodicity &period)
Definition: AMReX_FabArrayUtility.H:1335
std::array< T, N > Array
Definition: AMReX_Array.H:24
integer, parameter dp
Definition: AMReX_SDCquadrature.F90:8
Definition: AMReX_TypeTraits.H:18
FabArray memory allocation information.
Definition: AMReX_FabArray.H:66
Definition: AMReX_MFIter.H:20
Struct for holding types.
Definition: AMReX_TypeList.H:12