Block-Structured AMR Software Framework
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Loading...
Searching...
No Matches
AMReX_FabArrayUtility.H
Go to the documentation of this file.
1#ifndef AMREX_FABARRAY_UTILITY_H_
2#define AMREX_FABARRAY_UTILITY_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_FabArray.H>
6#include <AMReX_LayoutData.H>
7#include <AMReX_Print.H>
8#include <AMReX_ParReduce.H>
9#include <limits>
10
11namespace amrex {
12
13template <class FAB, class F,
14 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
15typename FAB::value_type
16ReduceSum (FabArray<FAB> const& fa, int nghost, F&& f) {
17 return ReduceSum(fa, IntVect(nghost), std::forward<F>(f));
18}
19
20namespace fudetail {
21template <class FAB, class F,
22 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
23typename FAB::value_type
24ReduceSum_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
25{
26 using value_type = typename FAB::value_type;
27 value_type sm = 0;
28
29#ifdef AMREX_USE_OMP
30#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
31#endif
32 for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
33 {
34 const Box& bx = mfi.growntilebox(nghost);
35 auto const& arr = fa.const_array(mfi);
36 sm += f(bx, arr);
37 }
38
39 return sm;
40}
41}
42
43#ifdef AMREX_USE_GPU
44namespace fudetail {
45template <class OP, class FAB, class F>
46std::enable_if_t<IsBaseFab<FAB>::value,
47 std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
48 std::is_same<OP,ReduceOpLogicalOr>::value,
49 int, typename FAB::value_type> >
50ReduceMF (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
51{
52 using T = std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
53 std::is_same<OP,ReduceOpLogicalOr>::value,
54 int, typename FAB::value_type>;
55 auto typ = fa.ixType();
56 auto const& ma = fa.const_arrays();
57 return ParReduce(TypeList<OP>{}, TypeList<T>{}, fa, nghost,
58 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
59 {
60 return { static_cast<T>(f(amrex::makeSingleCellBox(i,j,k,typ), ma[box_no])) };
61 });
62}
63
64template <class OP, class FAB1, class FAB2, class F>
65std::enable_if_t<IsBaseFab<FAB1>::value && IsBaseFab<FAB2>::value,
66 std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
67 std::is_same<OP,ReduceOpLogicalOr>::value,
68 int, typename FAB1::value_type> >
69ReduceMF (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, IntVect const& nghost, F const& f)
70{
71 using T = std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
72 std::is_same<OP,ReduceOpLogicalOr>::value,
73 int, typename FAB1::value_type>;
74 auto typ = fa1.ixType();
75 auto const& ma1 = fa1.const_arrays();
76 auto const& ma2 = fa2.const_arrays();
77 return ParReduce(TypeList<OP>{}, TypeList<T>{}, fa1, nghost,
78 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
79 {
80 return { static_cast<T>(f(amrex::makeSingleCellBox(i,j,k,typ),
81 ma1[box_no], ma2[box_no])) };
82 });
83}
84
85template <class OP, class FAB1, class FAB2, class FAB3, class F>
86std::enable_if_t<IsBaseFab<FAB1>::value && IsBaseFab<FAB2>::value && IsBaseFab<FAB3>::value,
87 std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
88 std::is_same<OP,ReduceOpLogicalOr>::value,
89 int, typename FAB1::value_type> >
90ReduceMF (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
91 FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
92{
93 using T = std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
94 std::is_same<OP,ReduceOpLogicalOr>::value,
95 int, typename FAB1::value_type>;
96 auto typ = fa1.ixType();
97 auto const& ma1 = fa1.const_arrays();
98 auto const& ma2 = fa2.const_arrays();
99 auto const& ma3 = fa3.const_arrays();
100 return ParReduce(TypeList<OP>{}, TypeList<T>{}, fa1, nghost,
101 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
102 {
103 return { static_cast<T>(f(amrex::makeSingleCellBox(i,j,k,typ),
104 ma1[box_no], ma2[box_no], ma3[box_no])) };
105 });
106}
107
108template <class FAB, class F>
109std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
110ReduceSum_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
111{
112 return ReduceSum_host(fa,nghost,std::forward<F>(f));
113}
114
115template <class FAB, class F>
116std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
117ReduceSum_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
118{
119 amrex::ignore_unused(fa,nghost,f);
120 amrex::Abort("ReduceSum: Launch Region is off. Device lambda cannot be called by host.");
121 return 0;
122}
123}
124
125template <class FAB, class F,
126 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
127typename FAB::value_type
128ReduceSum (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
129{
130 if (Gpu::inLaunchRegion()) {
131 return fudetail::ReduceMF<ReduceOpSum>(fa, nghost, std::forward<F>(f));
132 } else {
133 return fudetail::ReduceSum_host_wrapper(fa, nghost, std::forward<F>(f));
134 }
135}
136#else
137template <class FAB, class F,
138 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
139typename FAB::value_type
140ReduceSum (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
141{
142 return fudetail::ReduceSum_host(fa, nghost, std::forward<F>(f));
143}
144#endif
145
146template <class FAB1, class FAB2, class F,
147 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
148typename FAB1::value_type
150 int nghost, F&& f) {
151 return ReduceSum(fa1, fa2, IntVect(nghost), std::forward<F>(f));
152}
153
154namespace fudetail {
155template <class FAB1, class FAB2, class F,
156 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
157typename FAB1::value_type
159 IntVect const& nghost, F const& f)
160{
161 using value_type = typename FAB1::value_type;
162 value_type sm = 0;
163
164#ifdef AMREX_USE_OMP
165#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
166#endif
167 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
168 {
169 const Box& bx = mfi.growntilebox(nghost);
170 const auto& arr1 = fa1.const_array(mfi);
171 const auto& arr2 = fa2.const_array(mfi);
172 sm += f(bx, arr1, arr2);
173 }
174
175 return sm;
176}
177}
178
179#ifdef AMREX_USE_GPU
180namespace fudetail {
181template <class FAB1, class FAB2, class F>
182std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
184 IntVect const& nghost, F&& f)
185{
186 return ReduceSum_host(fa1,fa2,nghost,std::forward<F>(f));
187}
188
189template <class FAB1, class FAB2, class F>
190std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
192 IntVect const& nghost, F&& f)
193{
194 amrex::ignore_unused(fa1,fa2,nghost,f);
195 amrex::Abort("ReduceSum: Launch Region is off. Device lambda cannot be called by host.");
196 return 0;
197}
198}
199
200template <class FAB1, class FAB2, class F,
201 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
202typename FAB1::value_type
204 IntVect const& nghost, F&& f)
205{
206 if (Gpu::inLaunchRegion()) {
207 return fudetail::ReduceMF<ReduceOpSum>(fa1,fa2,nghost,std::forward<F>(f));
208 } else {
209 return fudetail::ReduceSum_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
210 }
211}
212#else
213template <class FAB1, class FAB2, class F,
214 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
215typename FAB1::value_type
216ReduceSum (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
217 IntVect const& nghost, F&& f)
218{
219 return fudetail::ReduceSum_host(fa1,fa2,nghost,std::forward<F>(f));
220}
221#endif
222
223template <class FAB1, class FAB2, class FAB3, class F,
224 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
225typename FAB1::value_type
226ReduceSum (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, FabArray<FAB3> const& fa3,
227 int nghost, F&& f)
228{
229 return ReduceSum(fa1, fa2, fa3, IntVect(nghost), std::forward<F>(f));
230}
231
232namespace fudetail {
233template <class FAB1, class FAB2, class FAB3, class F,
234 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
235typename FAB1::value_type
237 FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
238{
239 using value_type = typename FAB1::value_type;
240 value_type sm = 0;
241
242#ifdef AMREX_USE_OMP
243#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
244#endif
245 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
246 {
247 const Box& bx = mfi.growntilebox(nghost);
248 const auto& arr1 = fa1.const_array(mfi);
249 const auto& arr2 = fa2.const_array(mfi);
250 const auto& arr3 = fa3.const_array(mfi);
251 sm += f(bx, arr1, arr2, arr3);
252 }
253
254 return sm;
255}
256}
257
258#ifdef AMREX_USE_GPU
259namespace fudetail {
260template <class FAB1, class FAB2, class FAB3, class F>
261std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
263 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
264{
265 return fudetail::ReduceSum_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
266}
267
268template <class FAB1, class FAB2, class FAB3, class F>
269std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
271 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
272{
273 amrex::ignore_unused(fa1,fa2,fa3,nghost,f);
274 amrex::Abort("ReduceSum: Launch Region is off. Device lambda cannot be called by host.");
275 return 0;
276}
277}
278
279template <class FAB1, class FAB2, class FAB3, class F,
280 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
281typename FAB1::value_type
283 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
284{
285 if (Gpu::inLaunchRegion()) {
286 return fudetail::ReduceMF<ReduceOpSum>(fa1,fa2,fa3,nghost,std::forward<F>(f));
287 } else {
288 return fudetail::ReduceSum_host_wrapper(fa1,fa2,fa3,nghost,std::forward<F>(f));
289 }
290}
291#else
292template <class FAB1, class FAB2, class FAB3, class F,
293 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
294typename FAB1::value_type
295ReduceSum (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
296 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
297{
298 return fudetail::ReduceSum_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
299}
300#endif
301
302template <class FAB, class F,
303 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
304typename FAB::value_type
305ReduceMin (FabArray<FAB> const& fa, int nghost, F&& f)
306{
307 return ReduceMin(fa, IntVect(nghost), std::forward<F>(f));
308}
309
310namespace fudetail {
311template <class FAB, class F,
312 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
313typename FAB::value_type
314ReduceMin_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
315{
316 using value_type = typename FAB::value_type;
317 value_type r = std::numeric_limits<value_type>::max();
318
319#ifdef AMREX_USE_OMP
320#pragma omp parallel reduction(min:r)
321#endif
322 for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
323 {
324 const Box& bx = mfi.growntilebox(nghost);
325 const auto& arr = fa.const_array(mfi);
326 r = std::min(r, f(bx, arr));
327 }
328 return r;
329}
330}
331
332#ifdef AMREX_USE_GPU
333namespace fudetail {
334template <class FAB, class F>
335std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
336ReduceMin_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
337{
338 return ReduceMin_host(fa,nghost,std::forward<F>(f));
339}
340
341template <class FAB, class F>
342std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
343ReduceMin_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
344{
345 amrex::ignore_unused(fa,nghost,f);
346 amrex::Abort("ReduceMin: Launch Region is off. Device lambda cannot be called by host.");
347 return 0;
348}
349}
350
351template <class FAB, class F,
352 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
353typename FAB::value_type
354ReduceMin (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
355{
356 if (Gpu::inLaunchRegion()) {
357 return fudetail::ReduceMF<ReduceOpMin>(fa, nghost, std::forward<F>(f));
358 } else {
359 return fudetail::ReduceMin_host_wrapper(fa, nghost, std::forward<F>(f));
360 }
361}
362#else
363template <class FAB, class F,
364 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
365typename FAB::value_type
366ReduceMin (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
367{
368 return fudetail::ReduceMin_host(fa, nghost, std::forward<F>(f));
369}
370#endif
371
372template <class FAB1, class FAB2, class F,
373 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
374typename FAB1::value_type
375ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, int nghost, F&& f)
376{
377 return ReduceMin(fa1, fa2, IntVect(nghost), std::forward<F>(f));
378}
379
380namespace fudetail {
381template <class FAB1, class FAB2, class F,
382 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
383typename FAB1::value_type
385 IntVect const& nghost, F const& f)
386{
387 using value_type = typename FAB1::value_type;
388 value_type r = std::numeric_limits<value_type>::max();
389
390#ifdef AMREX_USE_OMP
391#pragma omp parallel reduction(min:r)
392#endif
393 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
394 {
395 const Box& bx = mfi.growntilebox(nghost);
396 const auto& arr1 = fa1.const_array(mfi);
397 const auto& arr2 = fa2.const_array(mfi);
398 r = std::min(r, f(bx, arr1, arr2));
399 }
400
401 return r;
402}
403}
404
405#ifdef AMREX_USE_GPU
406namespace fudetail {
407template <class FAB1, class FAB2, class F>
408std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
410 IntVect const& nghost, F&& f)
411{
412 return fudetail::ReduceMin_host(fa1,fa2,nghost,std::forward<F>(f));
413}
414
415template <class FAB1, class FAB2, class F>
416std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
418 IntVect const& nghost, F&& f)
419{
420 amrex::ignore_unused(fa1,fa2,nghost,f);
421 amrex::Abort("ReduceMin: Launch Region is off. Device lambda cannot be called by host.");
422 return 0;
423}
424}
425
426template <class FAB1, class FAB2, class F,
427 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
428typename FAB1::value_type
430 IntVect const& nghost, F&& f)
431{
432 if (Gpu::inLaunchRegion()) {
433 return fudetail::ReduceMF<ReduceOpMin>(fa1,fa2,nghost,std::forward<F>(f));
434 } else {
435 return fudetail::ReduceMin_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
436 }
437}
438#else
439template <class FAB1, class FAB2, class F,
440 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
441typename FAB1::value_type
442ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
443 IntVect const& nghost, F&& f)
444{
445 return fudetail::ReduceMin_host(fa1,fa2,nghost,std::forward<F>(f));
446}
447#endif
448
449template <class FAB1, class FAB2, class FAB3, class F,
450 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
451typename FAB1::value_type
452ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, FabArray<FAB3> const& fa3,
453 int nghost, F&& f)
454{
455 return ReduceMin(fa1, fa2, fa3, IntVect(nghost), std::forward<F>(f));
456}
457
458namespace fudetail {
459template <class FAB1, class FAB2, class FAB3, class F,
460 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
461typename FAB1::value_type
463 FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
464{
465 using value_type = typename FAB1::value_type;
466 value_type r = std::numeric_limits<value_type>::max();
467
468#ifdef AMREX_USE_OMP
469#pragma omp parallel reduction(min:r)
470#endif
471 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
472 {
473 const Box& bx = mfi.growntilebox(nghost);
474 const auto& arr1 = fa1.const_array(mfi);
475 const auto& arr2 = fa2.const_array(mfi);
476 const auto& arr3 = fa3.const_array(mfi);
477 r = std::min(r, f(bx, arr1, arr2, arr3));
478 }
479
480 return r;
481}
482}
483
484#ifdef AMREX_USE_GPU
485namespace fudetail {
486template <class FAB1, class FAB2, class FAB3, class F>
487std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
489 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
490{
491 return fudetail::ReduceMin_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
492}
493
494template <class FAB1, class FAB2, class FAB3, class F>
495std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
497 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
498{
499 amrex::ignore_unused(fa1,fa2,fa3,nghost,f);
500 amrex::Abort("ReduceMin: Launch Region is off. Device lambda lambda cannot be called by host.");
501 return 0;
502}
503}
504
505template <class FAB1, class FAB2, class FAB3, class F,
506 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
507typename FAB1::value_type
509 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
510{
511 if (Gpu::inLaunchRegion()) {
512 return fudetail::ReduceMF<ReduceOpMin>(fa1,fa2,fa3,nghost,std::forward<F>(f));
513 } else {
514 return fudetail::ReduceMin_host_wrapper(fa1,fa2,fa3,nghost,std::forward<F>(f));
515 }
516}
517#else
518template <class FAB1, class FAB2, class FAB3, class F,
519 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
520typename FAB1::value_type
521ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
522 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
523{
524 return fudetail::ReduceMin_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
525}
526#endif
527
528template <class FAB, class F,
529 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
530typename FAB::value_type
531ReduceMax (FabArray<FAB> const& fa, int nghost, F&& f)
532{
533 return ReduceMax(fa, IntVect(nghost), std::forward<F>(f));
534}
535
536namespace fudetail {
537template <class FAB, class F,
538 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
539typename FAB::value_type
540ReduceMax_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
541{
542 using value_type = typename FAB::value_type;
543 value_type r = std::numeric_limits<value_type>::lowest();
544
545#ifdef AMREX_USE_OMP
546#pragma omp parallel reduction(max:r)
547#endif
548 for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
549 {
550 const Box& bx = mfi.growntilebox(nghost);
551 const auto& arr = fa.const_array(mfi);
552 r = std::max(r, f(bx, arr));
553 }
554
555 return r;
556}
557}
558
559#ifdef AMREX_USE_GPU
560namespace fudetail {
561template <class FAB, class F>
562std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
563ReduceMax_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
564{
565 return ReduceMax_host(fa,nghost,std::forward<F>(f));
566}
567
568template <class FAB, class F>
569std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
570ReduceMax_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
571{
572 amrex::ignore_unused(fa,nghost,f);
573 amrex::Abort("ReduceMax: Launch Region is off. Device lambda cannot be called by host.");
574 return 0;
575}
576}
577
578template <class FAB, class F,
579 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
580typename FAB::value_type
581ReduceMax (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
582{
583 if (Gpu::inLaunchRegion()) {
584 return fudetail::ReduceMF<ReduceOpMax>(fa,nghost,std::forward<F>(f));
585 } else {
586 return fudetail::ReduceMax_host_wrapper(fa,nghost,std::forward<F>(f));
587 }
588}
589#else
590template <class FAB, class F,
591 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
592typename FAB::value_type
593ReduceMax (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
594{
595 return fudetail::ReduceMax_host(fa,nghost,std::forward<F>(f));
596}
597#endif
598
599template <class FAB1, class FAB2, class F,
600 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
601typename FAB1::value_type
602ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, int nghost, F&& f)
603{
604 return ReduceMax(fa1, fa2, IntVect(nghost), std::forward<F>(f));
605}
606
607namespace fudetail {
608template <class FAB1, class FAB2, class F,
609 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
610typename FAB1::value_type
612 IntVect const& nghost, F const& f)
613{
614 using value_type = typename FAB1::value_type;
615 value_type r = std::numeric_limits<value_type>::lowest();
616
617#ifdef AMREX_USE_OMP
618#pragma omp parallel reduction(max:r)
619#endif
620 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
621 {
622 const Box& bx = mfi.growntilebox(nghost);
623 const auto& arr1 = fa1.const_array(mfi);
624 const auto& arr2 = fa2.const_array(mfi);
625 r = std::max(r, f(bx, arr1, arr2));
626 }
627
628 return r;
629}
630}
631
632#ifdef AMREX_USE_GPU
633namespace fudetail {
634template <class FAB1, class FAB2, class F>
635std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
637 IntVect const& nghost, F&& f)
638{
639 return ReduceMax_host(fa1,fa2,nghost,std::forward<F>(f));
640}
641
642template <class FAB1, class FAB2, class F>
643std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
645 IntVect const& nghost, F&& f)
646{
647 amrex::ignore_unused(fa1,fa2,nghost,f);
648 amrex::Abort("ReduceMax: Launch Region is off. Device lambda cannot be called by host.");
649 return 0;
650}
651}
652
653template <class FAB1, class FAB2, class F,
654 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
655typename FAB1::value_type
657 IntVect const& nghost, F&& f)
658{
659 if (Gpu::inLaunchRegion()) {
660 return fudetail::ReduceMF<ReduceOpMax>(fa1,fa2,nghost,std::forward<F>(f));
661 } else {
662 return fudetail::ReduceMax_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
663 }
664}
665#else
666template <class FAB1, class FAB2, class F,
667 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
668typename FAB1::value_type
669ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
670 IntVect const& nghost, F&& f)
671{
672 return fudetail::ReduceMax_host(fa1,fa2,nghost,std::forward<F>(f));
673}
674#endif
675
676template <class FAB1, class FAB2, class FAB3, class F,
677 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
678typename FAB1::value_type
679ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, FabArray<FAB3> const& fa3,
680 int nghost, F&& f)
681{
682 return ReduceMax(fa1, fa2, fa3, IntVect(nghost), std::forward<F>(f));
683}
684
685namespace fudetail {
686template <class FAB1, class FAB2, class FAB3, class F,
687 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
688typename FAB1::value_type
690 FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
691{
692 using value_type = typename FAB1::value_type;
693 value_type r = std::numeric_limits<value_type>::lowest();
694
695#ifdef AMREX_USE_OMP
696#pragma omp parallel reduction(max:r)
697#endif
698 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
699 {
700 const Box& bx = mfi.growntilebox(nghost);
701 const auto& arr1 = fa1.const_array(mfi);
702 const auto& arr2 = fa2.const_array(mfi);
703 const auto& arr3 = fa3.const_array(mfi);
704 r = std::max(r, f(bx, arr1, arr2, arr3));
705 }
706
707 return r;
708}
709}
710
711#ifdef AMREX_USE_GPU
712namespace fudetail {
713template <class FAB1, class FAB2, class FAB3, class F>
714std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
716 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
717{
718 return fudetail::ReduceMax_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
719}
720
721template <class FAB1, class FAB2, class FAB3, class F>
722std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
724 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
725{
726 amrex::ignore_unused(fa1,fa2,fa3,nghost,f);
727 amrex::Abort("ReduceMax: Launch Region is off. Device lambda lambda cannot be called by host.");
728 return 0;
729}
730}
731
732template <class FAB1, class FAB2, class FAB3, class F,
733 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
734typename FAB1::value_type
736 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
737{
738 if (Gpu::inLaunchRegion()) {
739 return fudetail::ReduceMF<ReduceOpMax>(fa1,fa2,fa3,nghost,std::forward<F>(f));
740 } else {
741 return fudetail::ReduceMax_host_wrapper(fa1,fa2,fa3,nghost,std::forward<F>(f));
742 }
743}
744#else
745template <class FAB1, class FAB2, class FAB3, class F,
746 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
747typename FAB1::value_type
748ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
749 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
750{
751 return fudetail::ReduceMax_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
752}
753#endif
754
755template <class FAB, class F,
756 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
757bool
758ReduceLogicalAnd (FabArray<FAB> const& fa, int nghost, F&& f)
759{
760 return ReduceLogicalAnd(fa, IntVect(nghost), std::forward<F>(f));
761}
762
763namespace fudetail {
764template <class FAB, class F,
765 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
766bool
767ReduceLogicalAnd_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
768{
769 int r = true;
770
771#ifdef AMREX_USE_OMP
772#pragma omp parallel reduction(&&:r)
773#endif
774 for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
775 {
776 const Box& bx = mfi.growntilebox(nghost);
777 const auto& arr = fa.const_array(mfi);
778 r = r && f(bx, arr);
779 }
780
781 return r;
782}
783}
784
785#ifdef AMREX_USE_GPU
786namespace fudetail {
787template <class FAB, class F>
788std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, bool>
790{
791 return ReduceLogicalAnd_host(fa,nghost,std::forward<F>(f));
792}
793
794template <class FAB, class F>
795std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, bool>
797{
798 amrex::ignore_unused(fa,nghost,f);
799 amrex::Abort("ReduceLogicalAnd: Launch Region is off. Device lambda cannot be called by host.");
800 return false;
801}
802}
803
804template <class FAB, class F,
805 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
806bool
807ReduceLogicalAnd (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
808{
809 if (Gpu::inLaunchRegion()) {
810 return fudetail::ReduceMF<ReduceOpLogicalAnd>(fa,nghost,std::forward<F>(f));
811 } else {
812 return fudetail::ReduceLogicalAnd_host_wrapper(fa,nghost,std::forward<F>(f));
813 }
814}
815#else
816template <class FAB, class F,
817 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
818bool
819ReduceLogicalAnd (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
820{
821 return fudetail::ReduceLogicalAnd_host(fa,nghost,std::forward<F>(f));
822}
823#endif
824
825template <class FAB1, class FAB2, class F,
826 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
827bool
829 int nghost, F&& f)
830{
831 return ReduceLogicalAnd(fa1, fa2, IntVect(nghost), std::forward<F>(f));
832}
833
834namespace fudetail {
835template <class FAB1, class FAB2, class F,
836 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
837bool
839 IntVect const& nghost, F const& f)
840{
841 int r = true;
842
843#ifdef AMREX_USE_OMP
844#pragma omp parallel reduction(&&:r)
845#endif
846 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
847 {
848 const Box& bx = mfi.growntilebox(nghost);
849 const auto& arr1 = fa1.const_array(mfi);
850 const auto& arr2 = fa2.const_array(mfi);
851 r = r && f(bx, arr1, arr2);
852 }
853
854 return r;
855}
856}
857
858#ifdef AMREX_USE_GPU
859namespace fudetail {
860template <class FAB1, class FAB2, class F>
861std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, bool>
863 IntVect const& nghost, F&& f)
864{
865 return ReduceLogicalAnd_host(fa1,fa2,nghost,std::forward<F>(f));
866}
867
868template <class FAB1, class FAB2, class F>
869std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, bool>
871 IntVect const& nghost, F&& f)
872{
873 amrex::ignore_unused(fa1,fa2,nghost,f);
874 amrex::Abort("ReduceLogicalAnd: Luanch Region is off. Device lambda cannot be called by host.");
875 return false;
876}
877}
878
879template <class FAB1, class FAB2, class F,
880 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
881bool
883 IntVect const& nghost, F&& f)
884{
885 if (Gpu::inLaunchRegion()) {
886 return fudetail::ReduceMF<ReduceOpLogicalAnd>(fa1,fa2,nghost,std::forward<F>(f));
887 } else {
888 return fudetail::ReduceLogicalAnd_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
889 }
890}
891#else
892template <class FAB1, class FAB2, class F,
893 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
894bool
895ReduceLogicalAnd (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
896 IntVect const& nghost, F&& f)
897{
898 return fudetail::ReduceLogicalAnd_host(fa1,fa2,nghost,std::forward<F>(f));
899}
900#endif
901
902template <class FAB, class F,
903 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
904bool
905ReduceLogicalOr (FabArray<FAB> const& fa, int nghost, F&& f)
906{
907 return ReduceLogicalOr(fa, IntVect(nghost), std::forward<F>(f));
908}
909
910namespace fudetail {
911template <class FAB, class F,
912 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
913bool
914ReduceLogicalOr_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
915{
916 int r = false;
917
918#ifdef AMREX_USE_OMP
919#pragma omp parallel reduction(||:r)
920#endif
921 for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
922 {
923 const Box& bx = mfi.growntilebox(nghost);
924 const auto& arr = fa.const_array(mfi);
925 r = r || f(bx, arr);
926 }
927
928 return r;
929}
930}
931
932#ifdef AMREX_USE_GPU
933namespace fudetail {
934template <class FAB, class F>
935std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, bool>
937{
938 return ReduceLogicalOr_host(fa,nghost,std::forward<F>(f));
939}
940
941template <class FAB, class F>
942std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, bool>
943ReduceLogicalOr_host (FabArray<FAB> const& fa, IntVect const& nghost, F&& /*f*/)
944{
945 amrex::ignore_unused(fa,nghost);
946 amrex::Abort("ReduceLogicalOr: Launch Region is off. Device lambda cannot be called by host.");
947 return 0;
948}
949}
950
951template <class FAB, class F,
952 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
953bool
954ReduceLogicalOr (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
955{
956 if (Gpu::inLaunchRegion()) {
957 return fudetail::ReduceMF<ReduceOpLogicalOr>(fa,nghost,std::forward<F>(f));
958 } else {
959 return fudetail::ReduceLogicalOr_host_wrapper(fa,nghost,std::forward<F>(f));
960 }
961}
962#else
963template <class FAB, class F,
964 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
965bool
966ReduceLogicalOr (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
967{
968 return fudetail::ReduceLogicalOr_host(fa,nghost,std::forward<F>(f));
969}
970#endif
971
972template <class FAB1, class FAB2, class F,
973 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
974bool
976 int nghost, F&& f)
977{
978 return ReduceLogicalOr(fa1, fa2, IntVect(nghost), std::forward<F>(f));
979}
980
981namespace fudetail {
982template <class FAB1, class FAB2, class F,
983 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
984bool
986 IntVect const& nghost, F const& f)
987{
988 int r = false;
989
990#ifdef AMREX_USE_OMP
991#pragma omp parallel reduction(||:r)
992#endif
993 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
994 {
995 const Box& bx = mfi.growntilebox(nghost);
996 const auto& arr1 = fa1.const_array(mfi);
997 const auto& arr2 = fa2.const_array(mfi);
998 r = r || f(bx, arr1, arr2);
999 }
1000
1001 return r;
1002}
1003}
1004
1005#ifdef AMREX_USE_GPU
1006namespace fudetail {
1007template <class FAB1, class FAB2, class F>
1008std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, bool>
1010 IntVect const& nghost, F&& f)
1011{
1012 return fudetail::ReduceLogicalOr_host(fa1,fa2,nghost,std::forward<F>(f));
1013}
1014
1015template <class FAB1, class FAB2, class F>
1016std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, bool>
1018 IntVect const& nghost, F&& f)
1019{
1020 amrex::ignore_unused(fa1,fa2,nghost,f);
1021 amrex::Abort("ReeuceLogicalOr: Launch Region is off. Device lambda cannot be called by host.");
1022 return false;
1023}
1024}
1025
1026template <class FAB1, class FAB2, class F,
1027 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
1028bool
1030 IntVect const& nghost, F&& f)
1031{
1032 if (Gpu::inLaunchRegion()) {
1033 return fudetail::ReduceMF<ReduceOpLogicalOr>(fa1,fa2,nghost,std::forward<F>(f));
1034 } else {
1035 return fudetail::ReduceLogicalOr_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
1036 }
1037}
1038#else
1039template <class FAB1, class FAB2, class F,
1040 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
1041bool
1042ReduceLogicalOr (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
1043 IntVect const& nghost, F&& f)
1044{
1045 return fudetail::ReduceLogicalOr_host(fa1,fa2,nghost,std::forward<F>(f));
1046}
1047#endif
1048
1049template <class FAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1050void
1051printCell (FabArray<FAB> const& mf, const IntVect& cell, int comp = -1,
1052 const IntVect& ng = IntVect::TheZeroVector())
1053{
1054 for (MFIter mfi(mf); mfi.isValid(); ++mfi)
1055 {
1056 const Box& bx = amrex::grow(mfi.validbox(), ng);
1057 if (bx.contains(cell)) {
1058 int n = (comp >= 0) ? 1 : mf.nComp();
1059 auto const& fab = mf.const_array(mfi);
1061 auto* dp = pv.data();
1062 auto f = [=] AMREX_GPU_HOST_DEVICE ()
1063 {
1064 if (comp >= 0) {
1065 *dp = fab(cell, comp);
1066 } else {
1067 for (int i = 0; i < n; ++i) {
1068 dp[i] = fab(cell,i);
1069 }
1070 }
1071 };
1072
1073#ifdef AMREX_USE_GPU
1074 if (mf.arena()->isManaged() || mf.arena()->isDevice()) {
1077 } else
1078#endif
1079 {
1080 f();
1081 }
1082
1083 if (comp >= 0) {
1084 amrex::AllPrint().SetPrecision(17) << " At cell " << cell << " in Box " << bx
1085 << ": " << *dp << '\n';
1086 } else {
1087 std::ostringstream ss;
1088 ss.precision(17);
1089 for (int i = 0; i < n-1; ++i)
1090 {
1091 ss << dp[i] << ", ";
1092 }
1093 ss << dp[n-1];
1094 amrex::AllPrint() << " At cell " << cell << " in Box " << bx
1095 << ": " << ss.str() << '\n';
1096 }
1097 }
1098 }
1099}
1100
1101template <class FAB,
1102 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1103void
1104Subtract (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, int nghost)
1105{
1106 Subtract(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
1107}
1108
1109template <class FAB,
1110 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1111void
1112Subtract (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
1113{
1114#ifdef AMREX_USE_GPU
1115 if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
1116 auto const& dstfa = dst.arrays();
1117 auto const& srcfa = src.const_arrays();
1118 ParallelFor(dst, nghost, numcomp,
1119 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1120 {
1121 dstfa[box_no](i,j,k,n+dstcomp) -= srcfa[box_no](i,j,k,n+srccomp);
1122 });
1123 if (!Gpu::inNoSyncRegion()) {
1125 }
1126 } else
1127#endif
1128 {
1129#ifdef AMREX_USE_OMP
1130#pragma omp parallel if (Gpu::notInLaunchRegion())
1131#endif
1132 for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1133 {
1134 const Box& bx = mfi.growntilebox(nghost);
1135 if (bx.ok())
1136 {
1137 auto const srcFab = src.array(mfi);
1138 auto dstFab = dst.array(mfi);
1139 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1140 {
1141 dstFab(i,j,k,n+dstcomp) -= srcFab(i,j,k,n+srccomp);
1142 });
1143 }
1144 }
1145 }
1146}
1147
1148
1149template <class FAB,
1150 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1151void
1152Multiply (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, int nghost)
1153{
1154 Multiply(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
1155}
1156
1157template <class FAB,
1158 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1159void
1160Multiply (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
1161{
1162#ifdef AMREX_USE_GPU
1163 if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
1164 auto const& dstfa = dst.arrays();
1165 auto const& srcfa = src.const_arrays();
1166 ParallelFor(dst, nghost, numcomp,
1167 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1168 {
1169 dstfa[box_no](i,j,k,n+dstcomp) *= srcfa[box_no](i,j,k,n+srccomp);
1170 });
1171 if (!Gpu::inNoSyncRegion()) {
1173 }
1174 } else
1175#endif
1176 {
1177#ifdef AMREX_USE_OMP
1178#pragma omp parallel if (Gpu::notInLaunchRegion())
1179#endif
1180 for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1181 {
1182 const Box& bx = mfi.growntilebox(nghost);
1183 if (bx.ok())
1184 {
1185 auto const srcFab = src.array(mfi);
1186 auto dstFab = dst.array(mfi);
1187 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1188 {
1189 dstFab(i,j,k,n+dstcomp) *= srcFab(i,j,k,n+srccomp);
1190 });
1191 }
1192 }
1193 }
1194}
1195
1196
1197template <class FAB,
1198 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1199void
1200Divide (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, int nghost)
1201{
1202 Divide(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
1203}
1204
1205template <class FAB,
1206 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1207void
1208Divide (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
1209{
1210#ifdef AMREX_USE_GPU
1211 if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
1212 auto const& dstfa = dst.arrays();
1213 auto const& srcfa = src.const_arrays();
1214 ParallelFor(dst, nghost, numcomp,
1215 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1216 {
1217 dstfa[box_no](i,j,k,n+dstcomp) /= srcfa[box_no](i,j,k,n+srccomp);
1218 });
1219 if (!Gpu::inNoSyncRegion()) {
1221 }
1222 } else
1223#endif
1224 {
1225#ifdef AMREX_USE_OMP
1226#pragma omp parallel if (Gpu::notInLaunchRegion())
1227#endif
1228 for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1229 {
1230 const Box& bx = mfi.growntilebox(nghost);
1231 if (bx.ok())
1232 {
1233 auto const srcFab = src.array(mfi);
1234 auto dstFab = dst.array(mfi);
1235 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1236 {
1237 dstFab(i,j,k,n+dstcomp) /= srcFab(i,j,k,n+srccomp);
1238 });
1239 }
1240 }
1241 }
1242}
1243
1244template <class FAB,
1245 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1246void
1247Abs (FabArray<FAB>& fa, int icomp, int numcomp, int nghost)
1248{
1249 Abs(fa,icomp,numcomp,IntVect(nghost));
1250}
1251
1252template <class FAB,
1253 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1254void
1255Abs (FabArray<FAB>& fa, int icomp, int numcomp, const IntVect& nghost)
1256{
1257#ifdef AMREX_USE_GPU
1258 if (Gpu::inLaunchRegion() && fa.isFusingCandidate()) {
1259 auto const& fabarr = fa.arrays();
1260 ParallelFor(fa, nghost, numcomp,
1261 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1262 {
1263 fabarr[box_no](i,j,k,n+icomp) = std::abs(fabarr[box_no](i,j,k,n+icomp));
1264 });
1265 if (!Gpu::inNoSyncRegion()) {
1267 }
1268 } else
1269#endif
1270 {
1271#ifdef AMREX_USE_OMP
1272#pragma omp parallel if (Gpu::notInLaunchRegion())
1273#endif
1274 for (MFIter mfi(fa,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1275 {
1276 const Box& bx = mfi.growntilebox(nghost);
1277 if (bx.ok())
1278 {
1279 auto const& fab = fa.array(mfi);
1280 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1281 {
1282 fab(i,j,k,n+icomp) = std::abs(fab(i,j,k,n+icomp));
1283 });
1284 }
1285 }
1286 }
1287}
1288
1289template <class FAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1290void
1291prefetchToHost (FabArray<FAB> const& fa, const bool synchronous = true)
1292{
1293#ifdef AMREX_USE_GPU
1294 if (fa.arena()->isManaged()) {
1295 for (MFIter mfi(fa, MFItInfo().SetDeviceSync(synchronous)); mfi.isValid(); ++mfi) {
1296 fa.prefetchToHost(mfi);
1297 }
1298 }
1299#else
1300 amrex::ignore_unused(fa,synchronous);
1301#endif
1302}
1303
1304template <class FAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1305void
1306prefetchToDevice (FabArray<FAB> const& fa, const bool synchronous = true)
1307{
1308#ifdef AMREX_USE_GPU
1309 if (fa.arena()->isManaged()) {
1310 for (MFIter mfi(fa, MFItInfo().SetDeviceSync(synchronous)); mfi.isValid(); ++mfi) {
1311 fa.prefetchToDevice(mfi);
1312 }
1313 }
1314#else
1315 amrex::ignore_unused(fa,synchronous);
1316#endif
1317}
1318
1319
1320template <class FAB, class IFAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value
1321 && IsBaseFab<IFAB>::value> >
1322void
1323OverrideSync (FabArray<FAB> & fa, FabArray<IFAB> const& msk, const Periodicity& period)
1324{
1325 BL_PROFILE("OverrideSync()");
1326
1327 OverrideSync_nowait(fa, msk, period);
1329}
1330
1331
1332template <class FAB, class IFAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value
1333 && IsBaseFab<IFAB>::value> >
1334void
1336{
1337 BL_PROFILE("OverrideSync_nowait()");
1338 AMREX_ASSERT_WITH_MESSAGE(!fa.os_temp, "OverrideSync_nowait() called when already in progress.");
1339
1340 if (fa.ixType().cellCentered()) { return; }
1341
1342 const int ncomp = fa.nComp();
1343
1344#ifdef AMREX_USE_GPU
1345 if (Gpu::inLaunchRegion() && fa.isFusingCandidate()) {
1346 auto const& fabarr = fa.arrays();
1347 auto const& ifabarr = msk.const_arrays();
1348 ParallelFor(fa, IntVect(0), ncomp,
1349 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1350 {
1351 if (!ifabarr[box_no](i,j,k)) { fabarr[box_no](i,j,k,n) = 0; }
1352 });
1353 if (!Gpu::inNoSyncRegion()) {
1355 }
1356 } else
1357#endif
1358 {
1359#ifdef AMREX_USE_OMP
1360#pragma omp parallel if (Gpu::notInLaunchRegion())
1361#endif
1362 for (MFIter mfi(fa,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1363 {
1364 const Box& bx = mfi.tilebox();
1365 auto fab = fa.array(mfi);
1366 auto const ifab = msk.array(mfi);
1367 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n,
1368 {
1369 if (!ifab(i,j,k)) { fab(i,j,k,n) = 0; }
1370 });
1371 }
1372 }
1373
1374 fa.os_temp = std::make_unique< FabArray<FAB> > ( fa.boxArray(), fa.DistributionMap(),
1375 ncomp, 0, MFInfo(), fa.Factory() );
1376 fa.os_temp->setVal(0);
1377 fa.os_temp->ParallelCopy_nowait(fa, period, FabArrayBase::ADD);
1378}
1379
1380template <class FAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1381void
1383{
1384 BL_PROFILE("OverrideSync_finish()");
1385
1386 if (fa.ixType().cellCentered()) { return; }
1387
1388 fa.os_temp->ParallelCopy_finish();
1389 amrex::Copy(fa, *(fa.os_temp), 0, 0, fa.nComp(), 0);
1390
1391 fa.os_temp.reset();
1392}
1393
1394template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1395void
1397 int scomp, int dcomp, int ncomp)
1398{
1399 AMREX_ASSERT(isMFIterSafe(dst, src));
1400 AMREX_ASSERT(dst.nGrowVect() == src.nGrowVect());
1401#ifdef AMREX_USE_GPU
1402 for (MFIter mfi(dst); mfi.isValid(); ++mfi) {
1403 void* pdst = dst[mfi].dataPtr(dcomp);
1404 void const* psrc = src[mfi].dataPtr(scomp);
1405 Gpu::dtoh_memcpy_async(pdst, psrc, dst[mfi].nBytes(mfi.fabbox(), ncomp));
1406 }
1407#else
1408 Copy(dst, src, scomp, dcomp, ncomp, dst.nGrowVect());
1409#endif
1410}
1411
1412template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1413void
1415{
1416 dtoh_memcpy(dst, src, 0, 0, dst.nComp());
1417}
1418
1419template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1420void
1422 int scomp, int dcomp, int ncomp)
1423{
1424 AMREX_ASSERT(isMFIterSafe(dst, src));
1425 AMREX_ASSERT(dst.nGrowVect() == src.nGrowVect());
1426#ifdef AMREX_USE_GPU
1427 for (MFIter mfi(dst); mfi.isValid(); ++mfi) {
1428 void* pdst = dst[mfi].dataPtr(dcomp);
1429 void const* psrc = src[mfi].dataPtr(scomp);
1430 Gpu::htod_memcpy_async(pdst, psrc, dst[mfi].nBytes(mfi.fabbox(), ncomp));
1431 }
1432#else
1433 Copy(dst, src, scomp, dcomp, ncomp, dst.nGrowVect());
1434#endif
1435}
1436
1437template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1438void
1440{
1441 htod_memcpy(dst, src, 0, 0, dst.nComp());
1442}
1443
1444template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1445IntVect
1446indexFromValue (FabArray<FAB> const& mf, int comp, IntVect const& nghost,
1447 typename FAB::value_type value)
1448{
1449 IntVect loc;
1450
1451#ifdef AMREX_USE_GPU
1452 if (Gpu::inLaunchRegion())
1453 {
1455 int* p = aa.data();
1456 // This is a device ptr to 1+AMREX_SPACEDIM int zeros.
1457 // The first is used as an atomic bool and the others for intvect.
1458 if (mf.isFusingCandidate()) {
1459 auto const& ma = mf.const_arrays();
1460 ParallelFor(mf, nghost, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
1461 {
1462 int* flag = p;
1463 if (*flag == 0) {
1464 if (ma[box_no](i,j,k,comp) == value) {
1465 if (Gpu::Atomic::Exch(flag,1) == 0) {
1466 AMREX_D_TERM(p[1] = i;,
1467 p[2] = j;,
1468 p[3] = k;);
1469 }
1470 }
1471 }
1472 });
1473 } else {
1474 for (MFIter mfi(mf,MFItInfo().SetDeviceSync(false)); mfi.isValid(); ++mfi) {
1475 const Box& bx = amrex::grow(mfi.validbox(), nghost);
1476 auto const& arr = mf.const_array(mfi);
1477 amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
1478 {
1479 int* flag = p;
1480 if (*flag == 0) {
1481 if (arr(i,j,k,comp) == value) {
1482 if (Gpu::Atomic::Exch(flag,1) == 0) {
1483 AMREX_D_TERM(p[1] = i;,
1484 p[2] = j;,
1485 p[3] = k;);
1486 }
1487 }
1488 }
1489 });
1490 }
1491 }
1492 int const* tmp = aa.copyToHost();
1493 AMREX_D_TERM(loc[0] = tmp[1];,
1494 loc[1] = tmp[2];,
1495 loc[2] = tmp[3];);
1496 }
1497 else
1498#endif
1499 {
1500 bool f = false;
1501#ifdef AMREX_USE_OMP
1502#pragma omp parallel
1503#endif
1504 {
1505 IntVect priv_loc = IntVect::TheMinVector();
1506 for (MFIter mfi(mf,true); mfi.isValid(); ++mfi)
1507 {
1508 const Box& bx = mfi.growntilebox(nghost);
1509 auto const& fab = mf.const_array(mfi);
1510 AMREX_LOOP_3D(bx, i, j, k,
1511 {
1512 if (fab(i,j,k,comp) == value) {
1513 priv_loc = IntVect(AMREX_D_DECL(i,j,k));
1514 }
1515 });
1516 }
1517
1518 if (priv_loc.allGT(IntVect::TheMinVector())) {
1519 bool old;
1520// we should be able to test on _OPENMP < 201107 for capture (version 3.1)
1521// but we must work around a bug in gcc < 4.9
1522// And, with NVHPC 21.9 to <23.1, we saw an ICE with the atomic capture (NV bug: #3390723)
1523#if defined(AMREX_USE_OMP) && defined(_OPENMP) && (_OPENMP < 201307 || (defined(__NVCOMPILER) && __NVCOMPILER_MAJOR__ < 23)) // OpenMP 4.0
1524#pragma omp critical (amrex_indexfromvalue)
1525#elif defined(AMREX_USE_OMP)
1526#pragma omp atomic capture
1527#endif
1528 {
1529 old = f;
1530 f = true;
1531 }
1532
1533 if (old == false) { loc = priv_loc; }
1534 }
1535 }
1536 }
1537
1538 return loc;
1539}
1540
1552template <typename FAB, std::enable_if_t<IsBaseFab<FAB>::value,int> FOO = 0>
1553typename FAB::value_type
1554Dot (FabArray<FAB> const& x, int xcomp, FabArray<FAB> const& y, int ycomp, int ncomp,
1555 IntVect const& nghost, bool local = false)
1556{
1557 BL_ASSERT(x.boxArray() == y.boxArray());
1558 BL_ASSERT(x.DistributionMap() == y.DistributionMap());
1559 BL_ASSERT(x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost));
1560
1561 BL_PROFILE("amrex::Dot()");
1562
1563 using T = typename FAB::value_type;
1564 auto sm = T(0.0);
1565#ifdef AMREX_USE_GPU
1566 if (Gpu::inLaunchRegion()) {
1567 auto const& xma = x.const_arrays();
1568 auto const& yma = y.const_arrays();
1569 sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, x, nghost,
1570 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
1571 {
1572 auto t = T(0.0);
1573 auto const& xfab = xma[box_no];
1574 auto const& yfab = yma[box_no];
1575 for (int n = 0; n < ncomp; ++n) {
1576 t += xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n);
1577 }
1578 return t;
1579 });
1580 } else
1581#endif
1582 {
1583#ifdef AMREX_USE_OMP
1584#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
1585#endif
1586 for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
1587 {
1588 Box const& bx = mfi.growntilebox(nghost);
1589 auto const& xfab = x.const_array(mfi);
1590 auto const& yfab = y.const_array(mfi);
1591 AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
1592 {
1593 sm += xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n);
1594 });
1595 }
1596 }
1597
1598 if (!local) {
1600 }
1601
1602 return sm;
1603}
1604
1614template <typename FAB, std::enable_if_t<IsBaseFab<FAB>::value,int> FOO = 0>
1615typename FAB::value_type
1616Dot (FabArray<FAB> const& x, int xcomp, int ncomp, IntVect const& nghost, bool local = false)
1617{
1618 BL_ASSERT(x.nGrowVect().allGE(nghost));
1619
1620 BL_PROFILE("amrex::Dot()");
1621
1622 using T = typename FAB::value_type;
1623 auto sm = T(0.0);
1624#ifdef AMREX_USE_GPU
1625 if (Gpu::inLaunchRegion()) {
1626 auto const& xma = x.const_arrays();
1627 sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, x, nghost,
1628 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
1629 {
1630 auto t = T(0.0);
1631 auto const& xfab = xma[box_no];
1632 for (int n = 0; n < ncomp; ++n) {
1633 auto v = xfab(i,j,k,xcomp+n);
1634 t += v*v;
1635 }
1636 return t;
1637 });
1638 } else
1639#endif
1640 {
1641#ifdef AMREX_USE_OMP
1642#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
1643#endif
1644 for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
1645 {
1646 Box const& bx = mfi.growntilebox(nghost);
1647 auto const& xfab = x.const_array(mfi);
1648 AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
1649 {
1650 auto v = xfab(i,j,k,xcomp+n);
1651 sm += v*v;
1652 });
1653 }
1654 }
1655
1656 if (!local) {
1658 }
1659
1660 return sm;
1661}
1662
1675template <typename IFAB, typename FAB,
1676 std::enable_if_t<IsBaseFab<FAB>::value && IsBaseFab<IFAB>::value,int> FOO = 0>
1677typename FAB::value_type
1678Dot (FabArray<IFAB> const& mask, FabArray<FAB> const& x, int xcomp,
1679 FabArray<FAB> const& y, int ycomp, int ncomp, IntVect const& nghost,
1680 bool local = false)
1681{
1682 BL_ASSERT(x.boxArray() == y.boxArray() && x.boxArray() == mask.boxArray());
1683 BL_ASSERT(x.DistributionMap() == y.DistributionMap() && x.DistributionMap() == mask.DistributionMap());
1684 BL_ASSERT(x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost) &&
1685 mask.nGrowVect().allGE(nghost));
1686
1687 BL_PROFILE("amrex::Dot()");
1688
1689 using T = typename FAB::value_type;
1690 auto sm = T(0.0);
1691#ifdef AMREX_USE_GPU
1692 if (Gpu::inLaunchRegion()) {
1693 auto const& mma = mask.const_arrays();
1694 auto const& xma = x.const_arrays();
1695 auto const& yma = y.const_arrays();
1696 sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, x, nghost,
1697 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
1698 {
1699 auto t = T(0.0);
1700 auto m = T(mma[box_no](i,j,k));
1701 if (m != 0) {
1702 auto const& xfab = xma[box_no];
1703 auto const& yfab = yma[box_no];
1704 for (int n = 0; n < ncomp; ++n) {
1705 t += xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n);
1706 }
1707 }
1708 return t*m;
1709 });
1710 } else
1711#endif
1712 {
1713#ifdef AMREX_USE_OMP
1714#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
1715#endif
1716 for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
1717 {
1718 Box const& bx = mfi.growntilebox(nghost);
1719 auto const& mfab = mask.const_array(mfi);
1720 auto const& xfab = x.const_array(mfi);
1721 auto const& yfab = y.const_array(mfi);
1722 AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
1723 {
1724 auto m = T(mfab(i,j,k));
1725 sm += m * xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n);
1726 });
1727 }
1728 }
1729
1730 if (!local) {
1732 }
1733
1734 return sm;
1735}
1736
1747template <typename IFAB, typename FAB,
1748 std::enable_if_t<IsBaseFab<FAB>::value && IsBaseFab<IFAB>::value,int> FOO = 0>
1749typename FAB::value_type
1750Dot (FabArray<IFAB> const& mask, FabArray<FAB> const& x, int xcomp, int ncomp,
1751 IntVect const& nghost, bool local = false)
1752{
1753 BL_ASSERT(x.boxArray() == mask.boxArray());
1754 BL_ASSERT(x.DistributionMap() == mask.DistributionMap());
1755 BL_ASSERT(x.nGrowVect().allGE(nghost) && mask.nGrowVect().allGE(nghost));
1756
1757 BL_PROFILE("amrex::Dot()");
1758
1759 using T = typename FAB::value_type;
1760 auto sm = T(0.0);
1761#ifdef AMREX_USE_GPU
1762 if (Gpu::inLaunchRegion()) {
1763 auto const& mma = mask.const_arrays();
1764 auto const& xma = x.const_arrays();
1765 sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, x, nghost,
1766 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
1767 {
1768 auto t = T(0.0);
1769 auto m = T(mma[box_no](i,j,k));
1770 if (m != 0) {
1771 auto const& xfab = xma[box_no];
1772 for (int n = 0; n < ncomp; ++n) {
1773 auto v = xfab(i,j,k,xcomp+n);
1774 t += v*v;
1775 }
1776 }
1777 return t*m;
1778 });
1779 } else
1780#endif
1781 {
1782#ifdef AMREX_USE_OMP
1783#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
1784#endif
1785 for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
1786 {
1787 Box const& bx = mfi.growntilebox(nghost);
1788 auto const& mfab = mask.const_array(mfi);
1789 auto const& xfab = x.const_array(mfi);
1790 AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
1791 {
1792 auto m = T(mfab(i,j,k));
1793 auto v = xfab(i,j,k,xcomp+n);
1794 sm += m*v*v;
1795 });
1796 }
1797 }
1798
1799 if (!local) {
1801 }
1802
1803 return sm;
1804}
1805
1807template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1808void setVal (MF& dst, typename MF::value_type val)
1809{
1810 dst.setVal(val);
1811}
1812
1814template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1815void setBndry (MF& dst, typename MF::value_type val, int scomp, int ncomp)
1816{
1817 dst.setBndry(val, scomp, ncomp);
1818}
1819
1821template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1822void Scale (MF& dst, typename MF::value_type val, int scomp, int ncomp, int nghost)
1823{
1824 dst.mult(val, scomp, ncomp, nghost);
1825}
1826
1828template <class DMF, class SMF,
1829 std::enable_if_t<IsMultiFabLike_v<DMF> &&
1830 IsMultiFabLike_v<SMF>, int> = 0>
1831void LocalCopy (DMF& dst, SMF const& src, int scomp, int dcomp,
1832 int ncomp, IntVect const& nghost)
1833{
1834 amrex::Copy(dst, src, scomp, dcomp, ncomp, nghost);
1835}
1836
1838template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1839void LocalAdd (MF& dst, MF const& src, int scomp, int dcomp,
1840 int ncomp, IntVect const& nghost)
1841{
1842 amrex::Add(dst, src, scomp, dcomp, ncomp, nghost);
1843}
1844
1846template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1847void Saxpy (MF& dst, typename MF::value_type a, MF const& src, int scomp, int dcomp,
1848 int ncomp, IntVect const& nghost)
1849{
1850 MF::Saxpy(dst, a, src, scomp, dcomp, ncomp, nghost);
1851}
1852
1854template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1855void Xpay (MF& dst, typename MF::value_type a, MF const& src, int scomp, int dcomp,
1856 int ncomp, IntVect const& nghost)
1857{
1858 MF::Xpay(dst, a, src, scomp, dcomp, ncomp, nghost);
1859}
1860
1862template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1863void LinComb (MF& dst,
1864 typename MF::value_type a, MF const& src_a, int acomp,
1865 typename MF::value_type b, MF const& src_b, int bcomp,
1866 int dcomp, int ncomp, IntVect const& nghost)
1867{
1868 MF::LinComb(dst, a, src_a, acomp, b, src_b, bcomp, dcomp, ncomp, nghost);
1869}
1870
1872template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
1873void ParallelCopy (MF& dst, MF const& src, int scomp, int dcomp, int ncomp,
1874 IntVect const& ng_src = IntVect(0),
1875 IntVect const& ng_dst = IntVect(0),
1876 Periodicity const& period = Periodicity::NonPeriodic())
1877{
1878 dst.ParallelCopy(src, scomp, dcomp, ncomp, ng_src, ng_dst, period);
1879}
1880
1881template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
1882[[nodiscard]] typename MF::value_type
1883norminf (MF const& mf, int scomp, int ncomp, IntVect const& nghost,
1884 bool local = false)
1885{
1886 return mf.norminf(scomp, ncomp, nghost, local);
1887}
1888
1890template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1891void setVal (Array<MF,N>& dst, typename MF::value_type val)
1892{
1893 for (auto& mf: dst) {
1894 mf.setVal(val);
1895 }
1896}
1897
1899template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1900void setBndry (Array<MF,N>& dst, typename MF::value_type val, int scomp, int ncomp)
1901{
1902 for (auto& mf : dst) {
1903 mf.setBndry(val, scomp, ncomp);
1904 }
1905}
1906
1908template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1909void Scale (Array<MF,N>& dst, typename MF::value_type val, int scomp, int ncomp,
1910 int nghost)
1911{
1912 for (auto& mf : dst) {
1913 mf.mult(val, scomp, ncomp, nghost);
1914 }
1915}
1916
1918template <class DMF, class SMF, std::size_t N,
1919 std::enable_if_t<IsMultiFabLike_v<DMF> &&
1920 IsMultiFabLike_v<SMF>, int> = 0>
1921void LocalCopy (Array<DMF,N>& dst, Array<SMF,N> const& src, int scomp, int dcomp,
1922 int ncomp, IntVect const& nghost)
1923{
1924 for (std::size_t i = 0; i < N; ++i) {
1925 amrex::Copy(dst[i], src[i], scomp, dcomp, ncomp, nghost);
1926 }
1927}
1928
1930template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1931void LocalAdd (Array<MF,N>& dst, Array<MF,N> const& src, int scomp, int dcomp,
1932 int ncomp, IntVect const& nghost)
1933{
1934 for (std::size_t i = 0; i < N; ++i) {
1935 amrex::Add(dst[i], src[i], scomp, dcomp, ncomp, nghost);
1936 }
1937}
1938
1940template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1941void Saxpy (Array<MF,N>& dst, typename MF::value_type a,
1942 Array<MF,N> const& src, int scomp, int dcomp, int ncomp,
1943 IntVect const& nghost)
1944{
1945 for (std::size_t i = 0; i < N; ++i) {
1946 MF::Saxpy(dst[i], a, src[i], scomp, dcomp, ncomp, nghost);
1947 }
1948}
1949
1951template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1952void Xpay (Array<MF,N>& dst, typename MF::value_type a,
1953 Array<MF,N> const& src, int scomp, int dcomp, int ncomp,
1954 IntVect const& nghost)
1955{
1956 for (std::size_t i = 0; i < N; ++i) {
1957 MF::Xpay(dst[i], a, src[i], scomp, dcomp, ncomp, nghost);
1958 }
1959}
1960
1962template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1964 typename MF::value_type a, Array<MF,N> const& src_a, int acomp,
1965 typename MF::value_type b, Array<MF,N> const& src_b, int bcomp,
1966 int dcomp, int ncomp, IntVect const& nghost)
1967{
1968 for (std::size_t i = 0; i < N; ++i) {
1969 MF::LinComb(dst[i], a, src_a[i], acomp, b, src_b[i], bcomp, dcomp, ncomp, nghost);
1970 }
1971}
1972
1974template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
1975void ParallelCopy (Array<MF,N>& dst, Array<MF,N> const& src,
1976 int scomp, int dcomp, int ncomp,
1977 IntVect const& ng_src = IntVect(0),
1978 IntVect const& ng_dst = IntVect(0),
1979 Periodicity const& period = Periodicity::NonPeriodic())
1980{
1981 for (std::size_t i = 0; i < N; ++i) {
1982 dst[i].ParallelCopy(src[i], scomp, dcomp, ncomp, ng_src, ng_dst, period);
1983 }
1984}
1985
1986template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
1987[[nodiscard]] typename MF::value_type
1988norminf (Array<MF,N> const& mf, int scomp, int ncomp, IntVect const& nghost,
1989 bool local = false)
1990{
1991 auto r = typename MF::value_type(0);
1992 for (std::size_t i = 0; i < N; ++i) {
1993 auto tmp = mf[i].norminf(scomp, ncomp, nghost, true);
1994 r = std::max(r,tmp);
1995 }
1996 if (!local) {
1998 }
1999 return r;
2000}
2001
2002template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
2003 int> = 0>
2004[[nodiscard]] int nComp (Array<MF,N> const& mf)
2005{
2006 return mf[0].nComp();
2007}
2008
2009template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
2010 int> = 0>
2011[[nodiscard]] IntVect nGrowVect (Array<MF,N> const& mf)
2012{
2013 return mf[0].nGrowVect();
2014}
2015
2016template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
2017 int> = 0>
2018[[nodiscard]] BoxArray const&
2020{
2021 return mf[0].boxArray();
2022}
2023
2024template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
2025 int> = 0>
2026[[nodiscard]] DistributionMapping const&
2028{
2029 return mf[0].DistributionMap();
2030}
2031
2032}
2033
2034#endif
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define BL_ASSERT(EX)
Definition AMReX_BLassert.H:39
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:37
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D(...)
Definition AMReX_GpuLaunchMacrosC.nolint.H:111
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Real * pdst
Definition AMReX_HypreMLABecLap.cpp:1090
Array4< int const > mask
Definition AMReX_InterpFaceRegister.cpp:93
#define AMREX_LOOP_3D(bx, i, j, k, block)
Definition AMReX_Loop.nolint.H:4
#define AMREX_LOOP_4D(bx, ncomp, i, j, k, n, block)
Definition AMReX_Loop.nolint.H:16
#define AMREX_D_TERM(a, b, c)
Definition AMReX_SPACE.H:129
Print on all processors of the default communicator.
Definition AMReX_Print.H:117
virtual bool isManaged() const
Definition AMReX_Arena.cpp:79
virtual bool isDevice() const
Definition AMReX_Arena.cpp:91
AMREX_GPU_HOST_DEVICE bool ok() const noexcept
Checks if it is a proper BoxND (including a valid type).
Definition AMReX_Box.H:200
AMREX_GPU_HOST_DEVICE bool contains(const IntVectND< dim > &p) const noexcept
Returns true if argument is contained within BoxND.
Definition AMReX_Box.H:204
IntVect nGrowVect() const noexcept
Definition AMReX_FabArrayBase.H:79
bool isFusingCandidate() const noexcept
Is this a good candidate for kernel fusing?
IndexType ixType() const noexcept
Return index type.
Definition AMReX_FabArrayBase.H:85
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition AMReX_FabArrayBase.H:130
@ ADD
Definition AMReX_FabArrayBase.H:393
int nComp() const noexcept
Return number of variables (aka components) associated with each point.
Definition AMReX_FabArrayBase.H:82
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition AMReX_FabArrayBase.H:94
An Array of FortranArrayBox(FAB)-like Objects.
Definition AMReX_FabArray.H:344
Array4< typename FabArray< FAB >::value_type const > const_array(const MFIter &mfi) const noexcept
Definition AMReX_FabArray.H:584
std::unique_ptr< FabArray< FAB > > os_temp
Definition AMReX_FabArray.H:1474
const FabFactory< FAB > & Factory() const noexcept
Definition AMReX_FabArray.H:442
void prefetchToDevice(const MFIter &mfi) const noexcept
Definition AMReX_FabArray.H:550
Array4< typename FabArray< FAB >::value_type const > array(const MFIter &mfi) const noexcept
Definition AMReX_FabArray.H:560
Arena * arena() const noexcept
Definition AMReX_FabArray.H:445
void setBndry(value_type val)
Set all values in the boundary region to val.
Definition AMReX_FabArray.H:2213
void setVal(value_type val)
Set all components in the entire region of each FAB to val.
Definition AMReX_FabArray.H:2412
MultiArray4< typename FabArray< FAB >::value_type > arrays() noexcept
Definition AMReX_FabArray.H:632
void mult(value_type val, int comp, int num_comp, int nghost=0)
Definition AMReX_FabArray.H:2665
MultiArray4< typename FabArray< FAB >::value_type const > const_arrays() const noexcept
Definition AMReX_FabArray.H:646
void prefetchToHost(const MFIter &mfi) const noexcept
Definition AMReX_FabArray.H:540
Definition AMReX_Tuple.H:93
Definition AMReX_GpuBuffer.H:17
T const * data() const noexcept
Definition AMReX_GpuBuffer.H:65
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool cellCentered() const noexcept
True if the IndexTypeND is CELL based in all directions.
Definition AMReX_IndexType.H:101
AMREX_GPU_HOST_DEVICE static AMREX_FORCE_INLINE constexpr IntVectND< dim > TheMinVector() noexcept
Definition AMReX_IntVect.H:718
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool allGE(const IntVectND< dim > &rhs) const noexcept
Returns true if this is greater than or equal to argument for all components. NOTE: This is NOT a str...
Definition AMReX_IntVect.H:441
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool allGT(const IntVectND< dim > &rhs) const noexcept
Returns true if this is greater than argument for all components. NOTE: This is NOT a strict weak ord...
Definition AMReX_IntVect.H:416
AMREX_GPU_HOST_DEVICE static AMREX_FORCE_INLINE constexpr IntVectND< dim > TheZeroVector() noexcept
This static member function returns a reference to a constant IntVectND object, all of whose dim argu...
Definition AMReX_IntVect.H:670
Definition AMReX_MFIter.H:57
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:141
Definition AMReX_PODVector.H:262
T * data() noexcept
Definition AMReX_PODVector.H:609
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition AMReX_Periodicity.H:17
static const Periodicity & NonPeriodic() noexcept
Definition AMReX_Periodicity.cpp:52
Print & SetPrecision(int p)
Definition AMReX_Print.H:89
@ FAB
Definition AMReX_AmrvisConstants.H:86
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T Exch(T *address, T val) noexcept
Definition AMReX_GpuAtomic.H:485
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:237
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:265
bool inLaunchRegion() noexcept
Definition AMReX_GpuControl.H:86
bool inNoSyncRegion() noexcept
Definition AMReX_GpuControl.H:146
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:251
void Sum(T &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:204
void Max(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:126
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
std::enable_if_t<!amrex::DefinitelyNotHostRunnable< F >::value, typename FAB::value_type > ReduceMax_host_wrapper(FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Definition AMReX_FabArrayUtility.H:563
FAB::value_type ReduceMax_host(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition AMReX_FabArrayUtility.H:540
std::enable_if_t<!amrex::DefinitelyNotHostRunnable< F >::value, bool > ReduceLogicalAnd_host_wrapper(FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Definition AMReX_FabArrayUtility.H:789
std::enable_if_t<!amrex::DefinitelyNotHostRunnable< F >::value, bool > ReduceLogicalOr_host_wrapper(FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Definition AMReX_FabArrayUtility.H:936
std::enable_if_t<!amrex::DefinitelyNotHostRunnable< F >::value, typename FAB::value_type > ReduceSum_host_wrapper(FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Definition AMReX_FabArrayUtility.H:110
bool ReduceLogicalAnd_host(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition AMReX_FabArrayUtility.H:767
FAB::value_type ReduceMin_host(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition AMReX_FabArrayUtility.H:314
std::enable_if_t<!amrex::DefinitelyNotHostRunnable< F >::value, typename FAB::value_type > ReduceMin_host_wrapper(FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Definition AMReX_FabArrayUtility.H:336
bool ReduceLogicalOr_host(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition AMReX_FabArrayUtility.H:914
FAB::value_type ReduceSum_host(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition AMReX_FabArrayUtility.H:24
std::enable_if_t< IsBaseFab< FAB >::value, std::conditional_t< std::is_same< OP, ReduceOpLogicalAnd >::value||std::is_same< OP, ReduceOpLogicalOr >::value, int, typename FAB::value_type > > ReduceMF(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition AMReX_FabArrayUtility.H:50
Definition AMReX_Amr.cpp:49
MF::value_type norminf(MF const &mf, int scomp, int ncomp, IntVect const &nghost, bool local=false)
Definition AMReX_FabArrayUtility.H:1883
int nComp(FabArrayBase const &fa)
FAB::value_type ReduceMax(FabArray< FAB > const &fa, int nghost, F &&f)
Definition AMReX_FabArrayUtility.H:531
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:191
DistributionMapping const & DistributionMap(FabArrayBase const &fa)
void Saxpy(MF &dst, typename MF::value_type a, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst += a * src
Definition AMReX_FabArrayUtility.H:1847
IntVect nGrowVect(FabArrayBase const &fa)
ReduceData< Ts... >::Type ParReduce(TypeList< Ops... > operation_list, TypeList< Ts... > type_list, FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Parallel reduce for MultiFab/FabArray.
Definition AMReX_ParReduce.H:47
void LocalCopy(DMF &dst, SMF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst = src
Definition AMReX_FabArrayUtility.H:1831
void printCell(FabArray< FAB > const &mf, const IntVect &cell, int comp=-1, const IntVect &ng=IntVect::TheZeroVector())
Definition AMReX_FabArrayUtility.H:1051
bool ReduceLogicalAnd(FabArray< FAB > const &fa, int nghost, F &&f)
Definition AMReX_FabArrayUtility.H:758
void Abs(FabArray< FAB > &fa, int icomp, int numcomp, int nghost)
Definition AMReX_FabArrayUtility.H:1247
void LocalAdd(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst += src
Definition AMReX_FabArrayUtility.H:1839
void Copy(FabArray< DFAB > &dst, FabArray< SFAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArray.H:179
void prefetchToDevice(FabArray< FAB > const &fa, const bool synchronous=true)
Definition AMReX_FabArrayUtility.H:1306
void Subtract(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArrayUtility.H:1104
void OverrideSync(FabArray< FAB > &fa, FabArray< IFAB > const &msk, const Periodicity &period)
Definition AMReX_FabArrayUtility.H:1323
bool isMFIterSafe(const FabArrayBase &x, const FabArrayBase &y)
Definition AMReX_MFIter.H:219
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > makeSingleCellBox(int i, int j, int k, IndexTypeND< dim > typ=IndexTypeND< dim >::TheCellType())
Definition AMReX_Box.H:2009
bool ReduceLogicalOr(FabArray< FAB > const &fa, int nghost, F &&f)
Definition AMReX_FabArrayUtility.H:905
IntVectND< AMREX_SPACEDIM > IntVect
Definition AMReX_BaseFwd.H:30
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition AMReX_Box.H:1211
void LinComb(MF &dst, typename MF::value_type a, MF const &src_a, int acomp, typename MF::value_type b, MF const &src_b, int bcomp, int dcomp, int ncomp, IntVect const &nghost)
dst = a*src_a + b*src_b
Definition AMReX_FabArrayUtility.H:1863
FAB::value_type ReduceMin(FabArray< FAB > const &fa, int nghost, F &&f)
Definition AMReX_FabArrayUtility.H:305
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:127
void dtoh_memcpy(FabArray< FAB > &dst, FabArray< FAB > const &src, int scomp, int dcomp, int ncomp)
Definition AMReX_FabArrayUtility.H:1396
void setBndry(MF &dst, typename MF::value_type val, int scomp, int ncomp)
dst = val in ghost cells.
Definition AMReX_FabArrayUtility.H:1815
void single_task(L &&f) noexcept
Definition AMReX_GpuLaunchFunctsC.H:1307
void Divide(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArrayUtility.H:1200
void Xpay(MF &dst, typename MF::value_type a, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst = src + a * dst
Definition AMReX_FabArrayUtility.H:1855
void Scale(MF &dst, typename MF::value_type val, int scomp, int ncomp, int nghost)
dst *= val
Definition AMReX_FabArrayUtility.H:1822
void prefetchToHost(FabArray< FAB > const &fa, const bool synchronous=true)
Definition AMReX_FabArrayUtility.H:1291
bool TilingIfNotGPU() noexcept
Definition AMReX_MFIter.H:12
void Add(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArray.H:240
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:230
const int[]
Definition AMReX_BLProfiler.cpp:1664
void OverrideSync_finish(FabArray< FAB > &fa)
Definition AMReX_FabArrayUtility.H:1382
FAB::value_type Dot(FabArray< FAB > const &x, int xcomp, FabArray< FAB > const &y, int ycomp, int ncomp, IntVect const &nghost, bool local=false)
Compute dot products of two FabArrays.
Definition AMReX_FabArrayUtility.H:1554
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition AMReX_FabArrayUtility.H:1873
void htod_memcpy(FabArray< FAB > &dst, FabArray< FAB > const &src, int scomp, int dcomp, int ncomp)
Definition AMReX_FabArrayUtility.H:1421
void setVal(MF &dst, typename MF::value_type val)
dst = val
Definition AMReX_FabArrayUtility.H:1808
void Multiply(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArrayUtility.H:1152
FAB::value_type ReduceSum(FabArray< FAB > const &fa, int nghost, F &&f)
Definition AMReX_FabArrayUtility.H:16
IntVect indexFromValue(FabArray< FAB > const &mf, int comp, IntVect const &nghost, typename FAB::value_type value)
Definition AMReX_FabArrayUtility.H:1446
BoxArray const & boxArray(FabArrayBase const &fa)
void OverrideSync_nowait(FabArray< FAB > &fa, FabArray< IFAB > const &msk, const Periodicity &period)
Definition AMReX_FabArrayUtility.H:1335
std::array< T, N > Array
Definition AMReX_Array.H:24
Definition AMReX_TypeTraits.H:18
FabArray memory allocation information.
Definition AMReX_FabArray.H:66
Definition AMReX_MFIter.H:20
Struct for holding types.
Definition AMReX_TypeList.H:12