Block-Structured AMR Software Framework
 
Loading...
Searching...
No Matches
AMReX_FabArrayUtility.H
Go to the documentation of this file.
1#ifndef AMREX_FABARRAY_UTILITY_H_
2#define AMREX_FABARRAY_UTILITY_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_FabArray.H>
6#include <AMReX_LayoutData.H>
7#include <AMReX_Print.H>
8#include <AMReX_ParReduce.H>
9#include <limits>
10
11namespace amrex {
12
13template <class FAB, class F,
14 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
15typename FAB::value_type
16ReduceSum (FabArray<FAB> const& fa, int nghost, F&& f) {
17 return ReduceSum(fa, IntVect(nghost), std::forward<F>(f));
18}
19
20namespace fudetail {
21template <class FAB, class F,
22 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
23typename FAB::value_type
24ReduceSum_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
25{
26 using value_type = typename FAB::value_type;
27 value_type sm = 0;
28
29#ifdef AMREX_USE_OMP
30#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
31#endif
32 for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
33 {
34 const Box& bx = mfi.growntilebox(nghost);
35 auto const& arr = fa.const_array(mfi);
36 sm += f(bx, arr);
37 }
38
39 return sm;
40}
41}
42
43#ifdef AMREX_USE_GPU
44namespace fudetail {
45template <class OP, class FAB, class F>
46std::enable_if_t<IsBaseFab<FAB>::value,
47 std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
48 std::is_same<OP,ReduceOpLogicalOr>::value,
49 int, typename FAB::value_type> >
50ReduceMF (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
51{
52 using T = std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
53 std::is_same<OP,ReduceOpLogicalOr>::value,
54 int, typename FAB::value_type>;
55 auto typ = fa.ixType();
56 auto const& ma = fa.const_arrays();
57 return ParReduce(TypeList<OP>{}, TypeList<T>{}, fa, nghost,
58 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
59 {
60 return { static_cast<T>(f(amrex::makeSingleCellBox(i,j,k,typ), ma[box_no])) };
61 });
62}
63
64template <class OP, class FAB1, class FAB2, class F>
65std::enable_if_t<IsBaseFab<FAB1>::value && IsBaseFab<FAB2>::value,
66 std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
67 std::is_same<OP,ReduceOpLogicalOr>::value,
68 int, typename FAB1::value_type> >
69ReduceMF (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, IntVect const& nghost, F const& f)
70{
71 using T = std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
72 std::is_same<OP,ReduceOpLogicalOr>::value,
73 int, typename FAB1::value_type>;
74 auto typ = fa1.ixType();
75 auto const& ma1 = fa1.const_arrays();
76 auto const& ma2 = fa2.const_arrays();
77 return ParReduce(TypeList<OP>{}, TypeList<T>{}, fa1, nghost,
78 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
79 {
80 return { static_cast<T>(f(amrex::makeSingleCellBox(i,j,k,typ),
81 ma1[box_no], ma2[box_no])) };
82 });
83}
84
85template <class OP, class FAB1, class FAB2, class FAB3, class F>
86std::enable_if_t<IsBaseFab<FAB1>::value && IsBaseFab<FAB2>::value && IsBaseFab<FAB3>::value,
87 std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
88 std::is_same<OP,ReduceOpLogicalOr>::value,
89 int, typename FAB1::value_type> >
90ReduceMF (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
91 FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
92{
93 using T = std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
94 std::is_same<OP,ReduceOpLogicalOr>::value,
95 int, typename FAB1::value_type>;
96 auto typ = fa1.ixType();
97 auto const& ma1 = fa1.const_arrays();
98 auto const& ma2 = fa2.const_arrays();
99 auto const& ma3 = fa3.const_arrays();
100 return ParReduce(TypeList<OP>{}, TypeList<T>{}, fa1, nghost,
101 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
102 {
103 return { static_cast<T>(f(amrex::makeSingleCellBox(i,j,k,typ),
104 ma1[box_no], ma2[box_no], ma3[box_no])) };
105 });
106}
107
108template <class FAB, class F>
109std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
110ReduceSum_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
111{
112 return ReduceSum_host(fa,nghost,std::forward<F>(f));
113}
114
115template <class FAB, class F>
116std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
117ReduceSum_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
118{
119 amrex::ignore_unused(fa,nghost,f);
120 amrex::Abort("ReduceSum: Launch Region is off. Device lambda cannot be called by host.");
121 return 0;
122}
123}
124
125template <class FAB, class F,
126 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
127typename FAB::value_type
128ReduceSum (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
129{
130 if (Gpu::inLaunchRegion()) {
131 return fudetail::ReduceMF<ReduceOpSum>(fa, nghost, std::forward<F>(f));
132 } else {
133 return fudetail::ReduceSum_host_wrapper(fa, nghost, std::forward<F>(f));
134 }
135}
136#else
137template <class FAB, class F,
138 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
139typename FAB::value_type
140ReduceSum (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
141{
142 return fudetail::ReduceSum_host(fa, nghost, std::forward<F>(f));
143}
144#endif
145
146template <class FAB1, class FAB2, class F,
147 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
148typename FAB1::value_type
150 int nghost, F&& f) {
151 return ReduceSum(fa1, fa2, IntVect(nghost), std::forward<F>(f));
152}
153
154namespace fudetail {
155template <class FAB1, class FAB2, class F,
156 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
157typename FAB1::value_type
159 IntVect const& nghost, F const& f)
160{
161 using value_type = typename FAB1::value_type;
162 value_type sm = 0;
163
164#ifdef AMREX_USE_OMP
165#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
166#endif
167 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
168 {
169 const Box& bx = mfi.growntilebox(nghost);
170 const auto& arr1 = fa1.const_array(mfi);
171 const auto& arr2 = fa2.const_array(mfi);
172 sm += f(bx, arr1, arr2);
173 }
174
175 return sm;
176}
177}
178
179#ifdef AMREX_USE_GPU
180namespace fudetail {
181template <class FAB1, class FAB2, class F>
182std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
184 IntVect const& nghost, F&& f)
185{
186 return ReduceSum_host(fa1,fa2,nghost,std::forward<F>(f));
187}
188
189template <class FAB1, class FAB2, class F>
190std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
192 IntVect const& nghost, F&& f)
193{
194 amrex::ignore_unused(fa1,fa2,nghost,f);
195 amrex::Abort("ReduceSum: Launch Region is off. Device lambda cannot be called by host.");
196 return 0;
197}
198}
199
200template <class FAB1, class FAB2, class F,
201 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
202typename FAB1::value_type
204 IntVect const& nghost, F&& f)
205{
206 if (Gpu::inLaunchRegion()) {
207 return fudetail::ReduceMF<ReduceOpSum>(fa1,fa2,nghost,std::forward<F>(f));
208 } else {
209 return fudetail::ReduceSum_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
210 }
211}
212#else
213template <class FAB1, class FAB2, class F,
214 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
215typename FAB1::value_type
216ReduceSum (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
217 IntVect const& nghost, F&& f)
218{
219 return fudetail::ReduceSum_host(fa1,fa2,nghost,std::forward<F>(f));
220}
221#endif
222
223template <class FAB1, class FAB2, class FAB3, class F,
224 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
225typename FAB1::value_type
226ReduceSum (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, FabArray<FAB3> const& fa3,
227 int nghost, F&& f)
228{
229 return ReduceSum(fa1, fa2, fa3, IntVect(nghost), std::forward<F>(f));
230}
231
232namespace fudetail {
233template <class FAB1, class FAB2, class FAB3, class F,
234 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
235typename FAB1::value_type
237 FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
238{
239 using value_type = typename FAB1::value_type;
240 value_type sm = 0;
241
242#ifdef AMREX_USE_OMP
243#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
244#endif
245 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
246 {
247 const Box& bx = mfi.growntilebox(nghost);
248 const auto& arr1 = fa1.const_array(mfi);
249 const auto& arr2 = fa2.const_array(mfi);
250 const auto& arr3 = fa3.const_array(mfi);
251 sm += f(bx, arr1, arr2, arr3);
252 }
253
254 return sm;
255}
256}
257
258#ifdef AMREX_USE_GPU
259namespace fudetail {
260template <class FAB1, class FAB2, class FAB3, class F>
261std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
263 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
264{
265 return fudetail::ReduceSum_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
266}
267
268template <class FAB1, class FAB2, class FAB3, class F>
269std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
271 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
272{
273 amrex::ignore_unused(fa1,fa2,fa3,nghost,f);
274 amrex::Abort("ReduceSum: Launch Region is off. Device lambda cannot be called by host.");
275 return 0;
276}
277}
278
279template <class FAB1, class FAB2, class FAB3, class F,
280 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
281typename FAB1::value_type
283 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
284{
285 if (Gpu::inLaunchRegion()) {
286 return fudetail::ReduceMF<ReduceOpSum>(fa1,fa2,fa3,nghost,std::forward<F>(f));
287 } else {
288 return fudetail::ReduceSum_host_wrapper(fa1,fa2,fa3,nghost,std::forward<F>(f));
289 }
290}
291#else
292template <class FAB1, class FAB2, class FAB3, class F,
293 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
294typename FAB1::value_type
295ReduceSum (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
296 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
297{
298 return fudetail::ReduceSum_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
299}
300#endif
301
302template <class FAB, class F,
303 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
304typename FAB::value_type
305ReduceMin (FabArray<FAB> const& fa, int nghost, F&& f)
306{
307 return ReduceMin(fa, IntVect(nghost), std::forward<F>(f));
308}
309
310namespace fudetail {
311template <class FAB, class F,
312 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
313typename FAB::value_type
314ReduceMin_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
315{
316 using value_type = typename FAB::value_type;
317 value_type r = std::numeric_limits<value_type>::max();
318
319#ifdef AMREX_USE_OMP
320#pragma omp parallel reduction(min:r)
321#endif
322 for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
323 {
324 const Box& bx = mfi.growntilebox(nghost);
325 const auto& arr = fa.const_array(mfi);
326 r = std::min(r, f(bx, arr));
327 }
328 return r;
329}
330}
331
332#ifdef AMREX_USE_GPU
333namespace fudetail {
334template <class FAB, class F>
335std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
336ReduceMin_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
337{
338 return ReduceMin_host(fa,nghost,std::forward<F>(f));
339}
340
341template <class FAB, class F>
342std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
343ReduceMin_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
344{
345 amrex::ignore_unused(fa,nghost,f);
346 amrex::Abort("ReduceMin: Launch Region is off. Device lambda cannot be called by host.");
347 return 0;
348}
349}
350
351template <class FAB, class F,
352 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
353typename FAB::value_type
354ReduceMin (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
355{
356 if (Gpu::inLaunchRegion()) {
357 return fudetail::ReduceMF<ReduceOpMin>(fa, nghost, std::forward<F>(f));
358 } else {
359 return fudetail::ReduceMin_host_wrapper(fa, nghost, std::forward<F>(f));
360 }
361}
362#else
363template <class FAB, class F,
364 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
365typename FAB::value_type
366ReduceMin (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
367{
368 return fudetail::ReduceMin_host(fa, nghost, std::forward<F>(f));
369}
370#endif
371
372template <class FAB1, class FAB2, class F,
373 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
374typename FAB1::value_type
375ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, int nghost, F&& f)
376{
377 return ReduceMin(fa1, fa2, IntVect(nghost), std::forward<F>(f));
378}
379
380namespace fudetail {
381template <class FAB1, class FAB2, class F,
382 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
383typename FAB1::value_type
385 IntVect const& nghost, F const& f)
386{
387 using value_type = typename FAB1::value_type;
388 value_type r = std::numeric_limits<value_type>::max();
389
390#ifdef AMREX_USE_OMP
391#pragma omp parallel reduction(min:r)
392#endif
393 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
394 {
395 const Box& bx = mfi.growntilebox(nghost);
396 const auto& arr1 = fa1.const_array(mfi);
397 const auto& arr2 = fa2.const_array(mfi);
398 r = std::min(r, f(bx, arr1, arr2));
399 }
400
401 return r;
402}
403}
404
405#ifdef AMREX_USE_GPU
406namespace fudetail {
407template <class FAB1, class FAB2, class F>
408std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
410 IntVect const& nghost, F&& f)
411{
412 return fudetail::ReduceMin_host(fa1,fa2,nghost,std::forward<F>(f));
413}
414
415template <class FAB1, class FAB2, class F>
416std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
418 IntVect const& nghost, F&& f)
419{
420 amrex::ignore_unused(fa1,fa2,nghost,f);
421 amrex::Abort("ReduceMin: Launch Region is off. Device lambda cannot be called by host.");
422 return 0;
423}
424}
425
426template <class FAB1, class FAB2, class F,
427 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
428typename FAB1::value_type
430 IntVect const& nghost, F&& f)
431{
432 if (Gpu::inLaunchRegion()) {
433 return fudetail::ReduceMF<ReduceOpMin>(fa1,fa2,nghost,std::forward<F>(f));
434 } else {
435 return fudetail::ReduceMin_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
436 }
437}
438#else
439template <class FAB1, class FAB2, class F,
440 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
441typename FAB1::value_type
442ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
443 IntVect const& nghost, F&& f)
444{
445 return fudetail::ReduceMin_host(fa1,fa2,nghost,std::forward<F>(f));
446}
447#endif
448
449template <class FAB1, class FAB2, class FAB3, class F,
450 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
451typename FAB1::value_type
452ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, FabArray<FAB3> const& fa3,
453 int nghost, F&& f)
454{
455 return ReduceMin(fa1, fa2, fa3, IntVect(nghost), std::forward<F>(f));
456}
457
458namespace fudetail {
459template <class FAB1, class FAB2, class FAB3, class F,
460 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
461typename FAB1::value_type
463 FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
464{
465 using value_type = typename FAB1::value_type;
466 value_type r = std::numeric_limits<value_type>::max();
467
468#ifdef AMREX_USE_OMP
469#pragma omp parallel reduction(min:r)
470#endif
471 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
472 {
473 const Box& bx = mfi.growntilebox(nghost);
474 const auto& arr1 = fa1.const_array(mfi);
475 const auto& arr2 = fa2.const_array(mfi);
476 const auto& arr3 = fa3.const_array(mfi);
477 r = std::min(r, f(bx, arr1, arr2, arr3));
478 }
479
480 return r;
481}
482}
483
484#ifdef AMREX_USE_GPU
485namespace fudetail {
486template <class FAB1, class FAB2, class FAB3, class F>
487std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
489 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
490{
491 return fudetail::ReduceMin_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
492}
493
494template <class FAB1, class FAB2, class FAB3, class F>
495std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
497 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
498{
499 amrex::ignore_unused(fa1,fa2,fa3,nghost,f);
500 amrex::Abort("ReduceMin: Launch Region is off. Device lambda lambda cannot be called by host.");
501 return 0;
502}
503}
504
505template <class FAB1, class FAB2, class FAB3, class F,
506 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
507typename FAB1::value_type
509 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
510{
511 if (Gpu::inLaunchRegion()) {
512 return fudetail::ReduceMF<ReduceOpMin>(fa1,fa2,fa3,nghost,std::forward<F>(f));
513 } else {
514 return fudetail::ReduceMin_host_wrapper(fa1,fa2,fa3,nghost,std::forward<F>(f));
515 }
516}
517#else
518template <class FAB1, class FAB2, class FAB3, class F,
519 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
520typename FAB1::value_type
521ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
522 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
523{
524 return fudetail::ReduceMin_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
525}
526#endif
527
528template <class FAB, class F,
529 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
530typename FAB::value_type
531ReduceMax (FabArray<FAB> const& fa, int nghost, F&& f)
532{
533 return ReduceMax(fa, IntVect(nghost), std::forward<F>(f));
534}
535
536namespace fudetail {
537template <class FAB, class F,
538 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
539typename FAB::value_type
540ReduceMax_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
541{
542 using value_type = typename FAB::value_type;
543 value_type r = std::numeric_limits<value_type>::lowest();
544
545#ifdef AMREX_USE_OMP
546#pragma omp parallel reduction(max:r)
547#endif
548 for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
549 {
550 const Box& bx = mfi.growntilebox(nghost);
551 const auto& arr = fa.const_array(mfi);
552 r = std::max(r, f(bx, arr));
553 }
554
555 return r;
556}
557}
558
559#ifdef AMREX_USE_GPU
560namespace fudetail {
561template <class FAB, class F>
562std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
563ReduceMax_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
564{
565 return ReduceMax_host(fa,nghost,std::forward<F>(f));
566}
567
568template <class FAB, class F>
569std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
570ReduceMax_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
571{
572 amrex::ignore_unused(fa,nghost,f);
573 amrex::Abort("ReduceMax: Launch Region is off. Device lambda cannot be called by host.");
574 return 0;
575}
576}
577
578template <class FAB, class F,
579 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
580typename FAB::value_type
581ReduceMax (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
582{
583 if (Gpu::inLaunchRegion()) {
584 return fudetail::ReduceMF<ReduceOpMax>(fa,nghost,std::forward<F>(f));
585 } else {
586 return fudetail::ReduceMax_host_wrapper(fa,nghost,std::forward<F>(f));
587 }
588}
589#else
590template <class FAB, class F,
591 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
592typename FAB::value_type
593ReduceMax (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
594{
595 return fudetail::ReduceMax_host(fa,nghost,std::forward<F>(f));
596}
597#endif
598
599template <class FAB1, class FAB2, class F,
600 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
601typename FAB1::value_type
602ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, int nghost, F&& f)
603{
604 return ReduceMax(fa1, fa2, IntVect(nghost), std::forward<F>(f));
605}
606
607namespace fudetail {
608template <class FAB1, class FAB2, class F,
609 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
610typename FAB1::value_type
612 IntVect const& nghost, F const& f)
613{
614 using value_type = typename FAB1::value_type;
615 value_type r = std::numeric_limits<value_type>::lowest();
616
617#ifdef AMREX_USE_OMP
618#pragma omp parallel reduction(max:r)
619#endif
620 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
621 {
622 const Box& bx = mfi.growntilebox(nghost);
623 const auto& arr1 = fa1.const_array(mfi);
624 const auto& arr2 = fa2.const_array(mfi);
625 r = std::max(r, f(bx, arr1, arr2));
626 }
627
628 return r;
629}
630}
631
632#ifdef AMREX_USE_GPU
633namespace fudetail {
634template <class FAB1, class FAB2, class F>
635std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
637 IntVect const& nghost, F&& f)
638{
639 return ReduceMax_host(fa1,fa2,nghost,std::forward<F>(f));
640}
641
642template <class FAB1, class FAB2, class F>
643std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
645 IntVect const& nghost, F&& f)
646{
647 amrex::ignore_unused(fa1,fa2,nghost,f);
648 amrex::Abort("ReduceMax: Launch Region is off. Device lambda cannot be called by host.");
649 return 0;
650}
651}
652
653template <class FAB1, class FAB2, class F,
654 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
655typename FAB1::value_type
657 IntVect const& nghost, F&& f)
658{
659 if (Gpu::inLaunchRegion()) {
660 return fudetail::ReduceMF<ReduceOpMax>(fa1,fa2,nghost,std::forward<F>(f));
661 } else {
662 return fudetail::ReduceMax_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
663 }
664}
665#else
666template <class FAB1, class FAB2, class F,
667 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
668typename FAB1::value_type
669ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
670 IntVect const& nghost, F&& f)
671{
672 return fudetail::ReduceMax_host(fa1,fa2,nghost,std::forward<F>(f));
673}
674#endif
675
676template <class FAB1, class FAB2, class FAB3, class F,
677 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
678typename FAB1::value_type
679ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, FabArray<FAB3> const& fa3,
680 int nghost, F&& f)
681{
682 return ReduceMax(fa1, fa2, fa3, IntVect(nghost), std::forward<F>(f));
683}
684
685namespace fudetail {
686template <class FAB1, class FAB2, class FAB3, class F,
687 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
688typename FAB1::value_type
690 FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
691{
692 using value_type = typename FAB1::value_type;
693 value_type r = std::numeric_limits<value_type>::lowest();
694
695#ifdef AMREX_USE_OMP
696#pragma omp parallel reduction(max:r)
697#endif
698 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
699 {
700 const Box& bx = mfi.growntilebox(nghost);
701 const auto& arr1 = fa1.const_array(mfi);
702 const auto& arr2 = fa2.const_array(mfi);
703 const auto& arr3 = fa3.const_array(mfi);
704 r = std::max(r, f(bx, arr1, arr2, arr3));
705 }
706
707 return r;
708}
709}
710
711#ifdef AMREX_USE_GPU
712namespace fudetail {
713template <class FAB1, class FAB2, class FAB3, class F>
714std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
716 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
717{
718 return fudetail::ReduceMax_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
719}
720
721template <class FAB1, class FAB2, class FAB3, class F>
722std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
724 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
725{
726 amrex::ignore_unused(fa1,fa2,fa3,nghost,f);
727 amrex::Abort("ReduceMax: Launch Region is off. Device lambda lambda cannot be called by host.");
728 return 0;
729}
730}
731
732template <class FAB1, class FAB2, class FAB3, class F,
733 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
734typename FAB1::value_type
736 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
737{
738 if (Gpu::inLaunchRegion()) {
739 return fudetail::ReduceMF<ReduceOpMax>(fa1,fa2,fa3,nghost,std::forward<F>(f));
740 } else {
741 return fudetail::ReduceMax_host_wrapper(fa1,fa2,fa3,nghost,std::forward<F>(f));
742 }
743}
744#else
745template <class FAB1, class FAB2, class FAB3, class F,
746 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
747typename FAB1::value_type
748ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
749 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
750{
751 return fudetail::ReduceMax_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
752}
753#endif
754
755template <class FAB, class F,
756 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
757bool
758ReduceLogicalAnd (FabArray<FAB> const& fa, int nghost, F&& f)
759{
760 return ReduceLogicalAnd(fa, IntVect(nghost), std::forward<F>(f));
761}
762
763namespace fudetail {
764template <class FAB, class F,
765 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
766bool
767ReduceLogicalAnd_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
768{
769 int r = true;
770
771#ifdef AMREX_USE_OMP
772#pragma omp parallel reduction(&&:r)
773#endif
774 for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
775 {
776 const Box& bx = mfi.growntilebox(nghost);
777 const auto& arr = fa.const_array(mfi);
778 r = r && f(bx, arr);
779 }
780
781 return r;
782}
783}
784
785#ifdef AMREX_USE_GPU
786namespace fudetail {
787template <class FAB, class F>
788std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, bool>
790{
791 return ReduceLogicalAnd_host(fa,nghost,std::forward<F>(f));
792}
793
794template <class FAB, class F>
795std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, bool>
797{
798 amrex::ignore_unused(fa,nghost,f);
799 amrex::Abort("ReduceLogicalAnd: Launch Region is off. Device lambda cannot be called by host.");
800 return false;
801}
802}
803
804template <class FAB, class F,
805 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
806bool
807ReduceLogicalAnd (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
808{
809 if (Gpu::inLaunchRegion()) {
810 return fudetail::ReduceMF<ReduceOpLogicalAnd>(fa,nghost,std::forward<F>(f));
811 } else {
812 return fudetail::ReduceLogicalAnd_host_wrapper(fa,nghost,std::forward<F>(f));
813 }
814}
815#else
816template <class FAB, class F,
817 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
818bool
819ReduceLogicalAnd (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
820{
821 return fudetail::ReduceLogicalAnd_host(fa,nghost,std::forward<F>(f));
822}
823#endif
824
825template <class FAB1, class FAB2, class F,
826 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
827bool
829 int nghost, F&& f)
830{
831 return ReduceLogicalAnd(fa1, fa2, IntVect(nghost), std::forward<F>(f));
832}
833
834namespace fudetail {
835template <class FAB1, class FAB2, class F,
836 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
837bool
839 IntVect const& nghost, F const& f)
840{
841 int r = true;
842
843#ifdef AMREX_USE_OMP
844#pragma omp parallel reduction(&&:r)
845#endif
846 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
847 {
848 const Box& bx = mfi.growntilebox(nghost);
849 const auto& arr1 = fa1.const_array(mfi);
850 const auto& arr2 = fa2.const_array(mfi);
851 r = r && f(bx, arr1, arr2);
852 }
853
854 return r;
855}
856}
857
858#ifdef AMREX_USE_GPU
859namespace fudetail {
860template <class FAB1, class FAB2, class F>
861std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, bool>
863 IntVect const& nghost, F&& f)
864{
865 return ReduceLogicalAnd_host(fa1,fa2,nghost,std::forward<F>(f));
866}
867
868template <class FAB1, class FAB2, class F>
869std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, bool>
871 IntVect const& nghost, F&& f)
872{
873 amrex::ignore_unused(fa1,fa2,nghost,f);
874 amrex::Abort("ReduceLogicalAnd: Luanch Region is off. Device lambda cannot be called by host.");
875 return false;
876}
877}
878
879template <class FAB1, class FAB2, class F,
880 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
881bool
883 IntVect const& nghost, F&& f)
884{
885 if (Gpu::inLaunchRegion()) {
886 return fudetail::ReduceMF<ReduceOpLogicalAnd>(fa1,fa2,nghost,std::forward<F>(f));
887 } else {
888 return fudetail::ReduceLogicalAnd_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
889 }
890}
891#else
892template <class FAB1, class FAB2, class F,
893 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
894bool
895ReduceLogicalAnd (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
896 IntVect const& nghost, F&& f)
897{
898 return fudetail::ReduceLogicalAnd_host(fa1,fa2,nghost,std::forward<F>(f));
899}
900#endif
901
902template <class FAB, class F,
903 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
904bool
905ReduceLogicalOr (FabArray<FAB> const& fa, int nghost, F&& f)
906{
907 return ReduceLogicalOr(fa, IntVect(nghost), std::forward<F>(f));
908}
909
910namespace fudetail {
911template <class FAB, class F,
912 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
913bool
914ReduceLogicalOr_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
915{
916 int r = false;
917
918#ifdef AMREX_USE_OMP
919#pragma omp parallel reduction(||:r)
920#endif
921 for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
922 {
923 const Box& bx = mfi.growntilebox(nghost);
924 const auto& arr = fa.const_array(mfi);
925 r = r || f(bx, arr);
926 }
927
928 return r;
929}
930}
931
932#ifdef AMREX_USE_GPU
933namespace fudetail {
934template <class FAB, class F>
935std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, bool>
937{
938 return ReduceLogicalOr_host(fa,nghost,std::forward<F>(f));
939}
940
941template <class FAB, class F>
942std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, bool>
943ReduceLogicalOr_host (FabArray<FAB> const& fa, IntVect const& nghost, F&& /*f*/)
944{
945 amrex::ignore_unused(fa,nghost);
946 amrex::Abort("ReduceLogicalOr: Launch Region is off. Device lambda cannot be called by host.");
947 return 0;
948}
949}
950
951template <class FAB, class F,
952 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
953bool
954ReduceLogicalOr (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
955{
956 if (Gpu::inLaunchRegion()) {
957 return fudetail::ReduceMF<ReduceOpLogicalOr>(fa,nghost,std::forward<F>(f));
958 } else {
959 return fudetail::ReduceLogicalOr_host_wrapper(fa,nghost,std::forward<F>(f));
960 }
961}
962#else
963template <class FAB, class F,
964 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
965bool
966ReduceLogicalOr (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
967{
968 return fudetail::ReduceLogicalOr_host(fa,nghost,std::forward<F>(f));
969}
970#endif
971
972template <class FAB1, class FAB2, class F,
973 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
974bool
976 int nghost, F&& f)
977{
978 return ReduceLogicalOr(fa1, fa2, IntVect(nghost), std::forward<F>(f));
979}
980
981namespace fudetail {
982template <class FAB1, class FAB2, class F,
983 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
984bool
986 IntVect const& nghost, F const& f)
987{
988 int r = false;
989
990#ifdef AMREX_USE_OMP
991#pragma omp parallel reduction(||:r)
992#endif
993 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
994 {
995 const Box& bx = mfi.growntilebox(nghost);
996 const auto& arr1 = fa1.const_array(mfi);
997 const auto& arr2 = fa2.const_array(mfi);
998 r = r || f(bx, arr1, arr2);
999 }
1000
1001 return r;
1002}
1003}
1004
1005#ifdef AMREX_USE_GPU
1006namespace fudetail {
1007template <class FAB1, class FAB2, class F>
1008std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, bool>
1010 IntVect const& nghost, F&& f)
1011{
1012 return fudetail::ReduceLogicalOr_host(fa1,fa2,nghost,std::forward<F>(f));
1013}
1014
1015template <class FAB1, class FAB2, class F>
1016std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, bool>
1018 IntVect const& nghost, F&& f)
1019{
1020 amrex::ignore_unused(fa1,fa2,nghost,f);
1021 amrex::Abort("ReeuceLogicalOr: Launch Region is off. Device lambda cannot be called by host.");
1022 return false;
1023}
1024}
1025
1026template <class FAB1, class FAB2, class F,
1027 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
1028bool
1030 IntVect const& nghost, F&& f)
1031{
1032 if (Gpu::inLaunchRegion()) {
1033 return fudetail::ReduceMF<ReduceOpLogicalOr>(fa1,fa2,nghost,std::forward<F>(f));
1034 } else {
1035 return fudetail::ReduceLogicalOr_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
1036 }
1037}
1038#else
1039template <class FAB1, class FAB2, class F,
1040 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
1041bool
1042ReduceLogicalOr (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
1043 IntVect const& nghost, F&& f)
1044{
1045 return fudetail::ReduceLogicalOr_host(fa1,fa2,nghost,std::forward<F>(f));
1046}
1047#endif
1048
1049template <class FAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1050void
1051printCell (FabArray<FAB> const& mf, const IntVect& cell, int comp = -1,
1052 const IntVect& ng = IntVect::TheZeroVector())
1053{
1054 for (MFIter mfi(mf); mfi.isValid(); ++mfi)
1055 {
1056 const Box& bx = amrex::grow(mfi.validbox(), ng);
1057 if (bx.contains(cell)) {
1058 int n = (comp >= 0) ? 1 : mf.nComp();
1059 auto const& fab = mf.const_array(mfi);
1061 auto* dp = pv.data();
1062 auto f = [=] AMREX_GPU_HOST_DEVICE ()
1063 {
1064 if (comp >= 0) {
1065 *dp = fab(cell, comp);
1066 } else {
1067 for (int i = 0; i < n; ++i) {
1068 dp[i] = fab(cell,i);
1069 }
1070 }
1071 };
1072
1073#ifdef AMREX_USE_GPU
1074 if (mf.arena()->isManaged() || mf.arena()->isDevice()) {
1077 } else
1078#endif
1079 {
1080 f();
1081 }
1082
1083 if (comp >= 0) {
1084 amrex::AllPrint().SetPrecision(17) << " At cell " << cell << " in Box " << bx
1085 << ": " << *dp << '\n';
1086 } else {
1087 std::ostringstream ss;
1088 ss.precision(17);
1089 for (int i = 0; i < n-1; ++i)
1090 {
1091 ss << dp[i] << ", ";
1092 }
1093 ss << dp[n-1];
1094 amrex::AllPrint() << " At cell " << cell << " in Box " << bx
1095 << ": " << ss.str() << '\n';
1096 }
1097 }
1098 }
1099}
1100
1101template <class FAB,
1102 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1103void
1104Swap (FabArray<FAB>& dst, FabArray<FAB>& src, int srccomp, int dstcomp, int numcomp, int nghost)
1105{
1106 Swap(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
1107}
1108
1109template <class FAB,
1110 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1111void
1112Swap (FabArray<FAB>& dst, FabArray<FAB>& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
1113{
1114 // We can take a shortcut and do a std::swap if we're swapping all of the data
1115 // and they are allocated in the same Arena.
1116
1117 bool explicit_swap = true;
1118
1119 if (srccomp == dstcomp && dstcomp == 0 && src.nComp() == dst.nComp() &&
1120 src.nGrowVect() == nghost && src.nGrowVect() == dst.nGrowVect() &&
1121 src.arena() == dst.arena() && src.hasEBFabFactory() == dst.hasEBFabFactory()) {
1122 explicit_swap = false;
1123 }
1124
1125 if (!explicit_swap) {
1126
1127 std::swap(dst, src);
1128
1129 } else {
1130#ifdef AMREX_USE_GPU
1131 if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
1132 auto const& dstma = dst.arrays();
1133 auto const& srcma = src.arrays();
1134 ParallelFor(dst, nghost, numcomp,
1135 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1136 {
1137 const auto tmp = dstma[box_no](i,j,k,n+dstcomp);
1138 dstma[box_no](i,j,k,n+dstcomp) = srcma[box_no](i,j,k,n+srccomp);
1139 srcma[box_no](i,j,k,n+srccomp) = tmp;
1140 });
1141 if (!Gpu::inNoSyncRegion()) {
1143 }
1144 } else
1145#endif
1146 {
1147#ifdef AMREX_USE_OMP
1148#pragma omp parallel if (Gpu::notInLaunchRegion())
1149#endif
1150 for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1151 {
1152 const Box& bx = mfi.growntilebox(nghost);
1153 if (bx.ok()) {
1154 auto sfab = src.array(mfi);
1155 auto dfab = dst.array(mfi);
1156 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1157 {
1158 const auto tmp = dfab(i,j,k,n+dstcomp);
1159 dfab(i,j,k,n+dstcomp) = sfab(i,j,k,n+srccomp);
1160 sfab(i,j,k,n+srccomp) = tmp;
1161 });
1162 }
1163 }
1164 }
1165 }
1166}
1167
1168template <class FAB,
1169 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1170void
1171Subtract (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, int nghost)
1172{
1173 Subtract(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
1174}
1175
1176template <class FAB,
1177 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1178void
1179Subtract (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
1180{
1181#ifdef AMREX_USE_GPU
1182 if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
1183 auto const& dstfa = dst.arrays();
1184 auto const& srcfa = src.const_arrays();
1185 ParallelFor(dst, nghost, numcomp,
1186 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1187 {
1188 dstfa[box_no](i,j,k,n+dstcomp) -= srcfa[box_no](i,j,k,n+srccomp);
1189 });
1190 if (!Gpu::inNoSyncRegion()) {
1192 }
1193 } else
1194#endif
1195 {
1196#ifdef AMREX_USE_OMP
1197#pragma omp parallel if (Gpu::notInLaunchRegion())
1198#endif
1199 for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1200 {
1201 const Box& bx = mfi.growntilebox(nghost);
1202 if (bx.ok())
1203 {
1204 auto const srcFab = src.array(mfi);
1205 auto dstFab = dst.array(mfi);
1206 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1207 {
1208 dstFab(i,j,k,n+dstcomp) -= srcFab(i,j,k,n+srccomp);
1209 });
1210 }
1211 }
1212 }
1213}
1214
1215
1216template <class FAB,
1217 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1218void
1219Multiply (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, int nghost)
1220{
1221 Multiply(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
1222}
1223
1224template <class FAB,
1225 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1226void
1227Multiply (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
1228{
1229#ifdef AMREX_USE_GPU
1230 if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
1231 auto const& dstfa = dst.arrays();
1232 auto const& srcfa = src.const_arrays();
1233 ParallelFor(dst, nghost, numcomp,
1234 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1235 {
1236 dstfa[box_no](i,j,k,n+dstcomp) *= srcfa[box_no](i,j,k,n+srccomp);
1237 });
1238 if (!Gpu::inNoSyncRegion()) {
1240 }
1241 } else
1242#endif
1243 {
1244#ifdef AMREX_USE_OMP
1245#pragma omp parallel if (Gpu::notInLaunchRegion())
1246#endif
1247 for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1248 {
1249 const Box& bx = mfi.growntilebox(nghost);
1250 if (bx.ok())
1251 {
1252 auto const srcFab = src.array(mfi);
1253 auto dstFab = dst.array(mfi);
1254 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1255 {
1256 dstFab(i,j,k,n+dstcomp) *= srcFab(i,j,k,n+srccomp);
1257 });
1258 }
1259 }
1260 }
1261}
1262
1263
1264template <class FAB,
1265 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1266void
1267Divide (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, int nghost)
1268{
1269 Divide(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
1270}
1271
1272template <class FAB,
1273 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1274void
1275Divide (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
1276{
1277#ifdef AMREX_USE_GPU
1278 if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
1279 auto const& dstfa = dst.arrays();
1280 auto const& srcfa = src.const_arrays();
1281 ParallelFor(dst, nghost, numcomp,
1282 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1283 {
1284 dstfa[box_no](i,j,k,n+dstcomp) /= srcfa[box_no](i,j,k,n+srccomp);
1285 });
1286 if (!Gpu::inNoSyncRegion()) {
1288 }
1289 } else
1290#endif
1291 {
1292#ifdef AMREX_USE_OMP
1293#pragma omp parallel if (Gpu::notInLaunchRegion())
1294#endif
1295 for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1296 {
1297 const Box& bx = mfi.growntilebox(nghost);
1298 if (bx.ok())
1299 {
1300 auto const srcFab = src.array(mfi);
1301 auto dstFab = dst.array(mfi);
1302 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1303 {
1304 dstFab(i,j,k,n+dstcomp) /= srcFab(i,j,k,n+srccomp);
1305 });
1306 }
1307 }
1308 }
1309}
1310
1311template <class FAB,
1312 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1313void
1314Abs (FabArray<FAB>& fa, int icomp, int numcomp, int nghost)
1315{
1316 Abs(fa,icomp,numcomp,IntVect(nghost));
1317}
1318
1319template <class FAB,
1320 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1321void
1322Abs (FabArray<FAB>& fa, int icomp, int numcomp, const IntVect& nghost)
1323{
1324#ifdef AMREX_USE_GPU
1325 if (Gpu::inLaunchRegion() && fa.isFusingCandidate()) {
1326 auto const& fabarr = fa.arrays();
1327 ParallelFor(fa, nghost, numcomp,
1328 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1329 {
1330 fabarr[box_no](i,j,k,n+icomp) = std::abs(fabarr[box_no](i,j,k,n+icomp));
1331 });
1332 if (!Gpu::inNoSyncRegion()) {
1334 }
1335 } else
1336#endif
1337 {
1338#ifdef AMREX_USE_OMP
1339#pragma omp parallel if (Gpu::notInLaunchRegion())
1340#endif
1341 for (MFIter mfi(fa,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1342 {
1343 const Box& bx = mfi.growntilebox(nghost);
1344 if (bx.ok())
1345 {
1346 auto const& fab = fa.array(mfi);
1347 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1348 {
1349 fab(i,j,k,n+icomp) = std::abs(fab(i,j,k,n+icomp));
1350 });
1351 }
1352 }
1353 }
1354}
1355
1356template <class FAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1357void
1358prefetchToHost (FabArray<FAB> const& fa, const bool synchronous = true)
1359{
1360#ifdef AMREX_USE_GPU
1361 if (fa.arena()->isManaged()) {
1362 for (MFIter mfi(fa, MFItInfo().SetDeviceSync(synchronous)); mfi.isValid(); ++mfi) {
1363 fa.prefetchToHost(mfi);
1364 }
1365 }
1366#else
1367 amrex::ignore_unused(fa,synchronous);
1368#endif
1369}
1370
1371template <class FAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1372void
1373prefetchToDevice (FabArray<FAB> const& fa, const bool synchronous = true)
1374{
1375#ifdef AMREX_USE_GPU
1376 if (fa.arena()->isManaged()) {
1377 for (MFIter mfi(fa, MFItInfo().SetDeviceSync(synchronous)); mfi.isValid(); ++mfi) {
1378 fa.prefetchToDevice(mfi);
1379 }
1380 }
1381#else
1382 amrex::ignore_unused(fa,synchronous);
1383#endif
1384}
1385
1386
1387template <class FAB, class IFAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value
1388 && IsBaseFab<IFAB>::value> >
1389void
1390OverrideSync (FabArray<FAB> & fa, FabArray<IFAB> const& msk, const Periodicity& period)
1391{
1392 BL_PROFILE("OverrideSync()");
1393
1394 OverrideSync_nowait(fa, msk, period);
1396}
1397
1398
1399template <class FAB, class IFAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value
1400 && IsBaseFab<IFAB>::value> >
1401void
1403{
1404 BL_PROFILE("OverrideSync_nowait()");
1405 AMREX_ASSERT_WITH_MESSAGE(!fa.os_temp, "OverrideSync_nowait() called when already in progress.");
1406
1407 if (fa.ixType().cellCentered()) { return; }
1408
1409 const int ncomp = fa.nComp();
1410
1411#ifdef AMREX_USE_GPU
1412 if (Gpu::inLaunchRegion() && fa.isFusingCandidate()) {
1413 auto const& fabarr = fa.arrays();
1414 auto const& ifabarr = msk.const_arrays();
1415 ParallelFor(fa, IntVect(0), ncomp,
1416 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1417 {
1418 if (!ifabarr[box_no](i,j,k)) { fabarr[box_no](i,j,k,n) = 0; }
1419 });
1420 if (!Gpu::inNoSyncRegion()) {
1422 }
1423 } else
1424#endif
1425 {
1426#ifdef AMREX_USE_OMP
1427#pragma omp parallel if (Gpu::notInLaunchRegion())
1428#endif
1429 for (MFIter mfi(fa,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1430 {
1431 const Box& bx = mfi.tilebox();
1432 auto fab = fa.array(mfi);
1433 auto const ifab = msk.array(mfi);
1434 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n,
1435 {
1436 if (!ifab(i,j,k)) { fab(i,j,k,n) = 0; }
1437 });
1438 }
1439 }
1440
1441 fa.os_temp = std::make_unique< FabArray<FAB> > ( fa.boxArray(), fa.DistributionMap(),
1442 ncomp, 0, MFInfo(), fa.Factory() );
1443 fa.os_temp->setVal(0);
1444 fa.os_temp->ParallelCopy_nowait(fa, period, FabArrayBase::ADD);
1445}
1446
1447template <class FAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1448void
1450{
1451 BL_PROFILE("OverrideSync_finish()");
1452
1453 if (fa.ixType().cellCentered()) { return; }
1454
1455 fa.os_temp->ParallelCopy_finish();
1456 amrex::Copy(fa, *(fa.os_temp), 0, 0, fa.nComp(), 0);
1457
1458 fa.os_temp.reset();
1459}
1460
1461template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1462void
1464 int scomp, int dcomp, int ncomp)
1465{
1466 AMREX_ASSERT(isMFIterSafe(dst, src));
1467 AMREX_ASSERT(dst.nGrowVect() == src.nGrowVect());
1468#ifdef AMREX_USE_GPU
1469 for (MFIter mfi(dst); mfi.isValid(); ++mfi) {
1470 void* pdst = dst[mfi].dataPtr(dcomp);
1471 void const* psrc = src[mfi].dataPtr(scomp);
1472 Gpu::dtoh_memcpy_async(pdst, psrc, dst[mfi].nBytes(mfi.fabbox(), ncomp));
1473 }
1474#else
1475 Copy(dst, src, scomp, dcomp, ncomp, dst.nGrowVect());
1476#endif
1477}
1478
1479template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1480void
1482{
1483 dtoh_memcpy(dst, src, 0, 0, dst.nComp());
1484}
1485
1486template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1487void
1489 int scomp, int dcomp, int ncomp)
1490{
1491 AMREX_ASSERT(isMFIterSafe(dst, src));
1492 AMREX_ASSERT(dst.nGrowVect() == src.nGrowVect());
1493#ifdef AMREX_USE_GPU
1494 for (MFIter mfi(dst); mfi.isValid(); ++mfi) {
1495 void* pdst = dst[mfi].dataPtr(dcomp);
1496 void const* psrc = src[mfi].dataPtr(scomp);
1497 Gpu::htod_memcpy_async(pdst, psrc, dst[mfi].nBytes(mfi.fabbox(), ncomp));
1498 }
1499#else
1500 Copy(dst, src, scomp, dcomp, ncomp, dst.nGrowVect());
1501#endif
1502}
1503
1504template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1505void
1507{
1508 htod_memcpy(dst, src, 0, 0, dst.nComp());
1509}
1510
1511template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1512IntVect
1513indexFromValue (FabArray<FAB> const& mf, int comp, IntVect const& nghost,
1514 typename FAB::value_type value)
1515{
1516 IntVect loc;
1517
1518#ifdef AMREX_USE_GPU
1519 if (Gpu::inLaunchRegion())
1520 {
1522 int* p = aa.data();
1523 // This is a device ptr to 1+AMREX_SPACEDIM int zeros.
1524 // The first is used as an atomic bool and the others for intvect.
1525 if (mf.isFusingCandidate()) {
1526 auto const& ma = mf.const_arrays();
1527 ParallelFor(mf, nghost, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
1528 {
1529 int* flag = p;
1530 if (*flag == 0) {
1531 if (ma[box_no](i,j,k,comp) == value) {
1532 if (Gpu::Atomic::Exch(flag,1) == 0) {
1533 AMREX_D_TERM(p[1] = i;,
1534 p[2] = j;,
1535 p[3] = k;);
1536 }
1537 }
1538 }
1539 });
1540 } else {
1541 for (MFIter mfi(mf,MFItInfo().SetDeviceSync(false)); mfi.isValid(); ++mfi) {
1542 const Box& bx = amrex::grow(mfi.validbox(), nghost);
1543 auto const& arr = mf.const_array(mfi);
1544 amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
1545 {
1546 int* flag = p;
1547 if (*flag == 0) {
1548 if (arr(i,j,k,comp) == value) {
1549 if (Gpu::Atomic::Exch(flag,1) == 0) {
1550 AMREX_D_TERM(p[1] = i;,
1551 p[2] = j;,
1552 p[3] = k;);
1553 }
1554 }
1555 }
1556 });
1557 }
1558 }
1559 int const* tmp = aa.copyToHost();
1560 AMREX_D_TERM(loc[0] = tmp[1];,
1561 loc[1] = tmp[2];,
1562 loc[2] = tmp[3];);
1563 }
1564 else
1565#endif
1566 {
1567 bool f = false;
1568#ifdef AMREX_USE_OMP
1569#pragma omp parallel
1570#endif
1571 {
1572 IntVect priv_loc = IntVect::TheMinVector();
1573 for (MFIter mfi(mf,true); mfi.isValid(); ++mfi)
1574 {
1575 const Box& bx = mfi.growntilebox(nghost);
1576 auto const& fab = mf.const_array(mfi);
1577 AMREX_LOOP_3D(bx, i, j, k,
1578 {
1579 if (fab(i,j,k,comp) == value) {
1580 priv_loc = IntVect(AMREX_D_DECL(i,j,k));
1581 }
1582 });
1583 }
1584
1585 if (priv_loc.allGT(IntVect::TheMinVector())) {
1586 bool old;
1587// we should be able to test on _OPENMP < 201107 for capture (version 3.1)
1588// but we must work around a bug in gcc < 4.9
1589// And, with NVHPC 21.9 to <23.1, we saw an ICE with the atomic capture (NV bug: #3390723)
1590#if defined(AMREX_USE_OMP) && defined(_OPENMP) && (_OPENMP < 201307 || (defined(__NVCOMPILER) && __NVCOMPILER_MAJOR__ < 23)) // OpenMP 4.0
1591#pragma omp critical (amrex_indexfromvalue)
1592#elif defined(AMREX_USE_OMP)
1593#pragma omp atomic capture
1594#endif
1595 {
1596 old = f;
1597 f = true;
1598 }
1599
1600 if (old == false) { loc = priv_loc; }
1601 }
1602 }
1603 }
1604
1605 return loc;
1606}
1607
1619template <typename FAB, std::enable_if_t<IsBaseFab<FAB>::value,int> FOO = 0>
1620typename FAB::value_type
1621Dot (FabArray<FAB> const& x, int xcomp, FabArray<FAB> const& y, int ycomp, int ncomp,
1622 IntVect const& nghost, bool local = false)
1623{
1624 BL_ASSERT(x.boxArray() == y.boxArray());
1625 BL_ASSERT(x.DistributionMap() == y.DistributionMap());
1626 BL_ASSERT(x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost));
1627
1628 BL_PROFILE("amrex::Dot()");
1629
1630 using T = typename FAB::value_type;
1631 auto sm = T(0.0);
1632#ifdef AMREX_USE_GPU
1633 if (Gpu::inLaunchRegion()) {
1634 auto const& xma = x.const_arrays();
1635 auto const& yma = y.const_arrays();
1636 sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, x, nghost,
1637 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
1638 {
1639 auto t = T(0.0);
1640 auto const& xfab = xma[box_no];
1641 auto const& yfab = yma[box_no];
1642 for (int n = 0; n < ncomp; ++n) {
1643 t += xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n);
1644 }
1645 return t;
1646 });
1647 } else
1648#endif
1649 {
1650#ifdef AMREX_USE_OMP
1651#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
1652#endif
1653 for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
1654 {
1655 Box const& bx = mfi.growntilebox(nghost);
1656 auto const& xfab = x.const_array(mfi);
1657 auto const& yfab = y.const_array(mfi);
1658 AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
1659 {
1660 sm += xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n);
1661 });
1662 }
1663 }
1664
1665 if (!local) {
1667 }
1668
1669 return sm;
1670}
1671
1681template <typename FAB, std::enable_if_t<IsBaseFab<FAB>::value,int> FOO = 0>
1682typename FAB::value_type
1683Dot (FabArray<FAB> const& x, int xcomp, int ncomp, IntVect const& nghost, bool local = false)
1684{
1685 BL_ASSERT(x.nGrowVect().allGE(nghost));
1686
1687 BL_PROFILE("amrex::Dot()");
1688
1689 using T = typename FAB::value_type;
1690 auto sm = T(0.0);
1691#ifdef AMREX_USE_GPU
1692 if (Gpu::inLaunchRegion()) {
1693 auto const& xma = x.const_arrays();
1694 sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, x, nghost,
1695 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
1696 {
1697 auto t = T(0.0);
1698 auto const& xfab = xma[box_no];
1699 for (int n = 0; n < ncomp; ++n) {
1700 auto v = xfab(i,j,k,xcomp+n);
1701 t += v*v;
1702 }
1703 return t;
1704 });
1705 } else
1706#endif
1707 {
1708#ifdef AMREX_USE_OMP
1709#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
1710#endif
1711 for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
1712 {
1713 Box const& bx = mfi.growntilebox(nghost);
1714 auto const& xfab = x.const_array(mfi);
1715 AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
1716 {
1717 auto v = xfab(i,j,k,xcomp+n);
1718 sm += v*v;
1719 });
1720 }
1721 }
1722
1723 if (!local) {
1725 }
1726
1727 return sm;
1728}
1729
1742template <typename IFAB, typename FAB,
1743 std::enable_if_t<IsBaseFab<FAB>::value && IsBaseFab<IFAB>::value,int> FOO = 0>
1744typename FAB::value_type
1745Dot (FabArray<IFAB> const& mask, FabArray<FAB> const& x, int xcomp,
1746 FabArray<FAB> const& y, int ycomp, int ncomp, IntVect const& nghost,
1747 bool local = false)
1748{
1749 BL_ASSERT(x.boxArray() == y.boxArray() && x.boxArray() == mask.boxArray());
1750 BL_ASSERT(x.DistributionMap() == y.DistributionMap() && x.DistributionMap() == mask.DistributionMap());
1751 BL_ASSERT(x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost) &&
1752 mask.nGrowVect().allGE(nghost));
1753
1754 BL_PROFILE("amrex::Dot()");
1755
1756 using T = typename FAB::value_type;
1757 auto sm = T(0.0);
1758#ifdef AMREX_USE_GPU
1759 if (Gpu::inLaunchRegion()) {
1760 auto const& mma = mask.const_arrays();
1761 auto const& xma = x.const_arrays();
1762 auto const& yma = y.const_arrays();
1763 sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, x, nghost,
1764 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
1765 {
1766 auto t = T(0.0);
1767 auto m = T(mma[box_no](i,j,k));
1768 if (m != 0) {
1769 auto const& xfab = xma[box_no];
1770 auto const& yfab = yma[box_no];
1771 for (int n = 0; n < ncomp; ++n) {
1772 t += xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n);
1773 }
1774 }
1775 return t*m;
1776 });
1777 } else
1778#endif
1779 {
1780#ifdef AMREX_USE_OMP
1781#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
1782#endif
1783 for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
1784 {
1785 Box const& bx = mfi.growntilebox(nghost);
1786 auto const& mfab = mask.const_array(mfi);
1787 auto const& xfab = x.const_array(mfi);
1788 auto const& yfab = y.const_array(mfi);
1789 AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
1790 {
1791 auto m = T(mfab(i,j,k));
1792 sm += m * xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n);
1793 });
1794 }
1795 }
1796
1797 if (!local) {
1799 }
1800
1801 return sm;
1802}
1803
1814template <typename IFAB, typename FAB,
1815 std::enable_if_t<IsBaseFab<FAB>::value && IsBaseFab<IFAB>::value,int> FOO = 0>
1816typename FAB::value_type
1817Dot (FabArray<IFAB> const& mask, FabArray<FAB> const& x, int xcomp, int ncomp,
1818 IntVect const& nghost, bool local = false)
1819{
1820 BL_ASSERT(x.boxArray() == mask.boxArray());
1821 BL_ASSERT(x.DistributionMap() == mask.DistributionMap());
1822 BL_ASSERT(x.nGrowVect().allGE(nghost) && mask.nGrowVect().allGE(nghost));
1823
1824 BL_PROFILE("amrex::Dot()");
1825
1826 using T = typename FAB::value_type;
1827 auto sm = T(0.0);
1828#ifdef AMREX_USE_GPU
1829 if (Gpu::inLaunchRegion()) {
1830 auto const& mma = mask.const_arrays();
1831 auto const& xma = x.const_arrays();
1832 sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, x, nghost,
1833 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
1834 {
1835 auto t = T(0.0);
1836 auto m = T(mma[box_no](i,j,k));
1837 if (m != 0) {
1838 auto const& xfab = xma[box_no];
1839 for (int n = 0; n < ncomp; ++n) {
1840 auto v = xfab(i,j,k,xcomp+n);
1841 t += v*v;
1842 }
1843 }
1844 return t*m;
1845 });
1846 } else
1847#endif
1848 {
1849#ifdef AMREX_USE_OMP
1850#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
1851#endif
1852 for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
1853 {
1854 Box const& bx = mfi.growntilebox(nghost);
1855 auto const& mfab = mask.const_array(mfi);
1856 auto const& xfab = x.const_array(mfi);
1857 AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
1858 {
1859 auto m = T(mfab(i,j,k));
1860 auto v = xfab(i,j,k,xcomp+n);
1861 sm += m*v*v;
1862 });
1863 }
1864 }
1865
1866 if (!local) {
1868 }
1869
1870 return sm;
1871}
1872
1874template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1875void setVal (MF& dst, typename MF::value_type val)
1876{
1877 dst.setVal(val);
1878}
1879
1881template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1882void setBndry (MF& dst, typename MF::value_type val, int scomp, int ncomp)
1883{
1884 dst.setBndry(val, scomp, ncomp);
1885}
1886
1888template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1889void Scale (MF& dst, typename MF::value_type val, int scomp, int ncomp, int nghost)
1890{
1891 dst.mult(val, scomp, ncomp, nghost);
1892}
1893
1895template <class DMF, class SMF,
1896 std::enable_if_t<IsMultiFabLike_v<DMF> &&
1897 IsMultiFabLike_v<SMF>, int> = 0>
1898void LocalCopy (DMF& dst, SMF const& src, int scomp, int dcomp,
1899 int ncomp, IntVect const& nghost)
1900{
1901 amrex::Copy(dst, src, scomp, dcomp, ncomp, nghost);
1902}
1903
1905template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1906void LocalAdd (MF& dst, MF const& src, int scomp, int dcomp,
1907 int ncomp, IntVect const& nghost)
1908{
1909 amrex::Add(dst, src, scomp, dcomp, ncomp, nghost);
1910}
1911
1913template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1914void Saxpy (MF& dst, typename MF::value_type a, MF const& src, int scomp, int dcomp,
1915 int ncomp, IntVect const& nghost)
1916{
1917 MF::Saxpy(dst, a, src, scomp, dcomp, ncomp, nghost);
1918}
1919
1921template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1922void Xpay (MF& dst, typename MF::value_type a, MF const& src, int scomp, int dcomp,
1923 int ncomp, IntVect const& nghost)
1924{
1925 MF::Xpay(dst, a, src, scomp, dcomp, ncomp, nghost);
1926}
1927
1929template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1930void Saxpy_Xpay (MF& dst, typename MF::value_type a_saxpy, MF const& src_saxpy,
1931 typename MF::value_type a_xpay, MF const& src_xpay, int scomp, int dcomp,
1932 int ncomp, IntVect const& nghost)
1933{
1934 MF::Saxpy_Xpay(dst, a_saxpy, src_saxpy, a_xpay, src_xpay, scomp, dcomp, ncomp, nghost);
1935}
1936
1938template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1939void Saxpy_Saxpy (MF& dst1, typename MF::value_type a1, MF const& src1,
1940 MF& dst2, typename MF::value_type a2, MF const& src2, int scomp, int dcomp,
1941 int ncomp, IntVect const& nghost)
1942{
1943 MF::Saxpy_Saxpy(dst1, a1, src1, dst2, a2, src2, scomp, dcomp, ncomp, nghost);
1944}
1945
1947template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1948void Saypy_Saxpy (MF& dst1, typename MF::value_type a1,
1949 MF& dst2, typename MF::value_type a2, MF const& src, int scomp, int dcomp,
1950 int ncomp, IntVect const& nghost)
1951{
1952 MF::Saypy_Saxpy(dst1, a1, dst2, a2, src, scomp, dcomp, ncomp, nghost);
1953}
1954
1956template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1957void LinComb (MF& dst,
1958 typename MF::value_type a, MF const& src_a, int acomp,
1959 typename MF::value_type b, MF const& src_b, int bcomp,
1960 int dcomp, int ncomp, IntVect const& nghost)
1961{
1962 MF::LinComb(dst, a, src_a, acomp, b, src_b, bcomp, dcomp, ncomp, nghost);
1963}
1964
1966template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
1967void ParallelCopy (MF& dst, MF const& src, int scomp, int dcomp, int ncomp,
1968 IntVect const& ng_src = IntVect(0),
1969 IntVect const& ng_dst = IntVect(0),
1970 Periodicity const& period = Periodicity::NonPeriodic())
1971{
1972 dst.ParallelCopy(src, scomp, dcomp, ncomp, ng_src, ng_dst, period);
1973}
1974
1975template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
1976[[nodiscard]] typename MF::value_type
1977norminf (MF const& mf, int scomp, int ncomp, IntVect const& nghost,
1978 bool local = false)
1979{
1980 return mf.norminf(scomp, ncomp, nghost, local);
1981}
1982
1984template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1985void setVal (Array<MF,N>& dst, typename MF::value_type val)
1986{
1987 for (auto& mf: dst) {
1988 mf.setVal(val);
1989 }
1990}
1991
1993template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1994void setBndry (Array<MF,N>& dst, typename MF::value_type val, int scomp, int ncomp)
1995{
1996 for (auto& mf : dst) {
1997 mf.setBndry(val, scomp, ncomp);
1998 }
1999}
2000
2002template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
2003void Scale (Array<MF,N>& dst, typename MF::value_type val, int scomp, int ncomp,
2004 int nghost)
2005{
2006 for (auto& mf : dst) {
2007 mf.mult(val, scomp, ncomp, nghost);
2008 }
2009}
2010
2012template <class DMF, class SMF, std::size_t N,
2013 std::enable_if_t<IsMultiFabLike_v<DMF> &&
2014 IsMultiFabLike_v<SMF>, int> = 0>
2015void LocalCopy (Array<DMF,N>& dst, Array<SMF,N> const& src, int scomp, int dcomp,
2016 int ncomp, IntVect const& nghost)
2017{
2018 for (std::size_t i = 0; i < N; ++i) {
2019 amrex::Copy(dst[i], src[i], scomp, dcomp, ncomp, nghost);
2020 }
2021}
2022
2024template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
2025void LocalAdd (Array<MF,N>& dst, Array<MF,N> const& src, int scomp, int dcomp,
2026 int ncomp, IntVect const& nghost)
2027{
2028 for (std::size_t i = 0; i < N; ++i) {
2029 amrex::Add(dst[i], src[i], scomp, dcomp, ncomp, nghost);
2030 }
2031}
2032
2034template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
2035void Saxpy (Array<MF,N>& dst, typename MF::value_type a,
2036 Array<MF,N> const& src, int scomp, int dcomp, int ncomp,
2037 IntVect const& nghost)
2038{
2039 for (std::size_t i = 0; i < N; ++i) {
2040 MF::Saxpy(dst[i], a, src[i], scomp, dcomp, ncomp, nghost);
2041 }
2042}
2043
2045template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
2046void Xpay (Array<MF,N>& dst, typename MF::value_type a,
2047 Array<MF,N> const& src, int scomp, int dcomp, int ncomp,
2048 IntVect const& nghost)
2049{
2050 for (std::size_t i = 0; i < N; ++i) {
2051 MF::Xpay(dst[i], a, src[i], scomp, dcomp, ncomp, nghost);
2052 }
2053}
2054
2056template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
2058 typename MF::value_type a, Array<MF,N> const& src_a, int acomp,
2059 typename MF::value_type b, Array<MF,N> const& src_b, int bcomp,
2060 int dcomp, int ncomp, IntVect const& nghost)
2061{
2062 for (std::size_t i = 0; i < N; ++i) {
2063 MF::LinComb(dst[i], a, src_a[i], acomp, b, src_b[i], bcomp, dcomp, ncomp, nghost);
2064 }
2065}
2066
2068template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
2069void ParallelCopy (Array<MF,N>& dst, Array<MF,N> const& src,
2070 int scomp, int dcomp, int ncomp,
2071 IntVect const& ng_src = IntVect(0),
2072 IntVect const& ng_dst = IntVect(0),
2073 Periodicity const& period = Periodicity::NonPeriodic())
2074{
2075 for (std::size_t i = 0; i < N; ++i) {
2076 dst[i].ParallelCopy(src[i], scomp, dcomp, ncomp, ng_src, ng_dst, period);
2077 }
2078}
2079
2080template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
2081[[nodiscard]] typename MF::value_type
2082norminf (Array<MF,N> const& mf, int scomp, int ncomp, IntVect const& nghost,
2083 bool local = false)
2084{
2085 auto r = typename MF::value_type(0);
2086 for (std::size_t i = 0; i < N; ++i) {
2087 auto tmp = mf[i].norminf(scomp, ncomp, nghost, true);
2088 r = std::max(r,tmp);
2089 }
2090 if (!local) {
2092 }
2093 return r;
2094}
2095
2096template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
2097 int> = 0>
2098[[nodiscard]] int nComp (Array<MF,N> const& mf)
2099{
2100 return mf[0].nComp();
2101}
2102
2103template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
2104 int> = 0>
2105[[nodiscard]] IntVect nGrowVect (Array<MF,N> const& mf)
2106{
2107 return mf[0].nGrowVect();
2108}
2109
2110template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
2111 int> = 0>
2112[[nodiscard]] BoxArray const&
2114{
2115 return mf[0].boxArray();
2116}
2117
2118template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
2119 int> = 0>
2120[[nodiscard]] DistributionMapping const&
2122{
2123 return mf[0].DistributionMap();
2124}
2125
2126/*
2127 * \brief Return a mask indicating how many duplicates are in each point
2128 *
2129 * \param fa input FabArray
2130 * \param nghost number of ghost cells included in counting
2131 * \param period periodicity
2132 */
2133template <class FAB>
2134FabArray<BaseFab<int>>
2135OverlapMask (FabArray<FAB> const& fa, IntVect const& nghost, Periodicity const& period)
2136{
2137 BL_PROFILE("OverlapMask()");
2138
2139 const BoxArray& ba = fa.boxArray();
2140 const DistributionMapping& dm = fa.DistributionMap();
2141
2142 FabArray<BaseFab<int>> mask(ba, dm, 1, nghost);
2143 mask.setVal(1);
2144
2145 const std::vector<IntVect>& pshifts = period.shiftIntVect();
2146
2148
2149 bool run_on_gpu = Gpu::inLaunchRegion();
2150 amrex::ignore_unused(run_on_gpu, tags);
2151#ifdef AMREX_USE_OMP
2152#pragma omp parallel if (!run_on_gpu)
2153#endif
2154 {
2155 std::vector< std::pair<int,Box> > isects;
2156
2157 for (MFIter mfi(mask); mfi.isValid(); ++mfi)
2158 {
2159 const Box& bx = mask[mfi].box();
2160 auto const& arr = mask.array(mfi);
2161
2162 for (const auto& iv : pshifts)
2163 {
2164 ba.intersections(bx+iv, isects, false, nghost);
2165 for (const auto& is : isects)
2166 {
2167 Box const& b = is.second-iv;
2168 if (iv == 0 && b == bx) { continue; }
2169#ifdef AMREX_USE_GPU
2170 if (run_on_gpu) {
2171 tags.push_back({arr,b});
2172 } else
2173#endif
2174 {
2175 amrex::LoopConcurrentOnCpu(b, [=] (int i, int j, int k) noexcept
2176 {
2177 arr(i,j,k) += 1;
2178 });
2179 }
2180 }
2181 }
2182 }
2183 }
2184
2185#ifdef AMREX_USE_GPU
2186 amrex::ParallelFor(tags, 1,
2187 [=] AMREX_GPU_DEVICE (int i, int j, int k, int n, Array4BoxTag<int> const& tag) noexcept
2188 {
2189 Gpu::Atomic::AddNoRet(tag.dfab.ptr(i,j,k,n), 1);
2190 });
2191#endif
2192
2193 return mask;
2194}
2195
2196}
2197
2198#endif
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define BL_ASSERT(EX)
Definition AMReX_BLassert.H:39
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:37
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D(...)
Definition AMReX_GpuLaunchMacrosC.nolint.H:111
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Real * pdst
Definition AMReX_HypreMLABecLap.cpp:1090
Array4< int const > mask
Definition AMReX_InterpFaceRegister.cpp:93
#define AMREX_LOOP_3D(bx, i, j, k, block)
Definition AMReX_Loop.nolint.H:4
#define AMREX_LOOP_4D(bx, ncomp, i, j, k, n, block)
Definition AMReX_Loop.nolint.H:16
#define AMREX_D_TERM(a, b, c)
Definition AMReX_SPACE.H:172
#define AMREX_D_DECL(a, b, c)
Definition AMReX_SPACE.H:171
Print on all processors of the default communicator.
Definition AMReX_Print.H:117
virtual bool isManaged() const
Definition AMReX_Arena.cpp:89
virtual bool isDevice() const
Definition AMReX_Arena.cpp:101
A collection of Boxes stored in an Array.
Definition AMReX_BoxArray.H:551
std::vector< std::pair< int, Box > > intersections(const Box &bx) const
Return intersections of Box and BoxArray.
Definition AMReX_BoxArray.cpp:1179
__host__ __device__ bool contains(const IntVectND< dim > &p) const noexcept
Returns true if argument is contained within BoxND.
Definition AMReX_Box.H:207
__host__ __device__ bool ok() const noexcept
Checks if it is a proper BoxND (including a valid type).
Definition AMReX_Box.H:203
Calculates the distribution of FABs to MPI processes.
Definition AMReX_DistributionMapping.H:41
IntVect nGrowVect() const noexcept
Definition AMReX_FabArrayBase.H:80
bool isFusingCandidate() const noexcept
Is this a good candidate for kernel fusing?
Definition AMReX_FabArrayBase.cpp:2707
IndexType ixType() const noexcept
Return index type.
Definition AMReX_FabArrayBase.H:86
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition AMReX_FabArrayBase.H:131
@ ADD
Definition AMReX_FabArrayBase.H:394
int nComp() const noexcept
Return number of variables (aka components) associated with each point.
Definition AMReX_FabArrayBase.H:83
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition AMReX_FabArrayBase.H:95
An Array of FortranArrayBox(FAB)-like Objects.
Definition AMReX_FabArray.H:345
Array4< typename FabArray< FAB >::value_type const > const_array(const MFIter &mfi) const noexcept
Definition AMReX_FabArray.H:585
std::unique_ptr< FabArray< FAB > > os_temp
Definition AMReX_FabArray.H:1586
const FabFactory< FAB > & Factory() const noexcept
Definition AMReX_FabArray.H:443
void prefetchToDevice(const MFIter &mfi) const noexcept
Definition AMReX_FabArray.H:551
Array4< typename FabArray< FAB >::value_type const > array(const MFIter &mfi) const noexcept
Definition AMReX_FabArray.H:561
Arena * arena() const noexcept
Definition AMReX_FabArray.H:446
void setBndry(value_type val)
Set all values in the boundary region to val.
Definition AMReX_FabArray.H:2399
void setVal(value_type val)
Set all components in the entire region of each FAB to val.
Definition AMReX_FabArray.H:2609
MultiArray4< typename FabArray< FAB >::value_type > arrays() noexcept
Definition AMReX_FabArray.H:633
void mult(value_type val, int comp, int num_comp, int nghost=0)
Definition AMReX_FabArray.H:2862
bool hasEBFabFactory() const noexcept
Definition AMReX_FabArray.H:450
MultiArray4< typename FabArray< FAB >::value_type const > const_arrays() const noexcept
Definition AMReX_FabArray.H:647
void prefetchToHost(const MFIter &mfi) const noexcept
Definition AMReX_FabArray.H:541
Definition AMReX_Tuple.H:93
Definition AMReX_GpuBuffer.H:18
T const * data() const noexcept
Definition AMReX_GpuBuffer.H:45
__host__ __device__ bool cellCentered() const noexcept
True if the IndexTypeND is CELL based in all directions.
Definition AMReX_IndexType.H:101
__host__ __device__ bool allGT(const IntVectND< dim > &rhs) const noexcept
Returns true if this is greater than argument for all components. NOTE: This is NOT a strict weak ord...
Definition AMReX_IntVect.H:423
__host__ static __device__ constexpr IntVectND< dim > TheZeroVector() noexcept
This static member function returns a reference to a constant IntVectND object, all of whose dim argu...
Definition AMReX_IntVect.H:677
__host__ static __device__ constexpr IntVectND< dim > TheMinVector() noexcept
Definition AMReX_IntVect.H:725
Definition AMReX_MFIter.H:57
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:141
Definition AMReX_PODVector.H:297
T * data() noexcept
Definition AMReX_PODVector.H:655
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition AMReX_Periodicity.H:17
static const Periodicity & NonPeriodic() noexcept
Definition AMReX_Periodicity.cpp:52
std::vector< IntVect > shiftIntVect(IntVect const &nghost=IntVect(0)) const
Definition AMReX_Periodicity.cpp:8
Print & SetPrecision(int p)
Definition AMReX_Print.H:89
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
__host__ __device__ AMREX_FORCE_INLINE T Exch(T *address, T val) noexcept
Definition AMReX_GpuAtomic.H:485
__host__ __device__ AMREX_FORCE_INLINE void AddNoRet(T *sum, T value) noexcept
Definition AMReX_GpuAtomic.H:281
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:260
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:303
bool inLaunchRegion() noexcept
Definition AMReX_GpuControl.H:92
bool inNoSyncRegion() noexcept
Definition AMReX_GpuControl.H:152
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:289
void Sum(T &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:204
void Max(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:126
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
std::enable_if_t<!amrex::DefinitelyNotHostRunnable< F >::value, typename FAB::value_type > ReduceMax_host_wrapper(FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Definition AMReX_FabArrayUtility.H:563
FAB::value_type ReduceMax_host(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition AMReX_FabArrayUtility.H:540
std::enable_if_t<!amrex::DefinitelyNotHostRunnable< F >::value, bool > ReduceLogicalAnd_host_wrapper(FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Definition AMReX_FabArrayUtility.H:789
std::enable_if_t<!amrex::DefinitelyNotHostRunnable< F >::value, bool > ReduceLogicalOr_host_wrapper(FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Definition AMReX_FabArrayUtility.H:936
std::enable_if_t<!amrex::DefinitelyNotHostRunnable< F >::value, typename FAB::value_type > ReduceSum_host_wrapper(FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Definition AMReX_FabArrayUtility.H:110
bool ReduceLogicalAnd_host(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition AMReX_FabArrayUtility.H:767
FAB::value_type ReduceMin_host(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition AMReX_FabArrayUtility.H:314
std::enable_if_t<!amrex::DefinitelyNotHostRunnable< F >::value, typename FAB::value_type > ReduceMin_host_wrapper(FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Definition AMReX_FabArrayUtility.H:336
bool ReduceLogicalOr_host(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition AMReX_FabArrayUtility.H:914
FAB::value_type ReduceSum_host(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition AMReX_FabArrayUtility.H:24
std::enable_if_t< IsBaseFab< FAB >::value, std::conditional_t< std::is_same< OP, ReduceOpLogicalAnd >::value||std::is_same< OP, ReduceOpLogicalOr >::value, int, typename FAB::value_type > > ReduceMF(FabArray< FAB > const &fa, IntVect const &nghost, F const &f)
Definition AMReX_FabArrayUtility.H:50
Definition AMReX_Amr.cpp:49
MF::value_type norminf(MF const &mf, int scomp, int ncomp, IntVect const &nghost, bool local=false)
Definition AMReX_FabArrayUtility.H:1977
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:138
int nComp(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2852
FAB::value_type ReduceMax(FabArray< FAB > const &fa, int nghost, F &&f)
Definition AMReX_FabArrayUtility.H:531
__host__ __device__ void Swap(T &t1, T &t2) noexcept
Definition AMReX_Algorithm.H:75
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:191
DistributionMapping const & DistributionMap(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2867
void Saxpy(MF &dst, typename MF::value_type a, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst += a * src
Definition AMReX_FabArrayUtility.H:1914
IntVect nGrowVect(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2857
ReduceData< Ts... >::Type ParReduce(TypeList< Ops... > operation_list, TypeList< Ts... > type_list, FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Parallel reduce for MultiFab/FabArray. The reduce result is local and it's the user's responsibility ...
Definition AMReX_ParReduce.H:48
void LocalCopy(DMF &dst, SMF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst = src
Definition AMReX_FabArrayUtility.H:1898
void printCell(FabArray< FAB > const &mf, const IntVect &cell, int comp=-1, const IntVect &ng=IntVect::TheZeroVector())
Definition AMReX_FabArrayUtility.H:1051
bool ReduceLogicalAnd(FabArray< FAB > const &fa, int nghost, F &&f)
Definition AMReX_FabArrayUtility.H:758
void Abs(FabArray< FAB > &fa, int icomp, int numcomp, int nghost)
Definition AMReX_FabArrayUtility.H:1314
void LocalAdd(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst += src
Definition AMReX_FabArrayUtility.H:1906
void Copy(FabArray< DFAB > &dst, FabArray< SFAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArray.H:180
void prefetchToDevice(FabArray< FAB > const &fa, const bool synchronous=true)
Definition AMReX_FabArrayUtility.H:1373
__host__ __device__ BoxND< dim > makeSingleCellBox(int i, int j, int k, IndexTypeND< dim > typ=IndexTypeND< dim >::TheCellType())
Definition AMReX_Box.H:2027
void Subtract(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArrayUtility.H:1171
void OverrideSync(FabArray< FAB > &fa, FabArray< IFAB > const &msk, const Periodicity &period)
Definition AMReX_FabArrayUtility.H:1390
FabArray< BaseFab< int > > OverlapMask(FabArray< FAB > const &fa, IntVect const &nghost, Periodicity const &period)
Definition AMReX_FabArrayUtility.H:2135
void Saxpy_Saxpy(MF &dst1, typename MF::value_type a1, MF const &src1, MF &dst2, typename MF::value_type a2, MF const &src2, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst1 += a1 * src1 followed by dst2 += a2 * src2
Definition AMReX_FabArrayUtility.H:1939
bool isMFIterSafe(const FabArrayBase &x, const FabArrayBase &y)
Definition AMReX_MFIter.H:219
void Saypy_Saxpy(MF &dst1, typename MF::value_type a1, MF &dst2, typename MF::value_type a2, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst1 += a1 * dst2 followed by dst2 += a2 * src
Definition AMReX_FabArrayUtility.H:1948
bool ReduceLogicalOr(FabArray< FAB > const &fa, int nghost, F &&f)
Definition AMReX_FabArrayUtility.H:905
void LinComb(MF &dst, typename MF::value_type a, MF const &src_a, int acomp, typename MF::value_type b, MF const &src_b, int bcomp, int dcomp, int ncomp, IntVect const &nghost)
dst = a*src_a + b*src_b
Definition AMReX_FabArrayUtility.H:1957
FAB::value_type ReduceMin(FabArray< FAB > const &fa, int nghost, F &&f)
Definition AMReX_FabArrayUtility.H:305
IntVectND< 3 > IntVect
Definition AMReX_BaseFwd.H:30
void dtoh_memcpy(FabArray< FAB > &dst, FabArray< FAB > const &src, int scomp, int dcomp, int ncomp)
Definition AMReX_FabArrayUtility.H:1463
void setBndry(MF &dst, typename MF::value_type val, int scomp, int ncomp)
dst = val in ghost cells.
Definition AMReX_FabArrayUtility.H:1882
void Saxpy_Xpay(MF &dst, typename MF::value_type a_saxpy, MF const &src_saxpy, typename MF::value_type a_xpay, MF const &src_xpay, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst += a_saxpy * src_saxpy followed by dst = src_xpay + a_xpay * dst
Definition AMReX_FabArrayUtility.H:1930
void single_task(L &&f) noexcept
Definition AMReX_GpuLaunchFunctsC.H:1350
void Divide(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArrayUtility.H:1267
void Xpay(MF &dst, typename MF::value_type a, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst = src + a * dst
Definition AMReX_FabArrayUtility.H:1922
void Scale(MF &dst, typename MF::value_type val, int scomp, int ncomp, int nghost)
dst *= val
Definition AMReX_FabArrayUtility.H:1889
void prefetchToHost(FabArray< FAB > const &fa, const bool synchronous=true)
Definition AMReX_FabArrayUtility.H:1358
bool TilingIfNotGPU() noexcept
Definition AMReX_MFIter.H:12
void Add(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArray.H:241
__host__ __device__ BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition AMReX_Box.H:1229
void LoopConcurrentOnCpu(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition AMReX_Loop.H:378
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:230
void OverrideSync_finish(FabArray< FAB > &fa)
Definition AMReX_FabArrayUtility.H:1449
FAB::value_type Dot(FabArray< FAB > const &x, int xcomp, FabArray< FAB > const &y, int ycomp, int ncomp, IntVect const &nghost, bool local=false)
Compute dot products of two FabArrays.
Definition AMReX_FabArrayUtility.H:1621
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition AMReX_FabArrayUtility.H:1967
void htod_memcpy(FabArray< FAB > &dst, FabArray< FAB > const &src, int scomp, int dcomp, int ncomp)
Definition AMReX_FabArrayUtility.H:1488
void setVal(MF &dst, typename MF::value_type val)
dst = val
Definition AMReX_FabArrayUtility.H:1875
void Multiply(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArrayUtility.H:1219
FAB::value_type ReduceSum(FabArray< FAB > const &fa, int nghost, F &&f)
Definition AMReX_FabArrayUtility.H:16
IntVect indexFromValue(FabArray< FAB > const &mf, int comp, IntVect const &nghost, typename FAB::value_type value)
Definition AMReX_FabArrayUtility.H:1513
BoxArray const & boxArray(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2862
void OverrideSync_nowait(FabArray< FAB > &fa, FabArray< IFAB > const &msk, const Periodicity &period)
Definition AMReX_FabArrayUtility.H:1402
std::array< T, N > Array
Definition AMReX_Array.H:24
Definition AMReX_TagParallelFor.H:58
Array4< T > dfab
Definition AMReX_TagParallelFor.H:59
Definition AMReX_TypeTraits.H:18
FabArray memory allocation information.
Definition AMReX_FabArray.H:66
Definition AMReX_MFIter.H:20
Struct for holding types.
Definition AMReX_TypeList.H:12