Block-Structured AMR Software Framework
 
Loading...
Searching...
No Matches
AMReX_FabArrayUtility.H
Go to the documentation of this file.
1#ifndef AMREX_FABARRAY_UTILITY_H_
2#define AMREX_FABARRAY_UTILITY_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_FabArray.H>
6#include <AMReX_LayoutData.H>
7#include <AMReX_Print.H>
8#include <AMReX_ParReduce.H>
9#include <limits>
10
11namespace amrex {
12
13template <class FAB, class F,
14 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
15typename FAB::value_type
16ReduceSum (FabArray<FAB> const& fa, int nghost, F&& f) {
17 return ReduceSum(fa, IntVect(nghost), std::forward<F>(f));
18}
19
21namespace fudetail {
22template <class FAB, class F,
23 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
24typename FAB::value_type
25ReduceSum_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
26{
27 using value_type = typename FAB::value_type;
28 value_type sm = 0;
29
30#ifdef AMREX_USE_OMP
31#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
32#endif
33 for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
34 {
35 const Box& bx = mfi.growntilebox(nghost);
36 auto const& arr = fa.const_array(mfi);
37 sm += f(bx, arr);
38 }
39
40 return sm;
41}
42}
44
45#ifdef AMREX_USE_GPU
47namespace fudetail {
48template <class OP, class FAB, class F>
49std::enable_if_t<IsBaseFab<FAB>::value,
50 std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
51 std::is_same<OP,ReduceOpLogicalOr>::value,
52 int, typename FAB::value_type> >
53ReduceMF (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
54{
55 using T = std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
56 std::is_same<OP,ReduceOpLogicalOr>::value,
57 int, typename FAB::value_type>;
58 auto typ = fa.ixType();
59 auto const& ma = fa.const_arrays();
60 return ParReduce(TypeList<OP>{}, TypeList<T>{}, fa, nghost,
61 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
62 {
63 return { static_cast<T>(f(amrex::makeSingleCellBox(i,j,k,typ), ma[box_no])) };
64 });
65}
66
67template <class OP, class FAB1, class FAB2, class F>
68std::enable_if_t<IsBaseFab<FAB1>::value && IsBaseFab<FAB2>::value,
69 std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
70 std::is_same<OP,ReduceOpLogicalOr>::value,
71 int, typename FAB1::value_type> >
72ReduceMF (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, IntVect const& nghost, F const& f)
73{
74 using T = std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
75 std::is_same<OP,ReduceOpLogicalOr>::value,
76 int, typename FAB1::value_type>;
77 auto typ = fa1.ixType();
78 auto const& ma1 = fa1.const_arrays();
79 auto const& ma2 = fa2.const_arrays();
80 return ParReduce(TypeList<OP>{}, TypeList<T>{}, fa1, nghost,
81 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
82 {
83 return { static_cast<T>(f(amrex::makeSingleCellBox(i,j,k,typ),
84 ma1[box_no], ma2[box_no])) };
85 });
86}
87
88template <class OP, class FAB1, class FAB2, class FAB3, class F>
89std::enable_if_t<IsBaseFab<FAB1>::value && IsBaseFab<FAB2>::value && IsBaseFab<FAB3>::value,
90 std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
91 std::is_same<OP,ReduceOpLogicalOr>::value,
92 int, typename FAB1::value_type> >
93ReduceMF (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
94 FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
95{
96 using T = std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
97 std::is_same<OP,ReduceOpLogicalOr>::value,
98 int, typename FAB1::value_type>;
99 auto typ = fa1.ixType();
100 auto const& ma1 = fa1.const_arrays();
101 auto const& ma2 = fa2.const_arrays();
102 auto const& ma3 = fa3.const_arrays();
103 return ParReduce(TypeList<OP>{}, TypeList<T>{}, fa1, nghost,
104 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
105 {
106 return { static_cast<T>(f(amrex::makeSingleCellBox(i,j,k,typ),
107 ma1[box_no], ma2[box_no], ma3[box_no])) };
108 });
109}
110
111template <class FAB, class F>
112std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
113ReduceSum_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
114{
115 return ReduceSum_host(fa,nghost,std::forward<F>(f));
116}
117
118template <class FAB, class F>
119std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
120ReduceSum_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
121{
122 amrex::ignore_unused(fa,nghost,f);
123 amrex::Abort("ReduceSum: Launch Region is off. Device lambda cannot be called by host.");
124 return 0;
125}
126}
128
129template <class FAB, class F,
130 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
131typename FAB::value_type
132ReduceSum (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
133{
134 if (Gpu::inLaunchRegion()) {
135 return fudetail::ReduceMF<ReduceOpSum>(fa, nghost, std::forward<F>(f));
136 } else {
137 return fudetail::ReduceSum_host_wrapper(fa, nghost, std::forward<F>(f));
138 }
139}
140#else
141template <class FAB, class F,
142 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
143typename FAB::value_type
144ReduceSum (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
145{
146 return fudetail::ReduceSum_host(fa, nghost, std::forward<F>(f));
147}
148#endif
149
150template <class FAB1, class FAB2, class F,
151 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
152typename FAB1::value_type
154 int nghost, F&& f) {
155 return ReduceSum(fa1, fa2, IntVect(nghost), std::forward<F>(f));
156}
157
159namespace fudetail {
160template <class FAB1, class FAB2, class F,
161 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
162typename FAB1::value_type
163ReduceSum_host (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
164 IntVect const& nghost, F const& f)
165{
166 using value_type = typename FAB1::value_type;
167 value_type sm = 0;
168
169#ifdef AMREX_USE_OMP
170#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
171#endif
172 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
173 {
174 const Box& bx = mfi.growntilebox(nghost);
175 const auto& arr1 = fa1.const_array(mfi);
176 const auto& arr2 = fa2.const_array(mfi);
177 sm += f(bx, arr1, arr2);
178 }
179
180 return sm;
181}
182}
184
185#ifdef AMREX_USE_GPU
187namespace fudetail {
188template <class FAB1, class FAB2, class F>
189std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
190ReduceSum_host_wrapper (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
191 IntVect const& nghost, F&& f)
192{
193 return ReduceSum_host(fa1,fa2,nghost,std::forward<F>(f));
194}
195
196template <class FAB1, class FAB2, class F>
197std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
198ReduceSum_host_wrapper (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
199 IntVect const& nghost, F&& f)
200{
201 amrex::ignore_unused(fa1,fa2,nghost,f);
202 amrex::Abort("ReduceSum: Launch Region is off. Device lambda cannot be called by host.");
203 return 0;
204}
205}
207
208template <class FAB1, class FAB2, class F,
209 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
210typename FAB1::value_type
212 IntVect const& nghost, F&& f)
213{
214 if (Gpu::inLaunchRegion()) {
215 return fudetail::ReduceMF<ReduceOpSum>(fa1,fa2,nghost,std::forward<F>(f));
216 } else {
217 return fudetail::ReduceSum_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
218 }
219}
220#else
221template <class FAB1, class FAB2, class F,
222 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
223typename FAB1::value_type
224ReduceSum (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
225 IntVect const& nghost, F&& f)
226{
227 return fudetail::ReduceSum_host(fa1,fa2,nghost,std::forward<F>(f));
228}
229#endif
230
231template <class FAB1, class FAB2, class FAB3, class F,
232 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
233typename FAB1::value_type
234ReduceSum (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, FabArray<FAB3> const& fa3,
235 int nghost, F&& f)
236{
237 return ReduceSum(fa1, fa2, fa3, IntVect(nghost), std::forward<F>(f));
238}
239
241namespace fudetail {
242template <class FAB1, class FAB2, class FAB3, class F,
243 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
244typename FAB1::value_type
245ReduceSum_host (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
246 FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
247{
248 using value_type = typename FAB1::value_type;
249 value_type sm = 0;
250
251#ifdef AMREX_USE_OMP
252#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
253#endif
254 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
255 {
256 const Box& bx = mfi.growntilebox(nghost);
257 const auto& arr1 = fa1.const_array(mfi);
258 const auto& arr2 = fa2.const_array(mfi);
259 const auto& arr3 = fa3.const_array(mfi);
260 sm += f(bx, arr1, arr2, arr3);
261 }
262
263 return sm;
264}
265}
267
268#ifdef AMREX_USE_GPU
270namespace fudetail {
271template <class FAB1, class FAB2, class FAB3, class F>
272std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
273ReduceSum_host_wrapper (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
274 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
275{
276 return fudetail::ReduceSum_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
277}
278
279template <class FAB1, class FAB2, class FAB3, class F>
280std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
281ReduceSum_host_wrapper (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
282 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
283{
284 amrex::ignore_unused(fa1,fa2,fa3,nghost,f);
285 amrex::Abort("ReduceSum: Launch Region is off. Device lambda cannot be called by host.");
286 return 0;
287}
288}
290
291template <class FAB1, class FAB2, class FAB3, class F,
292 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
293typename FAB1::value_type
295 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
296{
297 if (Gpu::inLaunchRegion()) {
298 return fudetail::ReduceMF<ReduceOpSum>(fa1,fa2,fa3,nghost,std::forward<F>(f));
299 } else {
300 return fudetail::ReduceSum_host_wrapper(fa1,fa2,fa3,nghost,std::forward<F>(f));
301 }
302}
303#else
304template <class FAB1, class FAB2, class FAB3, class F,
305 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
306typename FAB1::value_type
307ReduceSum (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
308 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
309{
310 return fudetail::ReduceSum_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
311}
312#endif
313
314template <class FAB, class F,
315 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
316typename FAB::value_type
317ReduceMin (FabArray<FAB> const& fa, int nghost, F&& f)
318{
319 return ReduceMin(fa, IntVect(nghost), std::forward<F>(f));
320}
321
323namespace fudetail {
324template <class FAB, class F,
325 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
326typename FAB::value_type
327ReduceMin_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
328{
329 using value_type = typename FAB::value_type;
330 value_type r = std::numeric_limits<value_type>::max();
331
332#ifdef AMREX_USE_OMP
333#pragma omp parallel reduction(min:r)
334#endif
335 for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
336 {
337 const Box& bx = mfi.growntilebox(nghost);
338 const auto& arr = fa.const_array(mfi);
339 r = std::min(r, f(bx, arr));
340 }
341 return r;
342}
343}
345
346#ifdef AMREX_USE_GPU
348namespace fudetail {
349template <class FAB, class F>
350std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
351ReduceMin_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
352{
353 return ReduceMin_host(fa,nghost,std::forward<F>(f));
354}
355
356template <class FAB, class F>
357std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
358ReduceMin_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
359{
360 amrex::ignore_unused(fa,nghost,f);
361 amrex::Abort("ReduceMin: Launch Region is off. Device lambda cannot be called by host.");
362 return 0;
363}
364}
366
367template <class FAB, class F,
368 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
369typename FAB::value_type
370ReduceMin (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
371{
372 if (Gpu::inLaunchRegion()) {
373 return fudetail::ReduceMF<ReduceOpMin>(fa, nghost, std::forward<F>(f));
374 } else {
375 return fudetail::ReduceMin_host_wrapper(fa, nghost, std::forward<F>(f));
376 }
377}
378#else
379template <class FAB, class F,
380 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
381typename FAB::value_type
382ReduceMin (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
383{
384 return fudetail::ReduceMin_host(fa, nghost, std::forward<F>(f));
385}
386#endif
387
388template <class FAB1, class FAB2, class F,
389 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
390typename FAB1::value_type
391ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, int nghost, F&& f)
392{
393 return ReduceMin(fa1, fa2, IntVect(nghost), std::forward<F>(f));
394}
395
397namespace fudetail {
398template <class FAB1, class FAB2, class F,
399 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
400typename FAB1::value_type
401ReduceMin_host (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
402 IntVect const& nghost, F const& f)
403{
404 using value_type = typename FAB1::value_type;
405 value_type r = std::numeric_limits<value_type>::max();
406
407#ifdef AMREX_USE_OMP
408#pragma omp parallel reduction(min:r)
409#endif
410 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
411 {
412 const Box& bx = mfi.growntilebox(nghost);
413 const auto& arr1 = fa1.const_array(mfi);
414 const auto& arr2 = fa2.const_array(mfi);
415 r = std::min(r, f(bx, arr1, arr2));
416 }
417
418 return r;
419}
420}
422
423#ifdef AMREX_USE_GPU
425namespace fudetail {
426template <class FAB1, class FAB2, class F>
427std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
428ReduceMin_host_wrapper (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
429 IntVect const& nghost, F&& f)
430{
431 return fudetail::ReduceMin_host(fa1,fa2,nghost,std::forward<F>(f));
432}
433
434template <class FAB1, class FAB2, class F>
435std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
436ReduceMin_host_wrapper (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
437 IntVect const& nghost, F&& f)
438{
439 amrex::ignore_unused(fa1,fa2,nghost,f);
440 amrex::Abort("ReduceMin: Launch Region is off. Device lambda cannot be called by host.");
441 return 0;
442}
443}
445
446template <class FAB1, class FAB2, class F,
447 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
448typename FAB1::value_type
450 IntVect const& nghost, F&& f)
451{
452 if (Gpu::inLaunchRegion()) {
453 return fudetail::ReduceMF<ReduceOpMin>(fa1,fa2,nghost,std::forward<F>(f));
454 } else {
455 return fudetail::ReduceMin_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
456 }
457}
458#else
459template <class FAB1, class FAB2, class F,
460 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
461typename FAB1::value_type
462ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
463 IntVect const& nghost, F&& f)
464{
465 return fudetail::ReduceMin_host(fa1,fa2,nghost,std::forward<F>(f));
466}
467#endif
468
469template <class FAB1, class FAB2, class FAB3, class F,
470 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
471typename FAB1::value_type
472ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, FabArray<FAB3> const& fa3,
473 int nghost, F&& f)
474{
475 return ReduceMin(fa1, fa2, fa3, IntVect(nghost), std::forward<F>(f));
476}
477
479namespace fudetail {
480template <class FAB1, class FAB2, class FAB3, class F,
481 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
482typename FAB1::value_type
483ReduceMin_host (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
484 FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
485{
486 using value_type = typename FAB1::value_type;
487 value_type r = std::numeric_limits<value_type>::max();
488
489#ifdef AMREX_USE_OMP
490#pragma omp parallel reduction(min:r)
491#endif
492 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
493 {
494 const Box& bx = mfi.growntilebox(nghost);
495 const auto& arr1 = fa1.const_array(mfi);
496 const auto& arr2 = fa2.const_array(mfi);
497 const auto& arr3 = fa3.const_array(mfi);
498 r = std::min(r, f(bx, arr1, arr2, arr3));
499 }
500
501 return r;
502}
503}
505
506#ifdef AMREX_USE_GPU
508namespace fudetail {
509template <class FAB1, class FAB2, class FAB3, class F>
510std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
511ReduceMin_host_wrapper (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
512 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
513{
514 return fudetail::ReduceMin_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
515}
516
517template <class FAB1, class FAB2, class FAB3, class F>
518std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
519ReduceMin_host_wrapper (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
520 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
521{
522 amrex::ignore_unused(fa1,fa2,fa3,nghost,f);
523 amrex::Abort("ReduceMin: Launch Region is off. Device lambda lambda cannot be called by host.");
524 return 0;
525}
526}
528
529template <class FAB1, class FAB2, class FAB3, class F,
530 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
531typename FAB1::value_type
533 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
534{
535 if (Gpu::inLaunchRegion()) {
536 return fudetail::ReduceMF<ReduceOpMin>(fa1,fa2,fa3,nghost,std::forward<F>(f));
537 } else {
538 return fudetail::ReduceMin_host_wrapper(fa1,fa2,fa3,nghost,std::forward<F>(f));
539 }
540}
541#else
542template <class FAB1, class FAB2, class FAB3, class F,
543 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
544typename FAB1::value_type
545ReduceMin (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
546 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
547{
548 return fudetail::ReduceMin_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
549}
550#endif
551
552template <class FAB, class F,
553 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
554typename FAB::value_type
555ReduceMax (FabArray<FAB> const& fa, int nghost, F&& f)
556{
557 return ReduceMax(fa, IntVect(nghost), std::forward<F>(f));
558}
559
561namespace fudetail {
562template <class FAB, class F,
563 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
564typename FAB::value_type
565ReduceMax_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
566{
567 using value_type = typename FAB::value_type;
568 value_type r = std::numeric_limits<value_type>::lowest();
569
570#ifdef AMREX_USE_OMP
571#pragma omp parallel reduction(max:r)
572#endif
573 for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
574 {
575 const Box& bx = mfi.growntilebox(nghost);
576 const auto& arr = fa.const_array(mfi);
577 r = std::max(r, f(bx, arr));
578 }
579
580 return r;
581}
582}
584
585#ifdef AMREX_USE_GPU
587namespace fudetail {
588template <class FAB, class F>
589std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
590ReduceMax_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
591{
592 return ReduceMax_host(fa,nghost,std::forward<F>(f));
593}
594
595template <class FAB, class F>
596std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB::value_type>
597ReduceMax_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
598{
599 amrex::ignore_unused(fa,nghost,f);
600 amrex::Abort("ReduceMax: Launch Region is off. Device lambda cannot be called by host.");
601 return 0;
602}
603}
605
606template <class FAB, class F,
607 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
608typename FAB::value_type
609ReduceMax (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
610{
611 if (Gpu::inLaunchRegion()) {
612 return fudetail::ReduceMF<ReduceOpMax>(fa,nghost,std::forward<F>(f));
613 } else {
614 return fudetail::ReduceMax_host_wrapper(fa,nghost,std::forward<F>(f));
615 }
616}
617#else
618template <class FAB, class F,
619 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
620typename FAB::value_type
621ReduceMax (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
622{
623 return fudetail::ReduceMax_host(fa,nghost,std::forward<F>(f));
624}
625#endif
626
627template <class FAB1, class FAB2, class F,
628 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
629typename FAB1::value_type
630ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, int nghost, F&& f)
631{
632 return ReduceMax(fa1, fa2, IntVect(nghost), std::forward<F>(f));
633}
634
636namespace fudetail {
637template <class FAB1, class FAB2, class F,
638 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
639typename FAB1::value_type
640ReduceMax_host (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
641 IntVect const& nghost, F const& f)
642{
643 using value_type = typename FAB1::value_type;
644 value_type r = std::numeric_limits<value_type>::lowest();
645
646#ifdef AMREX_USE_OMP
647#pragma omp parallel reduction(max:r)
648#endif
649 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
650 {
651 const Box& bx = mfi.growntilebox(nghost);
652 const auto& arr1 = fa1.const_array(mfi);
653 const auto& arr2 = fa2.const_array(mfi);
654 r = std::max(r, f(bx, arr1, arr2));
655 }
656
657 return r;
658}
659}
661
662#ifdef AMREX_USE_GPU
664namespace fudetail {
665template <class FAB1, class FAB2, class F>
666std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
667ReduceMax_host_wrapper (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
668 IntVect const& nghost, F&& f)
669{
670 return ReduceMax_host(fa1,fa2,nghost,std::forward<F>(f));
671}
672
673template <class FAB1, class FAB2, class F>
674std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
675ReduceMax_host_wrapper (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
676 IntVect const& nghost, F&& f)
677{
678 amrex::ignore_unused(fa1,fa2,nghost,f);
679 amrex::Abort("ReduceMax: Launch Region is off. Device lambda cannot be called by host.");
680 return 0;
681}
682}
684
685template <class FAB1, class FAB2, class F,
686 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
687typename FAB1::value_type
689 IntVect const& nghost, F&& f)
690{
691 if (Gpu::inLaunchRegion()) {
692 return fudetail::ReduceMF<ReduceOpMax>(fa1,fa2,nghost,std::forward<F>(f));
693 } else {
694 return fudetail::ReduceMax_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
695 }
696}
697#else
698template <class FAB1, class FAB2, class F,
699 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
700typename FAB1::value_type
701ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
702 IntVect const& nghost, F&& f)
703{
704 return fudetail::ReduceMax_host(fa1,fa2,nghost,std::forward<F>(f));
705}
706#endif
707
708template <class FAB1, class FAB2, class FAB3, class F,
709 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
710typename FAB1::value_type
711ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, FabArray<FAB3> const& fa3,
712 int nghost, F&& f)
713{
714 return ReduceMax(fa1, fa2, fa3, IntVect(nghost), std::forward<F>(f));
715}
716
718namespace fudetail {
719template <class FAB1, class FAB2, class FAB3, class F,
720 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
721typename FAB1::value_type
722ReduceMax_host (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
723 FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
724{
725 using value_type = typename FAB1::value_type;
726 value_type r = std::numeric_limits<value_type>::lowest();
727
728#ifdef AMREX_USE_OMP
729#pragma omp parallel reduction(max:r)
730#endif
731 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
732 {
733 const Box& bx = mfi.growntilebox(nghost);
734 const auto& arr1 = fa1.const_array(mfi);
735 const auto& arr2 = fa2.const_array(mfi);
736 const auto& arr3 = fa3.const_array(mfi);
737 r = std::max(r, f(bx, arr1, arr2, arr3));
738 }
739
740 return r;
741}
742}
744
745#ifdef AMREX_USE_GPU
747namespace fudetail {
748template <class FAB1, class FAB2, class FAB3, class F>
749std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
750ReduceMax_host_wrapper (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
751 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
752{
753 return fudetail::ReduceMax_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
754}
755
756template <class FAB1, class FAB2, class FAB3, class F>
757std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, typename FAB1::value_type>
758ReduceMax_host_wrapper (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
759 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
760{
761 amrex::ignore_unused(fa1,fa2,fa3,nghost,f);
762 amrex::Abort("ReduceMax: Launch Region is off. Device lambda lambda cannot be called by host.");
763 return 0;
764}
765}
767
768template <class FAB1, class FAB2, class FAB3, class F,
769 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
770typename FAB1::value_type
772 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
773{
774 if (Gpu::inLaunchRegion()) {
775 return fudetail::ReduceMF<ReduceOpMax>(fa1,fa2,fa3,nghost,std::forward<F>(f));
776 } else {
777 return fudetail::ReduceMax_host_wrapper(fa1,fa2,fa3,nghost,std::forward<F>(f));
778 }
779}
780#else
781template <class FAB1, class FAB2, class FAB3, class F,
782 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
783typename FAB1::value_type
784ReduceMax (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
785 FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
786{
787 return fudetail::ReduceMax_host(fa1,fa2,fa3,nghost,std::forward<F>(f));
788}
789#endif
790
791template <class FAB, class F,
792 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
793bool
794ReduceLogicalAnd (FabArray<FAB> const& fa, int nghost, F&& f)
795{
796 return ReduceLogicalAnd(fa, IntVect(nghost), std::forward<F>(f));
797}
798
800namespace fudetail {
801template <class FAB, class F,
802 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
803bool
804ReduceLogicalAnd_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
805{
806 int r = true;
807
808#ifdef AMREX_USE_OMP
809#pragma omp parallel reduction(&&:r)
810#endif
811 for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
812 {
813 const Box& bx = mfi.growntilebox(nghost);
814 const auto& arr = fa.const_array(mfi);
815 r = r && f(bx, arr);
816 }
817
818 return r;
819}
820}
822
823#ifdef AMREX_USE_GPU
825namespace fudetail {
826template <class FAB, class F>
827std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, bool>
828ReduceLogicalAnd_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
829{
830 return ReduceLogicalAnd_host(fa,nghost,std::forward<F>(f));
831}
832
833template <class FAB, class F>
834std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, bool>
835ReduceLogicalAnd_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
836{
837 amrex::ignore_unused(fa,nghost,f);
838 amrex::Abort("ReduceLogicalAnd: Launch Region is off. Device lambda cannot be called by host.");
839 return false;
840}
841}
843
844template <class FAB, class F,
845 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
846bool
847ReduceLogicalAnd (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
848{
849 if (Gpu::inLaunchRegion()) {
850 return fudetail::ReduceMF<ReduceOpLogicalAnd>(fa,nghost,std::forward<F>(f));
851 } else {
852 return fudetail::ReduceLogicalAnd_host_wrapper(fa,nghost,std::forward<F>(f));
853 }
854}
855#else
856template <class FAB, class F,
857 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
858bool
859ReduceLogicalAnd (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
860{
861 return fudetail::ReduceLogicalAnd_host(fa,nghost,std::forward<F>(f));
862}
863#endif
864
865template <class FAB1, class FAB2, class F,
866 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
867bool
869 int nghost, F&& f)
870{
871 return ReduceLogicalAnd(fa1, fa2, IntVect(nghost), std::forward<F>(f));
872}
873
875namespace fudetail {
876template <class FAB1, class FAB2, class F,
877 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
878bool
879ReduceLogicalAnd_host (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
880 IntVect const& nghost, F const& f)
881{
882 int r = true;
883
884#ifdef AMREX_USE_OMP
885#pragma omp parallel reduction(&&:r)
886#endif
887 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
888 {
889 const Box& bx = mfi.growntilebox(nghost);
890 const auto& arr1 = fa1.const_array(mfi);
891 const auto& arr2 = fa2.const_array(mfi);
892 r = r && f(bx, arr1, arr2);
893 }
894
895 return r;
896}
897}
899
900#ifdef AMREX_USE_GPU
902namespace fudetail {
903template <class FAB1, class FAB2, class F>
904std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, bool>
905ReduceLogicalAnd_host_wrapper (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
906 IntVect const& nghost, F&& f)
907{
908 return ReduceLogicalAnd_host(fa1,fa2,nghost,std::forward<F>(f));
909}
910
911template <class FAB1, class FAB2, class F>
912std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, bool>
913ReduceLogicalAnd_host_wrapper (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
914 IntVect const& nghost, F&& f)
915{
916 amrex::ignore_unused(fa1,fa2,nghost,f);
917 amrex::Abort("ReduceLogicalAnd: Luanch Region is off. Device lambda cannot be called by host.");
918 return false;
919}
920}
922
923template <class FAB1, class FAB2, class F,
924 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
925bool
927 IntVect const& nghost, F&& f)
928{
929 if (Gpu::inLaunchRegion()) {
930 return fudetail::ReduceMF<ReduceOpLogicalAnd>(fa1,fa2,nghost,std::forward<F>(f));
931 } else {
932 return fudetail::ReduceLogicalAnd_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
933 }
934}
935#else
936template <class FAB1, class FAB2, class F,
937 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
938bool
939ReduceLogicalAnd (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
940 IntVect const& nghost, F&& f)
941{
942 return fudetail::ReduceLogicalAnd_host(fa1,fa2,nghost,std::forward<F>(f));
943}
944#endif
945
946template <class FAB, class F,
947 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
948bool
949ReduceLogicalOr (FabArray<FAB> const& fa, int nghost, F&& f)
950{
951 return ReduceLogicalOr(fa, IntVect(nghost), std::forward<F>(f));
952}
953
955namespace fudetail {
956template <class FAB, class F,
957 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
958bool
959ReduceLogicalOr_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
960{
961 int r = false;
962
963#ifdef AMREX_USE_OMP
964#pragma omp parallel reduction(||:r)
965#endif
966 for (MFIter mfi(fa,true); mfi.isValid(); ++mfi)
967 {
968 const Box& bx = mfi.growntilebox(nghost);
969 const auto& arr = fa.const_array(mfi);
970 r = r || f(bx, arr);
971 }
972
973 return r;
974}
975}
977
978#ifdef AMREX_USE_GPU
980namespace fudetail {
981template <class FAB, class F>
982std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, bool>
983ReduceLogicalOr_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
984{
985 return ReduceLogicalOr_host(fa,nghost,std::forward<F>(f));
986}
987
988template <class FAB, class F>
989std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, bool>
990ReduceLogicalOr_host (FabArray<FAB> const& fa, IntVect const& nghost, F&& /*f*/)
991{
992 amrex::ignore_unused(fa,nghost);
993 amrex::Abort("ReduceLogicalOr: Launch Region is off. Device lambda cannot be called by host.");
994 return 0;
995}
996}
998
999template <class FAB, class F,
1000 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1001bool
1002ReduceLogicalOr (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
1003{
1004 if (Gpu::inLaunchRegion()) {
1005 return fudetail::ReduceMF<ReduceOpLogicalOr>(fa,nghost,std::forward<F>(f));
1006 } else {
1007 return fudetail::ReduceLogicalOr_host_wrapper(fa,nghost,std::forward<F>(f));
1008 }
1009}
1010#else
1011template <class FAB, class F,
1012 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1013bool
1014ReduceLogicalOr (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
1015{
1016 return fudetail::ReduceLogicalOr_host(fa,nghost,std::forward<F>(f));
1017}
1018#endif
1019
1020template <class FAB1, class FAB2, class F,
1021 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
1022bool
1024 int nghost, F&& f)
1025{
1026 return ReduceLogicalOr(fa1, fa2, IntVect(nghost), std::forward<F>(f));
1027}
1028
1030namespace fudetail {
1031template <class FAB1, class FAB2, class F,
1032 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
1033bool
1034ReduceLogicalOr_host (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
1035 IntVect const& nghost, F const& f)
1036{
1037 int r = false;
1038
1039#ifdef AMREX_USE_OMP
1040#pragma omp parallel reduction(||:r)
1041#endif
1042 for (MFIter mfi(fa1,true); mfi.isValid(); ++mfi)
1043 {
1044 const Box& bx = mfi.growntilebox(nghost);
1045 const auto& arr1 = fa1.const_array(mfi);
1046 const auto& arr2 = fa2.const_array(mfi);
1047 r = r || f(bx, arr1, arr2);
1048 }
1049
1050 return r;
1051}
1052}
1054
1055#ifdef AMREX_USE_GPU
1057namespace fudetail {
1058template <class FAB1, class FAB2, class F>
1059std::enable_if_t<!amrex::DefinitelyNotHostRunnable<F>::value, bool>
1060ReduceLogicalOr_host_wrapper (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
1061 IntVect const& nghost, F&& f)
1062{
1063 return fudetail::ReduceLogicalOr_host(fa1,fa2,nghost,std::forward<F>(f));
1064}
1065
1066template <class FAB1, class FAB2, class F>
1067std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, bool>
1068ReduceLogicalOr_host_wrapper (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
1069 IntVect const& nghost, F&& f)
1070{
1071 amrex::ignore_unused(fa1,fa2,nghost,f);
1072 amrex::Abort("ReeuceLogicalOr: Launch Region is off. Device lambda cannot be called by host.");
1073 return false;
1074}
1075}
1077
1078template <class FAB1, class FAB2, class F,
1079 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
1080bool
1082 IntVect const& nghost, F&& f)
1083{
1084 if (Gpu::inLaunchRegion()) {
1085 return fudetail::ReduceMF<ReduceOpLogicalOr>(fa1,fa2,nghost,std::forward<F>(f));
1086 } else {
1087 return fudetail::ReduceLogicalOr_host_wrapper(fa1,fa2,nghost,std::forward<F>(f));
1088 }
1089}
1090#else
1091template <class FAB1, class FAB2, class F,
1092 class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
1093bool
1094ReduceLogicalOr (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
1095 IntVect const& nghost, F&& f)
1096{
1097 return fudetail::ReduceLogicalOr_host(fa1,fa2,nghost,std::forward<F>(f));
1098}
1099#endif
1100
1101template <class FAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1102void
1103printCell (FabArray<FAB> const& mf, const IntVect& cell, int comp = -1,
1104 const IntVect& ng = IntVect::TheZeroVector())
1105{
1106 for (MFIter mfi(mf); mfi.isValid(); ++mfi)
1107 {
1108 const Box& bx = amrex::grow(mfi.validbox(), ng);
1109 if (bx.contains(cell)) {
1110 int n = (comp >= 0) ? 1 : mf.nComp();
1111 auto const& fab = mf.const_array(mfi);
1113 auto* dp = pv.data();
1114 auto f = [=] AMREX_GPU_HOST_DEVICE ()
1115 {
1116 if (comp >= 0) {
1117 *dp = fab(cell, comp);
1118 } else {
1119 for (int i = 0; i < n; ++i) {
1120 dp[i] = fab(cell,i);
1121 }
1122 }
1123 };
1124
1125#ifdef AMREX_USE_GPU
1126 if (mf.arena()->isManaged() || mf.arena()->isDevice()) {
1129 } else
1130#endif
1131 {
1132 f();
1133 }
1134
1135 if (comp >= 0) {
1136 amrex::AllPrint().SetPrecision(17) << " At cell " << cell << " in Box " << bx
1137 << ": " << *dp << '\n';
1138 } else {
1139 std::ostringstream ss;
1140 ss.precision(17);
1141 for (int i = 0; i < n-1; ++i)
1142 {
1143 ss << dp[i] << ", ";
1144 }
1145 ss << dp[n-1];
1146 amrex::AllPrint() << " At cell " << cell << " in Box " << bx
1147 << ": " << ss.str() << '\n';
1148 }
1149 }
1150 }
1151}
1152
1153template <class FAB,
1154 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1155void
1156Swap (FabArray<FAB>& dst, FabArray<FAB>& src, int srccomp, int dstcomp, int numcomp, int nghost)
1157{
1158 Swap(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
1159}
1160
1161template <class FAB,
1162 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1163void
1164Swap (FabArray<FAB>& dst, FabArray<FAB>& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
1165{
1166 // We can take a shortcut and do a std::swap if we're swapping all of the data
1167 // and they are allocated in the same Arena.
1168
1169 bool explicit_swap = true;
1170
1171 if (srccomp == dstcomp && dstcomp == 0 && src.nComp() == dst.nComp() &&
1172 src.nGrowVect() == nghost && src.nGrowVect() == dst.nGrowVect() &&
1173 src.arena() == dst.arena() && src.hasEBFabFactory() == dst.hasEBFabFactory()) {
1174 explicit_swap = false;
1175 }
1176
1177 if (!explicit_swap) {
1178
1179 std::swap(dst, src);
1180
1181 } else {
1182#ifdef AMREX_USE_GPU
1183 if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
1184 auto const& dstma = dst.arrays();
1185 auto const& srcma = src.arrays();
1186 ParallelFor(dst, nghost, numcomp,
1187 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1188 {
1189 const auto tmp = dstma[box_no](i,j,k,n+dstcomp);
1190 dstma[box_no](i,j,k,n+dstcomp) = srcma[box_no](i,j,k,n+srccomp);
1191 srcma[box_no](i,j,k,n+srccomp) = tmp;
1192 });
1193 if (!Gpu::inNoSyncRegion()) {
1195 }
1196 } else
1197#endif
1198 {
1199#ifdef AMREX_USE_OMP
1200#pragma omp parallel if (Gpu::notInLaunchRegion())
1201#endif
1202 for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1203 {
1204 const Box& bx = mfi.growntilebox(nghost);
1205 if (bx.ok()) {
1206 auto sfab = src.array(mfi);
1207 auto dfab = dst.array(mfi);
1208 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1209 {
1210 const auto tmp = dfab(i,j,k,n+dstcomp);
1211 dfab(i,j,k,n+dstcomp) = sfab(i,j,k,n+srccomp);
1212 sfab(i,j,k,n+srccomp) = tmp;
1213 });
1214 }
1215 }
1216 }
1217 }
1218}
1219
1220template <class FAB,
1221 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1222void
1223Subtract (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, int nghost)
1224{
1225 Subtract(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
1226}
1227
1228template <class FAB,
1229 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1230void
1231Subtract (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
1232{
1233#ifdef AMREX_USE_GPU
1234 if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
1235 auto const& dstfa = dst.arrays();
1236 auto const& srcfa = src.const_arrays();
1237 ParallelFor(dst, nghost, numcomp,
1238 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1239 {
1240 dstfa[box_no](i,j,k,n+dstcomp) -= srcfa[box_no](i,j,k,n+srccomp);
1241 });
1242 if (!Gpu::inNoSyncRegion()) {
1244 }
1245 } else
1246#endif
1247 {
1248#ifdef AMREX_USE_OMP
1249#pragma omp parallel if (Gpu::notInLaunchRegion())
1250#endif
1251 for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1252 {
1253 const Box& bx = mfi.growntilebox(nghost);
1254 if (bx.ok())
1255 {
1256 auto const srcFab = src.array(mfi);
1257 auto dstFab = dst.array(mfi);
1258 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1259 {
1260 dstFab(i,j,k,n+dstcomp) -= srcFab(i,j,k,n+srccomp);
1261 });
1262 }
1263 }
1264 }
1265}
1266
1267
1268template <class FAB,
1269 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1270void
1271Multiply (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, int nghost)
1272{
1273 Multiply(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
1274}
1275
1276template <class FAB,
1277 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1278void
1279Multiply (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
1280{
1281#ifdef AMREX_USE_GPU
1282 if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
1283 auto const& dstfa = dst.arrays();
1284 auto const& srcfa = src.const_arrays();
1285 ParallelFor(dst, nghost, numcomp,
1286 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1287 {
1288 dstfa[box_no](i,j,k,n+dstcomp) *= srcfa[box_no](i,j,k,n+srccomp);
1289 });
1290 if (!Gpu::inNoSyncRegion()) {
1292 }
1293 } else
1294#endif
1295 {
1296#ifdef AMREX_USE_OMP
1297#pragma omp parallel if (Gpu::notInLaunchRegion())
1298#endif
1299 for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1300 {
1301 const Box& bx = mfi.growntilebox(nghost);
1302 if (bx.ok())
1303 {
1304 auto const srcFab = src.array(mfi);
1305 auto dstFab = dst.array(mfi);
1306 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1307 {
1308 dstFab(i,j,k,n+dstcomp) *= srcFab(i,j,k,n+srccomp);
1309 });
1310 }
1311 }
1312 }
1313}
1314
1315
1316template <class FAB,
1317 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1318void
1319Divide (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, int nghost)
1320{
1321 Divide(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
1322}
1323
1324template <class FAB,
1325 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1326void
1327Divide (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
1328{
1329#ifdef AMREX_USE_GPU
1330 if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
1331 auto const& dstfa = dst.arrays();
1332 auto const& srcfa = src.const_arrays();
1333 ParallelFor(dst, nghost, numcomp,
1334 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1335 {
1336 dstfa[box_no](i,j,k,n+dstcomp) /= srcfa[box_no](i,j,k,n+srccomp);
1337 });
1338 if (!Gpu::inNoSyncRegion()) {
1340 }
1341 } else
1342#endif
1343 {
1344#ifdef AMREX_USE_OMP
1345#pragma omp parallel if (Gpu::notInLaunchRegion())
1346#endif
1347 for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1348 {
1349 const Box& bx = mfi.growntilebox(nghost);
1350 if (bx.ok())
1351 {
1352 auto const srcFab = src.array(mfi);
1353 auto dstFab = dst.array(mfi);
1354 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1355 {
1356 dstFab(i,j,k,n+dstcomp) /= srcFab(i,j,k,n+srccomp);
1357 });
1358 }
1359 }
1360 }
1361}
1362
1363template <class FAB,
1364 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1365void
1366Abs (FabArray<FAB>& fa, int icomp, int numcomp, int nghost)
1367{
1368 Abs(fa,icomp,numcomp,IntVect(nghost));
1369}
1370
1371template <class FAB,
1372 class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1373void
1374Abs (FabArray<FAB>& fa, int icomp, int numcomp, const IntVect& nghost)
1375{
1376#ifdef AMREX_USE_GPU
1377 if (Gpu::inLaunchRegion() && fa.isFusingCandidate()) {
1378 auto const& fabarr = fa.arrays();
1379 ParallelFor(fa, nghost, numcomp,
1380 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1381 {
1382 fabarr[box_no](i,j,k,n+icomp) = std::abs(fabarr[box_no](i,j,k,n+icomp));
1383 });
1384 if (!Gpu::inNoSyncRegion()) {
1386 }
1387 } else
1388#endif
1389 {
1390#ifdef AMREX_USE_OMP
1391#pragma omp parallel if (Gpu::notInLaunchRegion())
1392#endif
1393 for (MFIter mfi(fa,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1394 {
1395 const Box& bx = mfi.growntilebox(nghost);
1396 if (bx.ok())
1397 {
1398 auto const& fab = fa.array(mfi);
1399 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
1400 {
1401 fab(i,j,k,n+icomp) = std::abs(fab(i,j,k,n+icomp));
1402 });
1403 }
1404 }
1405 }
1406}
1407
1408template <class FAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1409void
1410prefetchToHost (FabArray<FAB> const& fa, const bool synchronous = true)
1411{
1412#ifdef AMREX_USE_GPU
1413 if (fa.arena()->isManaged()) {
1414 for (MFIter mfi(fa, MFItInfo().SetDeviceSync(synchronous)); mfi.isValid(); ++mfi) {
1415 fa.prefetchToHost(mfi);
1416 }
1417 }
1418#else
1419 amrex::ignore_unused(fa,synchronous);
1420#endif
1421}
1422
1423template <class FAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1424void
1425prefetchToDevice (FabArray<FAB> const& fa, const bool synchronous = true)
1426{
1427#ifdef AMREX_USE_GPU
1428 if (fa.arena()->isManaged()) {
1429 for (MFIter mfi(fa, MFItInfo().SetDeviceSync(synchronous)); mfi.isValid(); ++mfi) {
1430 fa.prefetchToDevice(mfi);
1431 }
1432 }
1433#else
1434 amrex::ignore_unused(fa,synchronous);
1435#endif
1436}
1437
1438
1439template <class FAB, class IFAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value
1440 && IsBaseFab<IFAB>::value> >
1441void
1442OverrideSync (FabArray<FAB> & fa, FabArray<IFAB> const& msk, const Periodicity& period)
1443{
1444 BL_PROFILE("OverrideSync()");
1445
1446 OverrideSync_nowait(fa, msk, period);
1448}
1449
1450
1451template <class FAB, class IFAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value
1452 && IsBaseFab<IFAB>::value> >
1453void
1455{
1456 BL_PROFILE("OverrideSync_nowait()");
1457 AMREX_ASSERT_WITH_MESSAGE(!fa.os_temp, "OverrideSync_nowait() called when already in progress.");
1458
1459 if (fa.ixType().cellCentered()) { return; }
1460
1461 const int ncomp = fa.nComp();
1462
1463#ifdef AMREX_USE_GPU
1464 if (Gpu::inLaunchRegion() && fa.isFusingCandidate()) {
1465 auto const& fabarr = fa.arrays();
1466 auto const& ifabarr = msk.const_arrays();
1467 ParallelFor(fa, IntVect(0), ncomp,
1468 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
1469 {
1470 if (!ifabarr[box_no](i,j,k)) { fabarr[box_no](i,j,k,n) = 0; }
1471 });
1472 if (!Gpu::inNoSyncRegion()) {
1474 }
1475 } else
1476#endif
1477 {
1478#ifdef AMREX_USE_OMP
1479#pragma omp parallel if (Gpu::notInLaunchRegion())
1480#endif
1481 for (MFIter mfi(fa,TilingIfNotGPU()); mfi.isValid(); ++mfi)
1482 {
1483 const Box& bx = mfi.tilebox();
1484 auto fab = fa.array(mfi);
1485 auto const ifab = msk.array(mfi);
1486 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n,
1487 {
1488 if (!ifab(i,j,k)) { fab(i,j,k,n) = 0; }
1489 });
1490 }
1491 }
1492
1493 fa.os_temp = std::make_unique< FabArray<FAB> > ( fa.boxArray(), fa.DistributionMap(),
1494 ncomp, 0, MFInfo(), fa.Factory() );
1495 fa.os_temp->setVal(0);
1496 fa.os_temp->ParallelCopy_nowait(fa, period, FabArrayBase::ADD);
1497}
1498
1499template <class FAB, class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
1500void
1502{
1503 BL_PROFILE("OverrideSync_finish()");
1504
1505 if (fa.ixType().cellCentered()) { return; }
1506
1507 fa.os_temp->ParallelCopy_finish();
1508 amrex::Copy(fa, *(fa.os_temp), 0, 0, fa.nComp(), 0);
1509
1510 fa.os_temp.reset();
1511}
1512
1513template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1514void
1516 int scomp, int dcomp, int ncomp)
1517{
1518 AMREX_ASSERT(isMFIterSafe(dst, src));
1519 AMREX_ASSERT(dst.nGrowVect() == src.nGrowVect());
1520#ifdef AMREX_USE_GPU
1521 for (MFIter mfi(dst); mfi.isValid(); ++mfi) {
1522 void* pdst = dst[mfi].dataPtr(dcomp);
1523 void const* psrc = src[mfi].dataPtr(scomp);
1524 Gpu::dtoh_memcpy_async(pdst, psrc, dst[mfi].nBytes(mfi.fabbox(), ncomp));
1525 }
1526#else
1527 Copy(dst, src, scomp, dcomp, ncomp, dst.nGrowVect());
1528#endif
1529}
1530
1531template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1532void
1534{
1535 dtoh_memcpy(dst, src, 0, 0, dst.nComp());
1536}
1537
1538template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1539void
1541 int scomp, int dcomp, int ncomp)
1542{
1543 AMREX_ASSERT(isMFIterSafe(dst, src));
1544 AMREX_ASSERT(dst.nGrowVect() == src.nGrowVect());
1545#ifdef AMREX_USE_GPU
1546 for (MFIter mfi(dst); mfi.isValid(); ++mfi) {
1547 void* pdst = dst[mfi].dataPtr(dcomp);
1548 void const* psrc = src[mfi].dataPtr(scomp);
1549 Gpu::htod_memcpy_async(pdst, psrc, dst[mfi].nBytes(mfi.fabbox(), ncomp));
1550 }
1551#else
1552 Copy(dst, src, scomp, dcomp, ncomp, dst.nGrowVect());
1553#endif
1554}
1555
1556template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1557void
1559{
1560 htod_memcpy(dst, src, 0, 0, dst.nComp());
1561}
1562
1563template <class FAB, class foo = std::enable_if_t<IsBaseFab<FAB>::value> >
1564IntVect
1565indexFromValue (FabArray<FAB> const& mf, int comp, IntVect const& nghost,
1566 typename FAB::value_type value)
1567{
1568 IntVect loc;
1569
1570#ifdef AMREX_USE_GPU
1571 if (Gpu::inLaunchRegion())
1572 {
1574 int* p = aa.data();
1575 // This is a device ptr to 1+AMREX_SPACEDIM int zeros.
1576 // The first is used as an atomic bool and the others for intvect.
1577 if (mf.isFusingCandidate()) {
1578 auto const& ma = mf.const_arrays();
1579 ParallelFor(mf, nghost, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
1580 {
1581 int* flag = p;
1582 if (*flag == 0) {
1583 if (ma[box_no](i,j,k,comp) == value) {
1584 if (Gpu::Atomic::Exch(flag,1) == 0) {
1585 AMREX_D_TERM(p[1] = i;,
1586 p[2] = j;,
1587 p[3] = k;);
1588 }
1589 }
1590 }
1591 });
1592 } else {
1593 for (MFIter mfi(mf,MFItInfo().SetDeviceSync(false)); mfi.isValid(); ++mfi) {
1594 const Box& bx = amrex::grow(mfi.validbox(), nghost);
1595 auto const& arr = mf.const_array(mfi);
1596 amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
1597 {
1598 int* flag = p;
1599 if (*flag == 0) {
1600 if (arr(i,j,k,comp) == value) {
1601 if (Gpu::Atomic::Exch(flag,1) == 0) {
1602 AMREX_D_TERM(p[1] = i;,
1603 p[2] = j;,
1604 p[3] = k;);
1605 }
1606 }
1607 }
1608 });
1609 }
1610 }
1611 int const* tmp = aa.copyToHost();
1612 AMREX_D_TERM(loc[0] = tmp[1];,
1613 loc[1] = tmp[2];,
1614 loc[2] = tmp[3];);
1615 }
1616 else
1617#endif
1618 {
1619 bool f = false;
1620#ifdef AMREX_USE_OMP
1621#pragma omp parallel
1622#endif
1623 {
1624 IntVect priv_loc = IntVect::TheMinVector();
1625 for (MFIter mfi(mf,true); mfi.isValid(); ++mfi)
1626 {
1627 const Box& bx = mfi.growntilebox(nghost);
1628 auto const& fab = mf.const_array(mfi);
1629 AMREX_LOOP_3D(bx, i, j, k,
1630 {
1631 if (fab(i,j,k,comp) == value) {
1632 priv_loc = IntVect(AMREX_D_DECL(i,j,k));
1633 }
1634 });
1635 }
1636
1637 if (priv_loc.allGT(IntVect::TheMinVector())) {
1638 bool old;
1639// we should be able to test on _OPENMP < 201107 for capture (version 3.1)
1640// but we must work around a bug in gcc < 4.9
1641// And, with NVHPC 21.9 to <23.1, we saw an ICE with the atomic capture (NV bug: #3390723)
1642#if defined(AMREX_USE_OMP) && defined(_OPENMP) && (_OPENMP < 201307 || (defined(__NVCOMPILER) && __NVCOMPILER_MAJOR__ < 23)) // OpenMP 4.0
1643#pragma omp critical (amrex_indexfromvalue)
1644#elif defined(AMREX_USE_OMP)
1645#pragma omp atomic capture
1646#endif
1647 {
1648 old = f;
1649 f = true;
1650 }
1651
1652 if (old == false) { loc = priv_loc; }
1653 }
1654 }
1655 }
1656
1657 return loc;
1658}
1659
1671template <typename FAB, std::enable_if_t<IsBaseFab<FAB>::value,int> FOO = 0>
1672typename FAB::value_type
1673Dot (FabArray<FAB> const& x, int xcomp, FabArray<FAB> const& y, int ycomp, int ncomp,
1674 IntVect const& nghost, bool local = false)
1675{
1676 BL_ASSERT(x.boxArray() == y.boxArray());
1677 BL_ASSERT(x.DistributionMap() == y.DistributionMap());
1678 BL_ASSERT(x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost));
1679
1680 BL_PROFILE("amrex::Dot()");
1681
1682 using T = typename FAB::value_type;
1683 auto sm = T(0.0);
1684#ifdef AMREX_USE_GPU
1685 if (Gpu::inLaunchRegion()) {
1686 auto const& xma = x.const_arrays();
1687 auto const& yma = y.const_arrays();
1688 sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, x, nghost,
1689 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
1690 {
1691 auto t = T(0.0);
1692 auto const& xfab = xma[box_no];
1693 auto const& yfab = yma[box_no];
1694 for (int n = 0; n < ncomp; ++n) {
1695 t += xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n);
1696 }
1697 return t;
1698 });
1699 } else
1700#endif
1701 {
1702#ifdef AMREX_USE_OMP
1703#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
1704#endif
1705 for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
1706 {
1707 Box const& bx = mfi.growntilebox(nghost);
1708 auto const& xfab = x.const_array(mfi);
1709 auto const& yfab = y.const_array(mfi);
1710 AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
1711 {
1712 sm += xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n);
1713 });
1714 }
1715 }
1716
1717 if (!local) {
1719 }
1720
1721 return sm;
1722}
1723
1733template <typename FAB, std::enable_if_t<IsBaseFab<FAB>::value,int> FOO = 0>
1734typename FAB::value_type
1735Dot (FabArray<FAB> const& x, int xcomp, int ncomp, IntVect const& nghost, bool local = false)
1736{
1737 BL_ASSERT(x.nGrowVect().allGE(nghost));
1738
1739 BL_PROFILE("amrex::Dot()");
1740
1741 using T = typename FAB::value_type;
1742 auto sm = T(0.0);
1743#ifdef AMREX_USE_GPU
1744 if (Gpu::inLaunchRegion()) {
1745 auto const& xma = x.const_arrays();
1746 sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, x, nghost,
1747 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
1748 {
1749 auto t = T(0.0);
1750 auto const& xfab = xma[box_no];
1751 for (int n = 0; n < ncomp; ++n) {
1752 auto v = xfab(i,j,k,xcomp+n);
1753 t += v*v;
1754 }
1755 return t;
1756 });
1757 } else
1758#endif
1759 {
1760#ifdef AMREX_USE_OMP
1761#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
1762#endif
1763 for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
1764 {
1765 Box const& bx = mfi.growntilebox(nghost);
1766 auto const& xfab = x.const_array(mfi);
1767 AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
1768 {
1769 auto v = xfab(i,j,k,xcomp+n);
1770 sm += v*v;
1771 });
1772 }
1773 }
1774
1775 if (!local) {
1777 }
1778
1779 return sm;
1780}
1781
1794template <typename IFAB, typename FAB,
1795 std::enable_if_t<IsBaseFab<FAB>::value && IsBaseFab<IFAB>::value,int> FOO = 0>
1796typename FAB::value_type
1797Dot (FabArray<IFAB> const& mask, FabArray<FAB> const& x, int xcomp,
1798 FabArray<FAB> const& y, int ycomp, int ncomp, IntVect const& nghost,
1799 bool local = false)
1800{
1801 BL_ASSERT(x.boxArray() == y.boxArray() && x.boxArray() == mask.boxArray());
1802 BL_ASSERT(x.DistributionMap() == y.DistributionMap() && x.DistributionMap() == mask.DistributionMap());
1803 BL_ASSERT(x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost) &&
1804 mask.nGrowVect().allGE(nghost));
1805
1806 BL_PROFILE("amrex::Dot()");
1807
1808 using T = typename FAB::value_type;
1809 auto sm = T(0.0);
1810#ifdef AMREX_USE_GPU
1811 if (Gpu::inLaunchRegion()) {
1812 auto const& mma = mask.const_arrays();
1813 auto const& xma = x.const_arrays();
1814 auto const& yma = y.const_arrays();
1815 sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, x, nghost,
1816 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
1817 {
1818 auto t = T(0.0);
1819 auto m = T(mma[box_no](i,j,k));
1820 if (m != 0) {
1821 auto const& xfab = xma[box_no];
1822 auto const& yfab = yma[box_no];
1823 for (int n = 0; n < ncomp; ++n) {
1824 t += xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n);
1825 }
1826 }
1827 return t*m;
1828 });
1829 } else
1830#endif
1831 {
1832#ifdef AMREX_USE_OMP
1833#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
1834#endif
1835 for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
1836 {
1837 Box const& bx = mfi.growntilebox(nghost);
1838 auto const& mfab = mask.const_array(mfi);
1839 auto const& xfab = x.const_array(mfi);
1840 auto const& yfab = y.const_array(mfi);
1841 AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
1842 {
1843 auto m = T(mfab(i,j,k));
1844 sm += m * xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n);
1845 });
1846 }
1847 }
1848
1849 if (!local) {
1851 }
1852
1853 return sm;
1854}
1855
1866template <typename IFAB, typename FAB,
1867 std::enable_if_t<IsBaseFab<FAB>::value && IsBaseFab<IFAB>::value,int> FOO = 0>
1868typename FAB::value_type
1869Dot (FabArray<IFAB> const& mask, FabArray<FAB> const& x, int xcomp, int ncomp,
1870 IntVect const& nghost, bool local = false)
1871{
1872 BL_ASSERT(x.boxArray() == mask.boxArray());
1873 BL_ASSERT(x.DistributionMap() == mask.DistributionMap());
1874 BL_ASSERT(x.nGrowVect().allGE(nghost) && mask.nGrowVect().allGE(nghost));
1875
1876 BL_PROFILE("amrex::Dot()");
1877
1878 using T = typename FAB::value_type;
1879 auto sm = T(0.0);
1880#ifdef AMREX_USE_GPU
1881 if (Gpu::inLaunchRegion()) {
1882 auto const& mma = mask.const_arrays();
1883 auto const& xma = x.const_arrays();
1884 sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, x, nghost,
1885 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
1886 {
1887 auto t = T(0.0);
1888 auto m = T(mma[box_no](i,j,k));
1889 if (m != 0) {
1890 auto const& xfab = xma[box_no];
1891 for (int n = 0; n < ncomp; ++n) {
1892 auto v = xfab(i,j,k,xcomp+n);
1893 t += v*v;
1894 }
1895 }
1896 return t*m;
1897 });
1898 } else
1899#endif
1900 {
1901#ifdef AMREX_USE_OMP
1902#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
1903#endif
1904 for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
1905 {
1906 Box const& bx = mfi.growntilebox(nghost);
1907 auto const& mfab = mask.const_array(mfi);
1908 auto const& xfab = x.const_array(mfi);
1909 AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
1910 {
1911 auto m = T(mfab(i,j,k));
1912 auto v = xfab(i,j,k,xcomp+n);
1913 sm += m*v*v;
1914 });
1915 }
1916 }
1917
1918 if (!local) {
1920 }
1921
1922 return sm;
1923}
1924
1926template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1927void setVal (MF& dst, typename MF::value_type val)
1928{
1929 dst.setVal(val);
1930}
1931
1933template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1934void setBndry (MF& dst, typename MF::value_type val, int scomp, int ncomp)
1935{
1936 dst.setBndry(val, scomp, ncomp);
1937}
1938
1940template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1941void Scale (MF& dst, typename MF::value_type val, int scomp, int ncomp, int nghost)
1942{
1943 dst.mult(val, scomp, ncomp, nghost);
1944}
1945
1947template <class DMF, class SMF,
1948 std::enable_if_t<IsMultiFabLike_v<DMF> &&
1949 IsMultiFabLike_v<SMF>, int> = 0>
1950void LocalCopy (DMF& dst, SMF const& src, int scomp, int dcomp,
1951 int ncomp, IntVect const& nghost)
1952{
1953 amrex::Copy(dst, src, scomp, dcomp, ncomp, nghost);
1954}
1955
1957template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1958void LocalAdd (MF& dst, MF const& src, int scomp, int dcomp,
1959 int ncomp, IntVect const& nghost)
1960{
1961 amrex::Add(dst, src, scomp, dcomp, ncomp, nghost);
1962}
1963
1965template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1966void Saxpy (MF& dst, typename MF::value_type a, MF const& src, int scomp, int dcomp,
1967 int ncomp, IntVect const& nghost)
1968{
1969 MF::Saxpy(dst, a, src, scomp, dcomp, ncomp, nghost);
1970}
1971
1973template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1974void Xpay (MF& dst, typename MF::value_type a, MF const& src, int scomp, int dcomp,
1975 int ncomp, IntVect const& nghost)
1976{
1977 MF::Xpay(dst, a, src, scomp, dcomp, ncomp, nghost);
1978}
1979
1981template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1982void Saxpy_Xpay (MF& dst, typename MF::value_type a_saxpy, MF const& src_saxpy,
1983 typename MF::value_type a_xpay, MF const& src_xpay, int scomp, int dcomp,
1984 int ncomp, IntVect const& nghost)
1985{
1986 MF::Saxpy_Xpay(dst, a_saxpy, src_saxpy, a_xpay, src_xpay, scomp, dcomp, ncomp, nghost);
1987}
1988
1990template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
1991void Saxpy_Saxpy (MF& dst1, typename MF::value_type a1, MF const& src1,
1992 MF& dst2, typename MF::value_type a2, MF const& src2, int scomp, int dcomp,
1993 int ncomp, IntVect const& nghost)
1994{
1995 MF::Saxpy_Saxpy(dst1, a1, src1, dst2, a2, src2, scomp, dcomp, ncomp, nghost);
1996}
1997
1999template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
2000void Saypy_Saxpy (MF& dst1, typename MF::value_type a1,
2001 MF& dst2, typename MF::value_type a2, MF const& src, int scomp, int dcomp,
2002 int ncomp, IntVect const& nghost)
2003{
2004 MF::Saypy_Saxpy(dst1, a1, dst2, a2, src, scomp, dcomp, ncomp, nghost);
2005}
2006
2008template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
2009void LinComb (MF& dst,
2010 typename MF::value_type a, MF const& src_a, int acomp,
2011 typename MF::value_type b, MF const& src_b, int bcomp,
2012 int dcomp, int ncomp, IntVect const& nghost)
2013{
2014 MF::LinComb(dst, a, src_a, acomp, b, src_b, bcomp, dcomp, ncomp, nghost);
2015}
2016
2018template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
2019void ParallelCopy (MF& dst, MF const& src, int scomp, int dcomp, int ncomp,
2020 IntVect const& ng_src = IntVect(0),
2021 IntVect const& ng_dst = IntVect(0),
2022 Periodicity const& period = Periodicity::NonPeriodic())
2023{
2024 dst.ParallelCopy(src, scomp, dcomp, ncomp, ng_src, ng_dst, period);
2025}
2026
2027template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
2028[[nodiscard]] typename MF::value_type
2029norminf (MF const& mf, int scomp, int ncomp, IntVect const& nghost,
2030 bool local = false)
2031{
2032 return mf.norminf(scomp, ncomp, nghost, local);
2033}
2034
2036template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
2037void setVal (Array<MF,N>& dst, typename MF::value_type val)
2038{
2039 for (auto& mf: dst) {
2040 mf.setVal(val);
2041 }
2042}
2043
2045template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
2046void setBndry (Array<MF,N>& dst, typename MF::value_type val, int scomp, int ncomp)
2047{
2048 for (auto& mf : dst) {
2049 mf.setBndry(val, scomp, ncomp);
2050 }
2051}
2052
2054template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
2055void Scale (Array<MF,N>& dst, typename MF::value_type val, int scomp, int ncomp,
2056 int nghost)
2057{
2058 for (auto& mf : dst) {
2059 mf.mult(val, scomp, ncomp, nghost);
2060 }
2061}
2062
2064template <class DMF, class SMF, std::size_t N,
2065 std::enable_if_t<IsMultiFabLike_v<DMF> &&
2066 IsMultiFabLike_v<SMF>, int> = 0>
2067void LocalCopy (Array<DMF,N>& dst, Array<SMF,N> const& src, int scomp, int dcomp,
2068 int ncomp, IntVect const& nghost)
2069{
2070 for (std::size_t i = 0; i < N; ++i) {
2071 amrex::Copy(dst[i], src[i], scomp, dcomp, ncomp, nghost);
2072 }
2073}
2074
2076template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
2077void LocalAdd (Array<MF,N>& dst, Array<MF,N> const& src, int scomp, int dcomp,
2078 int ncomp, IntVect const& nghost)
2079{
2080 for (std::size_t i = 0; i < N; ++i) {
2081 amrex::Add(dst[i], src[i], scomp, dcomp, ncomp, nghost);
2082 }
2083}
2084
2086template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
2087void Saxpy (Array<MF,N>& dst, typename MF::value_type a,
2088 Array<MF,N> const& src, int scomp, int dcomp, int ncomp,
2089 IntVect const& nghost)
2090{
2091 for (std::size_t i = 0; i < N; ++i) {
2092 MF::Saxpy(dst[i], a, src[i], scomp, dcomp, ncomp, nghost);
2093 }
2094}
2095
2097template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
2098void Xpay (Array<MF,N>& dst, typename MF::value_type a,
2099 Array<MF,N> const& src, int scomp, int dcomp, int ncomp,
2100 IntVect const& nghost)
2101{
2102 for (std::size_t i = 0; i < N; ++i) {
2103 MF::Xpay(dst[i], a, src[i], scomp, dcomp, ncomp, nghost);
2104 }
2105}
2106
2108template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
2110 typename MF::value_type a, Array<MF,N> const& src_a, int acomp,
2111 typename MF::value_type b, Array<MF,N> const& src_b, int bcomp,
2112 int dcomp, int ncomp, IntVect const& nghost)
2113{
2114 for (std::size_t i = 0; i < N; ++i) {
2115 MF::LinComb(dst[i], a, src_a[i], acomp, b, src_b[i], bcomp, dcomp, ncomp, nghost);
2116 }
2117}
2118
2120template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
2121void ParallelCopy (Array<MF,N>& dst, Array<MF,N> const& src,
2122 int scomp, int dcomp, int ncomp,
2123 IntVect const& ng_src = IntVect(0),
2124 IntVect const& ng_dst = IntVect(0),
2125 Periodicity const& period = Periodicity::NonPeriodic())
2126{
2127 for (std::size_t i = 0; i < N; ++i) {
2128 dst[i].ParallelCopy(src[i], scomp, dcomp, ncomp, ng_src, ng_dst, period);
2129 }
2130}
2131
2132template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
2133[[nodiscard]] typename MF::value_type
2134norminf (Array<MF,N> const& mf, int scomp, int ncomp, IntVect const& nghost,
2135 bool local = false)
2136{
2137 auto r = typename MF::value_type(0);
2138 for (std::size_t i = 0; i < N; ++i) {
2139 auto tmp = mf[i].norminf(scomp, ncomp, nghost, true);
2140 r = std::max(r,tmp);
2141 }
2142 if (!local) {
2144 }
2145 return r;
2146}
2147
2148template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
2149 int> = 0>
2150[[nodiscard]] int nComp (Array<MF,N> const& mf)
2151{
2152 return mf[0].nComp();
2153}
2154
2155template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
2156 int> = 0>
2157[[nodiscard]] IntVect nGrowVect (Array<MF,N> const& mf)
2158{
2159 return mf[0].nGrowVect();
2160}
2161
2162template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
2163 int> = 0>
2164[[nodiscard]] BoxArray const&
2166{
2167 return mf[0].boxArray();
2168}
2169
2170template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
2171 int> = 0>
2172[[nodiscard]] DistributionMapping const&
2174{
2175 return mf[0].DistributionMap();
2176}
2177
2178/*
2179 * \brief Return a mask indicating how many duplicates are in each point
2180 *
2181 * \param fa input FabArray
2182 * \param nghost number of ghost cells included in counting
2183 * \param period periodicity
2184 */
2185template <class FAB>
2186FabArray<BaseFab<int>>
2187OverlapMask (FabArray<FAB> const& fa, IntVect const& nghost, Periodicity const& period)
2188{
2189 BL_PROFILE("OverlapMask()");
2190
2191 const BoxArray& ba = fa.boxArray();
2192 const DistributionMapping& dm = fa.DistributionMap();
2193
2194 FabArray<BaseFab<int>> mask(ba, dm, 1, nghost);
2195 mask.setVal(1);
2196
2197 const std::vector<IntVect>& pshifts = period.shiftIntVect();
2198
2200
2201 bool run_on_gpu = Gpu::inLaunchRegion();
2202 amrex::ignore_unused(run_on_gpu, tags);
2203#ifdef AMREX_USE_OMP
2204#pragma omp parallel if (!run_on_gpu)
2205#endif
2206 {
2207 std::vector< std::pair<int,Box> > isects;
2208
2209 for (MFIter mfi(mask); mfi.isValid(); ++mfi)
2210 {
2211 const Box& bx = mask[mfi].box();
2212 auto const& arr = mask.array(mfi);
2213
2214 for (const auto& iv : pshifts)
2215 {
2216 ba.intersections(bx+iv, isects, false, nghost);
2217 for (const auto& is : isects)
2218 {
2219 Box const& b = is.second-iv;
2220 if (iv == 0 && b == bx) { continue; }
2221#ifdef AMREX_USE_GPU
2222 if (run_on_gpu) {
2223 tags.push_back({arr,b});
2224 } else
2225#endif
2226 {
2227 amrex::LoopConcurrentOnCpu(b, [=] (int i, int j, int k) noexcept
2228 {
2229 arr(i,j,k) += 1;
2230 });
2231 }
2232 }
2233 }
2234 }
2235 }
2236
2237#ifdef AMREX_USE_GPU
2238 amrex::ParallelFor(tags, 1,
2239 [=] AMREX_GPU_DEVICE (int i, int j, int k, int n, Array4BoxTag<int> const& tag) noexcept
2240 {
2241 Gpu::Atomic::AddNoRet(tag.dfab.ptr(i,j,k,n), 1);
2242 });
2243#endif
2244
2245 return mask;
2246}
2247
2248}
2249
2250#endif
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define BL_ASSERT(EX)
Definition AMReX_BLassert.H:39
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:37
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D(...)
Definition AMReX_GpuLaunchMacrosC.nolint.H:111
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Real * pdst
Definition AMReX_HypreMLABecLap.cpp:1090
Array4< int const > mask
Definition AMReX_InterpFaceRegister.cpp:93
#define AMREX_LOOP_3D(bx, i, j, k, block)
Definition AMReX_Loop.nolint.H:4
#define AMREX_LOOP_4D(bx, ncomp, i, j, k, n, block)
Definition AMReX_Loop.nolint.H:16
#define AMREX_D_TERM(a, b, c)
Definition AMReX_SPACE.H:172
#define AMREX_D_DECL(a, b, c)
Definition AMReX_SPACE.H:171
Print on all processors of the default communicator.
Definition AMReX_Print.H:117
virtual bool isManaged() const
Definition AMReX_Arena.cpp:88
virtual bool isDevice() const
Definition AMReX_Arena.cpp:100
A collection of Boxes stored in an Array.
Definition AMReX_BoxArray.H:567
std::vector< std::pair< int, Box > > intersections(const Box &bx) const
Return intersections of Box and BoxArray.
Definition AMReX_BoxArray.cpp:1186
__host__ __device__ bool contains(const IntVectND< dim > &p) const noexcept
Return true if argument is contained within BoxND.
Definition AMReX_Box.H:212
__host__ __device__ bool ok() const noexcept
Checks if it is a proper BoxND (including a valid type).
Definition AMReX_Box.H:208
Calculates the distribution of FABs to MPI processes.
Definition AMReX_DistributionMapping.H:43
IntVect nGrowVect() const noexcept
Definition AMReX_FabArrayBase.H:80
bool isFusingCandidate() const noexcept
Is this a good candidate for kernel fusing?
Definition AMReX_FabArrayBase.cpp:2707
IndexType ixType() const noexcept
Return index type.
Definition AMReX_FabArrayBase.H:86
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition AMReX_FabArrayBase.H:131
@ ADD
Definition AMReX_FabArrayBase.H:394
int nComp() const noexcept
Return number of variables (aka components) associated with each point.
Definition AMReX_FabArrayBase.H:83
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition AMReX_FabArrayBase.H:95
An Array of FortranArrayBox(FAB)-like Objects.
Definition AMReX_FabArray.H:347
Array4< typename FabArray< FAB >::value_type const > const_array(const MFIter &mfi) const noexcept
Definition AMReX_FabArray.H:587
std::unique_ptr< FabArray< FAB > > os_temp
Definition AMReX_FabArray.H:1626
const FabFactory< FAB > & Factory() const noexcept
Definition AMReX_FabArray.H:445
void prefetchToDevice(const MFIter &mfi) const noexcept
Definition AMReX_FabArray.H:553
Array4< typename FabArray< FAB >::value_type const > array(const MFIter &mfi) const noexcept
Definition AMReX_FabArray.H:563
Arena * arena() const noexcept
Definition AMReX_FabArray.H:448
void setBndry(value_type val)
Set all values in the boundary region to val.
Definition AMReX_FabArray.H:2442
void setVal(value_type val)
Set all components in the entire region of each FAB to val.
Definition AMReX_FabArray.H:2652
MultiArray4< typename FabArray< FAB >::value_type > arrays() noexcept
Definition AMReX_FabArray.H:635
void mult(value_type val, int comp, int num_comp, int nghost=0)
Definition AMReX_FabArray.H:2905
bool hasEBFabFactory() const noexcept
Definition AMReX_FabArray.H:452
MultiArray4< typename FabArray< FAB >::value_type const > const_arrays() const noexcept
Definition AMReX_FabArray.H:649
void prefetchToHost(const MFIter &mfi) const noexcept
Definition AMReX_FabArray.H:543
GPU-compatible tuple.
Definition AMReX_Tuple.H:98
Definition AMReX_GpuBuffer.H:18
T const * data() const noexcept
Definition AMReX_GpuBuffer.H:45
__host__ __device__ bool cellCentered() const noexcept
True if the IndexTypeND is CELL based in all directions.
Definition AMReX_IndexType.H:104
__host__ __device__ bool allGT(const IntVectND< dim > &rhs) const noexcept
Returns true if this is greater than argument for all components. NOTE: This is NOT a strict weak ord...
Definition AMReX_IntVect.H:426
__host__ static __device__ constexpr IntVectND< dim > TheZeroVector() noexcept
This static member function returns a reference to a constant IntVectND object, all of whose dim argu...
Definition AMReX_IntVect.H:680
__host__ static __device__ constexpr IntVectND< dim > TheMinVector() noexcept
Definition AMReX_IntVect.H:728
Iterator for looping ever tiles and boxes of amrex::FabArray based containers.
Definition AMReX_MFIter.H:63
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:147
Dynamically allocated vector for trivially copyable data.
Definition AMReX_PODVector.H:308
T * data() noexcept
Definition AMReX_PODVector.H:666
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition AMReX_Periodicity.H:17
static const Periodicity & NonPeriodic() noexcept
Definition AMReX_Periodicity.cpp:52
std::vector< IntVect > shiftIntVect(IntVect const &nghost=IntVect(0)) const
Definition AMReX_Periodicity.cpp:8
Print & SetPrecision(int p)
Definition AMReX_Print.H:89
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
__host__ __device__ BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition AMReX_Box.H:1280
std::array< T, N > Array
Definition AMReX_Array.H:25
void Sum(T &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:221
void Max(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:133
__host__ __device__ AMREX_FORCE_INLINE T Exch(T *address, T val) noexcept
Definition AMReX_GpuAtomic.H:487
__host__ __device__ AMREX_FORCE_INLINE void AddNoRet(T *sum, T value) noexcept
Definition AMReX_GpuAtomic.H:283
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:263
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:315
bool inLaunchRegion() noexcept
Definition AMReX_GpuControl.H:92
bool inNoSyncRegion() noexcept
Definition AMReX_GpuControl.H:152
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:301
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
Definition AMReX_Amr.cpp:49
MF::value_type norminf(MF const &mf, int scomp, int ncomp, IntVect const &nghost, bool local=false)
Definition AMReX_FabArrayUtility.H:2029
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:138
int nComp(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2854
FAB::value_type ReduceMax(FabArray< FAB > const &fa, int nghost, F &&f)
Definition AMReX_FabArrayUtility.H:555
__host__ __device__ void Swap(T &t1, T &t2) noexcept
Definition AMReX_Algorithm.H:75
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:193
DistributionMapping const & DistributionMap(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2869
void Saxpy(MF &dst, typename MF::value_type a, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst += a * src
Definition AMReX_FabArrayUtility.H:1966
IntVect nGrowVect(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2859
ReduceData< Ts... >::Type ParReduce(TypeList< Ops... > operation_list, TypeList< Ts... > type_list, FabArray< FAB > const &fa, IntVect const &nghost, F &&f)
Parallel reduce for MultiFab/FabArray. The reduce result is local and it's the user's responsibility ...
Definition AMReX_ParReduce.H:48
void LocalCopy(DMF &dst, SMF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst = src
Definition AMReX_FabArrayUtility.H:1950
void printCell(FabArray< FAB > const &mf, const IntVect &cell, int comp=-1, const IntVect &ng=IntVect::TheZeroVector())
Definition AMReX_FabArrayUtility.H:1103
bool ReduceLogicalAnd(FabArray< FAB > const &fa, int nghost, F &&f)
Definition AMReX_FabArrayUtility.H:794
void Abs(FabArray< FAB > &fa, int icomp, int numcomp, int nghost)
Definition AMReX_FabArrayUtility.H:1366
void LocalAdd(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst += src
Definition AMReX_FabArrayUtility.H:1958
void Copy(FabArray< DFAB > &dst, FabArray< SFAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArray.H:180
void prefetchToDevice(FabArray< FAB > const &fa, const bool synchronous=true)
Definition AMReX_FabArrayUtility.H:1425
__host__ __device__ BoxND< dim > makeSingleCellBox(int i, int j, int k, IndexTypeND< dim > typ=IndexTypeND< dim >::TheCellType())
Definition AMReX_Box.H:2134
void Subtract(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArrayUtility.H:1223
void OverrideSync(FabArray< FAB > &fa, FabArray< IFAB > const &msk, const Periodicity &period)
Definition AMReX_FabArrayUtility.H:1442
FabArray< BaseFab< int > > OverlapMask(FabArray< FAB > const &fa, IntVect const &nghost, Periodicity const &period)
Definition AMReX_FabArrayUtility.H:2187
void Saxpy_Saxpy(MF &dst1, typename MF::value_type a1, MF const &src1, MF &dst2, typename MF::value_type a2, MF const &src2, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst1 += a1 * src1 followed by dst2 += a2 * src2
Definition AMReX_FabArrayUtility.H:1991
bool isMFIterSafe(const FabArrayBase &x, const FabArrayBase &y)
Definition AMReX_MFIter.H:225
void Saypy_Saxpy(MF &dst1, typename MF::value_type a1, MF &dst2, typename MF::value_type a2, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst1 += a1 * dst2 followed by dst2 += a2 * src
Definition AMReX_FabArrayUtility.H:2000
bool ReduceLogicalOr(FabArray< FAB > const &fa, int nghost, F &&f)
Definition AMReX_FabArrayUtility.H:949
void LinComb(MF &dst, typename MF::value_type a, MF const &src_a, int acomp, typename MF::value_type b, MF const &src_b, int bcomp, int dcomp, int ncomp, IntVect const &nghost)
dst = a*src_a + b*src_b
Definition AMReX_FabArrayUtility.H:2009
FAB::value_type ReduceMin(FabArray< FAB > const &fa, int nghost, F &&f)
Definition AMReX_FabArrayUtility.H:317
IntVectND< 3 > IntVect
IntVect is an alias for amrex::IntVectND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:30
void dtoh_memcpy(FabArray< FAB > &dst, FabArray< FAB > const &src, int scomp, int dcomp, int ncomp)
Definition AMReX_FabArrayUtility.H:1515
void setBndry(MF &dst, typename MF::value_type val, int scomp, int ncomp)
dst = val in ghost cells.
Definition AMReX_FabArrayUtility.H:1934
void Saxpy_Xpay(MF &dst, typename MF::value_type a_saxpy, MF const &src_saxpy, typename MF::value_type a_xpay, MF const &src_xpay, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst += a_saxpy * src_saxpy followed by dst = src_xpay + a_xpay * dst
Definition AMReX_FabArrayUtility.H:1982
void single_task(L &&f) noexcept
Definition AMReX_GpuLaunchFunctsC.H:1362
void Divide(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArrayUtility.H:1319
void Xpay(MF &dst, typename MF::value_type a, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst = src + a * dst
Definition AMReX_FabArrayUtility.H:1974
void Scale(MF &dst, typename MF::value_type val, int scomp, int ncomp, int nghost)
dst *= val
Definition AMReX_FabArrayUtility.H:1941
void prefetchToHost(FabArray< FAB > const &fa, const bool synchronous=true)
Definition AMReX_FabArrayUtility.H:1410
bool TilingIfNotGPU() noexcept
Definition AMReX_MFIter.H:12
void Add(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArray.H:241
void LoopConcurrentOnCpu(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition AMReX_Loop.H:388
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:230
void OverrideSync_finish(FabArray< FAB > &fa)
Definition AMReX_FabArrayUtility.H:1501
FAB::value_type Dot(FabArray< FAB > const &x, int xcomp, FabArray< FAB > const &y, int ycomp, int ncomp, IntVect const &nghost, bool local=false)
Compute dot products of two FabArrays.
Definition AMReX_FabArrayUtility.H:1673
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition AMReX_FabArrayUtility.H:2019
void htod_memcpy(FabArray< FAB > &dst, FabArray< FAB > const &src, int scomp, int dcomp, int ncomp)
Definition AMReX_FabArrayUtility.H:1540
void setVal(MF &dst, typename MF::value_type val)
dst = val
Definition AMReX_FabArrayUtility.H:1927
void Multiply(FabArray< FAB > &dst, FabArray< FAB > const &src, int srccomp, int dstcomp, int numcomp, int nghost)
Definition AMReX_FabArrayUtility.H:1271
FAB::value_type ReduceSum(FabArray< FAB > const &fa, int nghost, F &&f)
Definition AMReX_FabArrayUtility.H:16
IntVect indexFromValue(FabArray< FAB > const &mf, int comp, IntVect const &nghost, typename FAB::value_type value)
Definition AMReX_FabArrayUtility.H:1565
BoxArray const & boxArray(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2864
void OverrideSync_nowait(FabArray< FAB > &fa, FabArray< IFAB > const &msk, const Periodicity &period)
Definition AMReX_FabArrayUtility.H:1454
Definition AMReX_TagParallelFor.H:58
Array4< T > dfab
Definition AMReX_TagParallelFor.H:59
FabArray memory allocation information.
Definition AMReX_FabArray.H:66
Definition AMReX_MFIter.H:20
Struct for holding types.
Definition AMReX_TypeList.H:12