Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_NonLocalBCImpl.H
Go to the documentation of this file.
1#ifndef AMREX_NONLOCAL_BC_H_
2#include "AMReX_NonLocalBC.H"
3#endif
4
5#ifndef AMREX_NONLOCAL_BC_IMPL_H_
6#define AMREX_NONLOCAL_BC_IMPL_H_
7#include <AMReX_Config.H>
8#include <AMReX_TypeTraits.H>
9
10namespace amrex::NonLocalBC {
11struct Rotate90ClockWise {
13 IntVect operator() (IntVect const& iv) const noexcept {
14 return IntVect{AMREX_D_DECL(iv[1], -1-iv[0], iv[2])};
15 }
16
18 Dim3 operator() (Dim3 const& a) const noexcept {
19 return Dim3{.x = a.y, .y = -1-a.x, .z = a.z};
20 }
21
22 Box operator() (Box const& box) const noexcept {
23 return Box(operator()(IntVect{AMREX_D_DECL(box.bigEnd (0),
24 box.smallEnd(1),
25 box.smallEnd(2))}),
26 operator()(IntVect{AMREX_D_DECL(box.smallEnd(0),
27 box.bigEnd (1),
28 box.bigEnd (2))}));
29 }
30};
31
32struct Rotate90CounterClockWise {
34 IntVect operator() (IntVect const& iv) const noexcept {
35 return IntVect{AMREX_D_DECL(-1-iv[1], iv[0], iv[2])};
36 }
37
39 Dim3 operator() (Dim3 const& a) const noexcept {
40 return Dim3{.x = -1-a.y, .y = a.x, .z = a.z};
41 }
42
43 Box operator() (Box const& box) const noexcept {
44 return Box(operator()(IntVect{AMREX_D_DECL(box.smallEnd(0),
45 box.bigEnd (1),
46 box.smallEnd(2))}),
47 operator()(IntVect{AMREX_D_DECL(box.bigEnd (0),
48 box.smallEnd(1),
49 box.bigEnd (2))}));
50 }
51};
52
53struct Rotate90DstToSrc
54{
56 Dim3 operator() (Dim3 const& a) const noexcept {
57 if (a.x < 0) {
58 return Rotate90ClockWise()(a);
59 } else {
60 return Rotate90CounterClockWise()(a);
61 }
62 }
63};
64
65struct Rotate180Fn {
66 int Ly;
67
69 IntVect operator() (IntVect const& iv) const noexcept {
70 return IntVect{AMREX_D_DECL(-1-iv[0], Ly-1-iv[1], iv[2])};
71 }
72
74 Dim3 operator() (Dim3 const& a) const noexcept {
75 return Dim3{.x = -1-a.x, .y = Ly-1-a.y, .z = a.z};
76 }
77
78 Box operator() (Box const& box) const noexcept {
79 return Box(operator()(IntVect{AMREX_D_DECL(box.bigEnd (0),
80 box.bigEnd (1),
81 box.smallEnd(2))}),
82 operator()(IntVect{AMREX_D_DECL(box.smallEnd(0),
83 box.smallEnd(1),
84 box.bigEnd (2))}));
85 }
86};
87
88struct PolarFn {
89 int Lx, Ly;
90
91 [[nodiscard]] AMREX_GPU_HOST_DEVICE
92 int i_index (int i) const noexcept {
93 return (i < Lx/2) ? -1-i : 2*Lx-1-i;
94 }
95
96 [[nodiscard]] AMREX_GPU_HOST_DEVICE
97 int j_index (int j) const noexcept {
98 return (j < Ly/2) ? j+Ly/2 : j-Ly/2;
99 }
100
101 [[nodiscard]] AMREX_GPU_HOST_DEVICE
102 IntVect operator() (IntVect const& iv) const noexcept {
103 return IntVect{AMREX_D_DECL(i_index(iv[0]), j_index(iv[1]), iv[2])};
104 }
105
106 [[nodiscard]] AMREX_GPU_HOST_DEVICE
107 Dim3 operator() (Dim3 const& a) const noexcept {
108 return Dim3{.x = i_index(a.x), .y = j_index(a.y), .z = a.z};
109 }
110
111 [[nodiscard]] Box operator() (Box const& box) const noexcept {
112 return Box(operator()(IntVect{AMREX_D_DECL(box.bigEnd (0),
113 box.smallEnd(1),
114 box.smallEnd(2))}),
115 operator()(IntVect{AMREX_D_DECL(box.smallEnd(0),
116 box.bigEnd (1),
117 box.bigEnd (2))}));
118 }
119};
120
121struct PolarFn2 { // for the x-y corners
122 int Lx, Ly;
123
124 [[nodiscard]] AMREX_GPU_HOST_DEVICE
125 int i_index (int i) const noexcept {
126 return (i < Lx/2) ? -1-i : 2*Lx-1-i;
127 }
128
129 [[nodiscard]] AMREX_GPU_HOST_DEVICE
130 int j_index (int j) const noexcept {
131 if (j < 0) { // NOLINT
132 return j+Ly/2;
133 } else if (j >= Ly) { // NOLINT
134 return j-Ly/2;
135 } else if (j < Ly/2) {
136 return j-Ly/2;
137 } else {
138 return j+Ly/2;
139 }
140 }
141
142 [[nodiscard]] AMREX_GPU_HOST_DEVICE
143 IntVect operator() (IntVect const& iv) const noexcept {
144 return IntVect{AMREX_D_DECL(i_index(iv[0]), j_index(iv[1]), iv[2])};
145 }
146
147 [[nodiscard]] AMREX_GPU_HOST_DEVICE
148 Dim3 operator() (Dim3 const& a) const noexcept {
149 return Dim3{.x = i_index(a.x), .y = j_index(a.y), .z = a.z};
150 }
151
152 [[nodiscard]] Box operator() (Box const& box) const noexcept {
153 return Box(operator()(IntVect{AMREX_D_DECL(box.bigEnd (0),
154 box.smallEnd(1),
155 box.smallEnd(2))}),
156 operator()(IntVect{AMREX_D_DECL(box.smallEnd(0),
157 box.bigEnd (1),
158 box.bigEnd (2))}));
159 }
160};
161
185template <BaseFabType FAB, class DTOS, class Proj>
186requires (IsCallableR<Dim3, DTOS, Dim3>::value && IsFabProjection<Proj, FAB>::value)
187void
188local_copy_cpu (FabArray<FAB>& dest, const FabArray<FAB>& src, int dcomp, int scomp, int ncomp,
189 FabArrayBase::CopyComTagsContainer const& local_tags, DTOS const& dtos, Proj const& proj) noexcept
190{
191 const auto N_locs = static_cast<int>(local_tags.size());
192 if (N_locs == 0) { return; }
193#ifdef AMREX_USE_OMP
194#pragma omp parallel for
195#endif
196 for (int itag = 0; itag < N_locs; ++itag) {
197 const auto& tag = local_tags[itag];
198 auto const& sfab = src.const_array(tag.srcIndex);
199 auto const& dfab = dest.array (tag.dstIndex);
200 amrex::LoopConcurrentOnCpu(tag.dbox, ncomp, [=] (int i, int j, int k, int n) noexcept
201 {
202 auto const si = dtos(Dim3{.x = i, .y = j, .z = k});
203 dfab(i,j,k,dcomp+n) = proj(sfab,si,scomp+n);
204 });
205 }
206}
207
213template <BaseFabType FAB, class DTOS, class Proj>
214requires (IsCallableR<Dim3, DTOS, Dim3>::value && IsFabProjection<Proj, FAB>::value)
215void
216unpack_recv_buffer_cpu (FabArray<FAB>& mf, int dcomp, int ncomp, Vector<char*> const& recv_data,
217 Vector<std::size_t> const& recv_size,
218 Vector<FabArrayBase::CopyComTagsContainer const*> const& recv_cctc,
219 DTOS const& dtos, Proj const& proj) noexcept
220{
221 amrex::ignore_unused(recv_size);
222
223 const auto N_rcvs = static_cast<int>(recv_cctc.size());
224 if (N_rcvs == 0) { return; }
225
226 using T = typename FAB::value_type;
227#ifdef AMREX_USE_OMP
228#pragma omp parallel for
229#endif
230 for (int ircv = 0; ircv < N_rcvs; ++ircv) {
231 const char* dptr = recv_data[ircv];
232 auto const& cctc = *recv_cctc[ircv];
233 for (auto const& tag : cctc) {
234 auto const& dfab = mf.array(tag.dstIndex);
235 auto const& sfab = amrex::makeArray4((T const*)(dptr), tag.sbox, ncomp);
236 amrex::LoopConcurrentOnCpu(tag.dbox, ncomp, [=](int i, int j, int k, int n) noexcept {
237 auto const si = dtos(Dim3{.x = i, .y = j, .z = k});
238 dfab(i, j, k, dcomp + n) = proj(sfab, si, n);
239 });
240 dptr += tag.sbox.numPts() * ncomp * sizeof(T);
241 AMREX_ASSERT(dptr <= recv_data[ircv] + recv_size[ircv]);
242 }
243 }
244}
245
246#ifdef AMREX_USE_GPU
247template <class T>
248struct Array4Array4Box {
249 Array4<T > dfab;
250 Array4<T const> sfab;
251 Box dbox;
252
254 Box const& box () const noexcept { return dbox; }
255};
256
257template <BaseFabType FAB, class DTOS, class Proj>
258requires (IsCallableR<Dim3, DTOS, Dim3>::value && IsFabProjection<Proj, FAB>::value)
259void
260local_copy_gpu (FabArray<FAB>& dest, const FabArray<FAB>& src, int dcomp, int scomp, int ncomp,
261 FabArrayBase::CopyComTagsContainer const& local_tags, DTOS const& dtos, Proj const& proj) noexcept
262{
263 int N_locs = local_tags.size();
264 if (N_locs == 0) { return; }
265
266 using T = typename FAB::value_type;
267 Vector<Array4Array4Box<T> > loc_copy_tags;
268 loc_copy_tags.reserve(N_locs);
269 for (auto const& tag : local_tags) {
270 loc_copy_tags.push_back(Array4Array4Box<T>{.dfab = dest.array(tag.dstIndex),
271 .sfab = src.const_array(tag.srcIndex),
272 .dbox = tag.dbox});
273 }
274
275 ParallelFor(loc_copy_tags, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n,
276 Array4Array4Box<T> const& tag) noexcept
277 {
278 auto const si = dtos(Dim3{.x = i, .y = j, .z = k});
279 tag.dfab(i,j,k,dcomp+n) = proj(tag.sfab, si, scomp+n);
280 });
281}
282
283template <BaseFabType FAB, class DTOS, class Proj>
284requires (IsCallableR<Dim3, DTOS, Dim3>::value && IsFabProjection<Proj, FAB>::value)
285void
286unpack_recv_buffer_gpu (FabArray<FAB>& mf, int scomp, int ncomp,
287 Vector<char*> const& recv_data,
288 Vector<std::size_t> const& recv_size,
289 Vector<FabArrayBase::CopyComTagsContainer const*> const& recv_cctc,
290 DTOS const& dtos, Proj const& proj)
291{
292 amrex::ignore_unused(recv_size);
293
294 const int N_rcvs = recv_cctc.size();
295 if (N_rcvs == 0) { return; }
296
297 char* pbuffer = recv_data[0];
298#if 0
299 std::size_t szbuffer = 0;
300 // For linear solver test on summit, this is slower than writing to
301 // pinned memory directly on device.
302 if (not ParallelDescriptor::UseGpuAwareMpi()) {
303 // Memory in recv_data is pinned.
304 szbuffer = (recv_data[N_rcvs-1]-recv_data[0]) + recv_size[N_rcvs-1];
305 pbuffer = (char*)The_Arena()->alloc(szbuffer);
306 Gpu::copyAsync(Gpu::hostToDevice,recv_data[0],recv_data[0]+szbuffer,pbuffer);
307 Gpu::streamSynchronize();
308 }
309#endif
310
311 using T = typename FAB::value_type;
312 using TagType = Array4Array4Box<T>;
313 Vector<TagType> tags;
314 tags.reserve(N_rcvs);
315
316 for (int k = 0; k < N_rcvs; ++k)
317 {
318 std::size_t offset = recv_data[k]-recv_data[0];
319 const char* dptr = pbuffer + offset;
320 auto const& cctc = *recv_cctc[k];
321 for (auto const& tag : cctc)
322 {
323 tags.emplace_back(TagType{mf.array(tag.dstIndex),
324 amrex::makeArray4((T const*)dptr, tag.sbox, ncomp),
325 tag.dbox});
326 dptr += tag.dbox.numPts() * ncomp * sizeof(T);
327 BL_ASSERT(dptr <= pbuffer + offset + recv_size[k]);
328 }
329 }
330
331 ParallelFor(tags, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n,
332 Array4Array4Box<T> const& tag) noexcept
333 {
334 auto const si = dtos(Dim3{.x = i, .y = j, .z = k});
335 tag.dfab(i,j,k,scomp+n) = proj(tag.sfab, si ,n);
336 });
337
338 // There is Gpu::streamSynchronize in ParallelFor above
339
340 if (pbuffer != recv_data[0]) {
341 The_Arena()->free(pbuffer);
342 }
343}
344#endif
345
346template <typename DTOS>
347requires (IsIndexMapping<DTOS>::value)
348MultiBlockCommMetaData::MultiBlockCommMetaData (const FabArrayBase& dst, const Box& dstbox, const FabArrayBase& src,
349 const IntVect& ngrow, DTOS const& dtos)
350 : MultiBlockCommMetaData(dst.boxArray(), dst.DistributionMap(), dstbox, src.boxArray(),
351 src.DistributionMap(), ngrow, dtos) {}
352
353template <typename DTOS>
354requires (IsIndexMapping<DTOS>::value)
355MultiBlockCommMetaData::MultiBlockCommMetaData (const BoxArray& dstba, const DistributionMapping& dstdm,
356 const Box& dstbox, const BoxArray& srcba,
357 const DistributionMapping& srcdm, const IntVect& ngrow, DTOS const& dtos)
358{
359 define(dstba, dstdm, dstbox, srcba, srcdm, ngrow, dtos);
360}
361
362template <typename DTOS>
363requires (IsIndexMapping<DTOS>::value)
364void
365MultiBlockCommMetaData::define (const BoxArray& dstba, const DistributionMapping& dstdm, const Box& dstbox,
366 const BoxArray& srcba, const DistributionMapping& srcdm, const IntVect& ngrow,
367 DTOS const& dtos)
368{
369 m_LocTags = std::make_unique<FabArrayBase::CopyComTagsContainer>();
370 m_SndTags = std::make_unique<FabArrayBase::MapOfCopyComTagContainers>();
371 m_RcvTags = std::make_unique<FabArrayBase::MapOfCopyComTagContainers>();
372 const int myproc = ParallelDescriptor::MyProc();
373 for (int i = 0, N = static_cast<int>(dstba.size()); i < N; ++i) {
374 const int dest_owner = dstdm[i];
375 const Box partial_dstbox = grow(dstba[i], ngrow) & dstbox;
376 if (partial_dstbox.isEmpty()) {
377 continue;
378 }
379 const Box partial_dstbox_mapped_in_src = Image(dtos, partial_dstbox).setType(srcba.ixType());
380 enum { not_first_only = 0, first_only = 1 };
381 std::vector<std::pair<int, Box>> boxes_from_src =
382 srcba.intersections(partial_dstbox_mapped_in_src, not_first_only, ngrow);
383 for (std::pair<int, Box> counted_box : boxes_from_src) {
384 const int k = counted_box.first;
385 const Box src_box = counted_box.second;
386 AMREX_ASSERT(k < srcba.size());
387 const int src_owner = srcdm[k];
388 if (dest_owner == myproc || src_owner == myproc) {
389 if (src_owner == dest_owner) {
390 const BoxList tilelist(src_box, FabArrayBase::comm_tile_size);
391 for (const Box& tilebox : tilelist) {
392 const Box inverse_image = InverseImage(dtos, tilebox).setType(dstba.ixType());
393 if ((inverse_image & partial_dstbox).ok()) {
394 m_LocTags->emplace_back(inverse_image, tilebox, i, k);
395 }
396 }
397 } else {
398 const Box inverse_image = InverseImage(dtos, src_box).setType(dstba.ixType());
399 if ((inverse_image & partial_dstbox).ok()) {
400 FabArrayBase::CopyComTagsContainer& copy_tags =
401 (src_owner == myproc) ? (*m_SndTags)[dest_owner]
402 : (*m_RcvTags)[src_owner];
403 copy_tags.emplace_back(inverse_image, src_box, i, k);
404 }
405 }
406 }
407 }
408 }
409}
410
411template <BaseFabType FAB, class DTOS, class Proj>
412#ifdef AMREX_USE_MPI
414#endif
415CommHandler
416Comm_nowait (FabArray<FAB>& mf, int scomp, int ncomp, FabArrayBase::CommMetaData const& cmd,
417 DTOS const& dtos, Proj const& proj)
418{
419#ifdef AMREX_USE_MPI
420 if (ParallelContext::NProcsSub() == 1)
421#endif
422 {
423 if (cmd.m_LocTags->empty()) { return CommHandler{}; }
424#ifdef AMREX_USE_GPU
425 if (Gpu::inLaunchRegion()) {
426 local_copy_gpu(mf, mf, scomp, scomp, ncomp, *cmd.m_LocTags, dtos, proj);
427 } else
428#endif
429 {
430 local_copy_cpu(mf, mf, scomp, scomp, ncomp, *cmd.m_LocTags, dtos, proj);
431 }
432 return CommHandler{};
433 }
434
435#ifdef AMREX_USE_MPI
436 //
437 // Do this before prematurely exiting if running in parallel.
438 // Otherwise sequence numbers will not match across MPI processes.
439 //
440 int SeqNum = ParallelDescriptor::SeqNum();
441
442 const auto N_locs = cmd.m_LocTags->size();
443 const auto N_rcvs = cmd.m_RcvTags->size();
444 const auto N_snds = cmd.m_SndTags->size();
445
446 if (N_locs == 0 && N_rcvs == 0 && N_snds == 0) {
447 // No work to do.
448 return CommHandler{};
449 }
450
451 CommHandler handler{};
452 handler.mpi_tag = SeqNum;
453
454 if (N_rcvs > 0) {
455 handler.recv.the_data = FabArray<FAB>::PostRcvs(*cmd.m_RcvTags, handler.recv.data, handler.recv.size,
456 handler.recv.rank, handler.recv.request, ncomp, SeqNum);
457 }
458
459 if (N_snds > 0) {
460 handler.send.the_data =
461 FabArray<FAB>::PrepareSendBuffers(*cmd.m_SndTags, handler.send.data, handler.send.size,
462 handler.send.rank, handler.send.request, handler.send.cctc, ncomp);
463
464#ifdef AMREX_USE_GPU
465 if (Gpu::inLaunchRegion()) {
466 FabArray<FAB>::pack_send_buffer_gpu(mf, scomp, ncomp, handler.send.data,
467 handler.send.size, handler.send.cctc, handler.send.id);
468 } else
469#endif
470 {
471 FabArray<FAB>::pack_send_buffer_cpu(mf, scomp, ncomp, handler.send.data,
472 handler.send.size, handler.send.cctc);
473 }
474
475 FabArray<FAB>::PostSnds(handler.send.data, handler.send.size, handler.send.rank, handler.send.request, SeqNum);
476 }
477
478 if (N_locs > 0)
479 {
480#ifdef AMREX_USE_GPU
481 if (Gpu::inLaunchRegion()) {
482 local_copy_gpu(mf, mf, scomp, scomp, ncomp, *cmd.m_LocTags, dtos, proj);
483 } else
484#endif
485 {
486 local_copy_cpu(mf, mf, scomp, scomp, ncomp, *cmd.m_LocTags, dtos, proj);
487 }
488 }
489
490 return handler;
491#endif
492}
493
494#ifdef AMREX_USE_MPI
495template <BaseFabType FAB, class DTOS, class Proj>
496void
497Comm_finish (FabArray<FAB>& mf, int scomp, int ncomp, FabArrayBase::CommMetaData const& cmd,
498 CommHandler handler, DTOS const& dtos, Proj const& proj)
499{
500 if (ParallelContext::NProcsSub() == 1) { return; }
501
502 const auto N_rcvs = static_cast<int>(cmd.m_RcvTags->size());
503 if (N_rcvs > 0)
504 {
505 handler.recv.cctc.resize(N_rcvs, nullptr);
506 for (int k = 0; k < N_rcvs; ++k) {
507 auto const& cctc = cmd.m_RcvTags->at(handler.recv.rank[k]);
508 handler.recv.cctc[k] = &cctc;
509 }
510 handler.recv.stats.resize(handler.recv.request.size());
511 ParallelDescriptor::Waitall(handler.recv.request, handler.recv.stats);
512#ifdef AMREX_DEBUG
513 if (!CheckRcvStats(handler.recv.stats, handler.recv.size, handler.mpi_tag)) {
514 amrex::Abort("NonLocalBC::Comm_finish failed with wrong message size");
515 }
516#endif
517
518#ifdef AMREX_USE_GPU
519 if (Gpu::inLaunchRegion())
520 {
521 unpack_recv_buffer_gpu(mf, scomp, ncomp, handler.recv.data,
522 handler.recv.size, handler.recv.cctc, dtos, proj);
523 } else
524#endif
525 {
526 unpack_recv_buffer_cpu(mf, scomp, ncomp, handler.recv.data,
527 handler.recv.size, handler.recv.cctc, dtos, proj);
528 }
529 }
530
531 if ( ! cmd.m_SndTags->empty() ) {
532 handler.send.stats.resize(handler.send.request.size());
533 ParallelDescriptor::Waitall(handler.send.request, handler.send.stats);
534 }
535}
536#endif
537
538template <BaseFabType FAB>
539void
540Rotate90 (FabArray<FAB>& mf, int scomp, int ncomp, IntVect const& nghost, Box const& domain)
541{
542 BL_PROFILE("Rotate90");
543
544 AMREX_ASSERT(domain.cellCentered());
545 AMREX_ASSERT(domain.smallEnd() == 0);
546 AMREX_ASSERT(domain.length(0) == domain.length(1));
547 AMREX_ASSERT(mf.is_cell_centered());
548 AMREX_ASSERT(scomp < mf.nComp() && scomp+ncomp <= mf.nComp());
549 AMREX_ASSERT(nghost.allLE(mf.nGrowVect()) && nghost[0] == nghost[1]);
550
551 if (nghost[0] <= 0) { return; }
552
553 const FabArrayBase::RB90& TheRB90 = mf.getRB90(nghost, domain);
554
555 auto handler = Comm_nowait(mf, scomp, ncomp, TheRB90,Rotate90DstToSrc{},
556 Identity{});
557
558 Box corner(-nghost, IntVect{AMREX_D_DECL(-1,-1,domain.bigEnd(2)+nghost[2])});
559#ifdef AMREX_USE_OMP
560#pragma omp parallel if (Gpu::notInLaunchRegion())
561#endif
562 for (MFIter mfi(mf); mfi.isValid(); ++mfi) {
563 Box const& bx = corner & mfi.fabbox();
564 if (bx.ok()) {
565 auto const& fab = mf.array(mfi);
566 AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx,ncomp,i,j,k,n,
567 {
568 fab(i,j,k,n) = fab(-i-1,-j-1,k,n);
569 });
570 }
571 }
572
573#ifdef AMREX_USE_MPI
574 Comm_finish(mf, scomp, ncomp, TheRB90, std::move(handler), Rotate90DstToSrc{},
575 Identity{});
576#else
577 amrex::ignore_unused(handler);
578#endif
579}
580
581template <BaseFabType FAB>
582void
583Rotate90 (FabArray<FAB>& mf, Box const& domain)
584{
585 Rotate90(mf, 0, mf.nComp(), mf.nGrowVect(), domain);
586}
587
588template <BaseFabType FAB>
589void
590Rotate180 (FabArray<FAB>& mf, int scomp, int ncomp, IntVect const& nghost, Box const& domain)
591{
592 BL_PROFILE("Rotate180");
593
594 AMREX_ASSERT(domain.cellCentered());
595 AMREX_ASSERT(domain.smallEnd() == 0);
596 AMREX_ASSERT(domain.length(1) % 2 == 0);
597 AMREX_ASSERT(mf.is_cell_centered());
598 AMREX_ASSERT(scomp < mf.nComp() && scomp+ncomp <= mf.nComp());
599 AMREX_ASSERT(nghost.allLE(mf.nGrowVect()));
600
601 if (nghost[0] <= 0) { return; }
602
603 const FabArrayBase::RB180& TheRB180 = mf.getRB180(nghost, domain);
604
605 auto handler = Comm_nowait(mf, scomp, ncomp, TheRB180,
606 Rotate180Fn{domain.length(1)}, Identity{});
607
608#ifdef AMREX_USE_MPI
609 Comm_finish(mf, scomp, ncomp, TheRB180, std::move(handler),
610 Rotate180Fn{domain.length(1)}, Identity{});
611#else
612 amrex::ignore_unused(handler);
613#endif
614}
615
616template <BaseFabType FAB>
617void
618Rotate180 (FabArray<FAB>& mf, Box const& domain)
619{
620 Rotate180(mf, 0, mf.nComp(), mf.nGrowVect(), domain);
621}
622
623template <BaseFabType FAB>
624void
625FillPolar (FabArray<FAB>& mf, int scomp, int ncomp, IntVect const& nghost, Box const& domain)
626{
627 BL_PROFILE("FillPolar");
628
629 AMREX_ASSERT(domain.cellCentered());
630 AMREX_ASSERT(domain.smallEnd() == 0);
631 AMREX_ASSERT(domain.length(1) % 2 == 0);
632 AMREX_ASSERT(mf.is_cell_centered());
633 AMREX_ASSERT(scomp < mf.nComp() && scomp+ncomp <= mf.nComp());
634 AMREX_ASSERT(nghost.allLE(mf.nGrowVect()));
635
636 if (nghost[0] <= 0) { return; }
637
638 const FabArrayBase::PolarB& ThePolarB = mf.getPolarB(nghost, domain);
639
640 auto handler = Comm_nowait(mf, scomp, ncomp, ThePolarB,
641 PolarFn{.Lx = domain.length(0), .Ly = domain.length(1)},
642 Identity{});
643
644#ifdef AMREX_USE_MPI
645 Comm_finish(mf, scomp, ncomp, ThePolarB, std::move(handler),
646 PolarFn{.Lx = domain.length(0), .Ly = domain.length(1)}, Identity{});
647#else
648 amrex::ignore_unused(handler);
649#endif
650}
651
652template <BaseFabType FAB>
653void
654FillPolar (FabArray<FAB>& mf, Box const& domain)
655{
656 FillPolar(mf, 0, mf.nComp(), mf.nGrowVect(), domain);
657}
658
659template <BaseFabType FAB, typename DTOS, typename Proj>
660requires (IsCallableR<Dim3,DTOS,Dim3>::value && IsFabProjection<Proj,FAB>::value)
661CommHandler
662FillBoundary_nowait (FabArray<FAB>& mf, const FabArrayBase::CommMetaData& cmd,
663 int scomp, int ncomp, DTOS const& dtos, Proj const& proj)
664{
665 BL_PROFILE("FillBoundary_nowait(cmd)");
666 AMREX_ASSERT(scomp < mf.nComp() && scomp+ncomp <= mf.nComp());
667 return Comm_nowait(mf, scomp, ncomp, cmd, dtos, proj);
668}
669
670template <BaseFabType FAB, typename DTOS, typename Proj>
671requires (IsCallableR<Dim3,DTOS,Dim3>::value && IsFabProjection<Proj,FAB>::value)
672void
673FillBoundary_finish (CommHandler handler,
674 FabArray<FAB>& mf, const FabArrayBase::CommMetaData& cmd,
675 int scomp, int ncomp, DTOS const& dtos, Proj const& proj)
676{
677#ifdef AMREX_USE_MPI
678 BL_PROFILE("FillBoundary_finish(cmd)");
679 Comm_finish(mf, scomp, ncomp, cmd, std::move(handler), dtos, proj);
680#else
681 amrex::ignore_unused(handler,mf,cmd,scomp,ncomp,dtos,proj);
682#endif
683}
684
685template <typename DTOS>
686Vector<std::pair<Box,Box>>
687get_src_dst_boxes (DTOS const& dtos, Box const& dstbox, Box const& domain)
688{
689 Vector<std::pair<Box,Box>> r;
690 IntVect mapped_smallend(dtos(amrex::lbound(dstbox)));
691 IntVect mapped_bigend (dtos(amrex::ubound(dstbox)));
692 if (!domain.contains(mapped_smallend) || !domain.contains(mapped_bigend)) {
693 return r;
694 }
695
696 auto sign = dtos.sign(amrex::lbound(dstbox));
697 auto perm = dtos.permutation(amrex::lbound(dstbox));
698 auto dtype = dstbox.type();
699 IntVect stype{AMREX_D_DECL(dtype[perm[0]],
700 dtype[perm[1]],
701 dtype[perm[2]])};
702 Array<Array<std::pair<int,int>,2>,AMREX_SPACEDIM> ends;
703 Array<Array<std::pair<int,int>,2>,AMREX_SPACEDIM> dst_ends;
704 Array<int,AMREX_SPACEDIM> nboxes;
705 for (int ddim = 0; ddim < AMREX_SPACEDIM; ++ddim) {
706 int sdim = perm[ddim];
707 auto mm = std::minmax(mapped_smallend[sdim],mapped_bigend[sdim]);
708 if (((sign[ddim] > 0) && (mapped_smallend[sdim] <= mapped_bigend[sdim])) ||
709 ((sign[ddim] < 0) && (mapped_bigend[sdim] <= mapped_smallend[sdim])))
710 {
711 nboxes[sdim] = 1;
712 ends[sdim][0] = mm;
713 dst_ends[ddim][0] = std::make_pair(dstbox.smallEnd(ddim),
714 dstbox.bigEnd(ddim));
715 } else {
716 nboxes[sdim] = 2;
717 ends[sdim][0].first = domain.smallEnd(sdim);
718 ends[sdim][0].second = mm.first;
719 ends[sdim][1].first = mm.second;
720 ends[sdim][1].second = domain.bigEnd(sdim);
721 int n0 = ends[sdim][0].second - ends[sdim][0].first;
722 int n1 = ends[sdim][1].second - ends[sdim][1].first;
723 if (mm.first == mapped_smallend[sdim]) {
724 dst_ends[ddim][0] = std::make_pair(dstbox.smallEnd(ddim),
725 dstbox.smallEnd(ddim)+n0);
726 dst_ends[ddim][1] = std::make_pair(dstbox.bigEnd(ddim)-n1,
727 dstbox.bigEnd(ddim));
728 } else {
729 dst_ends[ddim][0] = std::make_pair(dstbox.bigEnd(ddim)-n0,
730 dstbox.bigEnd(ddim));
731 dst_ends[ddim][1] = std::make_pair(dstbox.smallEnd(ddim),
732 dstbox.smallEnd(ddim)+n1);
733 }
734 }
735 }
736
737 r.reserve(AMREX_D_TERM(nboxes[0],*nboxes[1],*nboxes[2]));
738
739#if (AMREX_SPACEDIM == 3)
740 for (int kbox = 0; kbox < nboxes[2]; ++kbox) {
741#endif
742#if (AMREX_SPACEDIM >=2 )
743 for (int jbox = 0; jbox < nboxes[1]; ++jbox) {
744#endif
745 for (int ibox = 0; ibox < nboxes[0]; ++ibox)
746 {
747 IntVect siv(AMREX_D_DECL(ibox,jbox,kbox));
748 IntVect div(AMREX_D_DECL(siv[perm[0]],siv[perm[1]],siv[perm[2]]));
749 r.emplace_back(Box(IntVect(AMREX_D_DECL(ends[0][ibox].first,
750 ends[1][jbox].first,
751 ends[2][kbox].first)),
752 IntVect(AMREX_D_DECL(ends[0][ibox].second,
753 ends[1][jbox].second,
754 ends[2][kbox].second)),
755 stype),
756 Box(IntVect(AMREX_D_DECL(dst_ends[0][div[0]].first,
757 dst_ends[1][div[1]].first,
758 dst_ends[2][div[2]].first)),
759 IntVect(AMREX_D_DECL(dst_ends[0][div[0]].second,
760 dst_ends[1][div[1]].second,
761 dst_ends[2][div[2]].second)),
762 dtype));
763 AMREX_D_TERM(},},})
764
765 return r;
766}
767
768template <typename DTOS>
769Box get_dst_subbox (DTOS const& dtos, std::pair<Box,Box> const& sdboxes,
770 Box const& srcsubbox)
771{
772 Box const& srcbox = sdboxes.first;
773 Box const& dstbox = sdboxes.second;
774 if (srcbox == srcsubbox) {
775 return dstbox;
776 } else {
777 auto sign = dtos.sign(amrex::lbound(dstbox));
778 auto perm = dtos.permutation(amrex::lbound(dstbox));
779 Box dstsubbox = dstbox;
780 for (int ddim = 0; ddim < AMREX_SPACEDIM; ++ddim) {
781 int sdim = perm[ddim];
782 if (sign[ddim] > 0) {
783 dstsubbox.growLo(ddim, srcbox.smallEnd(sdim)-srcsubbox.smallEnd(sdim));
784 dstsubbox.growHi(ddim, srcsubbox.bigEnd(sdim)-srcbox.bigEnd(sdim));
785 } else {
786 dstsubbox.growLo(ddim, srcsubbox.bigEnd(sdim)-srcbox.bigEnd(sdim));
787 dstsubbox.growHi(ddim, srcbox.smallEnd(sdim)-srcsubbox.smallEnd(sdim));
788 }
789 }
790 return dstsubbox;
791 }
792}
793
795namespace detail {
796 void split_boxes (BoxList& bl, Box const& domain);
797}
799
800template <BaseFabType FAB, typename DTOS>
801requires (IsCallableR<Dim3,DTOS,Dim3>::value)
802FabArrayBase::CommMetaData
803makeFillBoundaryMetaData (FabArray<FAB>& mf, IntVect const& nghost,
804 Geometry const& geom, DTOS const& dtos)
805{
806 FabArrayBase::CommMetaData cmd;
807 cmd.m_LocTags = std::make_unique<FabArrayBase::CopyComTagsContainer>();
808 cmd.m_SndTags = std::make_unique<FabArrayBase::MapOfCopyComTagContainers>();
809 cmd.m_RcvTags = std::make_unique<FabArrayBase::MapOfCopyComTagContainers>();
810
811 // Normal FillBoundary part
812 mf.define_fb_metadata(cmd, nghost, false, geom.periodicity(), false);
813
814 BoxArray const& ba = mf.boxArray();
815 DistributionMapping const& dm = mf.DistributionMap();
816 Box dombox = amrex::convert(geom.Domain(), ba.ixType());
817 Box pdombox = amrex::convert(geom.growPeriodicDomain(nghost), ba.ixType());
818
819 const int myproc = ParallelDescriptor::MyProc();
820 const auto nboxes = static_cast<int>(ba.size());
821 std::vector<std::pair<int,Box> > isects;
822
823 for (int i = 0; i < nboxes; ++i) {
824 Box const& gbx = amrex::grow(ba[i], nghost);
825 BoxList bl = amrex::boxDiff(gbx, pdombox);
826 if (bl.isEmpty()) { continue; }
827
828 detail::split_boxes(bl, dombox);
829
830 const int dst_owner = dm[i];
831 for (auto const& dst_box : bl) {
832 auto const& src_dst_boxes = get_src_dst_boxes(dtos, dst_box, dombox);
833 for (auto const& sd_box_pair : src_dst_boxes) {
834 ba.intersections(sd_box_pair.first, isects);
835 for (auto const& is : isects) {
836 int const k = is.first;
837 Box const src_b = is.second;
838 int const src_owner = dm[k];
839 if (dst_owner == myproc || src_owner == myproc) {
840 Box const& dst_b = get_dst_subbox(dtos, sd_box_pair, src_b);
841 if (src_owner == dst_owner) {
842 cmd.m_LocTags->emplace_back(dst_b, src_b, i, k);
843 } else {
844 auto& tags = (dst_owner == myproc) ?
845 (*cmd.m_RcvTags)[src_owner] :
846 (*cmd.m_SndTags)[dst_owner];
847 tags.emplace_back(dst_b, src_b, i, k);
848 }
849 }
850 }
851 }
852 }
853 }
854
855 return cmd;
856}
857
858struct SphThetaPhiRIndexMapping
859{
860 SphThetaPhiRIndexMapping (Box const& a_domain)
861 : nx(a_domain.length(0)),
862 ny(a_domain.length(1)),
863 nz(a_domain.length(2))
864 {
865 AMREX_ASSERT(a_domain.smallEnd() == 0);
866 }
867
868 [[nodiscard]] AMREX_GPU_HOST_DEVICE
869 Dim3 operator() (Dim3 const& ijk) const noexcept
870 {
871 const int i = ijk.x;
872 const int j = ijk.y;
873 const int k = ijk.z;
874 bool ilo = i < 0;
875 bool ihi = i >= nx;
876 bool imd = i >= 0 && i < nx;
877 bool jlo = j < 0;
878 bool jhi = j >= ny;
879 bool jmd = j >= 0 && j < ny;
880 bool klo = k < 0;
881 bool kmd = k >= 0 && k < nz;
882 // We do not need to do anything at the theta-lo/r-lo edge,
883 // theta-hi/r-lo edge, and r > r-hi.
884 if (ilo && jmd && kmd)
885 {
886 return Dim3{.x = -1-i, .y = (j+ny/2)%ny, .z = k};
887 }
888 else if (ihi && jmd && kmd)
889 {
890 return Dim3{.x = 2*nx-1-i, .y = (j+ny/2)%ny, .z = k};
891 }
892 else if (imd && jlo && kmd)
893 {
894 return Dim3{.x = i, .y = j+ny, .z = k};
895 }
896 else if (imd && jhi && kmd)
897 {
898 return Dim3{.x = i, .y = j-ny, .z = k};
899 }
900 else if (imd && jmd && klo)
901 {
902 return Dim3{.x = nx-1-i, .y = (j+ny/2)%ny, .z = -1-k};
903 }
904 else if (ilo && jlo && kmd)
905 {
906 return Dim3{.x = -1-i, .y = (j+ny/2)%ny, .z = k};
907 }
908 else if (ihi && jlo && kmd)
909 {
910 return Dim3{.x = 2*nx-1-i, .y = (j+ny/2)%ny, .z = k};
911 }
912 else if (ilo && jhi && kmd)
913 {
914 return Dim3{.x = -1-i, .y = (j+ny/2)%ny, .z = k};
915 }
916 else if (ihi && jhi && kmd)
917 {
918 return Dim3{.x = 2*nx-1-i, .y = (j+ny/2)%ny, .z = k};
919 }
920 else if (imd && jlo && klo)
921 {
922 return Dim3{.x = nx-1-i, .y = (j+ny/2)%ny, .z = -1-k};
923 }
924 else if (imd && jhi && klo)
925 {
926 return Dim3{.x = nx-1-i, .y = (j+ny/2)%ny, .z = -1-k};
927 }
928 else
929 {
930 return ijk;
931 }
932 }
933
934 [[nodiscard]] IntVect sign (Dim3 const& ijk) const noexcept
935 {
936 if (ijk.z < 0) {
937 return IntVect{AMREX_D_DECL(-1, 1,-1)};
938 } else if (ijk.z >=0 && ijk.z < nz &&
939 (ijk.x < 0 || ijk.x >= nx)) {
940 return IntVect{AMREX_D_DECL(-1, 1, 1)};
941 } else {
942 return IntVect{AMREX_D_DECL( 1, 1, 1)};
943 }
944 }
945
946 [[nodiscard]] IntVect permutation (Dim3 const& /*ijk*/) const noexcept // NOLINT(readability-convert-member-functions-to-static)
947 {
948 return IntVect(AMREX_D_DECL(0,1,2));
949 }
950
951private:
952 int nx, ny, nz;
953};
954
955struct SphThetaPhiRComponentMapping
956{
957 SphThetaPhiRComponentMapping (Box const& a_domain, int a_start_index)
958 : nx(a_domain.length(0)),
959 ny(a_domain.length(1)),
960 nz(a_domain.length(2)),
961 scomp(a_start_index) {}
962
963 template <typename T>
964 [[nodiscard]] AMREX_GPU_HOST_DEVICE
965 T operator()(Array4<const T> const& a, Dim3 const& ijk, int n) const noexcept
966 {
967 const int i = ijk.x;
968 const int j = ijk.y;
969 const int k = ijk.z;
970 auto r = a(i,j,k,n);
971 if (n == scomp) {
972 if ((i >= 0 && i < nx) &&
973 (j < 0 || j >= ny) &&
974 (k >= 0 && k < nz)) {
975 return r;
976 } else {
977 // We do not need to worry about the theta-lo/r-lo edge,
978 // theta-hi/r-lo edge, and r > r-hi.
979 return -r;
980 }
981 } else if (n == scomp+2) {
982 if (k < 0) {
983 return -r;
984 } else {
985 return r;
986 }
987 } else {
988 return r;
989 }
990 }
991private:
992 int nx, ny, nz;
993 int scomp;
994};
995
996extern template MultiBlockCommMetaData ParallelCopy(FabArray<FArrayBox>& dest, const Box& destbox,
997 const FabArray<FArrayBox>& src, int destcomp,
998 int srccomp, int numcomp, const IntVect& ngrow,
999 MultiBlockIndexMapping const&, Identity const&);
1000}
1001
1002#endif
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define BL_ASSERT(EX)
Definition AMReX_BLassert.H:39
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_NODISCARD
Definition AMReX_Extension.H:252
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D(...)
Definition AMReX_GpuLaunchMacrosC.nolint.H:111
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1139
#define AMREX_D_TERM(a, b, c)
Definition AMReX_SPACE.H:172
#define AMREX_D_DECL(a, b, c)
Definition AMReX_SPACE.H:171
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
__host__ __device__ IntVectND< dim > size() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:148
__host__ __device__ BoxND & growLo(int idir, int n_cell=1) noexcept
Grow the BoxND on the low end by n_cell cells in direction idir. NOTE: n_cell negative shrinks the Bo...
Definition AMReX_Box.H:670
__host__ __device__ BoxND & growHi(int idir, int n_cell=1) noexcept
Grow the BoxND on the high end by n_cell cells in direction idir. NOTE: n_cell negative shrinks the B...
Definition AMReX_Box.H:681
__host__ __device__ BoxND & setType(const IndexTypeND< dim > &t) noexcept
Set indexing type.
Definition AMReX_Box.H:513
CopyComTag::CopyComTagsContainer CopyComTagsContainer
Definition AMReX_FabArrayBase.H:220
__host__ __device__ Dim3 ubound(Array4< T > const &a) noexcept
Return the inclusive upper bounds of an Array4 in Dim3 form.
Definition AMReX_Array4.H:1359
__host__ __device__ Dim3 length(Array4< T > const &a) noexcept
Return the spatial extents of an Array4 in Dim3 form.
Definition AMReX_Array4.H:1373
__host__ __device__ Dim3 lbound(Array4< T > const &a) noexcept
Return the inclusive lower bounds of an Array4 in Dim3 form.
Definition AMReX_Array4.H:1345
__host__ __device__ BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition AMReX_Box.H:1289
Arena * The_Arena()
Definition AMReX_Arena.cpp:820
Definition AMReX_NonLocalBC.cpp:39
void Rotate90(FabArray< FAB > &mf, int scomp, int ncomp, IntVect const &nghost, Box const &domain)
void unpack_recv_buffer_cpu(FabArray< FAB > &mf, int dcomp, int ncomp, Vector< char * > const &recv_data, Vector< std::size_t > const &recv_size, Vector< FabArrayBase::CopyComTagsContainer const * > const &recv_cctc, DTOS const &dtos=DTOS{}, Proj const &proj=Proj{}) noexcept
Box InverseImage(DTOS const &dtos, const Box &box)
Applies the inverse Dim3 to Dim3 mapping onto Boxes without changing the index type.
Definition AMReX_NonLocalBC.H:189
void Rotate180(FabArray< FAB > &mf, int scomp, int ncomp, IntVect const &nghost, Box const &domain)
void local_copy_cpu(FabArray< FAB > &dest, const FabArray< FAB > &src, int dcomp, int scomp, int ncomp, FabArrayBase::CopyComTagsContainer const &local_tags, DTOS const &dtos=DTOS{}, Proj const &proj=Proj{}) noexcept
void FillPolar(FabArray< FAB > &mf, int scomp, int ncomp, IntVect const &nghost, Box const &domain)
void local_copy_gpu(FabArray< FAB > &dest, const FabArray< FAB > &src, int dcomp, int scomp, int ncomp, FabArrayBase::CopyComTagsContainer const &local_tags, DTOS const &dtos=DTOS{}, Proj const &proj=Proj{}) noexcept
Box Image(DTOS const &dtos, const Box &box)
Applies the Dim3 to Dim3 mapping onto Boxes but does not change the index type.
Definition AMReX_NonLocalBC.H:123
void unpack_recv_buffer_gpu(FabArray< FAB > &mf, int scomp, int ncomp, Vector< char * > const &recv_data, Vector< std::size_t > const &recv_size, Vector< FabArrayBase::CopyComTagsContainer const * > const &recv_cctc, DTOS const &dtos=DTOS{}, Proj const &proj=Proj{})
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition AMReX_ParallelDescriptor.H:678
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139
__host__ __device__ BoxND< dim > convert(const BoxND< dim > &b, const IntVectND< dim > &typ) noexcept
Return a BoxND with different type.
Definition AMReX_Box.H:1567
__host__ __device__ Array4< T > makeArray4(T *p, Box const &bx, int ncomp) noexcept
Definition AMReX_BaseFab.H:92
void ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:202
BoxND< 3 > Box
Box is an alias for amrex::BoxND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:30
BoxList boxDiff(const Box &b1in, const Box &b2)
Returns BoxList defining the compliment of b2 in b1in.
Definition AMReX_BoxList.cpp:599
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition AMReX_FabArrayUtility.H:1951
IntVectND< 3 > IntVect
IntVect is an alias for amrex::IntVectND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:33
void LoopConcurrentOnCpu(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition AMReX_Loop.H:388
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:241