Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_FFT_Poisson.H
Go to the documentation of this file.
1#ifndef AMREX_FFT_POISSON_H_
2#define AMREX_FFT_POISSON_H_
3
4#include <AMReX_FFT.H>
5#include <AMReX_Geometry.H>
6
7namespace amrex::FFT
8{
9
17namespace detail {
18template <typename MF>
19void fill_physbc (MF& mf, Geometry const& geom,
20 Array<std::pair<Boundary,Boundary>,AMREX_SPACEDIM> const& bc);
21
22inline Geometry shift_geom (Geometry const& geom)
23{
24 auto const& dom = geom.Domain();
25 auto const& dlo = dom.smallEnd();
26 if (dlo == 0) {
27 return geom;
28 } else {
29 return Geometry(amrex::shift(dom,-dlo), geom.ProbDomain(), geom.CoordInt(),
30 geom.isPeriodic());
31 }
32}
33
34template <typename MF>
35void shift_mfs (IntVect const& domain_lo, MF const& mf1, MF const& mf2,
36 MF& new_mf1, MF& new_mf2)
37{
38 AMREX_ALWAYS_ASSERT(mf1.boxArray() == mf2.boxArray() &&
39 mf1.DistributionMap() == mf2.DistributionMap());
40 BoxArray ba = mf1.boxArray();
41 ba.shift(-domain_lo);
42 new_mf1.define(ba, mf1.DistributionMap(), 1, mf1.nGrowVect(),
43 MFInfo().SetAlloc(false));
44 new_mf2.define(ba, mf2.DistributionMap(), 1, mf2.nGrowVect(),
45 MFInfo().SetAlloc(false));
46 for (MFIter mfi(mf1); mfi.isValid(); ++mfi) {
47 typename MF::fab_type fab1(mf1[mfi], amrex::make_alias, 0, 1);
48 fab1.shift(-domain_lo);
49 new_mf1.setFab(mfi, std::move(fab1));
50
51 typename MF::fab_type fab2(mf2[mfi], amrex::make_alias, 0, 1);
52 fab2.shift(-domain_lo);
53 new_mf2.setFab(mfi, std::move(fab2));
54 }
55}
56
57}
59
65template <typename MF = MultiFab>
67{
68public:
69
76 template <typename FA=MF, std::enable_if_t<IsFabArray_v<FA>,int> = 0>
77 Poisson (Geometry const& geom,
78 Array<std::pair<Boundary,Boundary>,AMREX_SPACEDIM> const& bc)
79 : m_domain_lo(geom.Domain().smallEnd()),
80 m_geom(detail::shift_geom(geom)),
81 m_bc(bc)
82 {
83 bool all_periodic = true;
84 for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
85 all_periodic = all_periodic
86 && (bc[idim].first == Boundary::periodic)
87 && (bc[idim].second == Boundary::periodic);
88 }
89 if (all_periodic) {
90 m_r2c = std::make_unique<R2C<typename MF::value_type>>(m_geom.Domain());
91 } else {
92 m_r2x = std::make_unique<R2X<typename MF::value_type>> (m_geom.Domain(), m_bc);
93 }
94 }
95
101 template <typename FA=MF, std::enable_if_t<IsFabArray_v<FA>,int> = 0>
102 explicit Poisson (Geometry const& geom)
103 : m_domain_lo(geom.Domain().smallEnd()),
104 m_geom(detail::shift_geom(geom)),
105 m_bc{AMREX_D_DECL(std::make_pair(Boundary::periodic,Boundary::periodic),
106 std::make_pair(Boundary::periodic,Boundary::periodic),
107 std::make_pair(Boundary::periodic,Boundary::periodic))}
108 {
109 if (m_geom.isAllPeriodic()) {
110 m_r2c = std::make_unique<R2C<typename MF::value_type>>(m_geom.Domain());
111 } else {
112 amrex::Abort("FFT::Poisson: wrong BC");
113 }
114 }
115
126 void solve (MF& a_soln, MF const& a_rhs);
127
128private:
129 IntVect m_domain_lo;
130 Geometry m_geom;
131 Array<std::pair<Boundary,Boundary>,AMREX_SPACEDIM> m_bc;
132 std::unique_ptr<R2X<typename MF::value_type>> m_r2x;
133 std::unique_ptr<R2C<typename MF::value_type>> m_r2c;
134};
135
136#if (AMREX_SPACEDIM == 3)
141template <typename MF = MultiFab>
143{
144public:
145
153 template <typename FA=MF, std::enable_if_t<IsFabArray_v<FA>,int> = 0>
154 explicit PoissonOpenBC (Geometry const& geom,
156 IntVect const& ngrow = IntVect(0));
157
164 void solve (MF& soln, MF const& rhs);
165
169 void define_doit (); // has to be public for cuda
170
171private:
172 Geometry m_geom;
173 Box m_grown_domain;
174 IntVect m_ngrow;
176};
177#endif
178
185template <typename MF = MultiFab>
187{
188public:
189 using T = typename MF::value_type;
190
197 template <typename FA=MF, std::enable_if_t<IsFabArray_v<FA>,int> = 0>
199 Array<std::pair<Boundary,Boundary>,AMREX_SPACEDIM> const& bc)
200 : m_domain_lo(geom.Domain().smallEnd()),
201 m_geom(detail::shift_geom(geom)),
202 m_bc(bc)
203 {
204#if (AMREX_SPACEDIM < 3)
205 amrex::Abort("FFT::PoissonHybrid: 1D & 2D todo");
206 return;
207#endif
208 bool periodic_xy = true;
209 for (int idim = 0; idim < 2; ++idim) {
210 if (m_geom.Domain().length(idim) > 1) {
211 periodic_xy = periodic_xy && (bc[idim].first == Boundary::periodic);
212 AMREX_ALWAYS_ASSERT((bc[idim].first == Boundary::periodic &&
213 bc[idim].second == Boundary::periodic) ||
214 (bc[idim].first != Boundary::periodic &&
215 bc[idim].second != Boundary::periodic));
216 }
217 }
219 bc[2].second != Boundary::periodic);
220 Info info{};
221 info.setTwoDMode(true);
222 if (m_geom.Domain().length(0) == 1 || m_geom.Domain().length(1) == 1) {
223 info.setOneDMode(true);
224 }
225 if (periodic_xy) {
226 m_r2c = std::make_unique<R2C<typename MF::value_type>>(m_geom.Domain(),
227 info);
228 } else {
229 m_r2x = std::make_unique<R2X<typename MF::value_type>> (m_geom.Domain(),
230 m_bc, info);
231 }
232 build_spmf();
233 }
234
244 void solve (MF& soln, MF const& rhs);
252 void solve (MF& soln, MF const& rhs, Vector<T> const& dz);
260 void solve (MF& soln, MF const& rhs, Gpu::DeviceVector<T> const& dz);
261
268 void solve_2d (MF& a_soln, MF const& a_rhs);
269
278 template <typename TRIA, typename TRIC>
279 void solve (MF& a_soln, MF const& a_rhs, TRIA const& tria, TRIC const& tric);
280
281 // This is public for cuda
289 template <typename FA, typename TRIA, typename TRIC>
290 void solve_z (FA& spmf, TRIA const& tria, TRIC const& tric);
291
297 [[nodiscard]] std::pair<BoxArray,DistributionMapping> getSpectralDataLayout () const;
298
299private:
300
301 void build_spmf ();
302
303 IntVect m_domain_lo;
304 Geometry m_geom;
305 Array<std::pair<Boundary,Boundary>,AMREX_SPACEDIM> m_bc;
306 std::unique_ptr<R2X<typename MF::value_type>> m_r2x;
307 std::unique_ptr<R2C<typename MF::value_type>> m_r2c;
308 MF m_spmf_r;
309 using cMF = FabArray<BaseFab<GpuComplex<T>>>;
310 cMF m_spmf_c;
311};
312
313template <typename MF>
314void Poisson<MF>::solve (MF& a_soln, MF const& a_rhs)
315{
316 BL_PROFILE("FFT::Poisson::solve");
317
318 MF* soln = &a_soln;
319 MF const* rhs = &a_rhs;
320 MF solntmp, rhstmp;
321 if (m_domain_lo != 0) {
322 detail::shift_mfs(m_domain_lo, a_soln, a_rhs, solntmp, rhstmp);
323 soln = &solntmp;
324 rhs = &rhstmp;
325 }
326
327 AMREX_ASSERT(soln->is_cell_centered() && rhs->is_cell_centered());
328
329 using T = typename MF::value_type;
330
332 {AMREX_D_DECL(Math::pi<T>()/T(m_geom.Domain().length(0)),
333 Math::pi<T>()/T(m_geom.Domain().length(1)),
334 Math::pi<T>()/T(m_geom.Domain().length(2)))};
335 for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
336 if (m_bc[idim].first == Boundary::periodic) {
337 fac[idim] *= T(2);
338 }
339 }
341 {AMREX_D_DECL(T(2)/T(m_geom.CellSize(0)*m_geom.CellSize(0)),
342 T(2)/T(m_geom.CellSize(1)*m_geom.CellSize(1)),
343 T(2)/T(m_geom.CellSize(2)*m_geom.CellSize(2)))};
344 for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
345 if (m_geom.Domain().length(idim) == 1) {
346 dxfac[idim] = 0;
347 }
348 }
349 auto scale = (m_r2x) ? m_r2x->scalingFactor() : m_r2c->scalingFactor();
350
352 for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
353 if (m_bc[idim].first == Boundary::odd &&
354 m_bc[idim].second == Boundary::odd)
355 {
356 offset[idim] = T(1);
357 }
358 else if ((m_bc[idim].first == Boundary::odd &&
359 m_bc[idim].second == Boundary::even) ||
360 (m_bc[idim].first == Boundary::even &&
361 m_bc[idim].second == Boundary::odd))
362 {
363 offset[idim] = T(0.5);
364 }
365 }
366
367 auto f = [=] AMREX_GPU_DEVICE (int i, int j, int k, auto& spectral_data)
368 {
370 AMREX_D_TERM(T a = fac[0]*(i+offset[0]);,
371 T b = fac[1]*(j+offset[1]);,
372 T c = fac[2]*(k+offset[2]));
373 T k2 = AMREX_D_TERM(dxfac[0]*(std::cos(a)-T(1)),
374 +dxfac[1]*(std::cos(b)-T(1)),
375 +dxfac[2]*(std::cos(c)-T(1)));
376 if (k2 != T(0)) {
377 spectral_data /= k2;
378 }
379 spectral_data *= scale;
380 };
381
382 IntVect const& ng = amrex::elemwiseMin(soln->nGrowVect(), IntVect(1));
383
384 if (m_r2x) {
385 m_r2x->forwardThenBackward_doit_0(*rhs, *soln, f, ng, m_geom.periodicity());
386 detail::fill_physbc(*soln, m_geom, m_bc);
387 } else {
388 m_r2c->forward(*rhs);
389 m_r2c->post_forward_doit_0(f);
390 m_r2c->backward_doit(*soln, ng, m_geom.periodicity());
391 }
392}
393
394#if (AMREX_SPACEDIM == 3)
395
396template <typename MF>
397template <typename FA, std::enable_if_t<IsFabArray_v<FA>,int> FOO>
399 IntVect const& ngrow)
400 : m_geom(geom),
401 m_grown_domain(amrex::grow(amrex::convert(geom.Domain(),ixtype),ngrow)),
402 m_ngrow(ngrow),
403 m_solver(m_grown_domain)
404{
405 define_doit();
406}
407
408template <typename MF>
410{
411 using T = typename MF::value_type;
412 auto const& lo = m_grown_domain.smallEnd();
413 auto const dx = T(m_geom.CellSize(0));
414 auto const dy = T(m_geom.CellSize(1));
415 auto const dz = T(m_geom.CellSize(2));
416 auto const gfac = T(1)/T(std::sqrt(T(12)));
417 // 0.125 comes from that there are 8 Gauss quadrature points
418 auto const fac = T(-0.125) * (dx*dy*dz) / (T(4)*Math::pi<T>());
419 m_solver.setGreensFunction([=] AMREX_GPU_DEVICE (int i, int j, int k) -> T
420 {
421 auto x = (T(i-lo[0]) - gfac) * dx; // first Gauss quadrature point
422 auto y = (T(j-lo[1]) - gfac) * dy;
423 auto z = (T(k-lo[2]) - gfac) * dz;
424 T r = 0;
425 for (int gx = 0; gx < 2; ++gx) {
426 for (int gy = 0; gy < 2; ++gy) {
427 for (int gz = 0; gz < 2; ++gz) {
428 auto xg = x + 2*gx*gfac*dx;
429 auto yg = y + 2*gy*gfac*dy;
430 auto zg = z + 2*gz*gfac*dz;
431 r += T(1)/std::sqrt(xg*xg+yg*yg+zg*zg);
432 }}}
433 return fac * r;
434 });
435}
436
437template <typename MF>
438void PoissonOpenBC<MF>::solve (MF& soln, MF const& rhs)
439{
440 AMREX_ASSERT(m_grown_domain.ixType() == soln.ixType() && m_grown_domain.ixType() == rhs.ixType());
441 m_solver.solve(soln, rhs);
442}
443
444#endif /* AMREX_SPACEDIM == 3 */
445
447namespace fft_poisson_detail {
448 template <typename T>
449 struct Tri_Zero {
450 [[nodiscard]] constexpr T operator() (int, int, int) const
451 {
452 return 0;
453 }
454 };
455
456 template <typename T>
457 struct Tri_Uniform {
459 T operator() (int, int, int) const
460 {
461 return m_dz2inv;
462 }
463 T m_dz2inv;
464 };
465
466 template <typename T>
467 struct TriA {
469 T operator() (int, int, int k) const
470 {
471 return (k > 0) ? T(2.0) / (m_dz[k]*(m_dz[k]+m_dz[k-1]))
472 : T(1.0) / (m_dz[k]* m_dz[k]);
473 }
474 T const* m_dz;
475 };
476
477 template <typename T>
478 struct TriC {
480 T operator() (int, int, int k) const
481 {
482 return (k < m_size-1) ? T(2.0) / (m_dz[k]*(m_dz[k]+m_dz[k+1]))
483 : T(1.0) / (m_dz[k]* m_dz[k]);
484 }
485 T const* m_dz;
486 int m_size;
487 };
488}
490
491template <typename MF>
492std::pair<BoxArray,DistributionMapping>
494{
495 if (!m_spmf_r.empty()) {
496 return std::make_pair(m_spmf_r.boxArray(), m_spmf_r.DistributionMap());
497 } else {
498 return std::make_pair(m_spmf_c.boxArray(), m_spmf_c.DistributionMap());
499 }
500}
501
502template <typename MF>
504{
505#if (AMREX_SPACEDIM == 3)
506 AMREX_ALWAYS_ASSERT(m_geom.Domain().length(2) > 1 &&
507 (m_geom.Domain().length(0) > 1 ||
508 m_geom.Domain().length(1) > 1));
509
510 if (m_r2c) {
511 Box cdomain = m_geom.Domain();
512 if (cdomain.length(0) > 1) {
513 cdomain.setBig(0,cdomain.length(0)/2);
514 } else {
515 cdomain.setBig(1,cdomain.length(1)/2);
516 }
517 auto cba = amrex::decompose(cdomain, ParallelContext::NProcsSub(),
518 {AMREX_D_DECL(true,true,false)});
519 DistributionMapping dm = detail::make_iota_distromap(cba.size());
520 m_spmf_c.define(cba, dm, 1, 0);
521 } else if (m_geom.Domain().length(0) > 1 &&
522 m_geom.Domain().length(1) > 1) {
523 if (m_r2x->m_cy.empty()) { // spectral data is real
524 auto sba = amrex::decompose(m_geom.Domain(),ParallelContext::NProcsSub(),
525 {AMREX_D_DECL(true,true,false)});
526 DistributionMapping dm = detail::make_iota_distromap(sba.size());
527 m_spmf_r.define(sba, dm, 1, 0);
528 } else { // spectral data is complex. one of the first two dimensions is periodic.
529 Box cdomain = m_geom.Domain();
530 if (m_bc[0].first == Boundary::periodic) {
531 cdomain.setBig(0,cdomain.length(0)/2);
532 } else {
533 cdomain.setBig(1,cdomain.length(1)/2);
534 }
535 auto cba = amrex::decompose(cdomain, ParallelContext::NProcsSub(),
536 {AMREX_D_DECL(true,true,false)});
537 DistributionMapping dm = detail::make_iota_distromap(cba.size());
538 m_spmf_c.define(cba, dm, 1, 0);
539 }
540 } else {
541 // spectral data is real
542 auto sba = amrex::decompose(m_geom.Domain(),ParallelContext::NProcsSub(),
543 {AMREX_D_DECL(true,true,false)});
544 DistributionMapping dm = detail::make_iota_distromap(sba.size());
545 m_spmf_r.define(sba, dm, 1, 0);
546 }
547#else
549#endif
550}
551
552template <typename MF>
553void PoissonHybrid<MF>::solve (MF& soln, MF const& rhs)
554{
555 auto delz = T(m_geom.CellSize(AMREX_SPACEDIM-1));
556 solve(soln, rhs,
557 fft_poisson_detail::Tri_Uniform<T>{T(1)/(delz*delz)},
558 fft_poisson_detail::Tri_Uniform<T>{T(1)/(delz*delz)});
559}
560
561template <typename MF>
562void PoissonHybrid<MF>::solve (MF& soln, MF const& rhs, Gpu::DeviceVector<T> const& dz)
563{
564 auto const* pdz = dz.dataPtr();
565 solve(soln, rhs,
566 fft_poisson_detail::TriA<T>{pdz},
567 fft_poisson_detail::TriC<T>{pdz,int(dz.size())});
568}
569
570template <typename MF>
571void PoissonHybrid<MF>::solve (MF& soln, MF const& rhs, Vector<T> const& dz)
572{
573 AMREX_ASSERT(soln.is_cell_centered() && rhs.is_cell_centered());
574
575#ifdef AMREX_USE_GPU
576 Gpu::DeviceVector<T> d_dz(dz.size());
577 Gpu::htod_memcpy_async(d_dz.data(), dz.data(), dz.size()*sizeof(T));
578 auto const* pdz = d_dz.data();
579#else
580 auto const* pdz = dz.data();
581#endif
582 solve(soln, rhs,
583 fft_poisson_detail::TriA<T>{pdz},
584 fft_poisson_detail::TriC<T>{pdz,int(dz.size())});
585}
586
587template <typename MF>
588void PoissonHybrid<MF>::solve_2d (MF& soln, MF const& rhs)
589{
590 solve(soln, rhs, fft_poisson_detail::Tri_Zero<T>{}, fft_poisson_detail::Tri_Zero<T>{});
591}
592
593template <typename MF>
594template <typename TRIA, typename TRIC>
595void PoissonHybrid<MF>::solve (MF& a_soln, MF const& a_rhs, TRIA const& tria,
596 TRIC const& tric)
597{
598 BL_PROFILE("FFT::PoissonHybrid::solve");
599
600 AMREX_ASSERT(a_soln.is_cell_centered() && a_rhs.is_cell_centered());
601
602#if (AMREX_SPACEDIM < 3)
603 amrex::ignore_unused(a_soln, a_rhs, tria, tric);
604#else
605
606 MF* soln = &a_soln;
607 MF const* rhs = &a_rhs;
608 MF solntmp, rhstmp;
609 if (m_domain_lo != 0) {
610 detail::shift_mfs(m_domain_lo, a_soln, a_rhs, solntmp, rhstmp);
611 soln = &solntmp;
612 rhs = &rhstmp;
613 }
614
615 IntVect const& ng = amrex::elemwiseMin(soln->nGrowVect(), IntVect(1));
616
617 if (m_r2c)
618 {
619 m_r2c->forward(*rhs, m_spmf_c);
620 solve_z(m_spmf_c, tria, tric);
621 m_r2c->backward_doit(m_spmf_c, *soln, ng, m_geom.periodicity());
622 }
623 else
624 {
625 if (m_r2x->m_cy.empty()) { // spectral data is real
626 m_r2x->forward(*rhs, m_spmf_r);
627 solve_z(m_spmf_r, tria, tric);
628 m_r2x->backward(m_spmf_r, *soln, ng, m_geom.periodicity());
629 } else { // spectral data is complex.
630 m_r2x->forward(*rhs, m_spmf_c);
631 solve_z(m_spmf_c, tria, tric);
632 m_r2x->backward(m_spmf_c, *soln, ng, m_geom.periodicity());
633 }
634 }
635
636 detail::fill_physbc(*soln, m_geom, m_bc);
637#endif
638}
639
640template <typename MF>
641template <typename FA, typename TRIA, typename TRIC>
642void PoissonHybrid<MF>::solve_z (FA& spmf, TRIA const& tria, TRIC const& tric)
643{
644 BL_PROFILE("PoissonHybrid::solve_z");
645
646#if (AMREX_SPACEDIM < 3)
647 amrex::ignore_unused(spmf, tria, tric);
648#else
649 auto facx = Math::pi<T>()/T(m_geom.Domain().length(0));
650 auto facy = Math::pi<T>()/T(m_geom.Domain().length(1));
651 if (m_bc[0].first == Boundary::periodic) { facx *= T(2); }
652 if (m_bc[1].first == Boundary::periodic) { facy *= T(2); }
653 auto dxfac = T(2)/T(m_geom.CellSize(0)*m_geom.CellSize(0));
654 auto dyfac = T(2)/T(m_geom.CellSize(1)*m_geom.CellSize(1));
655 auto scale = (m_r2x) ? m_r2x->scalingFactor() : m_r2c->scalingFactor();
656
657 if (m_geom.Domain().length(0) == 1) { dxfac = 0; }
658 if (m_geom.Domain().length(1) == 1) { dyfac = 0; }
659
660 GpuArray<T,AMREX_SPACEDIM-1> offset{T(0),T(0)};
661 for (int idim = 0; idim < AMREX_SPACEDIM-1; ++idim) {
662 if (m_geom.Domain().length(idim) > 1) {
663 if (m_bc[idim].first == Boundary::odd &&
664 m_bc[idim].second == Boundary::odd)
665 {
666 offset[idim] = T(1);
667 }
668 else if ((m_bc[idim].first == Boundary::odd &&
669 m_bc[idim].second == Boundary::even) ||
670 (m_bc[idim].first == Boundary::even &&
671 m_bc[idim].second == Boundary::odd))
672 {
673 offset[idim] = T(0.5);
674 }
675 }
676 }
677
678 if
679#ifndef _WIN32
680 constexpr
681#endif
682 (std::is_same_v<TRIA,fft_poisson_detail::Tri_Zero<T>> &&
683 std::is_same_v<TRIC,fft_poisson_detail::Tri_Zero<T>>) {
684 amrex::ignore_unused(tria,tric);
685#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
686#pragma omp parallel
687#endif
688 for (MFIter mfi(spmf,TilingIfNotGPU()); mfi.isValid(); ++mfi)
689 {
690 auto const& spectral = spmf.array(mfi);
691 auto const& box = mfi.validbox();
692 amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k)
693 {
694 T a = facx*(i+offset[0]);
695 T b = facy*(j+offset[1]);
696 T k2 = dxfac * (std::cos(a)-T(1))
697 + dyfac * (std::cos(b)-T(1));
698 if (k2 != T(0)) {
699 spectral(i,j,k) /= k2;
700 }
701 spectral(i,j,k) *= scale;
702 });
703 }
704 } else {
705 bool zlo_neumann = m_bc[2].first == Boundary::even;
706 bool zhi_neumann = m_bc[2].second == Boundary::even;
707 bool is_singular = (offset[0] == T(0)) && (offset[1] == T(0))
708 && zlo_neumann && zhi_neumann;
709
710 auto nz = m_geom.Domain().length(2);
711
712#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
713#pragma omp parallel
714#endif
715 for (MFIter mfi(spmf); mfi.isValid(); ++mfi)
716 {
717 auto const& spectral = spmf.array(mfi);
718 auto const& box = mfi.validbox();
719 auto const& xybox = amrex::makeSlab(box, 2, 0);
720
721#ifdef AMREX_USE_GPU
722 // xxxxx TODO: We need to explore how to optimize this
723 // function. Maybe we can use cusparse. Maybe we should make
724 // z-direction to be the unit stride direction.
725
726 FArrayBox tridiag_workspace(box,4);
727 auto const& ald = tridiag_workspace.array(0);
728 auto const& bd = tridiag_workspace.array(1);
729 auto const& cud = tridiag_workspace.array(2);
730 auto const& scratch = tridiag_workspace.array(3);
731
732 amrex::ParallelFor(xybox, [=] AMREX_GPU_DEVICE (int i, int j, int)
733 {
734 T a = facx*(i+offset[0]);
735 T b = facy*(j+offset[1]);
736 T k2 = dxfac * (std::cos(a)-T(1))
737 + dyfac * (std::cos(b)-T(1));
738
739 // Tridiagonal solve
740 for(int k=0; k < nz; k++) {
741 if(k==0) {
742 ald(i,j,k) = T(0.);
743 cud(i,j,k) = tric(i,j,k);
744 if (zlo_neumann) {
745 bd(i,j,k) = k2 - cud(i,j,k);
746 } else {
747 bd(i,j,k) = k2 - cud(i,j,k) - T(2.0)*tria(i,j,k);
748 }
749 } else if (k == nz-1) {
750 ald(i,j,k) = tria(i,j,k);
751 cud(i,j,k) = T(0.);
752 if (zhi_neumann) {
753 bd(i,j,k) = k2 - ald(i,j,k);
754 if (i == 0 && j == 0 && is_singular) {
755 bd(i,j,k) *= T(2.0);
756 }
757 } else {
758 bd(i,j,k) = k2 - ald(i,j,k) - T(2.0)*tric(i,j,k);
759 }
760 } else {
761 ald(i,j,k) = tria(i,j,k);
762 cud(i,j,k) = tric(i,j,k);
763 bd(i,j,k) = k2 -ald(i,j,k)-cud(i,j,k);
764 }
765 }
766
767 scratch(i,j,0) = cud(i,j,0)/bd(i,j,0);
768 spectral(i,j,0) = spectral(i,j,0)/bd(i,j,0);
769
770 for (int k = 1; k < nz; k++) {
771 if (k < nz-1) {
772 scratch(i,j,k) = cud(i,j,k) / (bd(i,j,k) - ald(i,j,k) * scratch(i,j,k-1));
773 }
774 spectral(i,j,k) = (spectral(i,j,k) - ald(i,j,k) * spectral(i,j,k - 1))
775 / (bd(i,j,k) - ald(i,j,k) * scratch(i,j,k-1));
776 }
777
778 for (int k = nz - 2; k >= 0; k--) {
779 spectral(i,j,k) -= scratch(i,j,k) * spectral(i,j,k + 1);
780 }
781
782 for (int k = 0; k < nz; ++k) {
783 spectral(i,j,k) *= scale;
784 }
785 });
787
788#else
789
790 Gpu::DeviceVector<T> ald(nz);
792 Gpu::DeviceVector<T> cud(nz);
793 Gpu::DeviceVector<T> scratch(nz);
794
795 amrex::LoopOnCpu(xybox, [&] (int i, int j, int)
796 {
797 T a = facx*(i+offset[0]);
798 T b = facy*(j+offset[1]);
799 T k2 = dxfac * (std::cos(a)-T(1))
800 + dyfac * (std::cos(b)-T(1));
801
802 // Tridiagonal solve
803 for(int k=0; k < nz; k++) {
804 if(k==0) {
805 ald[k] = T(0.);
806 cud[k] = tric(i,j,k);
807 if (zlo_neumann) {
808 bd[k] = k2 - cud[k];
809 } else {
810 bd[k] = k2 - cud[k] - T(2.0)*tria(i,j,k);
811 }
812 } else if (k == nz-1) {
813 ald[k] = tria(i,j,k);
814 cud[k] = T(0.);
815 if (zhi_neumann) {
816 bd[k] = k2 - ald[k];
817 if (i == 0 && j == 0 && is_singular) {
818 bd[k] *= T(2.0);
819 }
820 } else {
821 bd[k] = k2 - ald[k] - T(2.0)*tric(i,j,k);
822 }
823 } else {
824 ald[k] = tria(i,j,k);
825 cud[k] = tric(i,j,k);
826 bd[k] = k2 -ald[k]-cud[k];
827 }
828 }
829
830 scratch[0] = cud[0]/bd[0];
831 spectral(i,j,0) = spectral(i,j,0)/bd[0];
832
833 for (int k = 1; k < nz; k++) {
834 if (k < nz-1) {
835 scratch[k] = cud[k] / (bd[k] - ald[k] * scratch[k-1]);
836 }
837 spectral(i,j,k) = (spectral(i,j,k) - ald[k] * spectral(i,j,k - 1))
838 / (bd[k] - ald[k] * scratch[k-1]);
839 }
840
841 for (int k = nz - 2; k >= 0; k--) {
842 spectral(i,j,k) -= scratch[k] * spectral(i,j,k + 1);
843 }
844
845 for (int k = 0; k < nz; ++k) {
846 spectral(i,j,k) *= scale;
847 }
848 });
849#endif
850 }
851 }
852#endif
853}
854
856namespace detail {
857
858template <class T>
859struct FFTPhysBCTag {
860 Array4<T> dfab;
861 Box dbox;
862 Boundary bc;
863 Orientation face;
864
866 Box const& box () const noexcept { return dbox; }
867};
868
869template <typename MF>
870void fill_physbc (MF& mf, Geometry const& geom,
871 Array<std::pair<Boundary,Boundary>,AMREX_SPACEDIM> const& bc)
872{
873 using T = typename MF::value_type;
874 using Tag = FFTPhysBCTag<T>;
875 Vector<Tag> tags;
876
877 for (MFIter mfi(mf, MFItInfo{}.DisableDeviceSync()); mfi.isValid(); ++mfi)
878 {
879 auto const& box = mfi.fabbox();
880 auto const& arr = mf.array(mfi);
881 for (OrientationIter oit; oit; ++oit) {
882 Orientation face = oit();
883 int idim = face.coordDir();
884 Box b = geom.Domain();
885 Boundary fbc;
886 if (face.isLow()) {
887 b.setRange(idim,geom.Domain().smallEnd(idim)-1);
888 fbc = bc[idim].first;
889 } else {
890 b.setRange(idim,geom.Domain().bigEnd(idim)+1);
891 fbc = bc[idim].second;
892 }
893 b &= box;
894 if (b.ok() && fbc != Boundary::periodic) {
895 tags.push_back({arr, b, fbc, face});
896 }
897 }
898 }
899
900#if defined(AMREX_USE_GPU)
901 amrex::ParallelFor(tags, [=] AMREX_GPU_DEVICE (int i, int j, int k,
902 Tag const& tag) noexcept
903#else
904 auto ntags = int(tags.size());
905#ifdef AMREX_USE_OMP
906#pragma omp parallel for
907#endif
908 for (int itag = 0; itag < ntags; ++itag) {
909 Tag const& tag = tags[itag];
910 amrex::LoopOnCpu(tag.dbox, [&] (int i, int j, int k)
911#endif
912 {
913 int sgn = tag.face.isLow() ? 1 : -1;
914 IntVect siv = IntVect(AMREX_D_DECL(i,j,k))
915 + sgn * IntVect::TheDimensionVector(tag.face.coordDir());
916 if (tag.bc == Boundary::odd) {
917 tag.dfab(i,j,k) = -tag.dfab(siv);
918 } else { // even
919 tag.dfab(i,j,k) = tag.dfab(siv);
920 }
921 });
922#if !defined(AMREX_USE_GPU)
923 }
924#endif
925}
926}
928
929}
930
931#endif
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_ALWAYS_ASSERT(EX)
Definition AMReX_BLassert.H:50
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1089
#define AMREX_D_TERM(a, b, c)
Definition AMReX_SPACE.H:172
#define AMREX_D_DECL(a, b, c)
Definition AMReX_SPACE.H:171
Array4< T const > array() const noexcept
Definition AMReX_BaseFab.H:382
__host__ __device__ BoxND & setBig(const IntVectND< dim > &bg) noexcept
Redefine the big end of the BoxND.
Definition AMReX_Box.H:487
__host__ __device__ IntVectND< dim > length() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:154
A Fortran Array of REALs.
Definition AMReX_FArrayBox.H:231
Convolution-based solver for open boundary conditions using Green's functions.
Definition AMReX_FFT_OpenBCSolver.H:24
3D Poisson solver for periodic, Dirichlet & Neumann boundaries in the first two dimensions,...
Definition AMReX_FFT_Poisson.H:187
void solve(MF &soln, MF const &rhs)
Solve del dot grad soln = rhs for uniform spacing in all directions.
Definition AMReX_FFT_Poisson.H:553
typename MF::value_type T
Definition AMReX_FFT_Poisson.H:189
void solve_z(FA &spmf, TRIA const &tria, TRIC const &tric)
CUDA helper that applies the supplied tridiagonal operator along z.
Definition AMReX_FFT_Poisson.H:642
PoissonHybrid(Geometry const &geom, Array< std::pair< Boundary, Boundary >, 3 > const &bc)
Construct the hybrid Poisson solver (mixed BCs in z with optional nonuniform spacing).
Definition AMReX_FFT_Poisson.H:198
std::pair< BoxArray, DistributionMapping > getSpectralDataLayout() const
Layout information for spectral storage used by the hybrid solver.
Definition AMReX_FFT_Poisson.H:493
void solve_2d(MF &a_soln, MF const &a_rhs)
Solve an independent 2-D Poisson problem on every z-plane.
Definition AMReX_FFT_Poisson.H:588
Poisson solve for Open BC using FFT.
Definition AMReX_FFT_Poisson.H:143
void solve(MF &soln, MF const &rhs)
Solve the open-boundary Poisson problem.
Definition AMReX_FFT_Poisson.H:438
void define_doit()
Initialize the discretized Green's function cache (public for CUDA kernels).
Definition AMReX_FFT_Poisson.H:409
PoissonOpenBC(Geometry const &geom, IndexType ixtype=IndexType::TheCellType(), IntVect const &ngrow=IntVect(0))
Build an open-boundary FFT solver over the grown domain.
Definition AMReX_FFT_Poisson.H:398
Poisson solver for periodic, Dirichlet & Neumann boundaries using FFT.
Definition AMReX_FFT_Poisson.H:67
Poisson(Geometry const &geom, Array< std::pair< Boundary, Boundary >, 3 > const &bc)
Construct a Poisson solver with explicit boundary types.
Definition AMReX_FFT_Poisson.H:77
void solve(MF &a_soln, MF const &a_rhs)
Solve del dot grad soln = rhs.
Definition AMReX_FFT_Poisson.H:314
Poisson(Geometry const &geom)
Construct a purely periodic Poisson solver.
Definition AMReX_FFT_Poisson.H:102
An Array of FortranArrayBox(FAB)-like Objects.
Definition AMReX_FabArray.H:349
Rectangular problem domain geometry.
Definition AMReX_Geometry.H:74
const Box & Domain() const noexcept
Returns our rectangular domain.
Definition AMReX_Geometry.H:211
bool isAllPeriodic() const noexcept
Is domain periodic in all directions?
Definition AMReX_Geometry.H:339
__host__ static __device__ constexpr IndexTypeND< dim > TheCellType() noexcept
This static member function returns an IndexTypeND object of value IndexTypeND::CELL....
Definition AMReX_IndexType.H:152
Iterator for looping ever tiles and boxes of amrex::FabArray based containers.
Definition AMReX_MFIter.H:85
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:169
Encapsulation of the Orientation of the Faces of a Box.
Definition AMReX_Orientation.H:29
Dynamically allocated vector for trivially copyable data.
Definition AMReX_PODVector.H:308
size_type size() const noexcept
Definition AMReX_PODVector.H:648
T * dataPtr() noexcept
Definition AMReX_PODVector.H:670
T * data() noexcept
Definition AMReX_PODVector.H:666
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
Long size() const noexcept
Definition AMReX_Vector.H:53
__host__ __device__ BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition AMReX_Box.H:1280
std::array< T, N > Array
Definition AMReX_Array.H:26
Definition AMReX_FFT_Helper.H:52
Boundary
Definition AMReX_FFT_Helper.H:58
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:263
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:301
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
Definition AMReX_Amr.cpp:49
@ make_alias
Definition AMReX_MakeType.H:7
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139
__host__ __device__ BoxND< dim > convert(const BoxND< dim > &b, const IntVectND< dim > &typ) noexcept
Return a BoxND with different type.
Definition AMReX_Box.H:1558
__host__ __device__ BoxND< dim > makeSlab(BoxND< dim > const &b, int direction, int slab_index) noexcept
Definition AMReX_Box.H:2123
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:193
BoxND< 3 > Box
Box is an alias for amrex::BoxND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:30
double second() noexcept
Definition AMReX_Utility.cpp:940
__host__ __device__ BoxND< dim > shift(const BoxND< dim > &b, int dir, int nzones) noexcept
Return a BoxND with indices shifted by nzones in dir direction.
Definition AMReX_Box.H:1495
BoxArray decompose(Box const &domain, int nboxes, Array< bool, 3 > const &decomp, bool no_overlap)
Decompose domain box into BoxArray.
Definition AMReX_BoxArray.cpp:1947
IntVectND< 3 > IntVect
IntVect is an alias for amrex::IntVectND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:33
__host__ __device__ constexpr IntVectND< dim > scale(const IntVectND< dim > &p, int s) noexcept
Returns a IntVectND obtained by multiplying each of the components of this IntVectND by s.
Definition AMReX_IntVect.H:1013
bool TilingIfNotGPU() noexcept
Definition AMReX_MFIter.H:12
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:230
const int[]
Definition AMReX_BLProfiler.cpp:1664
void LoopOnCpu(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition AMReX_Loop.H:365
__host__ __device__ constexpr T elemwiseMin(T const &a, T const &b) noexcept
Return the element-wise minimum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:62
A multidimensional array accessor.
Definition AMReX_Array4.H:283
Definition AMReX_FFT_Helper.H:64
Info & setOneDMode(bool x)
Flag the degenerate 2-D mode (nx==1 or ny==1) that still batches along z.
Definition AMReX_FFT_Helper.H:117
Info & setTwoDMode(bool x)
Restrict transforms to the first two dimensions (3-D problems only).
Definition AMReX_FFT_Helper.H:106
Fixed-size array that can be used on GPU.
Definition AMReX_Array.H:43