Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_FFT_Poisson.H
Go to the documentation of this file.
1#ifndef AMREX_FFT_POISSON_H_
2#define AMREX_FFT_POISSON_H_
3
4#include <AMReX_FFT.H>
5#include <AMReX_Geometry.H>
6
7namespace amrex::FFT
8{
9
17namespace detail {
18template <typename MF>
19void fill_physbc (MF& mf, Geometry const& geom,
20 Array<std::pair<Boundary,Boundary>,AMREX_SPACEDIM> const& bc);
21
22}
24
30template <typename MF = MultiFab>
32{
33public:
34
41 Poisson (Geometry const& geom,
42 Array<std::pair<Boundary,Boundary>,AMREX_SPACEDIM> const& bc)
43 requires (IsFabArray_v<MF>)
44 : m_domain_lo(geom.Domain().smallEnd()),
45 m_geom(detail::shift_geom(geom)),
46 m_bc(bc)
47 {
48 bool all_periodic = true;
49 for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
50 all_periodic = all_periodic
51 && (bc[idim].first == Boundary::periodic)
52 && (bc[idim].second == Boundary::periodic);
53 }
54 if (all_periodic) {
55 m_r2c = std::make_unique<R2C<typename MF::value_type>>(m_geom.Domain());
56 } else {
57 m_r2x = std::make_unique<R2X<typename MF::value_type>> (m_geom.Domain(), m_bc);
58 }
59 }
60
66 explicit Poisson (Geometry const& geom)
67 requires (IsFabArray_v<MF>)
68 : m_domain_lo(geom.Domain().smallEnd()),
69 m_geom(detail::shift_geom(geom)),
70 m_bc{AMREX_D_DECL(std::make_pair(Boundary::periodic,Boundary::periodic),
71 std::make_pair(Boundary::periodic,Boundary::periodic),
72 std::make_pair(Boundary::periodic,Boundary::periodic))}
73 {
74 if (m_geom.isAllPeriodic()) {
75 m_r2c = std::make_unique<R2C<typename MF::value_type>>(m_geom.Domain());
76 } else {
77 amrex::Abort("FFT::Poisson: wrong BC");
78 }
79 }
80
91 void solve (MF& a_soln, MF const& a_rhs);
92
93private:
94 IntVect m_domain_lo;
95 Geometry m_geom;
96 Array<std::pair<Boundary,Boundary>,AMREX_SPACEDIM> m_bc;
97 std::unique_ptr<R2X<typename MF::value_type>> m_r2x;
98 std::unique_ptr<R2C<typename MF::value_type>> m_r2c;
99};
100
101#if (AMREX_SPACEDIM == 3)
106template <typename MF = MultiFab>
108{
109public:
110
119 template <typename FA=MF>
120 requires (IsFabArray_v<FA>)
121 explicit PoissonOpenBC (Geometry const& geom,
123 IntVect const& ngrow = IntVect(0),
124 Info const& info = Info{});
125
132 void solve (MF& soln, MF const& rhs);
133
137 void define_doit (); // has to be public for cuda
138
144 [[nodiscard]] IntVect const& PaddedLength () const { return m_solver.PaddedLength(); }
145
146private:
147 static Info make_solver_info (Info const& info);
148
149 Geometry m_geom;
150 Box m_grown_domain;
151 IntVect m_ngrow;
153};
154#endif
155
162template <typename MF = MultiFab>
164{
165public:
166 using T = typename MF::value_type;
167
175 Array<std::pair<Boundary,Boundary>,AMREX_SPACEDIM> const& bc)
176 requires (IsFabArray_v<MF>)
177 : m_domain_lo(geom.Domain().smallEnd()),
178 m_geom(detail::shift_geom(geom)),
179 m_bc(bc)
180 {
181#if (AMREX_SPACEDIM < 3)
182 amrex::Abort("FFT::PoissonHybrid: 1D & 2D todo");
183 return;
184#endif
185 bool periodic_xy = true;
186 for (int idim = 0; idim < 2; ++idim) {
187 if (m_geom.Domain().length(idim) > 1) {
188 periodic_xy = periodic_xy && (bc[idim].first == Boundary::periodic);
189 AMREX_ALWAYS_ASSERT((bc[idim].first == Boundary::periodic &&
190 bc[idim].second == Boundary::periodic) ||
191 (bc[idim].first != Boundary::periodic &&
192 bc[idim].second != Boundary::periodic));
193 }
194 }
196 bc[2].second != Boundary::periodic);
197 Info info{};
198 info.setTwoDMode(true);
199 if (m_geom.Domain().length(0) == 1 || m_geom.Domain().length(1) == 1) {
200 info.setOneDMode(true);
201 }
202 if (periodic_xy) {
203 m_r2c = std::make_unique<R2C<typename MF::value_type>>(m_geom.Domain(),
204 info);
205 } else {
206 m_r2x = std::make_unique<R2X<typename MF::value_type>> (m_geom.Domain(),
207 m_bc, info);
208 }
209 build_spmf();
210 }
211
221 void solve (MF& soln, MF const& rhs);
229 void solve (MF& soln, MF const& rhs, Vector<T> const& dz);
237 void solve (MF& soln, MF const& rhs, Gpu::DeviceVector<T> const& dz);
238
245 void solve_2d (MF& a_soln, MF const& a_rhs);
246
255 template <typename TRIA, typename TRIC>
256 void solve (MF& a_soln, MF const& a_rhs, TRIA const& tria, TRIC const& tric);
257
258 // This is public for cuda
266 template <typename FA, typename TRIA, typename TRIC>
267 void solve_z (FA& spmf, TRIA const& tria, TRIC const& tric);
268
274 [[nodiscard]] std::pair<BoxArray,DistributionMapping> getSpectralDataLayout () const;
275
276private:
277
278 void build_spmf ();
279
280 IntVect m_domain_lo;
281 Geometry m_geom;
282 Array<std::pair<Boundary,Boundary>,AMREX_SPACEDIM> m_bc;
283 std::unique_ptr<R2X<typename MF::value_type>> m_r2x;
284 std::unique_ptr<R2C<typename MF::value_type>> m_r2c;
285 MF m_spmf_r;
286 using cMF = FabArray<BaseFab<GpuComplex<T>>>;
287 cMF m_spmf_c;
288};
289
290template <typename MF>
291void Poisson<MF>::solve (MF& a_soln, MF const& a_rhs)
292{
293 BL_PROFILE("FFT::Poisson::solve");
294
295 MF* soln = &a_soln;
296 MF const* rhs = &a_rhs;
297 MF solntmp, rhstmp;
298 if (m_domain_lo != 0) {
299 detail::shift_mfs(m_domain_lo, a_soln, a_rhs, solntmp, rhstmp);
300 soln = &solntmp;
301 rhs = &rhstmp;
302 }
303
304 AMREX_ASSERT(soln->is_cell_centered() && rhs->is_cell_centered());
305
306 using T = typename MF::value_type;
307
309 {AMREX_D_DECL(Math::pi<T>()/T(m_geom.Domain().length(0)),
310 Math::pi<T>()/T(m_geom.Domain().length(1)),
311 Math::pi<T>()/T(m_geom.Domain().length(2)))};
312 for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
313 if (m_bc[idim].first == Boundary::periodic) {
314 fac[idim] *= T(2);
315 }
316 }
318 {AMREX_D_DECL(T(2)/T(m_geom.CellSize(0)*m_geom.CellSize(0)),
319 T(2)/T(m_geom.CellSize(1)*m_geom.CellSize(1)),
320 T(2)/T(m_geom.CellSize(2)*m_geom.CellSize(2)))};
321 for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
322 if (m_geom.Domain().length(idim) == 1) {
323 dxfac[idim] = 0;
324 }
325 }
326 auto scale = (m_r2x) ? m_r2x->scalingFactor() : m_r2c->scalingFactor();
327
329 for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
330 if (m_bc[idim].first == Boundary::odd &&
331 m_bc[idim].second == Boundary::odd)
332 {
333 offset[idim] = T(1);
334 }
335 else if ((m_bc[idim].first == Boundary::odd &&
336 m_bc[idim].second == Boundary::even) ||
337 (m_bc[idim].first == Boundary::even &&
338 m_bc[idim].second == Boundary::odd))
339 {
340 offset[idim] = T(0.5);
341 }
342 }
343
344 auto f = [=] AMREX_GPU_DEVICE (int i, int j, int k, auto& spectral_data)
345 {
347 AMREX_D_TERM(T a = fac[0]*(i+offset[0]);,
348 T b = fac[1]*(j+offset[1]);,
349 T c = fac[2]*(k+offset[2]));
350 T k2 = AMREX_D_TERM(dxfac[0]*(std::cos(a)-T(1)),
351 +dxfac[1]*(std::cos(b)-T(1)),
352 +dxfac[2]*(std::cos(c)-T(1)));
353 if (k2 != T(0)) {
354 spectral_data /= k2;
355 }
356 spectral_data *= scale;
357 };
358
359 IntVect const& ng = amrex::elemwiseMin(soln->nGrowVect(), IntVect(1));
360
361 if (m_r2x) {
362 m_r2x->forwardThenBackward_doit_0(*rhs, *soln, f, ng, m_geom.periodicity());
363 detail::fill_physbc(*soln, m_geom, m_bc);
364 } else {
365 m_r2c->forward(*rhs);
366 m_r2c->post_forward_doit_0(f);
367 m_r2c->backward_doit(*soln, ng, m_geom.periodicity());
368 }
369}
370
371#if (AMREX_SPACEDIM == 3)
372
373template <typename MF>
374template <typename FA>
375requires (IsFabArray_v<FA>)
377 IntVect const& ngrow, Info const& info)
378 : m_geom(geom),
379 m_grown_domain(amrex::grow(amrex::convert(geom.Domain(),ixtype),ngrow)),
380 m_ngrow(ngrow),
381 m_solver(m_grown_domain, make_solver_info(info))
382{
383 define_doit();
384}
385
386template <typename MF>
388{
389 Info solver_info;
390 solver_info.openbc_padding = info.openbc_padding;
392 return solver_info;
393}
394
395template <typename MF>
397{
398 using T = typename MF::value_type;
399 auto const& lo = m_grown_domain.smallEnd();
400 auto const dx = T(m_geom.CellSize(0));
401 auto const dy = T(m_geom.CellSize(1));
402 auto const dz = T(m_geom.CellSize(2));
403 auto const gfac = T(1)/T(std::sqrt(T(12)));
404 // 0.125 comes from that there are 8 Gauss quadrature points
405 auto const fac = T(-0.125) * (dx*dy*dz) / (T(4)*Math::pi<T>());
406 m_solver.setGreensFunction([=] AMREX_GPU_DEVICE (int i, int j, int k) -> T
407 {
408 auto x = (T(i-lo[0]) - gfac) * dx; // first Gauss quadrature point
409 auto y = (T(j-lo[1]) - gfac) * dy;
410 auto z = (T(k-lo[2]) - gfac) * dz;
411 T r = 0;
412 for (int gx = 0; gx < 2; ++gx) {
413 for (int gy = 0; gy < 2; ++gy) {
414 for (int gz = 0; gz < 2; ++gz) {
415 auto xg = x + 2*gx*gfac*dx;
416 auto yg = y + 2*gy*gfac*dy;
417 auto zg = z + 2*gz*gfac*dz;
418 r += T(1)/std::sqrt(xg*xg+yg*yg+zg*zg);
419 }}}
420 return fac * r;
421 });
422}
423
424template <typename MF>
425void PoissonOpenBC<MF>::solve (MF& soln, MF const& rhs)
426{
427 AMREX_ASSERT(m_grown_domain.ixType() == soln.ixType() && m_grown_domain.ixType() == rhs.ixType());
428 m_solver.solve(soln, rhs);
429}
430
431#endif /* AMREX_SPACEDIM == 3 */
432
434namespace fft_poisson_detail {
435 template <typename T>
436 struct Tri_Zero {
437 [[nodiscard]] constexpr T operator() (int, int, int) const
438 {
439 return 0;
440 }
441 };
442
443 template <typename T>
444 struct Tri_Uniform {
446 T operator() (int, int, int) const
447 {
448 return m_dz2inv;
449 }
450 T m_dz2inv;
451 };
452
453 template <typename T>
454 struct TriA {
456 T operator() (int, int, int k) const
457 {
458 return (k > 0) ? T(2.0) / (m_dz[k]*(m_dz[k]+m_dz[k-1]))
459 : T(1.0) / (m_dz[k]* m_dz[k]);
460 }
461 T const* m_dz;
462 };
463
464 template <typename T>
465 struct TriC {
467 T operator() (int, int, int k) const
468 {
469 return (k < m_size-1) ? T(2.0) / (m_dz[k]*(m_dz[k]+m_dz[k+1]))
470 : T(1.0) / (m_dz[k]* m_dz[k]);
471 }
472 T const* m_dz;
473 int m_size;
474 };
475}
477
478template <typename MF>
479std::pair<BoxArray,DistributionMapping>
481{
482 if (!m_spmf_r.empty()) {
483 return std::make_pair(m_spmf_r.boxArray(), m_spmf_r.DistributionMap());
484 } else {
485 return std::make_pair(m_spmf_c.boxArray(), m_spmf_c.DistributionMap());
486 }
487}
488
489template <typename MF>
491{
492#if (AMREX_SPACEDIM == 3)
493 AMREX_ALWAYS_ASSERT(m_geom.Domain().length(2) > 1 &&
494 (m_geom.Domain().length(0) > 1 ||
495 m_geom.Domain().length(1) > 1));
496
497 if (m_r2c) {
498 Box cdomain = m_geom.Domain();
499 if (cdomain.length(0) > 1) {
500 cdomain.setBig(0,cdomain.length(0)/2);
501 } else {
502 cdomain.setBig(1,cdomain.length(1)/2);
503 }
504 auto cba = amrex::decompose(cdomain, ParallelContext::NProcsSub(),
505 {AMREX_D_DECL(true,true,false)});
506 DistributionMapping dm = detail::make_iota_distromap(cba.size());
507 m_spmf_c.define(cba, dm, 1, 0);
508 } else if (m_geom.Domain().length(0) > 1 &&
509 m_geom.Domain().length(1) > 1) {
510 if (m_r2x->m_cy.empty()) { // spectral data is real
512 {AMREX_D_DECL(true,true,false)});
513 DistributionMapping dm = detail::make_iota_distromap(sba.size());
514 m_spmf_r.define(sba, dm, 1, 0);
515 } else { // spectral data is complex. one of the first two dimensions is periodic.
516 Box cdomain = m_geom.Domain();
517 if (m_bc[0].first == Boundary::periodic) {
518 cdomain.setBig(0,cdomain.length(0)/2);
519 } else {
520 cdomain.setBig(1,cdomain.length(1)/2);
521 }
522 auto cba = amrex::decompose(cdomain, ParallelContext::NProcsSub(),
523 {AMREX_D_DECL(true,true,false)});
524 DistributionMapping dm = detail::make_iota_distromap(cba.size());
525 m_spmf_c.define(cba, dm, 1, 0);
526 }
527 } else {
528 // spectral data is real
530 {AMREX_D_DECL(true,true,false)});
531 DistributionMapping dm = detail::make_iota_distromap(sba.size());
532 m_spmf_r.define(sba, dm, 1, 0);
533 }
534#else
536#endif
537}
538
539template <typename MF>
540void PoissonHybrid<MF>::solve (MF& soln, MF const& rhs)
541{
542 auto delz = T(m_geom.CellSize(AMREX_SPACEDIM-1));
543 solve(soln, rhs,
544 fft_poisson_detail::Tri_Uniform<T>{T(1)/(delz*delz)},
545 fft_poisson_detail::Tri_Uniform<T>{T(1)/(delz*delz)});
546}
547
548template <typename MF>
549void PoissonHybrid<MF>::solve (MF& soln, MF const& rhs, Gpu::DeviceVector<T> const& dz)
550{
552 int(dz.size()) == m_geom.Domain().length(2),
553 "FFT::PoissonHybrid: dz.size() must equal domain length in z");
554
555 auto const* pdz = dz.dataPtr();
556 solve(soln, rhs,
557 fft_poisson_detail::TriA<T>{pdz},
558 fft_poisson_detail::TriC<T>{pdz,int(dz.size())});
559}
560
561template <typename MF>
562void PoissonHybrid<MF>::solve (MF& soln, MF const& rhs, Vector<T> const& dz)
563{
564 AMREX_ASSERT(soln.is_cell_centered() && rhs.is_cell_centered());
566 int(dz.size()) == m_geom.Domain().length(2),
567 "FFT::PoissonHybrid: dz.size() must equal domain length in z");
568
569#ifdef AMREX_USE_GPU
570 Gpu::DeviceVector<T> d_dz(dz.size());
571 Gpu::htod_memcpy_async(d_dz.data(), dz.data(), dz.size()*sizeof(T));
572 auto const* pdz = d_dz.data();
573#else
574 auto const* pdz = dz.data();
575#endif
576 solve(soln, rhs,
577 fft_poisson_detail::TriA<T>{pdz},
578 fft_poisson_detail::TriC<T>{pdz,int(dz.size())});
579}
580
581template <typename MF>
582void PoissonHybrid<MF>::solve_2d (MF& soln, MF const& rhs)
583{
584 solve(soln, rhs, fft_poisson_detail::Tri_Zero<T>{}, fft_poisson_detail::Tri_Zero<T>{});
585}
586
587template <typename MF>
588template <typename TRIA, typename TRIC>
589void PoissonHybrid<MF>::solve (MF& a_soln, MF const& a_rhs, TRIA const& tria,
590 TRIC const& tric)
591{
592 BL_PROFILE("FFT::PoissonHybrid::solve");
593
594 AMREX_ASSERT(a_soln.is_cell_centered() && a_rhs.is_cell_centered());
595
596#if (AMREX_SPACEDIM < 3)
597 amrex::ignore_unused(a_soln, a_rhs, tria, tric);
598#else
599
600 MF* soln = &a_soln;
601 MF const* rhs = &a_rhs;
602 MF solntmp, rhstmp;
603 if (m_domain_lo != 0) {
604 detail::shift_mfs(m_domain_lo, a_soln, a_rhs, solntmp, rhstmp);
605 soln = &solntmp;
606 rhs = &rhstmp;
607 }
608
609 IntVect const& ng = amrex::elemwiseMin(soln->nGrowVect(), IntVect(1));
610
611 if (m_r2c)
612 {
613 m_r2c->forward(*rhs, m_spmf_c);
614 solve_z(m_spmf_c, tria, tric);
615 m_r2c->backward_doit(m_spmf_c, *soln, ng, m_geom.periodicity());
616 }
617 else
618 {
619 if (m_r2x->m_cy.empty()) { // spectral data is real
620 m_r2x->forward(*rhs, m_spmf_r);
621 solve_z(m_spmf_r, tria, tric);
622 m_r2x->backward(m_spmf_r, *soln, ng, m_geom.periodicity());
623 } else { // spectral data is complex.
624 m_r2x->forward(*rhs, m_spmf_c);
625 solve_z(m_spmf_c, tria, tric);
626 m_r2x->backward(m_spmf_c, *soln, ng, m_geom.periodicity());
627 }
628 }
629
630 detail::fill_physbc(*soln, m_geom, m_bc);
631#endif
632}
633
634template <typename MF>
635template <typename FA, typename TRIA, typename TRIC>
636void PoissonHybrid<MF>::solve_z (FA& spmf, TRIA const& tria, TRIC const& tric)
637{
638 BL_PROFILE("PoissonHybrid::solve_z");
639
640#if (AMREX_SPACEDIM < 3)
641 amrex::ignore_unused(spmf, tria, tric);
642#else
643 auto facx = Math::pi<T>()/T(m_geom.Domain().length(0));
644 auto facy = Math::pi<T>()/T(m_geom.Domain().length(1));
645 if (m_bc[0].first == Boundary::periodic) { facx *= T(2); }
646 if (m_bc[1].first == Boundary::periodic) { facy *= T(2); }
647 auto dxfac = T(2)/T(m_geom.CellSize(0)*m_geom.CellSize(0));
648 auto dyfac = T(2)/T(m_geom.CellSize(1)*m_geom.CellSize(1));
649 auto scale = (m_r2x) ? m_r2x->scalingFactor() : m_r2c->scalingFactor();
650
651 if (m_geom.Domain().length(0) == 1) { dxfac = 0; }
652 if (m_geom.Domain().length(1) == 1) { dyfac = 0; }
653
654 GpuArray<T,AMREX_SPACEDIM-1> offset{T(0),T(0)};
655 for (int idim = 0; idim < AMREX_SPACEDIM-1; ++idim) {
656 if (m_geom.Domain().length(idim) > 1) {
657 if (m_bc[idim].first == Boundary::odd &&
658 m_bc[idim].second == Boundary::odd)
659 {
660 offset[idim] = T(1);
661 }
662 else if ((m_bc[idim].first == Boundary::odd &&
663 m_bc[idim].second == Boundary::even) ||
664 (m_bc[idim].first == Boundary::even &&
665 m_bc[idim].second == Boundary::odd))
666 {
667 offset[idim] = T(0.5);
668 }
669 }
670 }
671
672 if
673#ifndef _WIN32
674 constexpr
675#endif
676 (std::is_same_v<TRIA,fft_poisson_detail::Tri_Zero<T>> &&
677 std::is_same_v<TRIC,fft_poisson_detail::Tri_Zero<T>>) {
678 amrex::ignore_unused(tria,tric);
679#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
680#pragma omp parallel
681#endif
682 for (MFIter mfi(spmf,TilingIfNotGPU()); mfi.isValid(); ++mfi)
683 {
684 auto const& spectral = spmf.array(mfi);
685 auto const& box = mfi.validbox();
686 amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k)
687 {
688 T a = facx*(i+offset[0]);
689 T b = facy*(j+offset[1]);
690 T k2 = dxfac * (std::cos(a)-T(1))
691 + dyfac * (std::cos(b)-T(1));
692 if (k2 != T(0)) {
693 spectral(i,j,k) /= k2;
694 }
695 spectral(i,j,k) *= scale;
696 });
697 }
698 } else {
699 bool zlo_neumann = m_bc[2].first == Boundary::even;
700 bool zhi_neumann = m_bc[2].second == Boundary::even;
701 bool is_singular = (offset[0] == T(0)) && (offset[1] == T(0))
702 && zlo_neumann && zhi_neumann;
703
704 auto nz = m_geom.Domain().length(2);
705
706#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
707#pragma omp parallel
708#endif
709 for (MFIter mfi(spmf); mfi.isValid(); ++mfi)
710 {
711 auto const& spectral = spmf.array(mfi);
712 auto const& box = mfi.validbox();
713 auto const& xybox = amrex::makeSlab(box, 2, 0);
714
715#ifdef AMREX_USE_GPU
716 // xxxxx TODO: We need to explore how to optimize this
717 // function. Maybe we can use cusparse. Maybe we should make
718 // z-direction to be the unit stride direction.
719
720 FArrayBox tridiag_workspace(box,4);
721 auto const& ald = tridiag_workspace.array(0);
722 auto const& bd = tridiag_workspace.array(1);
723 auto const& cud = tridiag_workspace.array(2);
724 auto const& scratch = tridiag_workspace.array(3);
725
726 amrex::ParallelFor(xybox, [=] AMREX_GPU_DEVICE (int i, int j, int)
727 {
728 T a = facx*(i+offset[0]);
729 T b = facy*(j+offset[1]);
730 T k2 = dxfac * (std::cos(a)-T(1))
731 + dyfac * (std::cos(b)-T(1));
732
733 // Tridiagonal solve
734 for(int k=0; k < nz; k++) {
735 if(k==0) {
736 ald(i,j,k) = T(0.);
737 cud(i,j,k) = tric(i,j,k);
738 if (zlo_neumann) {
739 bd(i,j,k) = k2 - cud(i,j,k);
740 } else {
741 bd(i,j,k) = k2 - cud(i,j,k) - T(2.0)*tria(i,j,k);
742 }
743 } else if (k == nz-1) {
744 ald(i,j,k) = tria(i,j,k);
745 cud(i,j,k) = T(0.);
746 if (zhi_neumann) {
747 bd(i,j,k) = k2 - ald(i,j,k);
748 if (i == 0 && j == 0 && is_singular) {
749 bd(i,j,k) *= T(2.0);
750 }
751 } else {
752 bd(i,j,k) = k2 - ald(i,j,k) - T(2.0)*tric(i,j,k);
753 }
754 } else {
755 ald(i,j,k) = tria(i,j,k);
756 cud(i,j,k) = tric(i,j,k);
757 bd(i,j,k) = k2 -ald(i,j,k)-cud(i,j,k);
758 }
759 }
760
761 scratch(i,j,0) = cud(i,j,0)/bd(i,j,0);
762 spectral(i,j,0) = spectral(i,j,0)/bd(i,j,0);
763
764 for (int k = 1; k < nz; k++) {
765 if (k < nz-1) {
766 scratch(i,j,k) = cud(i,j,k) / (bd(i,j,k) - ald(i,j,k) * scratch(i,j,k-1));
767 }
768 spectral(i,j,k) = (spectral(i,j,k) - ald(i,j,k) * spectral(i,j,k - 1))
769 / (bd(i,j,k) - ald(i,j,k) * scratch(i,j,k-1));
770 }
771
772 for (int k = nz - 2; k >= 0; k--) {
773 spectral(i,j,k) -= scratch(i,j,k) * spectral(i,j,k + 1);
774 }
775
776 for (int k = 0; k < nz; ++k) {
777 spectral(i,j,k) *= scale;
778 }
779 });
781
782#else
783
784 Gpu::DeviceVector<T> ald(nz);
786 Gpu::DeviceVector<T> cud(nz);
787 Gpu::DeviceVector<T> scratch(nz);
788
789 amrex::LoopOnCpu(xybox, [&] (int i, int j, int)
790 {
791 T a = facx*(i+offset[0]);
792 T b = facy*(j+offset[1]);
793 T k2 = dxfac * (std::cos(a)-T(1))
794 + dyfac * (std::cos(b)-T(1));
795
796 // Tridiagonal solve
797 for(int k=0; k < nz; k++) {
798 if(k==0) {
799 ald[k] = T(0.);
800 cud[k] = tric(i,j,k);
801 if (zlo_neumann) {
802 bd[k] = k2 - cud[k];
803 } else {
804 bd[k] = k2 - cud[k] - T(2.0)*tria(i,j,k);
805 }
806 } else if (k == nz-1) {
807 ald[k] = tria(i,j,k);
808 cud[k] = T(0.);
809 if (zhi_neumann) {
810 bd[k] = k2 - ald[k];
811 if (i == 0 && j == 0 && is_singular) {
812 bd[k] *= T(2.0);
813 }
814 } else {
815 bd[k] = k2 - ald[k] - T(2.0)*tric(i,j,k);
816 }
817 } else {
818 ald[k] = tria(i,j,k);
819 cud[k] = tric(i,j,k);
820 bd[k] = k2 -ald[k]-cud[k];
821 }
822 }
823
824 scratch[0] = cud[0]/bd[0];
825 spectral(i,j,0) = spectral(i,j,0)/bd[0];
826
827 for (int k = 1; k < nz; k++) {
828 if (k < nz-1) {
829 scratch[k] = cud[k] / (bd[k] - ald[k] * scratch[k-1]);
830 }
831 spectral(i,j,k) = (spectral(i,j,k) - ald[k] * spectral(i,j,k - 1))
832 / (bd[k] - ald[k] * scratch[k-1]);
833 }
834
835 for (int k = nz - 2; k >= 0; k--) {
836 spectral(i,j,k) -= scratch[k] * spectral(i,j,k + 1);
837 }
838
839 for (int k = 0; k < nz; ++k) {
840 spectral(i,j,k) *= scale;
841 }
842 });
843#endif
844 }
845 }
846#endif
847}
848
850namespace detail {
851
852template <class T>
853struct FFTPhysBCTag {
854 Array4<T> dfab;
855 Box dbox;
856 Boundary bc;
857 Orientation face;
858
860 Box const& box () const noexcept { return dbox; }
861};
862
863template <typename MF>
864void fill_physbc (MF& mf, Geometry const& geom,
865 Array<std::pair<Boundary,Boundary>,AMREX_SPACEDIM> const& bc)
866{
867 using T = typename MF::value_type;
868 using Tag = FFTPhysBCTag<T>;
869 Vector<Tag> tags;
870
871 for (MFIter mfi(mf, MFItInfo{}.DisableDeviceSync()); mfi.isValid(); ++mfi)
872 {
873 auto const& box = mfi.fabbox();
874 auto const& arr = mf.array(mfi);
875 for (OrientationIter oit; oit; ++oit) {
876 Orientation face = oit();
877 int idim = face.coordDir();
878 Box b = geom.Domain();
879 Boundary fbc;
880 if (face.isLow()) {
881 b.setRange(idim,geom.Domain().smallEnd(idim)-1);
882 fbc = bc[idim].first;
883 } else {
884 b.setRange(idim,geom.Domain().bigEnd(idim)+1);
885 fbc = bc[idim].second;
886 }
887 b &= box;
888 if (b.ok() && fbc != Boundary::periodic) {
889 tags.push_back(Tag{.dfab = arr, .dbox = b, .bc = fbc, .face = face});
890 }
891 }
892 }
893
894#if defined(AMREX_USE_GPU)
895 amrex::ParallelFor(tags, [=] AMREX_GPU_DEVICE (int i, int j, int k,
896 Tag const& tag) noexcept
897#else
898 auto ntags = int(tags.size());
899#ifdef AMREX_USE_OMP
900#pragma omp parallel for
901#endif
902 for (int itag = 0; itag < ntags; ++itag) {
903 Tag const& tag = tags[itag];
904 amrex::LoopOnCpu(tag.dbox, [&] (int i, int j, int k)
905#endif
906 {
907 int sgn = tag.face.isLow() ? 1 : -1;
908 IntVect siv = IntVect(AMREX_D_DECL(i,j,k))
909 + sgn * IntVect::TheDimensionVector(tag.face.coordDir());
910 if (tag.bc == Boundary::odd) {
911 tag.dfab(i,j,k) = -tag.dfab(siv);
912 } else { // even
913 tag.dfab(i,j,k) = tag.dfab(siv);
914 }
915 });
916#if !defined(AMREX_USE_GPU)
917 }
918#endif
919}
920}
922
923}
924
925#endif
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:49
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_ALWAYS_ASSERT(EX)
Definition AMReX_BLassert.H:50
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1139
#define AMREX_D_TERM(a, b, c)
Definition AMReX_SPACE.H:172
#define AMREX_D_DECL(a, b, c)
Definition AMReX_SPACE.H:171
Array4< T const > array() const noexcept
Definition AMReX_BaseFab.H:387
__host__ __device__ BoxND & setBig(const IntVectND< dim > &bg) noexcept
Redefine the big end of the BoxND.
Definition AMReX_Box.H:495
__host__ __device__ IntVectND< dim > length() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:155
const Real * CellSize() const noexcept
Returns the cellsize for each coordinate direction.
Definition AMReX_CoordSys.H:71
A Fortran Array of REALs.
Definition AMReX_FArrayBox.H:231
Convolution-based solver for open boundary conditions using Green's functions.
Definition AMReX_FFT_OpenBCSolver.H:26
IntVect const & PaddedLength() const
Access the one-sided padded length used to build the internal FFT domain.
Definition AMReX_FFT_OpenBCSolver.H:68
3D Poisson solver for periodic, Dirichlet & Neumann boundaries in the first two dimensions,...
Definition AMReX_FFT_Poisson.H:164
void solve(MF &soln, MF const &rhs)
Solve del dot grad soln = rhs for uniform spacing in all directions.
Definition AMReX_FFT_Poisson.H:540
typename MF::value_type T
Definition AMReX_FFT_Poisson.H:166
PoissonHybrid(Geometry const &geom, Array< std::pair< Boundary, Boundary >, 3 > const &bc)
Construct the hybrid Poisson solver (mixed BCs in z with optional nonuniform spacing).
Definition AMReX_FFT_Poisson.H:174
void solve_z(FA &spmf, TRIA const &tria, TRIC const &tric)
CUDA helper that applies the supplied tridiagonal operator along z.
Definition AMReX_FFT_Poisson.H:636
std::pair< BoxArray, DistributionMapping > getSpectralDataLayout() const
Layout information for spectral storage used by the hybrid solver.
Definition AMReX_FFT_Poisson.H:480
void solve_2d(MF &a_soln, MF const &a_rhs)
Solve an independent 2-D Poisson problem on every z-plane.
Definition AMReX_FFT_Poisson.H:582
Poisson solve for Open BC using FFT.
Definition AMReX_FFT_Poisson.H:108
void solve(MF &soln, MF const &rhs)
Solve the open-boundary Poisson problem.
Definition AMReX_FFT_Poisson.H:425
IntVect const & PaddedLength() const
Access the one-sided padded length used by the internal OpenBC solver.
Definition AMReX_FFT_Poisson.H:144
void define_doit()
Initialize the discretized Green's function cache (public for CUDA kernels).
Definition AMReX_FFT_Poisson.H:396
Poisson solver for periodic, Dirichlet & Neumann boundaries using FFT.
Definition AMReX_FFT_Poisson.H:32
Poisson(Geometry const &geom, Array< std::pair< Boundary, Boundary >, 3 > const &bc)
Construct a Poisson solver with explicit boundary types.
Definition AMReX_FFT_Poisson.H:41
Poisson(Geometry const &geom)
Construct a purely periodic Poisson solver.
Definition AMReX_FFT_Poisson.H:66
void solve(MF &a_soln, MF const &a_rhs)
Solve del dot grad soln = rhs.
Definition AMReX_FFT_Poisson.H:291
An Array of FortranArrayBox(FAB)-like Objects.
Definition AMReX_FabArray.H:344
Rectangular problem domain geometry.
Definition AMReX_Geometry.H:75
const Box & Domain() const noexcept
Returns our rectangular domain.
Definition AMReX_Geometry.H:216
Periodicity periodicity() const noexcept
Definition AMReX_Geometry.H:361
bool isAllPeriodic() const noexcept
Is domain periodic in all directions?
Definition AMReX_Geometry.H:344
__host__ static __device__ constexpr IndexTypeND< dim > TheCellType() noexcept
This static member function returns an IndexTypeND object of value IndexTypeND::CELL....
Definition AMReX_IndexType.H:150
Iterator for looping ever tiles and boxes of amrex::FabArray based containers.
Definition AMReX_MFIter.H:88
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:172
Encapsulation of the Orientation of the Faces of a Box.
Definition AMReX_Orientation.H:29
Dynamically allocated vector for trivially copyable data.
Definition AMReX_PODVector.H:308
size_type size() const noexcept
Definition AMReX_PODVector.H:648
T * dataPtr() noexcept
Definition AMReX_PODVector.H:670
T * data() noexcept
Definition AMReX_PODVector.H:666
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:29
Long size() const noexcept
Definition AMReX_Vector.H:54
__host__ __device__ BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition AMReX_Box.H:1289
std::array< T, N > Array
Definition AMReX_Array.H:26
Definition AMReX_FFT_Helper.H:53
Boundary
Definition AMReX_FFT_Helper.H:59
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:421
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139
__host__ __device__ BoxND< dim > convert(const BoxND< dim > &b, const IntVectND< dim > &typ) noexcept
Return a BoxND with different type.
Definition AMReX_Box.H:1567
__host__ __device__ BoxND< dim > makeSlab(BoxND< dim > const &b, int direction, int slab_index) noexcept
Definition AMReX_Box.H:2140
void ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:202
BoxND< 3 > Box
Box is an alias for amrex::BoxND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:30
double second() noexcept
Definition AMReX_Utility.cpp:940
BoxArray decompose(Box const &domain, int nboxes, Array< bool, 3 > const &decomp, bool no_overlap)
Decompose domain box into BoxArray.
Definition AMReX_BoxArray.cpp:1943
IntVectND< 3 > IntVect
IntVect is an alias for amrex::IntVectND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:33
__host__ __device__ constexpr IntVectND< dim > scale(const IntVectND< dim > &p, int s) noexcept
Returns a IntVectND obtained by multiplying each of the components of this IntVectND by s.
Definition AMReX_IntVect.H:1102
bool TilingIfNotGPU() noexcept
Definition AMReX_MFIter.H:12
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:241
const int[]
Definition AMReX_BLProfiler.cpp:1664
void LoopOnCpu(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition AMReX_Loop.H:365
__host__ __device__ constexpr T elemwiseMin(T const &a, T const &b) noexcept
Return the element-wise minimum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:63
A multidimensional array accessor.
Definition AMReX_Array4.H:285
Definition AMReX_FFT_Helper.H:83
bool openbc_padding
Whether OpenBCSolver pads internal FFT lengths for better performance.
Definition AMReX_FFT_Helper.H:112
Info & setOneDMode(bool x)
Flag the degenerate 2-D mode (nx==1 or ny==1) that still batches along z.
Definition AMReX_FFT_Helper.H:149
int openbc_padding_nfactors
Definition AMReX_FFT_Helper.H:116
Info & setTwoDMode(bool x)
Restrict transforms to the first two dimensions (3-D problems only).
Definition AMReX_FFT_Helper.H:138
Fixed-size array that can be used on GPU.
Definition AMReX_Array.H:43