Block-Structured AMR Software Framework
AMReX_FFT_Poisson.H
Go to the documentation of this file.
1 #ifndef AMREX_FFT_POISSON_H_
2 #define AMREX_FFT_POISSON_H_
3 
4 #include <AMReX_FFT.H>
5 #include <AMReX_Geometry.H>
6 
7 namespace amrex::FFT
8 {
9 
10 namespace detail {
11 template <typename MF>
12 void fill_physbc (MF& mf, Geometry const& geom,
13  Array<std::pair<Boundary,Boundary>,AMREX_SPACEDIM> const& bc);
14 }
15 
20 template <typename MF = MultiFab>
21 class Poisson
22 {
23 public:
24 
25  template <typename FA=MF, std::enable_if_t<IsFabArray_v<FA>,int> = 0>
26  Poisson (Geometry const& geom,
27  Array<std::pair<Boundary,Boundary>,AMREX_SPACEDIM> const& bc)
28  : m_geom(geom), m_bc(bc)
29  {
30  bool all_periodic = true;
31  for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
32  all_periodic = all_periodic
33  && (bc[idim].first == Boundary::periodic)
34  && (bc[idim].second == Boundary::periodic);
35  }
36  if (all_periodic) {
37  m_r2c = std::make_unique<R2C<typename MF::value_type>>(m_geom.Domain());
38  } else {
39  m_r2x = std::make_unique<R2X<typename MF::value_type>> (m_geom.Domain(), m_bc);
40  }
41  }
42 
43  template <typename FA=MF, std::enable_if_t<IsFabArray_v<FA>,int> = 0>
44  explicit Poisson (Geometry const& geom)
45  : m_geom(geom),
46  m_bc{AMREX_D_DECL(std::make_pair(Boundary::periodic,Boundary::periodic),
47  std::make_pair(Boundary::periodic,Boundary::periodic),
48  std::make_pair(Boundary::periodic,Boundary::periodic))}
49  {
50  if (m_geom.isAllPeriodic()) {
51  m_r2c = std::make_unique<R2C<typename MF::value_type>>(m_geom.Domain());
52  } else {
53  amrex::Abort("FFT::Poisson: wrong BC");
54  }
55  }
56 
57  /*
58  * \brief Solve del dot grad soln = rhs
59  *
60  * If soln has ghost cells, one layer of ghost cells will be filled
61  * except for the corners of the physical domain where they are not used
62  * in the cross stencil of the operator. The two MFs could be the same MF.
63  */
64  void solve (MF& soln, MF const& rhs);
65 
66 private:
69  std::unique_ptr<R2X<typename MF::value_type>> m_r2x;
70  std::unique_ptr<R2C<typename MF::value_type>> m_r2c;
71 };
72 
73 #if (AMREX_SPACEDIM == 3)
77 template <typename MF = MultiFab>
78 class PoissonOpenBC
79 {
80 public:
81 
82  template <typename FA=MF, std::enable_if_t<IsFabArray_v<FA>,int> = 0>
83  explicit PoissonOpenBC (Geometry const& geom,
85  IntVect const& ngrow = IntVect(0));
86 
87  void solve (MF& soln, MF const& rhs);
88 
89  void define_doit (); // has to be public for cuda
90 
91 private:
92  Geometry m_geom;
93  Box m_grown_domain;
94  IntVect m_ngrow;
96 };
97 #endif
98 
104 template <typename MF = MultiFab>
106 {
107 public:
108  using T = typename MF::value_type;
109 
110  template <typename FA=MF, std::enable_if_t<IsFabArray_v<FA>,int> = 0>
111  PoissonHybrid (Geometry const& geom,
112  Array<std::pair<Boundary,Boundary>,AMREX_SPACEDIM> const& bc)
113  : m_geom(geom), m_bc(bc)
114  {
115 #if (AMREX_SPACEDIM < 3)
116  amrex::Abort("FFT::PoissonHybrid: 1D & 2D todo");
117  return;
118 #endif
119  bool periodic_xy = true;
120  for (int idim = 0; idim < 2; ++idim) {
121  if (m_geom.Domain().length(idim) > 1) {
122  periodic_xy = periodic_xy && (bc[idim].first == Boundary::periodic);
123  AMREX_ALWAYS_ASSERT((bc[idim].first == Boundary::periodic &&
124  bc[idim].second == Boundary::periodic) ||
125  (bc[idim].first != Boundary::periodic &&
126  bc[idim].second != Boundary::periodic));
127  }
128  }
129  Info info{};
130  info.setBatchMode(true);
131  if (periodic_xy) {
132  m_r2c = std::make_unique<R2C<typename MF::value_type>>(m_geom.Domain(),
133  info);
134  } else {
135  m_r2x = std::make_unique<R2X<typename MF::value_type>> (m_geom.Domain(),
136  m_bc, info);
137  }
138  build_spmf();
139  }
140 
141  template <typename FA=MF, std::enable_if_t<IsFabArray_v<FA>,int> = 0>
142  explicit PoissonHybrid (Geometry const& geom)
143  : m_geom(geom),
144  m_bc{AMREX_D_DECL(std::make_pair(Boundary::periodic,Boundary::periodic),
145  std::make_pair(Boundary::periodic,Boundary::periodic),
146  std::make_pair(Boundary::even,Boundary::even))},
147  m_r2c(std::make_unique<R2C<typename MF::value_type>>
148  (geom.Domain(), Info().setBatchMode(true)))
149  {
150 #if (AMREX_SPACEDIM == 3)
151  AMREX_ALWAYS_ASSERT(geom.isPeriodic(0) && geom.isPeriodic(1));
152 #else
153  amrex::Abort("FFT::PoissonHybrid: 1D & 2D todo");
154  return;
155 #endif
156  build_spmf();
157  }
158 
159  /*
160  * \brief Solve del dot grad soln = rhs
161  *
162  * If soln has ghost cells, one layer of ghost cells will be filled
163  * except for the corners of the physical domain where they are not used
164  * in the cross stencil of the operator. The two MFs could be the same MF.
165  */
166  void solve (MF& soln, MF const& rhs);
167  void solve (MF& soln, MF const& rhs, Vector<T> const& dz);
168  void solve (MF& soln, MF const& rhs, Gpu::DeviceVector<T> const& dz);
169 
170  template <typename TRIA, typename TRIC>
171  void solve (MF& soln, MF const& rhs, TRIA const& tria, TRIC const& tric);
172 
173  // This is public for cuda
174  template <typename FA, typename TRIA, typename TRIC>
175  void solve_z (FA& spmf, TRIA const& tria, TRIC const& tric);
176 
177  [[nodiscard]] std::pair<BoxArray,DistributionMapping> getSpectralDataLayout () const;
178 
179 private:
180 
181  void build_spmf ();
182 
185  std::unique_ptr<R2X<typename MF::value_type>> m_r2x;
186  std::unique_ptr<R2C<typename MF::value_type>> m_r2c;
190 };
191 
192 template <typename MF>
193 void Poisson<MF>::solve (MF& soln, MF const& rhs)
194 {
195  BL_PROFILE("FFT::Poisson::solve");
196 
197  AMREX_ASSERT(soln.is_cell_centered() && rhs.is_cell_centered());
198 
199  using T = typename MF::value_type;
200 
202  {AMREX_D_DECL(Math::pi<T>()/T(m_geom.Domain().length(0)),
203  Math::pi<T>()/T(m_geom.Domain().length(1)),
204  Math::pi<T>()/T(m_geom.Domain().length(2)))};
205  for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
206  if (m_bc[idim].first == Boundary::periodic) {
207  fac[idim] *= T(2);
208  }
209  }
211  {AMREX_D_DECL(T(2)/T(m_geom.CellSize(0)*m_geom.CellSize(0)),
212  T(2)/T(m_geom.CellSize(1)*m_geom.CellSize(1)),
213  T(2)/T(m_geom.CellSize(2)*m_geom.CellSize(2)))};
214  for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
215  if (m_geom.Domain().length(idim) == 1) {
216  dxfac[idim] = 0;
217  }
218  }
219  auto scale = (m_r2x) ? m_r2x->scalingFactor() : m_r2c->scalingFactor();
220 
222  for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
223  if (m_bc[idim].first == Boundary::odd &&
224  m_bc[idim].second == Boundary::odd)
225  {
226  offset[idim] = T(1);
227  }
228  else if ((m_bc[idim].first == Boundary::odd &&
229  m_bc[idim].second == Boundary::even) ||
230  (m_bc[idim].first == Boundary::even &&
231  m_bc[idim].second == Boundary::odd))
232  {
233  offset[idim] = T(0.5);
234  }
235  }
236 
237  auto f = [=] AMREX_GPU_DEVICE (int i, int j, int k, auto& spectral_data)
238  {
240  AMREX_D_TERM(T a = fac[0]*(i+offset[0]);,
241  T b = fac[1]*(j+offset[1]);,
242  T c = fac[2]*(k+offset[2]));
243  T k2 = AMREX_D_TERM(dxfac[0]*(std::cos(a)-T(1)),
244  +dxfac[1]*(std::cos(b)-T(1)),
245  +dxfac[2]*(std::cos(c)-T(1)));
246  if (k2 != T(0)) {
247  spectral_data /= k2;
248  }
249  spectral_data *= scale;
250  };
251 
252  IntVect const& ng = amrex::elemwiseMin(soln.nGrowVect(), IntVect(1));
253 
254  if (m_r2x) {
255  m_r2x->template forwardThenBackward_doit<0>(rhs, soln, f, ng, m_geom.periodicity());
256  detail::fill_physbc(soln, m_geom, m_bc);
257  } else {
258  m_r2c->forward(rhs);
259  m_r2c->template post_forward_doit<0>(f);
260  m_r2c->backward_doit(soln, ng, m_geom.periodicity());
261  }
262 }
263 
264 #if (AMREX_SPACEDIM == 3)
265 
266 template <typename MF>
267 template <typename FA, std::enable_if_t<IsFabArray_v<FA>,int> FOO>
268 PoissonOpenBC<MF>::PoissonOpenBC (Geometry const& geom, IndexType ixtype,
269  IntVect const& ngrow)
270  : m_geom(geom),
271  m_grown_domain(amrex::grow(amrex::convert(geom.Domain(),ixtype),ngrow)),
272  m_ngrow(ngrow),
273  m_solver(m_grown_domain)
274 {
275  define_doit();
276 }
277 
278 template <typename MF>
279 void PoissonOpenBC<MF>::define_doit ()
280 {
281  using T = typename MF::value_type;
282  auto const& lo = m_grown_domain.smallEnd();
283  auto const dx = T(m_geom.CellSize(0));
284  auto const dy = T(m_geom.CellSize(1));
285  auto const dz = T(m_geom.CellSize(2));
286  auto const gfac = T(1)/T(std::sqrt(T(12)));
287  // 0.125 comes from that there are 8 Gauss quadrature points
288  auto const fac = T(-0.125) * (dx*dy*dz) / (T(4)*Math::pi<T>());
289  m_solver.setGreensFunction([=] AMREX_GPU_DEVICE (int i, int j, int k) -> T
290  {
291  auto x = (T(i-lo[0]) - gfac) * dx; // first Gauss quadrature point
292  auto y = (T(j-lo[1]) - gfac) * dy;
293  auto z = (T(k-lo[2]) - gfac) * dz;
294  T r = 0;
295  for (int gx = 0; gx < 2; ++gx) {
296  for (int gy = 0; gy < 2; ++gy) {
297  for (int gz = 0; gz < 2; ++gz) {
298  auto xg = x + 2*gx*gfac*dx;
299  auto yg = y + 2*gy*gfac*dy;
300  auto zg = z + 2*gz*gfac*dz;
301  r += T(1)/std::sqrt(xg*xg+yg*yg+zg*zg);
302  }}}
303  return fac * r;
304  });
305 }
306 
307 template <typename MF>
308 void PoissonOpenBC<MF>::solve (MF& soln, MF const& rhs)
309 {
310  AMREX_ASSERT(m_grown_domain.ixType() == soln.ixType() && m_grown_domain.ixType() == rhs.ixType());
311  m_solver.solve(soln, rhs);
312 }
313 
314 #endif /* AMREX_SPACEDIM == 3 */
315 
316 namespace fft_poisson_detail {
317  template <typename T>
318  struct Tri_Uniform {
320  T operator() (int, int, int) const
321  {
322  return m_dz2inv;
323  }
325  };
326 
327  template <typename T>
328  struct TriA {
330  T operator() (int, int, int k) const
331  {
332  return T(2.0) /(m_dz[k]*(m_dz[k]+m_dz[k-1]));
333  }
334  T const* m_dz;
335  };
336 
337  template <typename T>
338  struct TriC {
340  T operator() (int, int, int k) const
341  {
342  return T(2.0) /(m_dz[k]*(m_dz[k]+m_dz[k+1]));
343  }
344  T const* m_dz;
345  };
346 }
347 
348 template <typename MF>
349 std::pair<BoxArray,DistributionMapping>
351 {
352  if (!m_spmf_r.empty()) {
353  return std::make_pair(m_spmf_r.boxArray(), m_spmf_r.DistributionMap());
354  } else {
355  return std::make_pair(m_spmf_c.boxArray(), m_spmf_c.DistributionMap());
356  }
357 }
358 
359 template <typename MF>
361 {
362 #if (AMREX_SPACEDIM == 3)
363  AMREX_ALWAYS_ASSERT(m_geom.Domain().length(2) > 1 &&
364  (m_geom.Domain().length(0) > 1 ||
365  m_geom.Domain().length(1) > 1));
366 
367  if (m_r2c) {
368  Box cdomain = m_geom.Domain();
369  if (cdomain.length(0) > 1) {
370  cdomain.setBig(0,cdomain.length(0)/2);
371  } else {
372  cdomain.setBig(1,cdomain.length(1)/2);
373  }
374  auto cba = amrex::decompose(cdomain, ParallelContext::NProcsSub(),
375  {AMREX_D_DECL(true,true,false)});
377  m_spmf_c.define(cba, dm, 1, 0);
378  } else if (m_geom.Domain().length(0) > 1 &&
379  m_geom.Domain().length(1) > 1) {
380  if (m_r2x->m_cy.empty()) { // spectral data is real
381  auto sba = amrex::decompose(m_geom.Domain(),ParallelContext::NProcsSub(),
382  {AMREX_D_DECL(true,true,false)});
384  m_spmf_r.define(sba, dm, 1, 0);
385  } else { // spectral data is complex. one of the first two dimensions is periodic.
386  Box cdomain = m_geom.Domain();
387  if (m_bc[0].first == Boundary::periodic) {
388  cdomain.setBig(0,cdomain.length(0)/2);
389  } else {
390  cdomain.setBig(1,cdomain.length(1)/2);
391  }
392  auto cba = amrex::decompose(cdomain, ParallelContext::NProcsSub(),
393  {AMREX_D_DECL(true,true,false)});
395  m_spmf_c.define(cba, dm, 1, 0);
396  }
397  } else {
398  // spectral data is real
399  auto sba = amrex::decompose(m_geom.Domain(),ParallelContext::NProcsSub(),
400  {AMREX_D_DECL(true,true,false)});
402  m_spmf_r.define(sba, dm, 1, 0);
403  }
404 #else
405  amrex::ignore_unused(this);
406 #endif
407 }
408 
409 template <typename MF>
410 void PoissonHybrid<MF>::solve (MF& soln, MF const& rhs)
411 {
412  auto delz = T(m_geom.CellSize(AMREX_SPACEDIM-1));
413  solve(soln, rhs,
414  fft_poisson_detail::Tri_Uniform<T>{T(1)/(delz*delz)},
415  fft_poisson_detail::Tri_Uniform<T>{T(1)/(delz*delz)});
416 }
417 
418 template <typename MF>
419 void PoissonHybrid<MF>::solve (MF& soln, MF const& rhs, Gpu::DeviceVector<T> const& dz)
420 {
421  auto const* pdz = dz.dataPtr();
422  solve(soln, rhs,
425 }
426 
427 template <typename MF>
428 void PoissonHybrid<MF>::solve (MF& soln, MF const& rhs, Vector<T> const& dz)
429 {
430  AMREX_ASSERT(soln.is_cell_centered() && rhs.is_cell_centered());
431 
432 #ifdef AMREX_USE_GPU
433  Gpu::DeviceVector<T> d_dz(dz.size());
434  Gpu::htod_memcpy_async(d_dz.data(), dz.data(), dz.size()*sizeof(T));
435  auto const* pdz = d_dz.data();
436 #else
437  auto const* pdz = dz.data();
438 #endif
439  solve(soln, rhs,
442 }
443 
444 template <typename MF>
445 template <typename TRIA, typename TRIC>
446 void PoissonHybrid<MF>::solve (MF& soln, MF const& rhs, TRIA const& tria,
447  TRIC const& tric)
448 {
449  BL_PROFILE("FFT::PoissonHybrid::solve");
450 
451  AMREX_ASSERT(soln.is_cell_centered() && rhs.is_cell_centered());
452 
453 #if (AMREX_SPACEDIM < 3)
454  amrex::ignore_unused(soln, rhs, tria, tric);
455 #else
456 
457  IntVect const& ng = amrex::elemwiseMin(soln.nGrowVect(), IntVect(1));
458 
459  if (m_r2c)
460  {
461  m_r2c->forward(rhs, m_spmf_c);
462  solve_z(m_spmf_c, tria, tric);
463  m_r2c->backward_doit(m_spmf_c, soln, ng, m_geom.periodicity());
464  }
465  else
466  {
467  if (m_r2x->m_cy.empty()) { // spectral data is real
468  m_r2x->forward(rhs, m_spmf_r);
469  solve_z(m_spmf_r, tria, tric);
470  m_r2x->backward(m_spmf_r, soln, ng, m_geom.periodicity());
471  } else { // spectral data is complex.
472  m_r2x->forward(rhs, m_spmf_c);
473  solve_z(m_spmf_c, tria, tric);
474  m_r2x->backward(m_spmf_c, soln, ng, m_geom.periodicity());
475  }
476  }
477 
478  detail::fill_physbc(soln, m_geom, m_bc);
479 #endif
480 }
481 
482 template <typename MF>
483 template <typename FA, typename TRIA, typename TRIC>
484 void PoissonHybrid<MF>::solve_z (FA& spmf, TRIA const& tria, TRIC const& tric)
485 {
486  BL_PROFILE("PoissonHybrid::solve_z");
487 
488 #if (AMREX_SPACEDIM < 3)
489  amrex::ignore_unused(spmf, tria, tric);
490 #else
491  auto facx = Math::pi<T>()/T(m_geom.Domain().length(0));
492  auto facy = Math::pi<T>()/T(m_geom.Domain().length(1));
493  if (m_bc[0].first == Boundary::periodic) { facx *= T(2); }
494  if (m_bc[1].first == Boundary::periodic) { facy *= T(2); }
495  auto dxfac = T(2)/T(m_geom.CellSize(0)*m_geom.CellSize(0));
496  auto dyfac = T(2)/T(m_geom.CellSize(1)*m_geom.CellSize(1));
497  auto scale = (m_r2x) ? m_r2x->scalingFactor() : m_r2c->scalingFactor();
498 
499  if (m_geom.Domain().length(0) == 1) { dxfac = 0; }
500  if (m_geom.Domain().length(1) == 1) { dyfac = 0; }
501 
502  GpuArray<T,AMREX_SPACEDIM-1> offset{T(0),T(0)};
503  for (int idim = 0; idim < AMREX_SPACEDIM-1; ++idim) {
504  if (m_geom.Domain().length(idim) > 1) {
505  if (m_bc[idim].first == Boundary::odd &&
506  m_bc[idim].second == Boundary::odd)
507  {
508  offset[idim] = T(1);
509  }
510  else if ((m_bc[idim].first == Boundary::odd &&
511  m_bc[idim].second == Boundary::even) ||
512  (m_bc[idim].first == Boundary::even &&
513  m_bc[idim].second == Boundary::odd))
514  {
515  offset[idim] = T(0.5);
516  }
517  }
518  }
519 
520  bool has_dirichlet = (offset[0] != T(0)) || (offset[1] != T(0));
521 
522  auto nz = m_geom.Domain().length(2);
523 
524  for (MFIter mfi(spmf); mfi.isValid(); ++mfi)
525  {
526  auto const& spectral = spmf.array(mfi);
527  auto const& box = mfi.validbox();
528  auto const& xybox = amrex::makeSlab(box, 2, 0);
529 
530 #ifdef AMREX_USE_GPU
531  // xxxxx TODO: We need to explore how to optimize this
532  // function. Maybe we can use cusparse. Maybe we should make
533  // z-direction to be the unit stride direction.
534 
535  FArrayBox tridiag_workspace(box,4);
536  auto const& ald = tridiag_workspace.array(0);
537  auto const& bd = tridiag_workspace.array(1);
538  auto const& cud = tridiag_workspace.array(2);
539  auto const& scratch = tridiag_workspace.array(3);
540 
541  amrex::ParallelFor(xybox, [=] AMREX_GPU_DEVICE (int i, int j, int)
542  {
543  T a = facx*(i+offset[0]);
544  T b = facy*(j+offset[1]);
545  T k2 = dxfac * (std::cos(a)-T(1))
546  + dyfac * (std::cos(b)-T(1));
547 
548  // Tridiagonal solve with homogeneous Neumann
549  for(int k=0; k < nz; k++) {
550  if(k==0) {
551  ald(i,j,k) = T(0.);
552  cud(i,j,k) = tric(i,j,k);
553  bd(i,j,k) = k2 -ald(i,j,k)-cud(i,j,k);
554  } else if (k == nz-1) {
555  ald(i,j,k) = tria(i,j,k);
556  cud(i,j,k) = T(0.);
557  bd(i,j,k) = k2 -ald(i,j,k)-cud(i,j,k);
558  if (i == 0 && j == 0 && !has_dirichlet) {
559  bd(i,j,k) *= T(2.0);
560  }
561  } else {
562  ald(i,j,k) = tria(i,j,k);
563  cud(i,j,k) = tric(i,j,k);
564  bd(i,j,k) = k2 -ald(i,j,k)-cud(i,j,k);
565  }
566  }
567 
568  scratch(i,j,0) = cud(i,j,0)/bd(i,j,0);
569  spectral(i,j,0) = spectral(i,j,0)/bd(i,j,0);
570 
571  for (int k = 1; k < nz; k++) {
572  if (k < nz-1) {
573  scratch(i,j,k) = cud(i,j,k) / (bd(i,j,k) - ald(i,j,k) * scratch(i,j,k-1));
574  }
575  spectral(i,j,k) = (spectral(i,j,k) - ald(i,j,k) * spectral(i,j,k - 1))
576  / (bd(i,j,k) - ald(i,j,k) * scratch(i,j,k-1));
577  }
578 
579  for (int k = nz - 2; k >= 0; k--) {
580  spectral(i,j,k) -= scratch(i,j,k) * spectral(i,j,k + 1);
581  }
582 
583  for (int k = 0; k < nz; ++k) {
584  spectral(i,j,k) *= scale;
585  }
586  });
588 
589 #else
590 
591  Gpu::DeviceVector<T> ald(nz);
592  Gpu::DeviceVector<T> bd(nz);
593  Gpu::DeviceVector<T> cud(nz);
594  Gpu::DeviceVector<T> scratch(nz);
595 
596  amrex::LoopOnCpu(xybox, [&] (int i, int j, int)
597  {
598  T a = facx*(i+offset[0]);
599  T b = facy*(j+offset[1]);
600  T k2 = dxfac * (std::cos(a)-T(1))
601  + dyfac * (std::cos(b)-T(1));
602 
603  // Tridiagonal solve with homogeneous Neumann
604  for(int k=0; k < nz; k++) {
605  if(k==0) {
606  ald[k] = T(0.);
607  cud[k] = tric(i,j,k);
608  bd[k] = k2 -ald[k]-cud[k];
609  } else if (k == nz-1) {
610  ald[k] = tria(i,j,k);
611  cud[k] = T(0.);
612  bd[k] = k2 -ald[k]-cud[k];
613  if (i == 0 && j == 0 && !has_dirichlet) {
614  bd[k] *= T(2.0);
615  }
616  } else {
617  ald[k] = tria(i,j,k);
618  cud[k] = tric(i,j,k);
619  bd[k] = k2 -ald[k]-cud[k];
620  }
621  }
622 
623  scratch[0] = cud[0]/bd[0];
624  spectral(i,j,0) = spectral(i,j,0)/bd[0];
625 
626  for (int k = 1; k < nz; k++) {
627  if (k < nz-1) {
628  scratch[k] = cud[k] / (bd[k] - ald[k] * scratch[k-1]);
629  }
630  spectral(i,j,k) = (spectral(i,j,k) - ald[k] * spectral(i,j,k - 1))
631  / (bd[k] - ald[k] * scratch[k-1]);
632  }
633 
634  for (int k = nz - 2; k >= 0; k--) {
635  spectral(i,j,k) -= scratch[k] * spectral(i,j,k + 1);
636  }
637 
638  for (int k = 0; k < nz; ++k) {
639  spectral(i,j,k) *= scale;
640  }
641  });
642 #endif
643  }
644 #endif
645 }
646 
647 namespace detail {
648 
649 template <class T>
650 struct FFTPhysBCTag {
653  Boundary bc;
655 
657  Box const& box () const noexcept { return dbox; }
658 };
659 
660 template <typename MF>
661 void fill_physbc (MF& mf, Geometry const& geom,
662  Array<std::pair<Boundary,Boundary>,AMREX_SPACEDIM> const& bc)
663 {
664  using T = typename MF::value_type;
665  using Tag = FFTPhysBCTag<T>;
666  Vector<Tag> tags;
667 
668  for (MFIter mfi(mf, MFItInfo{}.DisableDeviceSync()); mfi.isValid(); ++mfi)
669  {
670  auto const& box = mfi.fabbox();
671  auto const& arr = mf.array(mfi);
672  for (OrientationIter oit; oit; ++oit) {
673  Orientation face = oit();
674  int idim = face.coordDir();
675  Box b = geom.Domain();
676  Boundary fbc;
677  if (face.isLow()) {
678  b.setRange(idim,geom.Domain().smallEnd(idim)-1);
679  fbc = bc[idim].first;
680  } else {
681  b.setRange(idim,geom.Domain().bigEnd(idim)+1);
682  fbc = bc[idim].second;
683  }
684  b &= box;
685  if (b.ok() && fbc != Boundary::periodic) {
686  tags.push_back({arr, b, fbc, face});
687  }
688  }
689  }
690 
691 #if defined(AMREX_USE_GPU)
692  amrex::ParallelFor(tags, [=] AMREX_GPU_DEVICE (int i, int j, int k,
693  Tag const& tag) noexcept
694 #else
695  auto ntags = int(tags.size());
696 #ifdef AMREX_USE_OMP
697 #pragma omp parallel for
698 #endif
699  for (int itag = 0; itag < ntags; ++itag) {
700  Tag const& tag = tags[itag];
701  amrex::LoopOnCpu(tag.dbox, [&] (int i, int j, int k)
702 #endif
703  {
704  int sgn = tag.face.isLow() ? 1 : -1;
705  IntVect siv = IntVect(AMREX_D_DECL(i,j,k))
706  + sgn * IntVect::TheDimensionVector(tag.face.coordDir());
707  if (tag.bc == Boundary::odd) {
708  tag.dfab(i,j,k) = -tag.dfab(siv);
709  } else { // even
710  tag.dfab(i,j,k) = tag.dfab(siv);
711  }
712  });
713 #if !defined(AMREX_USE_GPU)
714  }
715 #endif
716 }
717 }
718 
719 }
720 
721 #endif
#define BL_PROFILE(a)
Definition: AMReX_BLProfiler.H:551
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_ALWAYS_ASSERT(EX)
Definition: AMReX_BLassert.H:50
#define AMREX_FORCE_INLINE
Definition: AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition: AMReX_GpuQualifiers.H:20
Array4< int const > offset
Definition: AMReX_HypreMLABecLap.cpp:1089
#define AMREX_D_TERM(a, b, c)
Definition: AMReX_SPACE.H:129
#define AMREX_D_DECL(a, b, c)
Definition: AMReX_SPACE.H:104
AMREX_FORCE_INLINE Array4< T const > array() const noexcept
Definition: AMReX_BaseFab.H:379
AMREX_GPU_HOST_DEVICE const IntVectND< dim > & smallEnd() const &noexcept
Get the smallend of the BoxND.
Definition: AMReX_Box.H:105
AMREX_GPU_HOST_DEVICE const IntVectND< dim > & bigEnd() const &noexcept
Get the bigend.
Definition: AMReX_Box.H:116
AMREX_GPU_HOST_DEVICE IntVectND< dim > length() const noexcept
Return the length of the BoxND.
Definition: AMReX_Box.H:146
AMREX_GPU_HOST_DEVICE BoxND & setBig(const IntVectND< dim > &bg) noexcept
Redefine the big end of the BoxND.
Definition: AMReX_Box.H:474
Calculates the distribution of FABs to MPI processes.
Definition: AMReX_DistributionMapping.H:41
A Fortran Array of REALs.
Definition: AMReX_FArrayBox.H:229
Definition: AMReX_FFT_OpenBCSolver.H:11
3D Poisson solver for periodic, Dirichlet & Neumann boundaries in the first two dimensions,...
Definition: AMReX_FFT_Poisson.H:106
void solve(MF &soln, MF const &rhs)
Definition: AMReX_FFT_Poisson.H:410
std::unique_ptr< R2C< typename MF::value_type > > m_r2c
Definition: AMReX_FFT_Poisson.H:186
typename MF::value_type T
Definition: AMReX_FFT_Poisson.H:108
Array< std::pair< Boundary, Boundary >, AMREX_SPACEDIM > m_bc
Definition: AMReX_FFT_Poisson.H:184
cMF m_spmf_c
Definition: AMReX_FFT_Poisson.H:189
void solve_z(FA &spmf, TRIA const &tria, TRIC const &tric)
Definition: AMReX_FFT_Poisson.H:484
MF m_spmf_r
Definition: AMReX_FFT_Poisson.H:187
std::unique_ptr< R2X< typename MF::value_type > > m_r2x
Definition: AMReX_FFT_Poisson.H:185
Geometry m_geom
Definition: AMReX_FFT_Poisson.H:183
std::pair< BoxArray, DistributionMapping > getSpectralDataLayout() const
Definition: AMReX_FFT_Poisson.H:350
PoissonHybrid(Geometry const &geom, Array< std::pair< Boundary, Boundary >, AMREX_SPACEDIM > const &bc)
Definition: AMReX_FFT_Poisson.H:111
void build_spmf()
Definition: AMReX_FFT_Poisson.H:360
PoissonHybrid(Geometry const &geom)
Definition: AMReX_FFT_Poisson.H:142
Poisson solver for periodic, Dirichlet & Neumann boundaries using FFT.
Definition: AMReX_FFT_Poisson.H:22
Poisson(Geometry const &geom, Array< std::pair< Boundary, Boundary >, AMREX_SPACEDIM > const &bc)
Definition: AMReX_FFT_Poisson.H:26
std::unique_ptr< R2X< typename MF::value_type > > m_r2x
Definition: AMReX_FFT_Poisson.H:69
Geometry m_geom
Definition: AMReX_FFT_Poisson.H:67
Array< std::pair< Boundary, Boundary >, AMREX_SPACEDIM > m_bc
Definition: AMReX_FFT_Poisson.H:68
Poisson(Geometry const &geom)
Definition: AMReX_FFT_Poisson.H:44
void solve(MF &soln, MF const &rhs)
Definition: AMReX_FFT_Poisson.H:193
std::unique_ptr< R2C< typename MF::value_type > > m_r2c
Definition: AMReX_FFT_Poisson.H:70
Parallel Discrete Fourier Transform.
Definition: AMReX_FFT_R2C.H:36
An Array of FortranArrayBox(FAB)-like Objects.
Definition: AMReX_FabArray.H:344
Rectangular problem domain geometry.
Definition: AMReX_Geometry.H:73
const Box & Domain() const noexcept
Returns our rectangular domain.
Definition: AMReX_Geometry.H:210
bool isAllPeriodic() const noexcept
Is domain periodic in all directions?
Definition: AMReX_Geometry.H:338
bool isPeriodic(int dir) const noexcept
Is the domain periodic in the specified direction?
Definition: AMReX_Geometry.H:331
AMREX_GPU_HOST_DEVICE static constexpr AMREX_FORCE_INLINE IndexTypeND< dim > TheCellType() noexcept
This static member function returns an IndexTypeND object of value IndexTypeND::CELL....
Definition: AMReX_IndexType.H:149
Definition: AMReX_MFIter.H:57
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition: AMReX_MFIter.H:141
An Iterator over the Orientation of Faces of a Box.
Definition: AMReX_Orientation.H:135
Encapsulation of the Orientation of the Faces of a Box.
Definition: AMReX_Orientation.H:29
AMREX_GPU_HOST_DEVICE int coordDir() const noexcept
Returns the coordinate direction.
Definition: AMReX_Orientation.H:83
AMREX_GPU_HOST_DEVICE bool isLow() const noexcept
Returns true if Orientation is low.
Definition: AMReX_Orientation.H:89
Definition: AMReX_PODVector.H:246
T * data() noexcept
Definition: AMReX_PODVector.H:593
T * dataPtr() noexcept
Definition: AMReX_PODVector.H:597
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition: AMReX_Vector.H:27
Long size() const noexcept
Definition: AMReX_Vector.H:50
void fill_physbc(MF &mf, Geometry const &geom, Array< std::pair< Boundary, Boundary >, AMREX_SPACEDIM > const &bc)
Definition: AMReX_FFT_Poisson.H:661
DistributionMapping make_iota_distromap(Long n)
Definition: AMReX_FFT.cpp:88
Definition: AMReX_FFT.cpp:7
void streamSynchronize() noexcept
Definition: AMReX_GpuDevice.H:237
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:251
int NProcsSub() noexcept
number of ranks in current frame
Definition: AMReX_ParallelContext.H:74
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44
Definition: AMReX_Amr.cpp:49
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:200
AMREX_ATTRIBUTE_FLATTEN_FOR void LoopOnCpu(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition: AMReX_Loop.H:355
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition: AMReX_Box.H:1211
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > convert(const BoxND< dim > &b, const IntVectND< dim > &typ) noexcept
Returns a BoxND with different type.
Definition: AMReX_Box.H:1435
double second() noexcept
Definition: AMReX_Utility.cpp:922
IntVectND< AMREX_SPACEDIM > IntVect
Definition: AMReX_BaseFwd.H:30
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition: AMReX.H:111
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE IntVectND< dim > scale(const IntVectND< dim > &p, int s) noexcept
Returns a IntVectND obtained by multiplying each of the components of this IntVectND by s.
Definition: AMReX_IntVect.H:1006
BoxArray decompose(Box const &domain, int nboxes, Array< bool, AMREX_SPACEDIM > const &decomp={AMREX_D_DECL(true, true, true)}, bool no_overlap=false)
Decompose domain box into BoxArray.
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition: AMReX.cpp:225
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > makeSlab(BoxND< dim > const &b, int direction, int slab_index) noexcept
Definition: AMReX_Box.H:1998
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T elemwiseMin(T const &a, T const &b) noexcept
Definition: AMReX_Algorithm.H:49
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE GpuComplex< T > sqrt(const GpuComplex< T > &a_z) noexcept
Return the square root of a complex number.
Definition: AMReX_GpuComplex.H:373
std::array< T, N > Array
Definition: AMReX_Array.H:24
Definition: AMReX_FabArrayCommI.H:896
Definition: AMReX_Array4.H:61
Definition: AMReX_FFT_Helper.H:58
Info & setBatchMode(bool x)
Definition: AMReX_FFT_Helper.H:67
Definition: AMReX_FFT_Poisson.H:650
Orientation face
Definition: AMReX_FFT_Poisson.H:654
Boundary bc
Definition: AMReX_FFT_Poisson.H:653
Array4< T > dfab
Definition: AMReX_FFT_Poisson.H:651
Box dbox
Definition: AMReX_FFT_Poisson.H:652
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition: AMReX_FFT_Poisson.H:657
Definition: AMReX_FFT_Poisson.H:328
T const * m_dz
Definition: AMReX_FFT_Poisson.H:334
AMREX_GPU_DEVICE AMREX_FORCE_INLINE T operator()(int, int, int k) const
Definition: AMReX_FFT_Poisson.H:330
Definition: AMReX_FFT_Poisson.H:338
AMREX_GPU_DEVICE AMREX_FORCE_INLINE T operator()(int, int, int k) const
Definition: AMReX_FFT_Poisson.H:340
T const * m_dz
Definition: AMReX_FFT_Poisson.H:344
Definition: AMReX_FFT_Poisson.H:318
T m_dz2inv
Definition: AMReX_FFT_Poisson.H:324
AMREX_GPU_DEVICE AMREX_FORCE_INLINE T operator()(int, int, int) const
Definition: AMReX_FFT_Poisson.H:320
Definition: AMReX_Array.H:34
Definition: AMReX_MFIter.H:20
MFItInfo & DisableDeviceSync() noexcept
Definition: AMReX_MFIter.H:38