Block-Structured AMR Software Framework
AMReX_FFT_Poisson.H
Go to the documentation of this file.
1 #ifndef AMREX_FFT_POISSON_H_
2 #define AMREX_FFT_POISSON_H_
3 
4 #include <AMReX_FFT.H>
5 #include <AMReX_Geometry.H>
6 
7 namespace amrex::FFT
8 {
9 
14 template <typename MF = MultiFab>
15 class Poisson
16 {
17 public:
18 
19  template <typename FA=MF, std::enable_if_t<IsFabArray_v<FA>,int> = 0>
20  Poisson (Geometry const& geom,
21  Array<std::pair<Boundary,Boundary>,AMREX_SPACEDIM> const& bc)
22  : m_geom(geom), m_bc(bc)
23  {
24  bool all_periodic = true;
25  for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
26  all_periodic = all_periodic
27  && (bc[idim].first == Boundary::periodic)
28  && (bc[idim].second == Boundary::periodic);
29  }
30  if (all_periodic) {
31  m_r2c = std::make_unique<R2C<typename MF::value_type>>(m_geom.Domain());
32  } else {
33  m_r2x = std::make_unique<R2X<typename MF::value_type>> (m_geom.Domain(), m_bc);
34  }
35  }
36 
37  template <typename FA=MF, std::enable_if_t<IsFabArray_v<FA>,int> = 0>
38  explicit Poisson (Geometry const& geom)
39  : m_geom(geom),
40  m_bc{AMREX_D_DECL(std::make_pair(Boundary::periodic,Boundary::periodic),
41  std::make_pair(Boundary::periodic,Boundary::periodic),
42  std::make_pair(Boundary::periodic,Boundary::periodic))}
43  {
44  if (m_geom.isAllPeriodic()) {
45  m_r2c = std::make_unique<R2C<typename MF::value_type>>(m_geom.Domain());
46  } else {
47  amrex::Abort("FFT::Poisson: wrong BC");
48  }
49  }
50 
51  void solve (MF& soln, MF const& rhs);
52 
53 private:
56  std::unique_ptr<R2X<typename MF::value_type>> m_r2x;
57  std::unique_ptr<R2C<typename MF::value_type>> m_r2c;
58 };
59 
60 #if (AMREX_SPACEDIM == 3)
64 template <typename MF = MultiFab>
65 class PoissonOpenBC
66 {
67 public:
68 
69  template <typename FA=MF, std::enable_if_t<IsFabArray_v<FA>,int> = 0>
70  explicit PoissonOpenBC (Geometry const& geom,
72  IntVect const& ngrow = IntVect(0));
73 
74  void solve (MF& soln, MF const& rhs);
75 
76  void define_doit (); // has to be public for cuda
77 
78 private:
79  Geometry m_geom;
80  Box m_grown_domain;
81  IntVect m_ngrow;
83 };
84 #endif
85 
90 template <typename MF = MultiFab>
92 {
93 public:
94  using T = typename MF::value_type;
95 
96  template <typename FA=MF, std::enable_if_t<IsFabArray_v<FA>,int> = 0>
97  explicit PoissonHybrid (Geometry const& geom)
98  : m_geom(geom), m_r2c(geom.Domain(), Info().setBatchMode(true))
99  {
100 #if (AMREX_SPACEDIM == 3)
101  AMREX_ALWAYS_ASSERT(geom.isPeriodic(0) && geom.isPeriodic(1));
102 #else
103  amrex::Abort("FFT::PoissonHybrid: 1D & 2D todo");
104 #endif
105  }
106 
107  void solve (MF& soln, MF const& rhs);
108  void solve (MF& soln, MF const& rhs, Vector<T> const& dz);
109  void solve (MF& soln, MF const& rhs, Gpu::DeviceVector<T> const& dz);
110 
111  template <typename DZ>
112  void solve_doit (MF& soln, MF const& rhs, DZ const& dz); // has to be public for cuda
113 
114 private:
117 };
118 
119 template <typename MF>
120 void Poisson<MF>::solve (MF& soln, MF const& rhs)
121 {
122  BL_PROFILE("FFT::Poisson::solve");
123 
124  using T = typename MF::value_type;
125 
127  {AMREX_D_DECL(Math::pi<T>()/T(m_geom.Domain().length(0)),
128  Math::pi<T>()/T(m_geom.Domain().length(1)),
129  Math::pi<T>()/T(m_geom.Domain().length(2)))};
130  for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
131  if (m_bc[idim].first == Boundary::periodic) {
132  fac[idim] *= T(2);
133  }
134  }
136  {AMREX_D_DECL(T(2)/T(m_geom.CellSize(0)*m_geom.CellSize(0)),
137  T(2)/T(m_geom.CellSize(1)*m_geom.CellSize(1)),
138  T(2)/T(m_geom.CellSize(2)*m_geom.CellSize(2)))};
139  auto scale = (m_r2x) ? m_r2x->scalingFactor() : m_r2c->scalingFactor();
140 
142  // Not sure about odd-even and even-odd yet
143  for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
144  if (m_bc[idim].first == Boundary::odd &&
145  m_bc[idim].second == Boundary::odd)
146  {
147  offset[idim] = T(1);
148  }
149  else if ((m_bc[idim].first == Boundary::odd &&
150  m_bc[idim].second == Boundary::even) ||
151  (m_bc[idim].first == Boundary::even &&
152  m_bc[idim].second == Boundary::odd))
153  {
154  offset[idim] = T(0.5);
155  }
156  }
157 
158  auto f = [=] AMREX_GPU_DEVICE (int i, int j, int k, auto& spectral_data)
159  {
161  AMREX_D_TERM(T a = fac[0]*(i+offset[0]);,
162  T b = fac[1]*(j+offset[1]);,
163  T c = fac[2]*(k+offset[2]));
164  T k2 = AMREX_D_TERM(dxfac[0]*(std::cos(a)-T(1)),
165  +dxfac[1]*(std::cos(b)-T(1)),
166  +dxfac[2]*(std::cos(c)-T(1)));
167  if (k2 != T(0)) {
168  spectral_data /= k2;
169  }
170  spectral_data *= scale;
171  };
172 
173  if (m_r2x) {
174  m_r2x->forwardThenBackward(rhs, soln, f);
175  } else {
176  m_r2c->forwardThenBackward(rhs, soln, f);
177  }
178 }
179 
180 #if (AMREX_SPACEDIM == 3)
181 
182 template <typename MF>
183 template <typename FA, std::enable_if_t<IsFabArray_v<FA>,int> FOO>
184 PoissonOpenBC<MF>::PoissonOpenBC (Geometry const& geom, IndexType ixtype,
185  IntVect const& ngrow)
186  : m_geom(geom),
187  m_grown_domain(amrex::grow(amrex::convert(geom.Domain(),ixtype),ngrow)),
188  m_ngrow(ngrow),
189  m_solver(m_grown_domain)
190 {
191  define_doit();
192 }
193 
194 template <typename MF>
195 void PoissonOpenBC<MF>::define_doit ()
196 {
197  using T = typename MF::value_type;
198  auto const& lo = m_grown_domain.smallEnd();
199  auto const dx = T(m_geom.CellSize(0));
200  auto const dy = T(m_geom.CellSize(1));
201  auto const dz = T(m_geom.CellSize(2));
202  auto const gfac = T(1)/T(std::sqrt(T(12)));
203  // 0.125 comes from that there are 8 Gauss quadrature points
204  auto const fac = T(-0.125) * (dx*dy*dz) / (T(4)*Math::pi<T>());
205  m_solver.setGreensFunction([=] AMREX_GPU_DEVICE (int i, int j, int k) -> T
206  {
207  auto x = (T(i-lo[0]) - gfac) * dx; // first Gauss quadrature point
208  auto y = (T(j-lo[1]) - gfac) * dy;
209  auto z = (T(k-lo[2]) - gfac) * dz;
210  T r = 0;
211  for (int gx = 0; gx < 2; ++gx) {
212  for (int gy = 0; gy < 2; ++gy) {
213  for (int gz = 0; gz < 2; ++gz) {
214  auto xg = x + 2*gx*gfac*dx;
215  auto yg = y + 2*gy*gfac*dy;
216  auto zg = z + 2*gz*gfac*dz;
217  r += T(1)/std::sqrt(xg*xg+yg*yg+zg*zg);
218  }}}
219  return fac * r;
220  });
221 }
222 
223 template <typename MF>
224 void PoissonOpenBC<MF>::solve (MF& soln, MF const& rhs)
225 {
226  AMREX_ASSERT(m_grown_domain.ixType() == soln.ixType() && m_grown_domain.ixType() == rhs.ixType());
227  m_solver.solve(soln, rhs);
228 }
229 
230 #endif /* AMREX_SPACEDIM == 3 */
231 
232 namespace fft_poisson_detail {
233  template <typename T>
234  struct DZ {
235  [[nodiscard]] constexpr T operator[] (int) const { return m_delz; }
237  };
238 }
239 
240 template <typename MF>
241 void PoissonHybrid<MF>::solve (MF& soln, MF const& rhs)
242 {
243  auto delz = T(m_geom.CellSize(AMREX_SPACEDIM-1));
244  solve_doit(soln, rhs, fft_poisson_detail::DZ<T>{delz});
245 }
246 
247 template <typename MF>
248 void PoissonHybrid<MF>::solve (MF& soln, MF const& rhs, Gpu::DeviceVector<T> const& dz)
249 {
250  auto const* pdz = dz.dataPtr();
251  solve_doit(soln, rhs, pdz);
252 }
253 
254 template <typename MF>
255 void PoissonHybrid<MF>::solve (MF& soln, MF const& rhs, Vector<T> const& dz)
256 {
257 #ifdef AMREX_USE_GPU
258  Gpu::DeviceVector<T> d_dz(dz.size());
259  Gpu::htod_memcpy_async(d_dz.data(), dz.data(), dz.size()*sizeof(T));
260  auto const* pdz = d_dz.data();
261 #else
262  auto const* pdz = dz.data();
263 #endif
264  solve_doit(soln, rhs, pdz);
265 }
266 
267 template <typename MF>
268 template <typename DZ>
269 void PoissonHybrid<MF>::solve_doit (MF& soln, MF const& rhs, DZ const& dz)
270 {
271  BL_PROFILE("FFT::PoissonHybrid::solve");
272 
273 #if (AMREX_SPACEDIM < 3)
274  amrex::ignore_unused(soln, rhs, dz);
275 #else
276  auto facx = T(2)*Math::pi<T>()/T(m_geom.ProbLength(0));
277  auto facy = T(2)*Math::pi<T>()/T(m_geom.ProbLength(1));
278  auto dx = T(m_geom.CellSize(0));
279  auto dy = T(m_geom.CellSize(1));
280  auto scale = T(1.0)/(T(m_geom.Domain().length(0)) *
281  T(m_geom.Domain().length(1)));
282  auto ny = m_geom.Domain().length(1);
283  auto nz = m_geom.Domain().length(2);
284 
285  Box cdomain = m_geom.Domain();
286  cdomain.setBig(0,cdomain.length(0)/2);
287  auto cba = amrex::decompose(cdomain, ParallelContext::NProcsSub(),
288  {AMREX_D_DECL(true,true,false)});
290  FabArray<BaseFab<GpuComplex<T> > > spmf(cba, dm, 1, 0);
291 
292  m_r2c.forward(rhs, spmf);
293 
294  for (MFIter mfi(spmf); mfi.isValid(); ++mfi)
295  {
296  auto const& spectral = spmf.array(mfi);
297  auto const& box = mfi.validbox();
298  auto const& xybox = amrex::makeSlab(box, 2, 0);
299 
300 #ifdef AMREX_USE_GPU
301  // xxxxx TODO: We need to explore how to optimize this
302  // function. Maybe we can use cusparse. Maybe we should make
303  // z-direction to be the unit stride direction.
304 
305  FArrayBox tridiag_workspace(box,4);
306  auto const& ald = tridiag_workspace.array(0);
307  auto const& bd = tridiag_workspace.array(1);
308  auto const& cud = tridiag_workspace.array(2);
309  auto const& scratch = tridiag_workspace.array(3);
310 
311  amrex::ParallelFor(xybox, [=] AMREX_GPU_DEVICE (int i, int j, int)
312  {
313  T a = facx*i;
314  T b = (j < ny/2) ? facy*j : facy*(ny-j);
315 
316  T k2 = T(2)*(std::cos(a*dx)-T(1))/(dx*dx)
317  + T(2)*(std::cos(b*dy)-T(1))/(dy*dy);
318 
319  // Tridiagonal solve with homogeneous Neumann
320  for(int k=0; k < nz; k++) {
321  if(k==0) {
322  ald(i,j,k) = 0.;
323  cud(i,j,k) = 2.0 /(dz[k]*(dz[k]+dz[k+1]));
324  bd(i,j,k) = k2 -ald(i,j,k)-cud(i,j,k);
325  } else if (k == nz-1) {
326  ald(i,j,k) = 2.0 /(dz[k]*(dz[k]+dz[k-1]));
327  cud(i,j,k) = 0.;
328  bd(i,j,k) = k2 -ald(i,j,k)-cud(i,j,k);
329  if (i == 0 && j == 0) {
330  bd(i,j,k) *= 2.0;
331  }
332  } else {
333  ald(i,j,k) = 2.0 /(dz[k]*(dz[k]+dz[k-1]));
334  cud(i,j,k) = 2.0 /(dz[k]*(dz[k]+dz[k+1]));
335  bd(i,j,k) = k2 -ald(i,j,k)-cud(i,j,k);
336  }
337  }
338 
339  scratch(i,j,0) = cud(i,j,0)/bd(i,j,0);
340  spectral(i,j,0) = spectral(i,j,0)/bd(i,j,0);
341 
342  for (int k = 1; k < nz; k++) {
343  if (k < nz-1) {
344  scratch(i,j,k) = cud(i,j,k) / (bd(i,j,k) - ald(i,j,k) * scratch(i,j,k-1));
345  }
346  spectral(i,j,k) = (spectral(i,j,k) - ald(i,j,k) * spectral(i,j,k - 1))
347  / (bd(i,j,k) - ald(i,j,k) * scratch(i,j,k-1));
348  }
349 
350  for (int k = nz - 2; k >= 0; k--) {
351  spectral(i,j,k) -= scratch(i,j,k) * spectral(i,j,k + 1);
352  }
353 
354  for (int k = 0; k < nz; ++k) {
355  spectral(i,j,k) *= scale;
356  }
357  });
359 
360 #else
361 
366 
367  amrex::LoopOnCpu(xybox, [&] (int i, int j, int)
368  {
369  T a = facx*i;
370  T b = (j < ny/2) ? facy*j : facy*(ny-j);
371 
372  T k2 = T(2)*(std::cos(a*dx)-T(1))/(dx*dx)
373  + T(2)*(std::cos(b*dy)-T(1))/(dy*dy);
374 
375  // Tridiagonal solve with homogeneous Neumann
376  for(int k=0; k < nz; k++) {
377  if(k==0) {
378  ald[k] = 0.;
379  cud[k] = 2.0 /(dz[k]*(dz[k]+dz[k+1]));
380  bd[k] = k2 -ald[k]-cud[k];
381  } else if (k == nz-1) {
382  ald[k] = 2.0 /(dz[k]*(dz[k]+dz[k-1]));
383  cud[k] = 0.;
384  bd[k] = k2 -ald[k]-cud[k];
385  if (i == 0 && j == 0) {
386  bd[k] *= 2.0;
387  }
388  } else {
389  ald[k] = 2.0 /(dz[k]*(dz[k]+dz[k-1]));
390  cud[k] = 2.0 /(dz[k]*(dz[k]+dz[k+1]));
391  bd[k] = k2 -ald[k]-cud[k];
392  }
393  }
394 
395  scratch[0] = cud[0]/bd[0];
396  spectral(i,j,0) = spectral(i,j,0)/bd[0];
397 
398  for (int k = 1; k < nz; k++) {
399  if (k < nz-1) {
400  scratch[k] = cud[k] / (bd[k] - ald[k] * scratch[k-1]);
401  }
402  spectral(i,j,k) = (spectral(i,j,k) - ald[k] * spectral(i,j,k - 1))
403  / (bd[k] - ald[k] * scratch[k-1]);
404  }
405 
406  for (int k = nz - 2; k >= 0; k--) {
407  spectral(i,j,k) -= scratch[k] * spectral(i,j,k + 1);
408  }
409 
410  for (int k = 0; k < nz; ++k) {
411  spectral(i,j,k) *= scale;
412  }
413  });
414 #endif
415  }
416 
417  m_r2c.backward(spmf, soln);
418 #endif
419 }
420 
421 }
422 
423 #endif
#define BL_PROFILE(a)
Definition: AMReX_BLProfiler.H:551
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_ALWAYS_ASSERT(EX)
Definition: AMReX_BLassert.H:50
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition: AMReX_HypreMLABecLap.cpp:1089
#define AMREX_D_TERM(a, b, c)
Definition: AMReX_SPACE.H:129
#define AMREX_D_DECL(a, b, c)
Definition: AMReX_SPACE.H:104
AMREX_FORCE_INLINE Array4< T const > array() const noexcept
Definition: AMReX_BaseFab.H:379
AMREX_GPU_HOST_DEVICE IntVectND< dim > length() const noexcept
Return the length of the BoxND.
Definition: AMReX_Box.H:146
AMREX_GPU_HOST_DEVICE BoxND & setBig(const IntVectND< dim > &bg) noexcept
Redefine the big end of the BoxND.
Definition: AMReX_Box.H:474
Calculates the distribution of FABs to MPI processes.
Definition: AMReX_DistributionMapping.H:41
A Fortran Array of REALs.
Definition: AMReX_FArrayBox.H:229
Definition: AMReX_FFT_OpenBCSolver.H:11
3D Poisson solver for periodic boundaries in the first two dimensions and Neumann in the last dimensi...
Definition: AMReX_FFT_Poisson.H:92
void solve(MF &soln, MF const &rhs)
Definition: AMReX_FFT_Poisson.H:241
typename MF::value_type T
Definition: AMReX_FFT_Poisson.H:94
Geometry m_geom
Definition: AMReX_FFT_Poisson.H:115
void solve_doit(MF &soln, MF const &rhs, DZ const &dz)
Definition: AMReX_FFT_Poisson.H:269
R2C< typename MF::value_type, Direction::both > m_r2c
Definition: AMReX_FFT_Poisson.H:116
PoissonHybrid(Geometry const &geom)
Definition: AMReX_FFT_Poisson.H:97
Poisson solver for periodic, Dirichlet & Neumann boundaries using FFT.
Definition: AMReX_FFT_Poisson.H:16
Poisson(Geometry const &geom, Array< std::pair< Boundary, Boundary >, AMREX_SPACEDIM > const &bc)
Definition: AMReX_FFT_Poisson.H:20
std::unique_ptr< R2X< typename MF::value_type > > m_r2x
Definition: AMReX_FFT_Poisson.H:56
Geometry m_geom
Definition: AMReX_FFT_Poisson.H:54
Array< std::pair< Boundary, Boundary >, AMREX_SPACEDIM > m_bc
Definition: AMReX_FFT_Poisson.H:55
Poisson(Geometry const &geom)
Definition: AMReX_FFT_Poisson.H:38
void solve(MF &soln, MF const &rhs)
Definition: AMReX_FFT_Poisson.H:120
std::unique_ptr< R2C< typename MF::value_type > > m_r2c
Definition: AMReX_FFT_Poisson.H:57
An Array of FortranArrayBox(FAB)-like Objects.
Definition: AMReX_FabArray.H:344
Array4< typename FabArray< FAB >::value_type const > array(const MFIter &mfi) const noexcept
Definition: AMReX_FabArray.H:1561
Rectangular problem domain geometry.
Definition: AMReX_Geometry.H:73
const Box & Domain() const noexcept
Returns our rectangular domain.
Definition: AMReX_Geometry.H:210
bool isAllPeriodic() const noexcept
Is domain periodic in all directions?
Definition: AMReX_Geometry.H:338
bool isPeriodic(int dir) const noexcept
Is the domain periodic in the specified direction?
Definition: AMReX_Geometry.H:331
AMREX_GPU_HOST_DEVICE static constexpr AMREX_FORCE_INLINE IndexTypeND< dim > TheCellType() noexcept
This static member function returns an IndexTypeND object of value IndexTypeND::CELL....
Definition: AMReX_IndexType.H:149
Definition: AMReX_MFIter.H:57
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition: AMReX_MFIter.H:141
Definition: AMReX_PODVector.H:246
T * data() noexcept
Definition: AMReX_PODVector.H:593
T * dataPtr() noexcept
Definition: AMReX_PODVector.H:597
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition: AMReX_Vector.H:27
Long size() const noexcept
Definition: AMReX_Vector.H:50
DistributionMapping make_iota_distromap(Long n)
Definition: AMReX_FFT.cpp:88
Definition: AMReX_FFT.cpp:7
void streamSynchronize() noexcept
Definition: AMReX_GpuDevice.H:237
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:251
int NProcsSub() noexcept
number of ranks in current frame
Definition: AMReX_ParallelContext.H:74
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44
Definition: AMReX_Amr.cpp:49
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:200
AMREX_ATTRIBUTE_FLATTEN_FOR void LoopOnCpu(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition: AMReX_Loop.H:355
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition: AMReX_Box.H:1211
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > convert(const BoxND< dim > &b, const IntVectND< dim > &typ) noexcept
Returns a BoxND with different type.
Definition: AMReX_Box.H:1435
double second() noexcept
Definition: AMReX_Utility.cpp:922
IntVectND< AMREX_SPACEDIM > IntVect
Definition: AMReX_BaseFwd.H:30
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition: AMReX.H:111
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE IntVectND< dim > scale(const IntVectND< dim > &p, int s) noexcept
Returns a IntVectND obtained by multiplying each of the components of this IntVectND by s.
Definition: AMReX_IntVect.H:1006
BoxArray decompose(Box const &domain, int nboxes, Array< bool, AMREX_SPACEDIM > const &decomp={AMREX_D_DECL(true, true, true)}, bool no_overlap=false)
Decompose domain box into BoxArray.
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition: AMReX.cpp:225
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > makeSlab(BoxND< dim > const &b, int direction, int slab_index) noexcept
Definition: AMReX_Box.H:1998
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE GpuComplex< T > sqrt(const GpuComplex< T > &a_z) noexcept
Return the square root of a complex number.
Definition: AMReX_GpuComplex.H:373
std::array< T, N > Array
Definition: AMReX_Array.H:24
Definition: AMReX_FFT_Helper.H:56
Definition: AMReX_FFT_Poisson.H:234
T m_delz
Definition: AMReX_FFT_Poisson.H:236
constexpr T operator[](int) const
Definition: AMReX_FFT_Poisson.H:235
Definition: AMReX_Array.H:34