Block-Structured AMR Software Framework
AMReX_FFT_R2C.H
Go to the documentation of this file.
1 #ifndef AMREX_FFT_R2C_H_
2 #define AMREX_FFT_R2C_H_
3 #include <AMReX_Config.H>
4 
5 #include <AMReX_MultiFab.H>
6 #include <AMReX_FFT_Helper.H>
7 #include <algorithm>
8 #include <numeric>
9 #include <tuple>
10 
11 namespace amrex::FFT
12 {
13 
14 template <typename T> class OpenBCSolver;
15 template <typename T> class Poisson;
16 template <typename T> class PoissonHybrid;
17 
32 template <typename T = Real, FFT::Direction D = FFT::Direction::both,
34  // Don't change the default. Otherwise OpenBCSolver might break.
35 class R2C
36 {
37 public:
38  using MF = std::conditional_t<std::is_same_v<T,Real>,
41 
42  template <typename U> friend class OpenBCSolver;
43  template <typename U> friend class Poisson;
44  template <typename U> friend class PoissonHybrid;
45 
52  explicit R2C (Box const& domain, Info const& info = Info{});
53 
54  ~R2C ();
55 
56  R2C (R2C const&) = delete;
57  R2C (R2C &&) = delete;
58  R2C& operator= (R2C const&) = delete;
59  R2C& operator= (R2C &&) = delete;
60 
80  template <typename F, Direction DIR=D,
81  std::enable_if_t<DIR == Direction::both, int> = 0>
82  void forwardThenBackward (MF const& inmf, MF& outmf, F const& post_forward)
83  {
84  BL_PROFILE("FFT::R2C::forwardbackward");
85  this->forward(inmf);
86  this->post_forward_doit<0>(post_forward);
87  this->backward(outmf);
88  }
89 
99  template <Direction DIR=D, std::enable_if_t<DIR == Direction::forward ||
100  DIR == Direction::both, int> = 0>
101  void forward (MF const& inmf);
102 
112  template <Direction DIR=D, std::enable_if_t<DIR == Direction::forward ||
113  DIR == Direction::both, int> = 0>
114  void forward (MF const& inmf, cMF& outmf);
115 
124  template <Direction DIR=D, std::enable_if_t<DIR == Direction::both, int> = 0>
125  void backward (MF& outmf);
126 
136  template <Direction DIR=D, std::enable_if_t<DIR == Direction::backward ||
137  DIR == Direction::both, int> = 0>
138  void backward (cMF const& inmf, MF& outmf);
139 
143  [[nodiscard]] T scalingFactor () const;
144 
154  template <Direction DIR=D, std::enable_if_t<DIR == Direction::forward ||
155  DIR == Direction::both, int> = 0>
156  std::pair<cMF*,IntVect> getSpectralData ();
157 
165  [[nodiscard]] std::pair<BoxArray,DistributionMapping> getSpectralDataLayout () const;
166 
167  // This is a private function, but it's public for cuda.
168  template <int Depth, typename F>
169  void post_forward_doit (F const& post_forward);
170 
171 private:
172 
173  void prepare_openbc ();
174 
175  void backward_doit (MF& outmf, IntVect const& ngout = IntVect(0),
176  Periodicity const& period = Periodicity::NonPeriodic());
177 
178  void backward_doit (cMF const& inmf, MF& outmf,
179  IntVect const& ngout = IntVect(0),
180  Periodicity const& period = Periodicity::NonPeriodic());
181 
182  static std::pair<Plan<T>,Plan<T>> make_c2c_plans (cMF& inout);
183 
192 
193  // Comm meta-data. In the forward phase, we start with (x,y,z),
194  // transpose to (y,x,z) and then (z,x,y). In the backward phase, we
195  // perform inverse transpose.
196  std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2y; // (x,y,z) -> (y,x,z)
197  std::unique_ptr<MultiBlockCommMetaData> m_cmd_y2x; // (y,x,z) -> (x,y,z)
198  std::unique_ptr<MultiBlockCommMetaData> m_cmd_y2z; // (y,x,z) -> (z,x,y)
199  std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2y; // (z,x,y) -> (y,x,z)
200  std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2z; // (x,y,z) -> (z,x,y)
201  std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2x; // (z,x,y) -> (x,y,z)
202  std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2z_half; // for openbc
203  std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2x_half; // for openbc
210 
215 
216  std::unique_ptr<char,DataDeleter> m_data_1;
217  std::unique_ptr<char,DataDeleter> m_data_2;
218 
223 
224  std::unique_ptr<R2C<T,D,S>> m_r2c_sub;
226 
228 
229  bool m_do_alld_fft = false;
230  bool m_slab_decomp = false;
231  bool m_openbc_half = false;
232 };
233 
234 template <typename T, Direction D, DomainStrategy S>
235 R2C<T,D,S>::R2C (Box const& domain, Info const& info)
236  : m_real_domain(domain),
237  m_spectral_domain_x(IntVect(0), IntVect(AMREX_D_DECL(domain.length(0)/2,
238  domain.length(1)-1,
239  domain.length(2)-1)),
240  domain.ixType()),
241 #if (AMREX_SPACEDIM >= 2)
242  m_spectral_domain_y(IntVect(0), IntVect(AMREX_D_DECL(domain.length(1)-1,
243  domain.length(0)/2,
244  domain.length(2)-1)),
245  domain.ixType()),
246 #if (AMREX_SPACEDIM == 3)
247  m_spectral_domain_z(IntVect(0), IntVect(AMREX_D_DECL(domain.length(2)-1,
248  domain.length(0)/2,
249  domain.length(1)-1)),
250  domain.ixType()),
251 #endif
252 #endif
253  m_sub_helper(domain),
254  m_info(info)
255 {
256  BL_PROFILE("FFT::R2C");
257 
258  static_assert(std::is_same_v<float,T> || std::is_same_v<double,T>);
259 
261 #if (AMREX_SPACEDIM == 2)
263 #else
264  if (m_info.batch_mode) {
265  AMREX_ALWAYS_ASSERT((int(domain.length(0) > 1) +
266  int(domain.length(1) > 1) +
267  int(domain.length(2) > 1)) >= 2);
268  }
269 #endif
270 
271  {
273  if (subbox.size() != m_real_domain.size()) {
274  m_r2c_sub = std::make_unique<R2C<T,D,S>>(subbox, info);
275  return;
276  }
277  }
278 
279  int myproc = ParallelContext::MyProcSub();
281 
282 #if (AMREX_SPACEDIM == 3)
283  if (S == DomainStrategy::slab && (m_real_domain.length(1) > 1)) {
284  if (m_info.batch_mode && m_real_domain.length(2) == 1) {
285  m_slab_decomp = false;
286  } else {
287  m_slab_decomp = true;
288  }
289  }
290 #endif
291 
292  auto bax = amrex::decompose(m_real_domain, nprocs,
293  {AMREX_D_DECL(false,!m_slab_decomp,true)}, true);
295  m_rx.define(bax, dmx, 1, 0, MFInfo().SetAlloc(false));
296 
297  {
298  BoxList bl = bax.boxList();
299  for (auto & b : bl) {
300  b.shift(-m_real_domain.smallEnd());
301  b.setBig(0, m_spectral_domain_x.bigEnd(0));
302  }
303  BoxArray cbax(std::move(bl));
304  m_cx.define(cbax, dmx, 1, 0, MFInfo().SetAlloc(false));
305  }
306 
308 
309  if (!m_do_alld_fft) // do a series of 1d or 2d ffts
310  {
311  //
312  // make data containers
313  //
314 
315 #if (AMREX_SPACEDIM >= 2)
316 #if (AMREX_SPACEDIM == 2)
317  bool batch_on_y = false;
318 #else
319  bool batch_on_y = m_info.batch_mode && (m_real_domain.length(2) == 1);
320 #endif
321  DistributionMapping cdmy;
322  if ((m_real_domain.length(1) > 1) && !m_slab_decomp && !batch_on_y)
323  {
324  auto cbay = amrex::decompose(m_spectral_domain_y, nprocs,
325  {AMREX_D_DECL(false,true,true)}, true);
326  if (cbay.size() == dmx.size()) {
327  cdmy = dmx;
328  } else {
329  cdmy = detail::make_iota_distromap(cbay.size());
330  }
331  m_cy.define(cbay, cdmy, 1, 0, MFInfo().SetAlloc(false));
332  }
333 #endif
334 
335 #if (AMREX_SPACEDIM == 3)
336  if (m_real_domain.length(1) > 1 &&
337  (! m_info.batch_mode && m_real_domain.length(2) > 1))
338  {
339  auto cbaz = amrex::decompose(m_spectral_domain_z, nprocs,
340  {false,true,true}, true);
341  DistributionMapping cdmz;
342  if (cbaz.size() == dmx.size()) {
343  cdmz = dmx;
344  } else if (cbaz.size() == cdmy.size()) {
345  cdmz = cdmy;
346  } else {
347  cdmz = detail::make_iota_distromap(cbaz.size());
348  }
349  m_cz.define(cbaz, cdmz, 1, 0, MFInfo().SetAlloc(false));
350  }
351 #endif
352 
353  if (m_slab_decomp) {
356  } else {
359  }
360 
361  //
362  // make copiers
363  //
364 
365 #if (AMREX_SPACEDIM >= 2)
366  if (! m_cy.empty()) {
367  // comm meta-data between x and y phases
368  m_cmd_x2y = std::make_unique<MultiBlockCommMetaData>
370  m_cmd_y2x = std::make_unique<MultiBlockCommMetaData>
372  }
373 #endif
374 #if (AMREX_SPACEDIM == 3)
375  if (! m_cz.empty() ) {
376  if (m_slab_decomp) {
377  // comm meta-data between xy and z phases
378  m_cmd_x2z = std::make_unique<MultiBlockCommMetaData>
380  m_cmd_z2x = std::make_unique<MultiBlockCommMetaData>
382  } else {
383  // comm meta-data between y and z phases
384  m_cmd_y2z = std::make_unique<MultiBlockCommMetaData>
386  m_cmd_z2y = std::make_unique<MultiBlockCommMetaData>
388  }
389  }
390 #endif
391 
392  //
393  // make plans
394  //
395 
396  if (myproc < m_rx.size())
397  {
398  Box const& box = m_rx.box(myproc);
399  auto* pr = m_rx[myproc].dataPtr();
400  auto* pc = (typename Plan<T>::VendorComplex *)m_cx[myproc].dataPtr();
401 #ifdef AMREX_USE_SYCL
402  m_fft_fwd_x.template init_r2c<Direction::forward>(box, pr, pc, m_slab_decomp);
404 #else
405  if constexpr (D == Direction::both || D == Direction::forward) {
406  m_fft_fwd_x.template init_r2c<Direction::forward>(box, pr, pc, m_slab_decomp);
407  }
408  if constexpr (D == Direction::both || D == Direction::backward) {
409  m_fft_bwd_x.template init_r2c<Direction::backward>(box, pr, pc, m_slab_decomp);
410  }
411 #endif
412  }
413 
414 #if (AMREX_SPACEDIM >= 2)
415  if (! m_cy.empty()) {
417  }
418 #endif
419 #if (AMREX_SPACEDIM == 3)
420  if (! m_cz.empty()) {
422  }
423 #endif
424  }
425  else // do fft in all dimensions at the same time
426  {
429 
430  auto const& len = m_real_domain.length();
431  auto* pr = (void*)m_rx[0].dataPtr();
432  auto* pc = (void*)m_cx[0].dataPtr();
433 #ifdef AMREX_USE_SYCL
434  m_fft_fwd_x.template init_r2c<Direction::forward>(len, pr, pc, false);
436 #else
437  if constexpr (D == Direction::both || D == Direction::forward) {
438  m_fft_fwd_x.template init_r2c<Direction::forward>(len, pr, pc, false);
439  }
440  if constexpr (D == Direction::both || D == Direction::backward) {
441  m_fft_bwd_x.template init_r2c<Direction::backward>(len, pr, pc, false);
442  }
443 #endif
444  }
445 }
446 
447 template <typename T, Direction D, DomainStrategy S>
449 {
450  if (m_fft_bwd_x.plan != m_fft_fwd_x.plan) {
451  m_fft_bwd_x.destroy();
452  }
453  if (m_fft_bwd_y.plan != m_fft_fwd_y.plan) {
454  m_fft_bwd_y.destroy();
455  }
456  if (m_fft_bwd_z.plan != m_fft_fwd_z.plan) {
457  m_fft_bwd_z.destroy();
458  }
459  m_fft_fwd_x.destroy();
460  m_fft_fwd_y.destroy();
461  m_fft_fwd_z.destroy();
462  if (m_fft_bwd_x_half.plan != m_fft_fwd_x_half.plan) {
463  m_fft_bwd_x_half.destroy();
464  }
465  m_fft_fwd_x_half.destroy();
466 }
467 
468 template <typename T, Direction D, DomainStrategy S>
470 {
471  if (m_r2c_sub) { amrex::Abort("R2C: OpenBC not supported with reduced dimensions"); }
472 
473 #if (AMREX_SPACEDIM == 3)
474  if (m_do_alld_fft) { return; }
475 
476  if (m_slab_decomp) {
477  auto* fab = detail::get_fab(m_rx);
478  if (fab) {
479  Box bottom_half = m_real_domain;
480  bottom_half.growHi(2,-m_real_domain.length(2)/2);
481  Box box = fab->box() & bottom_half;
482  if (box.ok()) {
483  auto* pr = fab->dataPtr();
484  auto* pc = (typename Plan<T>::VendorComplex *)
485  detail::get_fab(m_cx)->dataPtr();
486 #ifdef AMREX_USE_SYCL
487  m_fft_fwd_x_half.template init_r2c<Direction::forward>
488  (box, pr, pc, m_slab_decomp);
489  m_fft_bwd_x_half = m_fft_fwd_x_half;
490 #else
491  if constexpr (D == Direction::both || D == Direction::forward) {
492  m_fft_fwd_x_half.template init_r2c<Direction::forward>
493  (box, pr, pc, m_slab_decomp);
494  }
495  if constexpr (D == Direction::both || D == Direction::backward) {
496  m_fft_bwd_x_half.template init_r2c<Direction::backward>
497  (box, pr, pc, m_slab_decomp);
498  }
499 #endif
500  }
501  }
502  } // else todo
503 
504  if (m_cmd_x2z && ! m_cmd_x2z_half) {
505  Box bottom_half = m_spectral_domain_z;
506  // Note that z-direction's index is 0 because we z is the
507  // unit-stride direction here.
508  bottom_half.growHi(0,-m_spectral_domain_z.length(0)/2);
509  m_cmd_x2z_half = std::make_unique<MultiBlockCommMetaData>
510  (m_cz, bottom_half, m_cx, IntVect(0), m_dtos_x2z);
511  }
512 
513  if (m_cmd_z2x && ! m_cmd_z2x_half) {
514  Box bottom_half = m_spectral_domain_x;
515  bottom_half.growHi(2,-m_spectral_domain_x.length(2)/2);
516  m_cmd_z2x_half = std::make_unique<MultiBlockCommMetaData>
517  (m_cx, bottom_half, m_cz, IntVect(0), m_dtos_z2x);
518  }
519 #endif
520 }
521 
522 template <typename T, Direction D, DomainStrategy S>
523 template <Direction DIR, std::enable_if_t<DIR == Direction::forward ||
524  DIR == Direction::both, int> >
525 void R2C<T,D,S>::forward (MF const& inmf)
526 {
527  BL_PROFILE("FFT::R2C::forward(in)");
528 
529  if (m_r2c_sub) {
530  if (m_sub_helper.ghost_safe(inmf.nGrowVect())) {
531  m_r2c_sub->forward(m_sub_helper.make_alias_mf(inmf));
532  } else {
533  MF tmp(inmf.boxArray(), inmf.DistributionMap(), 1, 0);
534  tmp.LocalCopy(inmf, 0, 0, 1, IntVect(0));
535  m_r2c_sub->forward(m_sub_helper.make_alias_mf(tmp));
536  }
537  return;
538  }
539 
540  if (&m_rx != &inmf) {
541  m_rx.ParallelCopy(inmf, 0, 0, 1);
542  }
543 
544  if (m_do_alld_fft) {
545  m_fft_fwd_x.template compute_r2c<Direction::forward>();
546  return;
547  }
548 
549  auto& fft_x = m_openbc_half ? m_fft_fwd_x_half : m_fft_fwd_x;
550  fft_x.template compute_r2c<Direction::forward>();
551 
552  if ( m_cmd_x2y) {
553  ParallelCopy(m_cy, m_cx, *m_cmd_x2y, 0, 0, 1, m_dtos_x2y);
554  }
555  m_fft_fwd_y.template compute_c2c<Direction::forward>();
556 
557  if ( m_cmd_y2z) {
558  ParallelCopy(m_cz, m_cy, *m_cmd_y2z, 0, 0, 1, m_dtos_y2z);
559  }
560 #if (AMREX_SPACEDIM == 3)
561  else if ( m_cmd_x2z) {
562  if (m_openbc_half) {
564  {NonLocalBC::PackComponents{}, m_dtos_x2z};
565  auto handler = ParallelCopy_nowait(m_cz, m_cx, *m_cmd_x2z_half, packing);
566 
567  Box upper_half = m_spectral_domain_z;
568  // Note that z-direction's index is 0 because we z is the
569  // unit-stride direction here.
570  upper_half.growLo (0,-m_spectral_domain_z.length(0)/2);
571  m_cz.setVal(0, upper_half, 0, 1);
572 
573  ParallelCopy_finish(m_cz, std::move(handler), *m_cmd_x2z_half, packing);
574  } else {
575  ParallelCopy(m_cz, m_cx, *m_cmd_x2z, 0, 0, 1, m_dtos_x2z);
576  }
577  }
578 #endif
579  m_fft_fwd_z.template compute_c2c<Direction::forward>();
580 }
581 
582 template <typename T, Direction D, DomainStrategy S>
583 template <Direction DIR, std::enable_if_t<DIR == Direction::both, int> >
585 {
586  backward_doit(outmf);
587 }
588 
589 template <typename T, Direction D, DomainStrategy S>
590 void R2C<T,D,S>::backward_doit (MF& outmf, IntVect const& ngout,
591  Periodicity const& period)
592 {
593  BL_PROFILE("FFT::R2C::backward(out)");
594 
595  if (m_r2c_sub) {
596  if (m_sub_helper.ghost_safe(outmf.nGrowVect())) {
597  MF submf = m_sub_helper.make_alias_mf(outmf);
598  IntVect const& subngout = m_sub_helper.make_iv(ngout);
599  Periodicity const& subperiod = m_sub_helper.make_periodicity(period);
600  m_r2c_sub->backward_doit(submf, subngout, subperiod);
601  } else {
602  MF tmp(outmf.boxArray(), outmf.DistributionMap(), 1,
603  m_sub_helper.make_safe_ghost(outmf.nGrowVect()));
604  this->backward_doit(tmp, ngout, period);
605  outmf.LocalCopy(tmp, 0, 0, 1, tmp.nGrowVect());
606  }
607  return;
608  }
609 
610  if (m_do_alld_fft) {
611  m_fft_bwd_x.template compute_r2c<Direction::backward>();
612  outmf.ParallelCopy(m_rx, 0, 0, 1, IntVect(0),
613  amrex::elemwiseMin(ngout,outmf.nGrowVect()), period);
614  return;
615  }
616 
617  m_fft_bwd_z.template compute_c2c<Direction::backward>();
618  if ( m_cmd_z2y) {
619  ParallelCopy(m_cy, m_cz, *m_cmd_z2y, 0, 0, 1, m_dtos_z2y);
620  }
621 #if (AMREX_SPACEDIM == 3)
622  else if ( m_cmd_z2x) {
623  auto const& cmd = m_openbc_half ? m_cmd_z2x_half : m_cmd_z2x;
624  ParallelCopy(m_cx, m_cz, *cmd, 0, 0, 1, m_dtos_z2x);
625  }
626 #endif
627 
628  m_fft_bwd_y.template compute_c2c<Direction::backward>();
629  if ( m_cmd_y2x) {
630  ParallelCopy(m_cx, m_cy, *m_cmd_y2x, 0, 0, 1, m_dtos_y2x);
631  }
632 
633  auto& fft_x = m_openbc_half ? m_fft_bwd_x_half : m_fft_bwd_x;
634  fft_x.template compute_r2c<Direction::backward>();
635  outmf.ParallelCopy(m_rx, 0, 0, 1, IntVect(0),
636  amrex::elemwiseMin(ngout,outmf.nGrowVect()), period);
637 }
638 
639 template <typename T, Direction D, DomainStrategy S>
640 std::pair<Plan<T>, Plan<T>>
642 {
643  Plan<T> fwd;
644  Plan<T> bwd;
645 
646  auto* fab = detail::get_fab(inout);
647  if (!fab) { return {fwd, bwd};}
648 
649  Box const& box = fab->box();
650  auto* pio = (typename Plan<T>::VendorComplex *)fab->dataPtr();
651 
652 #ifdef AMREX_USE_SYCL
653  fwd.template init_c2c<Direction::forward>(box, pio);
654  bwd = fwd;
655 #else
656  if constexpr (D == Direction::both || D == Direction::forward) {
657  fwd.template init_c2c<Direction::forward>(box, pio);
658  }
659  if constexpr (D == Direction::both || D == Direction::backward) {
660  bwd.template init_c2c<Direction::backward>(box, pio);
661  }
662 #endif
663 
664  return {fwd, bwd};
665 }
666 
667 template <typename T, Direction D, DomainStrategy S>
668 template <int Depth, typename F>
669 void R2C<T,D,S>::post_forward_doit (F const& post_forward)
670 {
671  if (m_info.batch_mode) {
672  amrex::Abort("xxxxx todo: post_forward");
673 #if (AMREX_SPACEDIM > 1)
674  } else if (m_r2c_sub) {
675  if constexpr (Depth == 0) {
676  // We need to pass the originally ordered indices to post_forward.
677 #if (AMREX_SPACEDIM == 2)
678  // The original domain is (1,ny). The sub domain is (ny,1).
679  m_r2c_sub->template post_forward_doit<(Depth+1)>
680  ([=] AMREX_GPU_DEVICE (int i, int, int, auto& sp)
681  {
682  post_forward(0, i, 0, sp);
683  });
684 #else
685  if (m_real_domain.length(0) == 1 && m_real_domain.length(1) == 1) {
686  // Original domain: (1, 1, nz). Sub domain: (nz, 1, 1)
687  m_r2c_sub->template post_forward_doit<(Depth+1)>
688  ([=] AMREX_GPU_DEVICE (int i, int, int, auto& sp)
689  {
690  post_forward(0, 0, i, sp);
691  });
692  } else if (m_real_domain.length(0) == 1 && m_real_domain.length(2) == 1) {
693  // Original domain: (1, ny, 1). Sub domain: (ny, 1, 1)
694  m_r2c_sub->template post_forward_doit<(Depth+1)>
695  ([=] AMREX_GPU_DEVICE (int i, int, int, auto& sp)
696  {
697  post_forward(0, i, 0, sp);
698  });
699  } else if (m_real_domain.length(0) == 1) {
700  // Original domain: (1, ny, nz). Sub domain: (ny, nz, 1)
701  m_r2c_sub->template post_forward_doit<(Depth+1)>
702  ([=] AMREX_GPU_DEVICE (int i, int j, int, auto& sp)
703  {
704  post_forward(0, i, j, sp);
705  });
706  } else if (m_real_domain.length(1) == 1) {
707  // Original domain: (nx, 1, nz). Sub domain: (nx, nz, 1)
708  m_r2c_sub->template post_forward_doit<(Depth+1)>
709  ([=] AMREX_GPU_DEVICE (int i, int j, int, auto& sp)
710  {
711  post_forward(i, 0, j, sp);
712  });
713  } else {
714  amrex::Abort("R2c::post_forward_doit: how did this happen?");
715  }
716 #endif
717  } else {
718  amrex::Abort("R2C::post_forward_doit: How did this happen?");
719  }
720 #endif
721  } else {
722  if ( ! m_cz.empty()) {
723  auto* spectral_fab = detail::get_fab(m_cz);
724  if (spectral_fab) {
725  auto const& a = spectral_fab->array(); // m_cz's ordering is z,x,y
726  ParallelFor(spectral_fab->box(),
727  [=] AMREX_GPU_DEVICE (int iz, int jx, int ky)
728  {
729  post_forward(jx,ky,iz,a(iz,jx,ky));
730  });
731  }
732  } else if ( ! m_cy.empty()) {
733  auto* spectral_fab = detail::get_fab(m_cy);
734  if (spectral_fab) {
735  auto const& a = spectral_fab->array(); // m_cy's ordering is y,x,z
736  ParallelFor(spectral_fab->box(),
737  [=] AMREX_GPU_DEVICE (int iy, int jx, int k)
738  {
739  post_forward(jx,iy,k,a(iy,jx,k));
740  });
741  }
742  } else {
743  auto* spectral_fab = detail::get_fab(m_cx);
744  if (spectral_fab) {
745  auto const& a = spectral_fab->array();
746  ParallelFor(spectral_fab->box(),
747  [=] AMREX_GPU_DEVICE (int i, int j, int k)
748  {
749  post_forward(i,j,k,a(i,j,k));
750  });
751  }
752  }
753  }
754 }
755 
756 template <typename T, Direction D, DomainStrategy S>
758 {
759 #if (AMREX_SPACEDIM == 3)
760  if (m_info.batch_mode) {
761  if (m_real_domain.length(2) > 1) {
762  return T(1)/T(Long(m_real_domain.length(0)) *
763  Long(m_real_domain.length(1)));
764  } else {
765  return T(1)/T(m_real_domain.length(0));
766  }
767  } else
768 #endif
769  {
770  return T(1)/T(m_real_domain.numPts());
771  }
772 }
773 
774 template <typename T, Direction D, DomainStrategy S>
775 template <Direction DIR, std::enable_if_t<DIR == Direction::forward ||
776  DIR == Direction::both, int> >
777 std::pair<typename R2C<T,D,S>::cMF *, IntVect>
779 {
780 #if (AMREX_SPACEDIM > 1)
781  if (m_r2c_sub) {
782  auto [cmf, order] = m_r2c_sub->getSpectralData();
783  return std::make_pair(cmf, m_sub_helper.inverse_order(order));
784  } else
785 #endif
786  if (!m_cz.empty()) {
787  return std::make_pair(&m_cz, IntVect{AMREX_D_DECL(2,0,1)});
788  } else if (!m_cy.empty()) {
789  return std::make_pair(&m_cy, IntVect{AMREX_D_DECL(1,0,2)});
790  } else {
791  return std::make_pair(&m_cx, IntVect{AMREX_D_DECL(0,1,2)});
792  }
793 }
794 
795 template <typename T, Direction D, DomainStrategy S>
796 template <Direction DIR, std::enable_if_t<DIR == Direction::forward ||
797  DIR == Direction::both, int> >
798 void R2C<T,D,S>::forward (MF const& inmf, cMF& outmf)
799 {
800  BL_PROFILE("FFT::R2C::forward(inout)");
801 
802  if (m_r2c_sub)
803  {
804  bool inmf_safe = m_sub_helper.ghost_safe(inmf.nGrowVect());
805  MF inmf_sub, inmf_tmp;
806  if (inmf_safe) {
807  inmf_sub = m_sub_helper.make_alias_mf(inmf);
808  } else {
809  inmf_tmp.define(inmf.boxArray(), inmf.DistributionMap(), 1, 0);
810  inmf_tmp.LocalCopy(inmf, 0, 0, 1, IntVect(0));
811  inmf_sub = m_sub_helper.make_alias_mf(inmf_tmp);
812  }
813 
814  bool outmf_safe = m_sub_helper.ghost_safe(outmf.nGrowVect());
815  cMF outmf_sub, outmf_tmp;
816  if (outmf_safe) {
817  outmf_sub = m_sub_helper.make_alias_mf(outmf);
818  } else {
819  outmf_tmp.define(outmf.boxArray(), outmf.DistributionMap(), 1, 0);
820  outmf_sub = m_sub_helper.make_alias_mf(outmf_tmp);
821  }
822 
823  m_r2c_sub->forward(inmf_sub, outmf_sub);
824 
825  if (!outmf_safe) {
826  outmf.LocalCopy(outmf_tmp, 0, 0, 1, IntVect(0));
827  }
828  }
829  else
830  {
831  forward(inmf);
832  if (!m_cz.empty()) { // m_cz's order (z,x,y) -> (x,y,z)
833  RotateBwd dtos{};
835  (outmf, m_spectral_domain_x, m_cz, IntVect(0), dtos);
836  ParallelCopy(outmf, m_cz, cmd, 0, 0, 1, dtos);
837  } else if (!m_cy.empty()) { // m_cy's order (y,x,z) -> (x,y,z)
839  (outmf, m_spectral_domain_x, m_cy, IntVect(0), m_dtos_y2x);
840  ParallelCopy(outmf, m_cy, cmd, 0, 0, 1, m_dtos_y2x);
841  } else {
842  outmf.ParallelCopy(m_cx, 0, 0, 1);
843  }
844  }
845 }
846 
847 template <typename T, Direction D, DomainStrategy S>
848 template <Direction DIR, std::enable_if_t<DIR == Direction::backward ||
849  DIR == Direction::both, int> >
850 void R2C<T,D,S>::backward (cMF const& inmf, MF& outmf)
851 {
852  backward_doit(inmf, outmf);
853 }
854 
855 template <typename T, Direction D, DomainStrategy S>
856 void R2C<T,D,S>::backward_doit (cMF const& inmf, MF& outmf, IntVect const& ngout,
857  Periodicity const& period)
858 {
859  BL_PROFILE("FFT::R2C::backward(inout)");
860 
861  if (m_r2c_sub)
862  {
863  bool inmf_safe = m_sub_helper.ghost_safe(inmf.nGrowVect());
864  cMF inmf_sub, inmf_tmp;
865  if (inmf_safe) {
866  inmf_sub = m_sub_helper.make_alias_mf(inmf);
867  } else {
868  inmf_tmp.define(inmf.boxArray(), inmf.DistributionMap(), 1, 0);
869  inmf_tmp.LocalCopy(inmf, 0, 0, 1, IntVect(0));
870  inmf_sub = m_sub_helper.make_alias_mf(inmf_tmp);
871  }
872 
873  bool outmf_safe = m_sub_helper.ghost_safe(outmf.nGrowVect());
874  MF outmf_sub, outmf_tmp;
875  if (outmf_safe) {
876  outmf_sub = m_sub_helper.make_alias_mf(outmf);
877  } else {
878  IntVect const& ngtmp = m_sub_helper.make_safe_ghost(outmf.nGrowVect());
879  outmf_tmp.define(outmf.boxArray(), outmf.DistributionMap(), 1, ngtmp);
880  outmf_sub = m_sub_helper.make_alias_mf(outmf_tmp);
881  }
882 
883  IntVect const& subngout = m_sub_helper.make_iv(ngout);
884  Periodicity const& subperiod = m_sub_helper.make_periodicity(period);
885  m_r2c_sub->backward_doit(inmf_sub, outmf_sub, subngout, subperiod);
886 
887  if (!outmf_safe) {
888  outmf.LocalCopy(outmf_tmp, 0, 0, 1, outmf_tmp.nGrowVect());
889  }
890  }
891  else
892  {
893  if (!m_cz.empty()) { // (x,y,z) -> m_cz's order (z,x,y)
894  RotateFwd dtos{};
896  (m_cz, m_spectral_domain_z, inmf, IntVect(0), dtos);
897  ParallelCopy(m_cz, inmf, cmd, 0, 0, 1, dtos);
898  } else if (!m_cy.empty()) { // (x,y,z) -> m_cy's ordering (y,x,z)
900  (m_cy, m_spectral_domain_y, inmf, IntVect(0), m_dtos_x2y);
901  ParallelCopy(m_cy, inmf, cmd, 0, 0, 1, m_dtos_x2y);
902  } else {
903  m_cx.ParallelCopy(inmf, 0, 0, 1);
904  }
905  backward_doit(outmf, ngout, period);
906  }
907 }
908 
909 template <typename T, Direction D, DomainStrategy S>
910 std::pair<BoxArray,DistributionMapping>
912 {
913 #if (AMREX_SPACEDIM > 1)
914  if (m_r2c_sub) {
915  auto const& [ba, dm] = m_r2c_sub->getSpectralDataLayout();
916  return std::make_pair(m_sub_helper.inverse_boxarray(ba), dm);
917  }
918 #endif
919 
920 #if (AMREX_SPACEDIM == 3)
921  if (!m_cz.empty()) {
922  BoxList bl = m_cz.boxArray().boxList();
923  for (auto& b : bl) {
924  auto lo = b.smallEnd();
925  auto hi = b.bigEnd();
926  std::swap(lo[0], lo[1]);
927  std::swap(lo[1], lo[2]);
928  std::swap(hi[0], hi[1]);
929  std::swap(hi[1], hi[2]);
930  b.setSmall(lo);
931  b.setBig(hi);
932  }
933  return std::make_pair(BoxArray(std::move(bl)), m_cz.DistributionMap());
934  } else
935 #endif
936 #if (AMREX_SPACEDIM >= 2)
937  if (!m_cy.empty()) {
938  BoxList bl = m_cy.boxArray().boxList();
939  for (auto& b : bl) {
940  auto lo = b.smallEnd();
941  auto hi = b.bigEnd();
942  std::swap(lo[0], lo[1]);
943  std::swap(hi[0], hi[1]);
944  b.setSmall(lo);
945  b.setBig(hi);
946  }
947  return std::make_pair(BoxArray(std::move(bl)), m_cy.DistributionMap());
948  } else
949 #endif
950  {
951  return std::make_pair(m_cx.boxArray(), m_cx.DistributionMap());
952  }
953 }
954 
955 }
956 
957 #endif
#define BL_PROFILE(a)
Definition: AMReX_BLProfiler.H:551
#define AMREX_ALWAYS_ASSERT(EX)
Definition: AMReX_BLassert.H:50
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
#define AMREX_D_DECL(a, b, c)
Definition: AMReX_SPACE.H:104
if(!(yy_init))
Definition: amrex_iparser.lex.nolint.H:935
A collection of Boxes stored in an Array.
Definition: AMReX_BoxArray.H:550
A class for managing a List of Boxes that share a common IndexType. This class implements operations ...
Definition: AMReX_BoxList.H:52
AMREX_GPU_HOST_DEVICE const IntVectND< dim > & smallEnd() const &noexcept
Get the smallend of the BoxND.
Definition: AMReX_Box.H:105
AMREX_GPU_HOST_DEVICE const IntVectND< dim > & bigEnd() const &noexcept
Get the bigend.
Definition: AMReX_Box.H:116
AMREX_GPU_HOST_DEVICE IntVectND< dim > length() const noexcept
Return the length of the BoxND.
Definition: AMReX_Box.H:146
AMREX_GPU_HOST_DEVICE BoxND & growHi(int idir, int n_cell=1) noexcept
Grow the BoxND on the high end by n_cell cells in direction idir. NOTE: n_cell negative shrinks the B...
Definition: AMReX_Box.H:659
AMREX_GPU_HOST_DEVICE BoxND & growLo(int idir, int n_cell=1) noexcept
Grow the BoxND on the low end by n_cell cells in direction idir. NOTE: n_cell negative shrinks the Bo...
Definition: AMReX_Box.H:648
AMREX_GPU_HOST_DEVICE IntVectND< dim > size() const noexcept
Return the length of the BoxND.
Definition: AMReX_Box.H:139
AMREX_GPU_HOST_DEVICE bool ok() const noexcept
Checks if it is a proper BoxND (including a valid type).
Definition: AMReX_Box.H:200
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
Returns the number of points contained in the BoxND.
Definition: AMReX_Box.H:346
Calculates the distribution of FABs to MPI processes.
Definition: AMReX_DistributionMapping.H:41
Long size() const noexcept
Length of the underlying processor map.
Definition: AMReX_DistributionMapping.H:127
Definition: AMReX_FFT_OpenBCSolver.H:11
3D Poisson solver for periodic, Dirichlet & Neumann boundaries in the first two dimensions,...
Definition: AMReX_FFT_Poisson.H:106
Poisson solver for periodic, Dirichlet & Neumann boundaries using FFT.
Definition: AMReX_FFT_Poisson.H:22
Parallel Discrete Fourier Transform.
Definition: AMReX_FFT_R2C.H:36
T scalingFactor() const
Definition: AMReX_FFT_R2C.H:757
void prepare_openbc()
Definition: AMReX_FFT_R2C.H:469
std::unique_ptr< MultiBlockCommMetaData > m_cmd_x2z_half
Definition: AMReX_FFT_R2C.H:202
std::unique_ptr< MultiBlockCommMetaData > m_cmd_z2x_half
Definition: AMReX_FFT_R2C.H:203
R2C(R2C &&)=delete
void backward_doit(cMF const &inmf, MF &outmf, IntVect const &ngout=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
Definition: AMReX_FFT_R2C.H:856
static std::pair< Plan< T >, Plan< T > > make_c2c_plans(cMF &inout)
Definition: AMReX_FFT_R2C.H:641
std::conditional_t< std::is_same_v< T, Real >, MultiFab, FabArray< BaseFab< T > > > MF
Definition: AMReX_FFT_R2C.H:39
std::pair< cMF *, IntVect > getSpectralData()
Get the internal spectral data.
void forwardThenBackward(MF const &inmf, MF &outmf, F const &post_forward)
Forward and then backward transform.
Definition: AMReX_FFT_R2C.H:82
MF m_rx
Definition: AMReX_FFT_R2C.H:211
bool m_openbc_half
Definition: AMReX_FFT_R2C.H:231
std::unique_ptr< R2C< T, D, S > > m_r2c_sub
Definition: AMReX_FFT_R2C.H:224
detail::SubHelper m_sub_helper
Definition: AMReX_FFT_R2C.H:225
Box m_real_domain
Definition: AMReX_FFT_R2C.H:219
Swap02 m_dtos_z2y
Definition: AMReX_FFT_R2C.H:207
Swap01 m_dtos_y2x
Definition: AMReX_FFT_R2C.H:205
std::unique_ptr< MultiBlockCommMetaData > m_cmd_z2y
Definition: AMReX_FFT_R2C.H:199
Info m_info
Definition: AMReX_FFT_R2C.H:227
Plan< T > m_fft_fwd_x_half
Definition: AMReX_FFT_R2C.H:190
cMF m_cy
Definition: AMReX_FFT_R2C.H:213
std::unique_ptr< MultiBlockCommMetaData > m_cmd_x2z
Definition: AMReX_FFT_R2C.H:200
Box m_spectral_domain_z
Definition: AMReX_FFT_R2C.H:222
void forward(MF const &inmf, cMF &outmf)
Forward transform.
Definition: AMReX_FFT_R2C.H:798
bool m_do_alld_fft
Definition: AMReX_FFT_R2C.H:229
std::unique_ptr< MultiBlockCommMetaData > m_cmd_y2z
Definition: AMReX_FFT_R2C.H:198
Plan< T > m_fft_bwd_z
Definition: AMReX_FFT_R2C.H:189
RotateBwd m_dtos_z2x
Definition: AMReX_FFT_R2C.H:209
void backward(MF &outmf)
Backward transform.
Definition: AMReX_FFT_R2C.H:584
R2C(R2C const &)=delete
Box m_spectral_domain_y
Definition: AMReX_FFT_R2C.H:221
Plan< T > m_fft_fwd_y
Definition: AMReX_FFT_R2C.H:186
std::unique_ptr< MultiBlockCommMetaData > m_cmd_y2x
Definition: AMReX_FFT_R2C.H:197
std::pair< BoxArray, DistributionMapping > getSpectralDataLayout() const
Get BoxArray and DistributionMapping for spectral data.
Definition: AMReX_FFT_R2C.H:911
void post_forward_doit(F const &post_forward)
Definition: AMReX_FFT_R2C.H:669
std::unique_ptr< char, DataDeleter > m_data_1
Definition: AMReX_FFT_R2C.H:216
std::unique_ptr< MultiBlockCommMetaData > m_cmd_z2x
Definition: AMReX_FFT_R2C.H:201
std::unique_ptr< MultiBlockCommMetaData > m_cmd_x2y
Definition: AMReX_FFT_R2C.H:196
void backward_doit(MF &outmf, IntVect const &ngout=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
Definition: AMReX_FFT_R2C.H:590
R2C(Box const &domain, Info const &info=Info{})
Constructor.
Definition: AMReX_FFT_R2C.H:235
R2C & operator=(R2C const &)=delete
std::unique_ptr< char, DataDeleter > m_data_2
Definition: AMReX_FFT_R2C.H:217
Swap02 m_dtos_y2z
Definition: AMReX_FFT_R2C.H:206
Plan< T > m_fft_bwd_y
Definition: AMReX_FFT_R2C.H:187
bool m_slab_decomp
Definition: AMReX_FFT_R2C.H:230
Box m_spectral_domain_x
Definition: AMReX_FFT_R2C.H:220
cMF m_cz
Definition: AMReX_FFT_R2C.H:214
Swap01 m_dtos_x2y
Definition: AMReX_FFT_R2C.H:204
Plan< T > m_fft_fwd_z
Definition: AMReX_FFT_R2C.H:188
RotateFwd m_dtos_x2z
Definition: AMReX_FFT_R2C.H:208
~R2C()
Definition: AMReX_FFT_R2C.H:448
Plan< T > m_fft_bwd_x
Definition: AMReX_FFT_R2C.H:185
void backward(cMF const &inmf, MF &outmf)
Backward transform.
Definition: AMReX_FFT_R2C.H:850
void forward(MF const &inmf)
Forward transform.
Definition: AMReX_FFT_R2C.H:525
Plan< T > m_fft_fwd_x
Definition: AMReX_FFT_R2C.H:184
cMF m_cx
Definition: AMReX_FFT_R2C.H:212
Plan< T > m_fft_bwd_x_half
Definition: AMReX_FFT_R2C.H:191
IntVect nGrowVect() const noexcept
Definition: AMReX_FabArrayBase.H:79
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition: AMReX_FabArrayBase.H:94
bool empty() const noexcept
Definition: AMReX_FabArrayBase.H:88
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition: AMReX_FabArrayBase.H:130
An Array of FortranArrayBox(FAB)-like Objects.
Definition: AMReX_FabArray.H:344
void ParallelCopy(const FabArray< FAB > &src, const Periodicity &period=Periodicity::NonPeriodic(), CpOp op=FabArrayBase::COPY)
Definition: AMReX_FabArray.H:778
void define(const BoxArray &bxs, const DistributionMapping &dm, int nvar, int ngrow, const MFInfo &info=MFInfo(), const FabFactory< FAB > &factory=DefaultFabFactory< FAB >())
Define this FabArray identically to that performed by the constructor having an analogous function si...
Definition: AMReX_FabArray.H:2027
void LocalCopy(FabArray< SFAB > const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
Perform local copy of FabArray data.
Definition: AMReX_FabArray.H:1818
A collection (stored as an array) of FArrayBox objects.
Definition: AMReX_MultiFab.H:38
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition: AMReX_Periodicity.H:17
static const Periodicity & NonPeriodic() noexcept
Definition: AMReX_Periodicity.cpp:52
FA::FABType::value_type * get_fab(FA &fa)
Definition: AMReX_FFT_Helper.H:1305
std::unique_ptr< char, DataDeleter > make_mfs_share(FA1 &fa1, FA2 &fa2)
Definition: AMReX_FFT_Helper.H:1316
DistributionMapping make_iota_distromap(Long n)
Definition: AMReX_FFT.cpp:88
Definition: AMReX_FFT.cpp:7
Direction
Definition: AMReX_FFT_Helper.H:48
DomainStrategy
Definition: AMReX_FFT_Helper.H:50
std::enable_if_t< IsBaseFab< FAB >) &&IsDataPacking< DataPacking, FAB >)> ParallelCopy_finish(FabArray< FAB > &dest, CommHandler handler, const FabArrayBase::CommMetaData &cmd, const DataPacking &data_packing)
Definition: AMReX_NonLocalBC.H:793
AMREX_NODISCARD CommHandler ParallelCopy_nowait(NoLocalCopy, FabArray< FAB > &dest, const FabArray< FAB > &src, const FabArrayBase::CommMetaData &cmd, const DataPacking &data_packing)
Definition: AMReX_NonLocalBC.H:701
int MyProcSub() noexcept
my sub-rank in current frame
Definition: AMReX_ParallelContext.H:76
int NProcsSub() noexcept
number of ranks in current frame
Definition: AMReX_ParallelContext.H:74
int NProcs() noexcept
return the number of MPI ranks local to the current Parallel Context
Definition: AMReX_ParallelDescriptor.H:243
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void swap(T &a, T &b) noexcept
Definition: AMReX_algoim_K.H:113
@ min
Definition: AMReX_ParallelReduce.H:18
constexpr int iz
Definition: AMReX_Interp_3D_C.H:37
constexpr int iy
Definition: AMReX_Interp_2D_C.H:33
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:200
IntVectND< AMREX_SPACEDIM > IntVect
Definition: AMReX_BaseFwd.H:30
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 length(Array4< T > const &a) noexcept
Definition: AMReX_Array4.H:322
BoxArray decompose(Box const &domain, int nboxes, Array< bool, AMREX_SPACEDIM > const &decomp={AMREX_D_DECL(true, true, true)}, bool no_overlap=false)
Decompose domain box into BoxArray.
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition: AMReX.cpp:225
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition: AMReX_FabArrayUtility.H:1672
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T elemwiseMin(T const &a, T const &b) noexcept
Definition: AMReX_Algorithm.H:49
Definition: AMReX_FFT_Helper.H:58
bool batch_mode
Definition: AMReX_FFT_Helper.H:62
int nprocs
Max number of processes to use.
Definition: AMReX_FFT_Helper.H:65
Definition: AMReX_FFT_Helper.H:113
std::conditional_t< std::is_same_v< float, T >, cuComplex, cuDoubleComplex > VendorComplex
Definition: AMReX_FFT_Helper.H:117
Definition: AMReX_FFT_Helper.H:1428
Definition: AMReX_FFT_Helper.H:1403
Definition: AMReX_FFT_Helper.H:1357
Definition: AMReX_FFT_Helper.H:1380
Definition: AMReX_FFT_Helper.H:1455
Box make_box(Box const &box) const
Definition: AMReX_FFT.cpp:142
FabArray memory allocation information.
Definition: AMReX_FabArray.H:66
This class specializes behaviour on local copies and unpacking receive buffers.
Definition: AMReX_NonLocalBC.H:615
This is the index mapping based on the DTOS MultiBlockDestToSrc.
Definition: AMReX_NonLocalBC.H:210
Contains information about which components take part of the data transaction.
Definition: AMReX_NonLocalBC.H:528