Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_FFT_R2C.H
Go to the documentation of this file.
1#ifndef AMREX_FFT_R2C_H_
2#define AMREX_FFT_R2C_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_MultiFab.H>
6#include <AMReX_FFT_Helper.H>
7#include <algorithm>
8#include <numeric>
9#include <tuple>
10
11namespace amrex::FFT
12{
13
20template <typename T> class OpenBCSolver;
21template <typename T> class Poisson;
22template <typename T> class PoissonHybrid;
23
45template <typename T = Real, FFT::Direction D = FFT::Direction::both, bool C = false>
46class R2C
47{
48public:
50 using MF = std::conditional_t
51 <C, cMF, std::conditional_t<std::is_same_v<T,Real>,
53
54 template <typename U> friend class OpenBCSolver;
55 template <typename U> friend class Poisson;
56 template <typename U> friend class PoissonHybrid;
57
64 explicit R2C (Box const& domain, Info const& info = Info{});
65
75 explicit R2C (std::array<int,AMREX_SPACEDIM> const& domain_size,
76 Info const& info = Info{});
77
78 ~R2C ();
79
80 R2C (R2C const&) = delete;
81 R2C (R2C &&) = delete;
82 R2C& operator= (R2C const&) = delete;
83 R2C& operator= (R2C &&) = delete;
84
108 void setLocalDomain (std::array<int,AMREX_SPACEDIM> const& local_start,
109 std::array<int,AMREX_SPACEDIM> const& local_size);
110
124 std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
126
155 void setLocalSpectralDomain (std::array<int,AMREX_SPACEDIM> const& local_start,
156 std::array<int,AMREX_SPACEDIM> const& local_size);
157
176 std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
178
200 template <typename F, Direction DIR=D,
201 std::enable_if_t<DIR == Direction::both, int> = 0>
202 void forwardThenBackward (MF const& inmf, MF& outmf, F const& post_forward,
203 int incomp = 0, int outcomp = 0)
204 {
205 BL_PROFILE("FFT::R2C::forwardbackward");
206 this->forward(inmf, incomp);
207 this->post_forward_doit_0(post_forward);
208 this->backward(outmf, outcomp);
209 }
210
221 template <Direction DIR=D, std::enable_if_t<DIR == Direction::forward ||
222 DIR == Direction::both, int> = 0>
223 void forward (MF const& inmf, int incomp = 0);
224
236 template <Direction DIR=D, std::enable_if_t<DIR == Direction::forward ||
237 DIR == Direction::both, int> = 0>
238 void forward (MF const& inmf, cMF& outmf, int incomp = 0, int outcomp = 0);
239
256 template <typename RT, typename CT, Direction DIR=D, bool CP=C,
257 std::enable_if_t<(DIR == Direction::forward ||
258 DIR == Direction::both)
259 && ((sizeof(RT)*2 == sizeof(CT) && !CP) ||
260 (sizeof(RT) == sizeof(CT) && CP)), int> = 0>
261 void forward (RT const* in, CT* out);
262
272 template <Direction DIR=D, std::enable_if_t<DIR == Direction::both, int> = 0>
273 void backward (MF& outmf, int outcomp = 0);
274
286 template <Direction DIR=D, std::enable_if_t<DIR == Direction::backward ||
287 DIR == Direction::both, int> = 0>
288 void backward (cMF const& inmf, MF& outmf, int incomp = 0, int outcomp = 0);
289
306 template <typename CT, typename RT, Direction DIR=D, bool CP=C,
307 std::enable_if_t<(DIR == Direction::backward ||
308 DIR == Direction::both)
309 && ((sizeof(RT)*2 == sizeof(CT) && !CP) ||
310 (sizeof(RT) == sizeof(CT) && CP)), int> = 0>
311 void backward (CT const* in, RT* out);
312
320 [[nodiscard]] T scalingFactor () const;
321
333 template <Direction DIR=D, std::enable_if_t<DIR == Direction::forward ||
334 DIR == Direction::both, int> = 0>
335 std::pair<cMF*,IntVect> getSpectralData () const;
336
346 [[nodiscard]] std::pair<BoxArray,DistributionMapping> getSpectralDataLayout () const;
347
353 template <typename F>
354 void post_forward_doit_0 (F const& post_forward);
355
356 template <typename F>
362 void post_forward_doit_1 (F const& post_forward);
363
364private:
365
366 void prepare_openbc ();
367
368 void backward_doit (MF& outmf, IntVect const& ngout = IntVect(0),
369 Periodicity const& period = Periodicity::NonPeriodic(),
370 int outcomp = 0);
371
372 void backward_doit (cMF const& inmf, MF& outmf,
373 IntVect const& ngout = IntVect(0),
374 Periodicity const& period = Periodicity::NonPeriodic(),
375 int incomp = 0, int outcomp = 0);
376
377 std::pair<Plan<T>,Plan<T>> make_c2c_plans (cMF& inout, int ndims) const;
378
379 static Box make_domain_x (Box const& domain)
380 {
381 if constexpr (C) {
382 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(0)-1,
383 domain.length(1)-1,
384 domain.length(2)-1)),
385 domain.ixType());
386 } else {
387 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(0)/2,
388 domain.length(1)-1,
389 domain.length(2)-1)),
390 domain.ixType());
391 }
392 }
393
394 static Box make_domain_y (Box const& domain)
395 {
396 if constexpr (C) {
397 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(1)-1,
398 domain.length(0)-1,
399 domain.length(2)-1)),
400 domain.ixType());
401 } else {
402 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(1)-1,
403 domain.length(0)/2,
404 domain.length(2)-1)),
405 domain.ixType());
406 }
407 }
408
409 static Box make_domain_z (Box const& domain)
410 {
411 if constexpr (C) {
412 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(2)-1,
413 domain.length(0)-1,
414 domain.length(1)-1)),
415 domain.ixType());
416 } else {
417 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(2)-1,
418 domain.length(0)/2,
419 domain.length(1)-1)),
420 domain.ixType());
421 }
422 }
423
424 static std::pair<BoxArray,DistributionMapping>
425 make_layout_from_local_domain (std::array<int,AMREX_SPACEDIM> const& local_start,
426 std::array<int,AMREX_SPACEDIM> const& local_size);
427
428 template <typename FA, typename RT>
429 std::pair<std::unique_ptr<char,DataDeleter>,std::size_t>
430 install_raw_ptr (FA& fa, RT const* p);
431
432 Plan<T> m_fft_fwd_x{};
433 Plan<T> m_fft_bwd_x{};
434 Plan<T> m_fft_fwd_y{};
435 Plan<T> m_fft_bwd_y{};
436 Plan<T> m_fft_fwd_z{};
437 Plan<T> m_fft_bwd_z{};
438 Plan<T> m_fft_fwd_x_half{};
439 Plan<T> m_fft_bwd_x_half{};
440
441 // Comm meta-data. In the forward phase, we start with (x,y,z),
442 // transpose to (y,x,z) and then (z,x,y). In the backward phase, we
443 // perform inverse transpose.
444 std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2y; // (x,y,z) -> (y,x,z)
445 std::unique_ptr<MultiBlockCommMetaData> m_cmd_y2x; // (y,x,z) -> (x,y,z)
446 std::unique_ptr<MultiBlockCommMetaData> m_cmd_y2z; // (y,x,z) -> (z,x,y)
447 std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2y; // (z,x,y) -> (y,x,z)
448 std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2z; // (x,y,z) -> (z,x,y)
449 std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2x; // (z,x,y) -> (x,y,z)
450 std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2z_half; // for openbc
451 std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2x_half; // for openbc
452 Swap01 m_dtos_x2y{};
453 Swap01 m_dtos_y2x{};
454 Swap02 m_dtos_y2z{};
455 Swap02 m_dtos_z2y{};
456 RotateFwd m_dtos_x2z{};
457 RotateBwd m_dtos_z2x{};
458
459 MF m_rx;
460 cMF m_cx;
461 cMF m_cy;
462 cMF m_cz;
463
464 mutable MF m_raw_mf;
465 mutable cMF m_raw_cmf;
466
467 std::unique_ptr<char,DataDeleter> m_data_1;
468 std::unique_ptr<char,DataDeleter> m_data_2;
469
470 Box m_real_domain;
471 Box m_spectral_domain_x;
472 Box m_spectral_domain_y;
473 Box m_spectral_domain_z;
474
475 std::unique_ptr<R2C<T,D,C>> m_r2c_sub;
476 detail::SubHelper m_sub_helper;
477
478 Info m_info;
479
480 bool m_do_alld_fft = false;
481 bool m_slab_decomp = false;
482 bool m_openbc_half = false;
483};
484
485template <typename T, Direction D, bool C>
486R2C<T,D,C>::R2C (Box const& domain, Info const& info)
487 : m_real_domain(domain),
488 m_spectral_domain_x(make_domain_x(domain)),
489#if (AMREX_SPACEDIM >= 2)
490 m_spectral_domain_y(make_domain_y(domain)),
491#if (AMREX_SPACEDIM == 3)
492 m_spectral_domain_z(make_domain_z(domain)),
493#endif
494#endif
495 m_sub_helper(domain),
496 m_info(info)
497{
498 BL_PROFILE("FFT::R2C");
499
500 static_assert(std::is_same_v<float,T> || std::is_same_v<double,T>);
501
502 AMREX_ALWAYS_ASSERT(m_real_domain.numPts() > 1);
503#if (AMREX_SPACEDIM == 2)
505#else
506 if (m_info.twod_mode) {
507 AMREX_ALWAYS_ASSERT((int(domain.length(0) > 1) +
508 int(domain.length(1) > 1) +
509 int(domain.length(2) > 1)) >= 2);
510 }
511#endif
512
513 {
514 Box subbox = m_sub_helper.make_box(m_real_domain);
515 if (subbox.size() != m_real_domain.size()) {
516 m_r2c_sub = std::make_unique<R2C<T,D,C>>(subbox, m_info);
517 return;
518 }
519 }
520
521 int myproc = ParallelContext::MyProcSub();
522 int nprocs = std::min(ParallelContext::NProcsSub(), m_info.nprocs);
523
524#if (AMREX_SPACEDIM == 3)
526 if (m_info.twod_mode) {
528 } else {
529 int shortside = m_real_domain.shortside();
530 if (shortside < m_info.pencil_threshold*nprocs) {
532 } else {
534 }
535 }
536 }
537
538 if (!m_info.oned_mode) {
539 if (m_info.twod_mode) {
540 m_slab_decomp = true;
541 } else if (m_info.domain_strategy == DomainStrategy::slab && (m_real_domain.length(1) > 1)) {
542 m_slab_decomp = true;
543 }
544 }
545
546#endif
547
548 auto const ncomp = m_info.batch_size;
549
550 auto bax = amrex::decompose(m_real_domain, nprocs,
551 {AMREX_D_DECL(false,!m_slab_decomp,m_real_domain.length(2)>1)}, true);
552
553 DistributionMapping dmx = detail::make_iota_distromap(bax.size());
554 m_rx.define(bax, dmx, ncomp, 0, MFInfo().SetAlloc(false));
555
556 {
557 BoxList bl = bax.boxList();
558 for (auto & b : bl) {
559 b.shift(-m_real_domain.smallEnd());
560 b.setBig(0, m_spectral_domain_x.bigEnd(0));
561 }
562 BoxArray cbax(std::move(bl));
563 m_cx.define(cbax, dmx, ncomp, 0, MFInfo().SetAlloc(false));
564 }
565
566 m_do_alld_fft = (ParallelDescriptor::NProcs() == 1) && (! m_info.twod_mode);
567
568 if (!m_do_alld_fft) // do a series of 1d or 2d ffts
569 {
570 //
571 // make data containers
572 //
573
574#if (AMREX_SPACEDIM >= 2)
576 if ((m_real_domain.length(1) > 1) && !m_slab_decomp && !m_info.oned_mode)
577 {
578 auto cbay = amrex::decompose(m_spectral_domain_y, nprocs,
579 {AMREX_D_DECL(false,true,true)}, true);
580 if (cbay.size() == dmx.size()) {
581 cdmy = dmx;
582 } else {
583 cdmy = detail::make_iota_distromap(cbay.size());
584 }
585 m_cy.define(cbay, cdmy, ncomp, 0, MFInfo().SetAlloc(false));
586 }
587#endif
588
589#if (AMREX_SPACEDIM == 3)
590 if (m_real_domain.length(1) > 1 &&
591 (! m_info.twod_mode && m_real_domain.length(2) > 1))
592 {
593 auto cbaz = amrex::decompose(m_spectral_domain_z, nprocs,
594 {false,true,true}, true);
596 if (cbaz.size() == dmx.size()) {
597 cdmz = dmx;
598 } else if (cbaz.size() == cdmy.size()) {
599 cdmz = cdmy;
600 } else {
601 cdmz = detail::make_iota_distromap(cbaz.size());
602 }
603 m_cz.define(cbaz, cdmz, ncomp, 0, MFInfo().SetAlloc(false));
604 }
605#endif
606
607 if constexpr (C) {
608 if (m_slab_decomp) {
609 m_data_1 = detail::make_mfs_share(m_rx, m_cx);
610 m_data_2 = detail::make_mfs_share(m_cz, m_cz);
611 } else {
612 m_data_1 = detail::make_mfs_share(m_rx, m_cz);
613 m_data_2 = detail::make_mfs_share(m_cy, m_cy);
614 // make m_cx an alias to m_rx
615 if (myproc < m_cx.size()) {
616 Box const& box = m_cx.fabbox(myproc);
617 using FAB = typename cMF::FABType::value_type;
618 m_cx.setFab(myproc, FAB(box, ncomp, m_rx[myproc].dataPtr()));
619 }
620 }
621 } else {
622 if (m_slab_decomp) {
623 m_data_1 = detail::make_mfs_share(m_rx, m_cz);
624 m_data_2 = detail::make_mfs_share(m_cx, m_cx);
625 } else {
626 m_data_1 = detail::make_mfs_share(m_rx, m_cy);
627 m_data_2 = detail::make_mfs_share(m_cx, m_cz);
628 }
629 }
630
631 //
632 // make copiers
633 //
634
635#if (AMREX_SPACEDIM >= 2)
636 if (! m_cy.empty()) {
637 // comm meta-data between x and y phases
638 m_cmd_x2y = std::make_unique<MultiBlockCommMetaData>
639 (m_cy, m_spectral_domain_y, m_cx, IntVect(0), m_dtos_x2y);
640 m_cmd_y2x = std::make_unique<MultiBlockCommMetaData>
641 (m_cx, m_spectral_domain_x, m_cy, IntVect(0), m_dtos_y2x);
642 }
643#endif
644#if (AMREX_SPACEDIM == 3)
645 if (! m_cz.empty() ) {
646 if (m_slab_decomp) {
647 // comm meta-data between xy and z phases
648 m_cmd_x2z = std::make_unique<MultiBlockCommMetaData>
649 (m_cz, m_spectral_domain_z, m_cx, IntVect(0), m_dtos_x2z);
650 m_cmd_z2x = std::make_unique<MultiBlockCommMetaData>
651 (m_cx, m_spectral_domain_x, m_cz, IntVect(0), m_dtos_z2x);
652 } else {
653 // comm meta-data between y and z phases
654 m_cmd_y2z = std::make_unique<MultiBlockCommMetaData>
655 (m_cz, m_spectral_domain_z, m_cy, IntVect(0), m_dtos_y2z);
656 m_cmd_z2y = std::make_unique<MultiBlockCommMetaData>
657 (m_cy, m_spectral_domain_y, m_cz, IntVect(0), m_dtos_z2y);
658 }
659 }
660#endif
661
662 //
663 // make plans
664 //
665
666 if (myproc < m_rx.size())
667 {
668 if constexpr (C) {
669 int ndims = m_slab_decomp ? 2 : 1;
670 std::tie(m_fft_fwd_x, m_fft_bwd_x) = make_c2c_plans(m_cx, ndims);
671 } else {
672 Box const& box = m_rx.box(myproc);
673 auto* pr = m_rx[myproc].dataPtr();
674 auto* pc = (typename Plan<T>::VendorComplex *)m_cx[myproc].dataPtr();
675#ifdef AMREX_USE_SYCL
676 m_fft_fwd_x.template init_r2c<Direction::forward>(box, pr, pc, m_slab_decomp, ncomp);
677 m_fft_bwd_x = m_fft_fwd_x;
678#else
679 if constexpr (D == Direction::both || D == Direction::forward) {
680 m_fft_fwd_x.template init_r2c<Direction::forward>(box, pr, pc, m_slab_decomp, ncomp);
681 }
682 if constexpr (D == Direction::both || D == Direction::backward) {
683 m_fft_bwd_x.template init_r2c<Direction::backward>(box, pr, pc, m_slab_decomp, ncomp);
684 }
685#endif
686 }
687 }
688
689#if (AMREX_SPACEDIM >= 2)
690 if (! m_cy.empty()) {
691 std::tie(m_fft_fwd_y, m_fft_bwd_y) = make_c2c_plans(m_cy,1);
692 }
693#endif
694#if (AMREX_SPACEDIM == 3)
695 if (! m_cz.empty()) {
696 std::tie(m_fft_fwd_z, m_fft_bwd_z) = make_c2c_plans(m_cz,1);
697 }
698#endif
699 }
700 else // do fft in all dimensions at the same time
701 {
702 if constexpr (C) {
703 m_data_1 = detail::make_mfs_share(m_rx, m_cx);
704 std::tie(m_fft_fwd_x, m_fft_bwd_x) = make_c2c_plans(m_cx,AMREX_SPACEDIM);
705 } else {
706 m_data_1 = detail::make_mfs_share(m_rx, m_rx);
707 m_data_2 = detail::make_mfs_share(m_cx, m_cx);
708
709 auto const& len = m_real_domain.length();
710 auto* pr = (void*)m_rx[0].dataPtr();
711 auto* pc = (void*)m_cx[0].dataPtr();
712#ifdef AMREX_USE_SYCL
713 m_fft_fwd_x.template init_r2c<Direction::forward>(len, pr, pc, false, ncomp);
714 m_fft_bwd_x = m_fft_fwd_x;
715#else
716 if constexpr (D == Direction::both || D == Direction::forward) {
717 m_fft_fwd_x.template init_r2c<Direction::forward>(len, pr, pc, false, ncomp);
718 }
719 if constexpr (D == Direction::both || D == Direction::backward) {
720 m_fft_bwd_x.template init_r2c<Direction::backward>(len, pr, pc, false, ncomp);
721 }
722#endif
723 }
724 }
725}
726
727template <typename T, Direction D, bool C>
728R2C<T,D,C>::R2C (std::array<int,AMREX_SPACEDIM> const& domain_size, Info const& info)
729 : R2C<T,D,C>(Box(IntVect(0),IntVect(domain_size)-1), info)
730{}
731
732template <typename T, Direction D, bool C>
734{
735 if (m_fft_bwd_x.plan != m_fft_fwd_x.plan) {
736 m_fft_bwd_x.destroy();
737 }
738 if (m_fft_bwd_y.plan != m_fft_fwd_y.plan) {
739 m_fft_bwd_y.destroy();
740 }
741 if (m_fft_bwd_z.plan != m_fft_fwd_z.plan) {
742 m_fft_bwd_z.destroy();
743 }
744 m_fft_fwd_x.destroy();
745 m_fft_fwd_y.destroy();
746 m_fft_fwd_z.destroy();
747 if (m_fft_bwd_x_half.plan != m_fft_fwd_x_half.plan) {
748 m_fft_bwd_x_half.destroy();
749 }
750 m_fft_fwd_x_half.destroy();
751}
752
753template <typename T, Direction D, bool C>
754std::pair<BoxArray,DistributionMapping>
755R2C<T,D,C>::make_layout_from_local_domain (std::array<int,AMREX_SPACEDIM> const& local_start,
756 std::array<int,AMREX_SPACEDIM> const& local_size)
757{
758 IntVect lo(local_start);
759 IntVect len(local_size);
760 Box bx(lo, lo+len-1);
761#ifdef AMREX_USE_MPI
763 MPI_Allgather(&bx, 1, ParallelDescriptor::Mpi_typemap<Box>::type(),
764 allboxes.data(), 1, ParallelDescriptor::Mpi_typemap<Box>::type(),
766 Vector<int> pmap;
767 pmap.reserve(allboxes.size());
768 for (int i = 0; i < allboxes.size(); ++i) {
769 if (allboxes[i].ok()) {
770 pmap.push_back(i);
771 }
772 }
773 allboxes.erase(std::remove_if(allboxes.begin(), allboxes.end(),
774 [=] (Box const& b) { return b.isEmpty(); }),
775 allboxes.end());
776 BoxList bl(std::move(allboxes));
777 return std::make_pair(BoxArray(std::move(bl)), DistributionMapping(std::move(pmap)));
778#else
779 return std::make_pair(BoxArray(bx), DistributionMapping(Vector<int>({0})));
780#endif
781}
782
783template <typename T, Direction D, bool C>
784void R2C<T,D,C>::setLocalDomain (std::array<int,AMREX_SPACEDIM> const& local_start,
785 std::array<int,AMREX_SPACEDIM> const& local_size)
786{
787 auto const& [ba, dm] = make_layout_from_local_domain(local_start, local_size);
788 m_raw_mf = MF(ba, dm, m_rx.nComp(), 0, MFInfo().SetAlloc(false));
789}
790
791template <typename T, Direction D, bool C>
792std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
794{
795 m_raw_mf = MF(m_rx.boxArray(), m_rx.DistributionMap(), m_rx.nComp(), 0,
796 MFInfo{}.SetAlloc(false));
797
798 auto const myproc = ParallelDescriptor::MyProc();
799 if (myproc < m_rx.size()) {
800 Box const& box = m_rx.box(myproc);
801 return std::make_pair(box.smallEnd().toArray(),
802 box.length().toArray());
803 } else {
804 return std::make_pair(std::array<int,AMREX_SPACEDIM>{AMREX_D_DECL(0,0,0)},
805 std::array<int,AMREX_SPACEDIM>{AMREX_D_DECL(0,0,0)});
806 }
807}
808
809template <typename T, Direction D, bool C>
810void R2C<T,D,C>::setLocalSpectralDomain (std::array<int,AMREX_SPACEDIM> const& local_start,
811 std::array<int,AMREX_SPACEDIM> const& local_size)
812{
813 auto const& [ba, dm] = make_layout_from_local_domain(local_start, local_size);
814 m_raw_cmf = cMF(ba, dm, m_rx.nComp(), 0, MFInfo().SetAlloc(false));
815}
816
817template <typename T, Direction D, bool C>
818std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
820{
821 auto const ncomp = m_info.batch_size;
822 auto const& [ba, dm] = getSpectralDataLayout();
823
824 m_raw_cmf = cMF(ba, dm, ncomp, 0, MFInfo{}.SetAlloc(false));
825
826 auto const myproc = ParallelDescriptor::MyProc();
827 if (myproc < m_raw_cmf.size()) {
828 Box const& box = m_raw_cmf.box(myproc);
829 return std::make_pair(box.smallEnd().toArray(), box.length().toArray());
830 } else {
831 return std::make_pair(std::array<int,AMREX_SPACEDIM>{AMREX_D_DECL(0,0,0)},
832 std::array<int,AMREX_SPACEDIM>{AMREX_D_DECL(0,0,0)});
833 }
834}
835
836template <typename T, Direction D, bool C>
838{
839 if (C || m_r2c_sub) { amrex::Abort("R2C: OpenBC not supported with reduced dimensions or complex inputs"); }
840
841#if (AMREX_SPACEDIM == 3)
842 if (m_do_alld_fft) { return; }
843
844 auto const ncomp = m_info.batch_size;
845
846 if (m_slab_decomp && ! m_fft_fwd_x_half.defined) {
847 auto* fab = detail::get_fab(m_rx);
848 if (fab) {
849 Box bottom_half = m_real_domain;
850 bottom_half.growHi(2,-m_real_domain.length(2)/2);
851 Box box = fab->box() & bottom_half;
852 if (box.ok()) {
853 auto* pr = fab->dataPtr();
854 auto* pc = (typename Plan<T>::VendorComplex *)
855 detail::get_fab(m_cx)->dataPtr();
856#ifdef AMREX_USE_SYCL
857 m_fft_fwd_x_half.template init_r2c<Direction::forward>
858 (box, pr, pc, m_slab_decomp, ncomp);
859 m_fft_bwd_x_half = m_fft_fwd_x_half;
860#else
861 if constexpr (D == Direction::both || D == Direction::forward) {
862 m_fft_fwd_x_half.template init_r2c<Direction::forward>
863 (box, pr, pc, m_slab_decomp, ncomp);
864 }
865 if constexpr (D == Direction::both || D == Direction::backward) {
866 m_fft_bwd_x_half.template init_r2c<Direction::backward>
867 (box, pr, pc, m_slab_decomp, ncomp);
868 }
869#endif
870 }
871 }
872 } // else todo
873
874 if (m_cmd_x2z && ! m_cmd_x2z_half) {
875 Box bottom_half = m_spectral_domain_z;
876 // Note that z-direction's index is 0 because we z is the
877 // unit-stride direction here.
878 bottom_half.growHi(0,-m_spectral_domain_z.length(0)/2);
879 m_cmd_x2z_half = std::make_unique<MultiBlockCommMetaData>
880 (m_cz, bottom_half, m_cx, IntVect(0), m_dtos_x2z);
881 }
882
883 if (m_cmd_z2x && ! m_cmd_z2x_half) {
884 Box bottom_half = m_spectral_domain_x;
885 bottom_half.growHi(2,-m_spectral_domain_x.length(2)/2);
886 m_cmd_z2x_half = std::make_unique<MultiBlockCommMetaData>
887 (m_cx, bottom_half, m_cz, IntVect(0), m_dtos_z2x);
888 }
889#endif
890}
891
892template <typename T, Direction D, bool C>
893template <Direction DIR, std::enable_if_t<DIR == Direction::forward ||
894 DIR == Direction::both, int> >
895void R2C<T,D,C>::forward (MF const& inmf, int incomp)
896{
897 BL_PROFILE("FFT::R2C::forward(in)");
898
899 auto const ncomp = m_info.batch_size;
900
901 if (m_r2c_sub) {
902 if (m_sub_helper.ghost_safe(inmf.nGrowVect())) {
903 m_r2c_sub->forward(m_sub_helper.make_alias_mf(inmf), incomp);
904 } else {
905 MF tmp(inmf.boxArray(), inmf.DistributionMap(), ncomp, 0);
906 tmp.LocalCopy(inmf, incomp, 0, ncomp, IntVect(0));
907 m_r2c_sub->forward(m_sub_helper.make_alias_mf(tmp),0);
908 }
909 return;
910 }
911
912 if (&m_rx != &inmf) {
913 m_rx.ParallelCopy(inmf, incomp, 0, ncomp);
914 }
915
916 if (m_do_alld_fft) {
917 if constexpr (C) {
918 m_fft_fwd_x.template compute_c2c<Direction::forward>();
919 } else {
920 m_fft_fwd_x.template compute_r2c<Direction::forward>();
921 }
922 return;
923 }
924
925 auto& fft_x = m_openbc_half ? m_fft_fwd_x_half : m_fft_fwd_x;
926 if constexpr (C) {
927 fft_x.template compute_c2c<Direction::forward>();
928 } else {
929 fft_x.template compute_r2c<Direction::forward>();
930 }
931
932 if ( m_cmd_x2y) {
933 ParallelCopy(m_cy, m_cx, *m_cmd_x2y, 0, 0, ncomp, m_dtos_x2y);
934 }
935 m_fft_fwd_y.template compute_c2c<Direction::forward>();
936
937 if ( m_cmd_y2z) {
938 ParallelCopy(m_cz, m_cy, *m_cmd_y2z, 0, 0, ncomp, m_dtos_y2z);
939 }
940#if (AMREX_SPACEDIM == 3)
941 else if ( m_cmd_x2z) {
942 if (m_openbc_half) {
943 NonLocalBC::PackComponents components{};
944 components.n_components = ncomp;
946 {components, m_dtos_x2z};
947 auto handler = ParallelCopy_nowait(m_cz, m_cx, *m_cmd_x2z_half, packing);
948
949 Box upper_half = m_spectral_domain_z;
950 // Note that z-direction's index is 0 because we z is the
951 // unit-stride direction here.
952 upper_half.growLo (0,-m_spectral_domain_z.length(0)/2);
953 m_cz.setVal(0, upper_half, 0, ncomp);
954
955 ParallelCopy_finish(m_cz, std::move(handler), *m_cmd_x2z_half, packing);
956 } else {
957 ParallelCopy(m_cz, m_cx, *m_cmd_x2z, 0, 0, ncomp, m_dtos_x2z);
958 }
959 }
960#endif
961 m_fft_fwd_z.template compute_c2c<Direction::forward>();
962}
963
964template <typename T, Direction D, bool C>
965template <typename FA, typename RT>
966std::pair<std::unique_ptr<char,DataDeleter>,std::size_t>
967R2C<T,D,C>::install_raw_ptr (FA& fa, RT const* p)
968{
969 AMREX_ALWAYS_ASSERT(!fa.empty());
970
971 using FAB = typename FA::FABType::value_type;
972 using T_FAB = typename FAB::value_type;
973 static_assert(sizeof(T_FAB) == sizeof(RT));
974
975 auto const ncomp = m_info.batch_size;
976 auto const& ia = fa.IndexArray();
977
978 T_FAB* pp = nullptr;
979 std::size_t sz = 0;
980
981 if ( ! ia.empty() ) {
982 int K = ia[0];
983 Box const& box = fa.fabbox(K);
984 if ((alignof(T_FAB) == alignof(RT)) || amrex::is_aligned(p,alignof(T_FAB))) {
985 pp = (T_FAB*)p;
986 } else {
987 sz = sizeof(T_FAB) * box.numPts() * ncomp;
988 pp = (T_FAB*) The_Arena()->alloc(sz);
989 }
990 fa.setFab(K, FAB(box,ncomp,pp));
991 }
992
993 if (sz == 0) {
994 return std::make_pair(std::unique_ptr<char,DataDeleter>{},std::size_t(0));
995 } else {
996 return std::make_pair(std::unique_ptr<char,DataDeleter>
997 {(char*)pp,DataDeleter{The_Arena()}}, sz);
998 }
999}
1000
1001
1002template <typename T, Direction D, bool C>
1003template <typename RT, typename CT, Direction DIR, bool CP,
1004 std::enable_if_t<(DIR == Direction::forward ||
1005 DIR == Direction::both)
1006 && ((sizeof(RT)*2 == sizeof(CT) && !CP) ||
1007 (sizeof(RT) == sizeof(CT) && CP)), int> >
1008void R2C<T,D,C>::forward (RT const* in, CT* out)
1009{
1010 auto [rdata, rsz] = install_raw_ptr(m_raw_mf, in);
1011 auto [cdata, csz] = install_raw_ptr(m_raw_cmf, out);
1012
1013 if (rsz > 0) {
1014 Gpu::dtod_memcpy_async(rdata.get(),in,rsz);
1016 }
1017
1018 forward(m_raw_mf, m_raw_cmf);
1019
1020 if (csz) {
1021 Gpu::dtod_memcpy_async(out,cdata.get(),csz);
1023 }
1024}
1025
1026template <typename T, Direction D, bool C>
1027template <Direction DIR, std::enable_if_t<DIR == Direction::both, int> >
1028void R2C<T,D,C>::backward (MF& outmf, int outcomp)
1029{
1030 backward_doit(outmf, IntVect(0), Periodicity::NonPeriodic(), outcomp);
1031}
1032
1033template <typename T, Direction D, bool C>
1034void R2C<T,D,C>::backward_doit (MF& outmf, IntVect const& ngout,
1035 Periodicity const& period, int outcomp)
1036{
1037 BL_PROFILE("FFT::R2C::backward(out)");
1038
1039 auto const ncomp = m_info.batch_size;
1040
1041 if (m_r2c_sub) {
1042 if (m_sub_helper.ghost_safe(outmf.nGrowVect())) {
1043 MF submf = m_sub_helper.make_alias_mf(outmf);
1044 IntVect const& subngout = m_sub_helper.make_iv(ngout);
1045 Periodicity const& subperiod = m_sub_helper.make_periodicity(period);
1046 m_r2c_sub->backward_doit(submf, subngout, subperiod, outcomp);
1047 } else {
1048 MF tmp(outmf.boxArray(), outmf.DistributionMap(), ncomp,
1049 m_sub_helper.make_safe_ghost(outmf.nGrowVect()));
1050 this->backward_doit(tmp, ngout, period, 0);
1051 outmf.LocalCopy(tmp, 0, outcomp, ncomp, tmp.nGrowVect());
1052 }
1053 return;
1054 }
1055
1056 if (m_do_alld_fft) {
1057 if constexpr (C) {
1058 m_fft_bwd_x.template compute_c2c<Direction::backward>();
1059 } else {
1060 m_fft_bwd_x.template compute_r2c<Direction::backward>();
1061 }
1062 outmf.ParallelCopy(m_rx, 0, outcomp, ncomp, IntVect(0),
1063 amrex::elemwiseMin(ngout,outmf.nGrowVect()), period);
1064 return;
1065 }
1066
1067 m_fft_bwd_z.template compute_c2c<Direction::backward>();
1068 if ( m_cmd_z2y) {
1069 ParallelCopy(m_cy, m_cz, *m_cmd_z2y, 0, 0, ncomp, m_dtos_z2y);
1070 }
1071#if (AMREX_SPACEDIM == 3)
1072 else if ( m_cmd_z2x) {
1073 auto const& cmd = m_openbc_half ? m_cmd_z2x_half : m_cmd_z2x;
1074 ParallelCopy(m_cx, m_cz, *cmd, 0, 0, ncomp, m_dtos_z2x);
1075 }
1076#endif
1077
1078 m_fft_bwd_y.template compute_c2c<Direction::backward>();
1079 if ( m_cmd_y2x) {
1080 ParallelCopy(m_cx, m_cy, *m_cmd_y2x, 0, 0, ncomp, m_dtos_y2x);
1081 }
1082
1083 auto& fft_x = m_openbc_half ? m_fft_bwd_x_half : m_fft_bwd_x;
1084 if constexpr (C) {
1085 fft_x.template compute_c2c<Direction::backward>();
1086 } else {
1087 fft_x.template compute_r2c<Direction::backward>();
1088 }
1089 outmf.ParallelCopy(m_rx, 0, outcomp, ncomp, IntVect(0),
1090 amrex::elemwiseMin(ngout,outmf.nGrowVect()), period);
1091}
1092
1093template <typename T, Direction D, bool C>
1094template <typename CT, typename RT, Direction DIR, bool CP,
1095 std::enable_if_t<(DIR == Direction::backward ||
1096 DIR == Direction::both)
1097 && ((sizeof(RT)*2 == sizeof(CT) && !CP) ||
1098 (sizeof(RT) == sizeof(CT) && CP)), int> >
1099void R2C<T,D,C>::backward (CT const* in, RT* out)
1100{
1101 auto [rdata, rsz] = install_raw_ptr(m_raw_mf, out);
1102 auto [cdata, csz] = install_raw_ptr(m_raw_cmf, in);
1103
1104 if (csz) {
1105 Gpu::dtod_memcpy_async(cdata.get(),in,csz);
1107 }
1108
1109 backward(m_raw_cmf, m_raw_mf);
1110
1111 if (rsz > 0) {
1112 Gpu::dtod_memcpy_async(out,rdata.get(),rsz);
1114 }
1115}
1116
1117template <typename T, Direction D, bool C>
1118std::pair<Plan<T>, Plan<T>>
1119R2C<T,D,C>::make_c2c_plans (cMF& inout, int ndims) const
1120{
1121 Plan<T> fwd;
1122 Plan<T> bwd;
1123
1124 auto* fab = detail::get_fab(inout);
1125 if (!fab) { return {fwd, bwd};}
1126
1127 Box const& box = fab->box();
1128 auto* pio = (typename Plan<T>::VendorComplex *)fab->dataPtr();
1129
1130 auto const ncomp = m_info.batch_size;
1131
1132#ifdef AMREX_USE_SYCL
1133 fwd.template init_c2c<Direction::forward>(box, pio, ncomp, ndims);
1134 bwd = fwd;
1135#else
1136 if constexpr (D == Direction::both || D == Direction::forward) {
1137 fwd.template init_c2c<Direction::forward>(box, pio, ncomp, ndims);
1138 }
1139 if constexpr (D == Direction::both || D == Direction::backward) {
1140 bwd.template init_c2c<Direction::backward>(box, pio, ncomp, ndims);
1141 }
1142#endif
1143
1144 return {fwd, bwd};
1145}
1146
1147template <typename T, Direction D, bool C>
1148template <typename F>
1149void R2C<T,D,C>::post_forward_doit_0 (F const& post_forward)
1150{
1151 if (m_info.twod_mode || m_info.batch_size > 1) {
1152 amrex::Abort("xxxxx todo: post_forward");
1153#if (AMREX_SPACEDIM > 1)
1154 } else if (m_r2c_sub) {
1155 // We need to pass the originally ordered indices to post_forward.
1156#if (AMREX_SPACEDIM == 2)
1157 // The original domain is (1,ny). The sub domain is (ny,1).
1158 m_r2c_sub->post_forward_doit_1
1159 ([=] AMREX_GPU_DEVICE (int i, int, int, auto& sp)
1160 {
1161 post_forward(0, i, 0, sp);
1162 });
1163#else
1164 if (m_real_domain.length(0) == 1 && m_real_domain.length(1) == 1) {
1165 // Original domain: (1, 1, nz). Sub domain: (nz, 1, 1)
1166 m_r2c_sub->post_forward_doit_1
1167 ([=] AMREX_GPU_DEVICE (int i, int, int, auto& sp)
1168 {
1169 post_forward(0, 0, i, sp);
1170 });
1171 } else if (m_real_domain.length(0) == 1 && m_real_domain.length(2) == 1) {
1172 // Original domain: (1, ny, 1). Sub domain: (ny, 1, 1)
1173 m_r2c_sub->post_forward_doit_1
1174 ([=] AMREX_GPU_DEVICE (int i, int, int, auto& sp)
1175 {
1176 post_forward(0, i, 0, sp);
1177 });
1178 } else if (m_real_domain.length(0) == 1) {
1179 // Original domain: (1, ny, nz). Sub domain: (ny, nz, 1)
1180 m_r2c_sub->post_forward_doit_1
1181 ([=] AMREX_GPU_DEVICE (int i, int j, int, auto& sp)
1182 {
1183 post_forward(0, i, j, sp);
1184 });
1185 } else if (m_real_domain.length(1) == 1) {
1186 // Original domain: (nx, 1, nz). Sub domain: (nx, nz, 1)
1187 m_r2c_sub->post_forward_doit_1
1188 ([=] AMREX_GPU_DEVICE (int i, int j, int, auto& sp)
1189 {
1190 post_forward(i, 0, j, sp);
1191 });
1192 } else {
1193 amrex::Abort("R2c::post_forward_doit_0: how did this happen?");
1194 }
1195#endif
1196#endif
1197 } else {
1198 this->post_forward_doit_1(post_forward);
1199 }
1200}
1201
1202template <typename T, Direction D, bool C>
1203template <typename F>
1204void R2C<T,D,C>::post_forward_doit_1 (F const& post_forward)
1205{
1206 if (m_info.twod_mode || m_info.batch_size > 1) {
1207 amrex::Abort("xxxxx todo: post_forward");
1208 } else if (m_r2c_sub) {
1209 amrex::Abort("R2C::post_forward_doit_1: How did this happen?");
1210 } else {
1211 if ( ! m_cz.empty()) {
1212 auto* spectral_fab = detail::get_fab(m_cz);
1213 if (spectral_fab) {
1214 auto const& a = spectral_fab->array(); // m_cz's ordering is z,x,y
1215 ParallelForOMP(spectral_fab->box(),
1216 [=] AMREX_GPU_DEVICE (int iz, int jx, int ky)
1217 {
1218 post_forward(jx,ky,iz,a(iz,jx,ky));
1219 });
1220 }
1221 } else if ( ! m_cy.empty()) {
1222 auto* spectral_fab = detail::get_fab(m_cy);
1223 if (spectral_fab) {
1224 auto const& a = spectral_fab->array(); // m_cy's ordering is y,x,z
1225 ParallelForOMP(spectral_fab->box(),
1226 [=] AMREX_GPU_DEVICE (int iy, int jx, int k)
1227 {
1228 post_forward(jx,iy,k,a(iy,jx,k));
1229 });
1230 }
1231 } else {
1232 auto* spectral_fab = detail::get_fab(m_cx);
1233 if (spectral_fab) {
1234 auto const& a = spectral_fab->array();
1235 ParallelForOMP(spectral_fab->box(),
1236 [=] AMREX_GPU_DEVICE (int i, int j, int k)
1237 {
1238 post_forward(i,j,k,a(i,j,k));
1239 });
1240 }
1241 }
1242 }
1243}
1244
1245template <typename T, Direction D, bool C>
1247{
1248#if (AMREX_SPACEDIM == 3)
1249 if (m_info.twod_mode) {
1250 return T(1)/T(Long(m_real_domain.length(0)) *
1251 Long(m_real_domain.length(1)));
1252 } else
1253#endif
1254 {
1255 return T(1)/T(m_real_domain.numPts());
1256 }
1257}
1258
1259template <typename T, Direction D, bool C>
1260template <Direction DIR, std::enable_if_t<DIR == Direction::forward ||
1261 DIR == Direction::both, int> >
1262std::pair<typename R2C<T,D,C>::cMF *, IntVect>
1264{
1265#if (AMREX_SPACEDIM > 1)
1266 if (m_r2c_sub) {
1267 auto [cmf, order] = m_r2c_sub->getSpectralData();
1268 return std::make_pair(cmf, m_sub_helper.inverse_order(order));
1269 } else
1270#endif
1271 if (!m_cz.empty()) {
1272 return std::make_pair(const_cast<cMF*>(&m_cz), IntVect{AMREX_D_DECL(2,0,1)});
1273 } else if (!m_cy.empty()) {
1274 return std::make_pair(const_cast<cMF*>(&m_cy), IntVect{AMREX_D_DECL(1,0,2)});
1275 } else {
1276 return std::make_pair(const_cast<cMF*>(&m_cx), IntVect{AMREX_D_DECL(0,1,2)});
1277 }
1278}
1279
1280template <typename T, Direction D, bool C>
1281template <Direction DIR, std::enable_if_t<DIR == Direction::forward ||
1282 DIR == Direction::both, int> >
1283void R2C<T,D,C>::forward (MF const& inmf, cMF& outmf, int incomp, int outcomp)
1284{
1285 BL_PROFILE("FFT::R2C::forward(inout)");
1286
1287 auto const ncomp = m_info.batch_size;
1288
1289 if (m_r2c_sub)
1290 {
1291 bool inmf_safe = m_sub_helper.ghost_safe(inmf.nGrowVect());
1292 MF inmf_sub, inmf_tmp;
1293 int incomp_sub;
1294 if (inmf_safe) {
1295 inmf_sub = m_sub_helper.make_alias_mf(inmf);
1296 incomp_sub = incomp;
1297 } else {
1298 inmf_tmp.define(inmf.boxArray(), inmf.DistributionMap(), ncomp, 0);
1299 inmf_tmp.LocalCopy(inmf, incomp, 0, ncomp, IntVect(0));
1300 inmf_sub = m_sub_helper.make_alias_mf(inmf_tmp);
1301 incomp_sub = 0;
1302 }
1303
1304 bool outmf_safe = m_sub_helper.ghost_safe(outmf.nGrowVect());
1305 cMF outmf_sub, outmf_tmp;
1306 int outcomp_sub;
1307 if (outmf_safe) {
1308 outmf_sub = m_sub_helper.make_alias_mf(outmf);
1309 outcomp_sub = outcomp;
1310 } else {
1311 outmf_tmp.define(outmf.boxArray(), outmf.DistributionMap(), ncomp, 0);
1312 outmf_sub = m_sub_helper.make_alias_mf(outmf_tmp);
1313 outcomp_sub = 0;
1314 }
1315
1316 m_r2c_sub->forward(inmf_sub, outmf_sub, incomp_sub, outcomp_sub);
1317
1318 if (!outmf_safe) {
1319 outmf.LocalCopy(outmf_tmp, 0, outcomp, ncomp, IntVect(0));
1320 }
1321 }
1322 else
1323 {
1324 forward(inmf, incomp);
1325 if (!m_cz.empty()) { // m_cz's order (z,x,y) -> (x,y,z)
1326 RotateBwd dtos{};
1328 (outmf, m_spectral_domain_x, m_cz, IntVect(0), dtos);
1329 ParallelCopy(outmf, m_cz, cmd, 0, outcomp, ncomp, dtos);
1330 } else if (!m_cy.empty()) { // m_cy's order (y,x,z) -> (x,y,z)
1332 (outmf, m_spectral_domain_x, m_cy, IntVect(0), m_dtos_y2x);
1333 ParallelCopy(outmf, m_cy, cmd, 0, outcomp, ncomp, m_dtos_y2x);
1334 } else {
1335 outmf.ParallelCopy(m_cx, 0, outcomp, ncomp);
1336 }
1337 }
1338}
1339
1340template <typename T, Direction D, bool C>
1341template <Direction DIR, std::enable_if_t<DIR == Direction::backward ||
1342 DIR == Direction::both, int> >
1343void R2C<T,D,C>::backward (cMF const& inmf, MF& outmf, int incomp, int outcomp)
1344{
1345 backward_doit(inmf, outmf, IntVect(0), Periodicity::NonPeriodic(), incomp, outcomp);
1346}
1347
1348template <typename T, Direction D, bool C>
1349void R2C<T,D,C>::backward_doit (cMF const& inmf, MF& outmf, IntVect const& ngout,
1350 Periodicity const& period, int incomp, int outcomp)
1351{
1352 BL_PROFILE("FFT::R2C::backward(inout)");
1353
1354 auto const ncomp = m_info.batch_size;
1355
1356 if (m_r2c_sub)
1357 {
1358 bool inmf_safe = m_sub_helper.ghost_safe(inmf.nGrowVect());
1359 cMF inmf_sub, inmf_tmp;
1360 int incomp_sub;
1361 if (inmf_safe) {
1362 inmf_sub = m_sub_helper.make_alias_mf(inmf);
1363 incomp_sub = incomp;
1364 } else {
1365 inmf_tmp.define(inmf.boxArray(), inmf.DistributionMap(), ncomp, 0);
1366 inmf_tmp.LocalCopy(inmf, incomp, 0, ncomp, IntVect(0));
1367 inmf_sub = m_sub_helper.make_alias_mf(inmf_tmp);
1368 incomp_sub = 0;
1369 }
1370
1371 bool outmf_safe = m_sub_helper.ghost_safe(outmf.nGrowVect());
1372 MF outmf_sub, outmf_tmp;
1373 int outcomp_sub;
1374 if (outmf_safe) {
1375 outmf_sub = m_sub_helper.make_alias_mf(outmf);
1376 outcomp_sub = outcomp;
1377 } else {
1378 IntVect const& ngtmp = m_sub_helper.make_safe_ghost(outmf.nGrowVect());
1379 outmf_tmp.define(outmf.boxArray(), outmf.DistributionMap(), ncomp, ngtmp);
1380 outmf_sub = m_sub_helper.make_alias_mf(outmf_tmp);
1381 outcomp_sub = 0;
1382 }
1383
1384 IntVect const& subngout = m_sub_helper.make_iv(ngout);
1385 Periodicity const& subperiod = m_sub_helper.make_periodicity(period);
1386 m_r2c_sub->backward_doit(inmf_sub, outmf_sub, subngout, subperiod, incomp_sub, outcomp_sub);
1387
1388 if (!outmf_safe) {
1389 outmf.LocalCopy(outmf_tmp, 0, outcomp, ncomp, outmf_tmp.nGrowVect());
1390 }
1391 }
1392 else
1393 {
1394 if (!m_cz.empty()) { // (x,y,z) -> m_cz's order (z,x,y)
1395 RotateFwd dtos{};
1396 MultiBlockCommMetaData cmd
1397 (m_cz, m_spectral_domain_z, inmf, IntVect(0), dtos);
1398 ParallelCopy(m_cz, inmf, cmd, incomp, 0, ncomp, dtos);
1399 } else if (!m_cy.empty()) { // (x,y,z) -> m_cy's ordering (y,x,z)
1400 MultiBlockCommMetaData cmd
1401 (m_cy, m_spectral_domain_y, inmf, IntVect(0), m_dtos_x2y);
1402 ParallelCopy(m_cy, inmf, cmd, incomp, 0, ncomp, m_dtos_x2y);
1403 } else {
1404 m_cx.ParallelCopy(inmf, incomp, 0, ncomp);
1405 }
1406 backward_doit(outmf, ngout, period, outcomp);
1407 }
1408}
1409
1410template <typename T, Direction D, bool C>
1411std::pair<BoxArray,DistributionMapping>
1413{
1414#if (AMREX_SPACEDIM > 1)
1415 if (m_r2c_sub) {
1416 auto const& [ba, dm] = m_r2c_sub->getSpectralDataLayout();
1417 return std::make_pair(m_sub_helper.inverse_boxarray(ba), dm);
1418 }
1419#endif
1420
1421#if (AMREX_SPACEDIM == 3)
1422 if (!m_cz.empty()) {
1423 BoxList bl = m_cz.boxArray().boxList();
1424 for (auto& b : bl) {
1425 auto lo = b.smallEnd();
1426 auto hi = b.bigEnd();
1427 std::swap(lo[0], lo[1]);
1428 std::swap(lo[1], lo[2]);
1429 std::swap(hi[0], hi[1]);
1430 std::swap(hi[1], hi[2]);
1431 b.setSmall(lo);
1432 b.setBig(hi);
1433 }
1434 return std::make_pair(BoxArray(std::move(bl)), m_cz.DistributionMap());
1435 } else
1436#endif
1437#if (AMREX_SPACEDIM >= 2)
1438 if (!m_cy.empty()) {
1439 BoxList bl = m_cy.boxArray().boxList();
1440 for (auto& b : bl) {
1441 auto lo = b.smallEnd();
1442 auto hi = b.bigEnd();
1443 std::swap(lo[0], lo[1]);
1444 std::swap(hi[0], hi[1]);
1445 b.setSmall(lo);
1446 b.setBig(hi);
1447 }
1448 return std::make_pair(BoxArray(std::move(bl)), m_cy.DistributionMap());
1449 } else
1450#endif
1451 {
1452 return std::make_pair(m_cx.boxArray(), m_cx.DistributionMap());
1453 }
1454}
1455
1457template <typename T = Real, FFT::Direction D = FFT::Direction::both>
1459
1460}
1461
1462#endif
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ALWAYS_ASSERT(EX)
Definition AMReX_BLassert.H:50
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
amrex::ParmParse pp
Input file parser instance for the given namespace.
Definition AMReX_HypreIJIface.cpp:15
#define AMREX_D_DECL(a, b, c)
Definition AMReX_SPACE.H:171
virtual void * alloc(std::size_t sz)=0
A collection of Boxes stored in an Array.
Definition AMReX_BoxArray.H:568
A class for managing a List of Boxes that share a common IndexType. This class implements operations ...
Definition AMReX_BoxList.H:52
BoxList & shift(int dir, int nzones)
Applies Box::shift(int,int) to each Box in the BoxList.
Definition AMReX_BoxList.cpp:565
__host__ __device__ const IntVectND< dim > & bigEnd() const &noexcept
Return the inclusive upper bound of the box.
Definition AMReX_Box.H:123
__host__ __device__ Long numPts() const noexcept
Return the number of points contained in the BoxND.
Definition AMReX_Box.H:356
__host__ __device__ IntVectND< dim > length() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:154
__host__ __device__ int shortside(int &dir) const noexcept
Return length of shortest side. dir is modified to give direction with shortest side: 0....
Definition AMReX_Box.H:437
__host__ __device__ IntVectND< dim > size() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:147
__host__ __device__ BoxND & growLo(int idir, int n_cell=1) noexcept
Grow the BoxND on the low end by n_cell cells in direction idir. NOTE: n_cell negative shrinks the Bo...
Definition AMReX_Box.H:662
__host__ __device__ IndexTypeND< dim > ixType() const noexcept
Return the indexing type.
Definition AMReX_Box.H:135
__host__ __device__ BoxND & growHi(int idir, int n_cell=1) noexcept
Grow the BoxND on the high end by n_cell cells in direction idir. NOTE: n_cell negative shrinks the B...
Definition AMReX_Box.H:673
__host__ __device__ const IntVectND< dim > & smallEnd() const &noexcept
Return the inclusive lower bound of the box.
Definition AMReX_Box.H:111
Calculates the distribution of FABs to MPI processes.
Definition AMReX_DistributionMapping.H:43
Long size() const noexcept
Length of the underlying processor map.
Definition AMReX_DistributionMapping.H:129
Convolution-based solver for open boundary conditions using Green's functions.
Definition AMReX_FFT_OpenBCSolver.H:24
3D Poisson solver for periodic, Dirichlet & Neumann boundaries in the first two dimensions,...
Definition AMReX_FFT_Poisson.H:187
Poisson solver for periodic, Dirichlet & Neumann boundaries using FFT.
Definition AMReX_FFT_Poisson.H:67
Parallel Discrete Fourier Transform.
Definition AMReX_FFT_R2C.H:47
std::conditional_t< C, cMF, std::conditional_t< std::is_same_v< T, Real >, MultiFab, FabArray< BaseFab< T > > > > MF
Definition AMReX_FFT_R2C.H:52
R2C & operator=(R2C const &)=delete
void forward(MF const &inmf, cMF &outmf, int incomp=0, int outcomp=0)
Forward transform.
Definition AMReX_FFT_R2C.H:1283
~R2C()
Definition AMReX_FFT_R2C.H:733
void setLocalDomain(std::array< int, 3 > const &local_start, std::array< int, 3 > const &local_size)
Set local domain.
Definition AMReX_FFT_R2C.H:784
R2C(Box const &domain, Info const &info=Info{})
Constructor.
Definition AMReX_FFT_R2C.H:486
void backward(cMF const &inmf, MF &outmf, int incomp=0, int outcomp=0)
Backward transform.
Definition AMReX_FFT_R2C.H:1343
R2C(R2C &&)=delete
void backward(MF &outmf, int outcomp=0)
Backward transform.
Definition AMReX_FFT_R2C.H:1028
void post_forward_doit_1(F const &post_forward)
CUDA-visible helper that redistributes and applies post_forward for the batched layout.
Definition AMReX_FFT_R2C.H:1204
T scalingFactor() const
Scaling factor. If the data goes through forward and then backward, the result multiplied by the scal...
Definition AMReX_FFT_R2C.H:1246
void post_forward_doit_0(F const &post_forward)
CUDA-visible hook that walks internal spectral data and applies post_forward.
Definition AMReX_FFT_R2C.H:1149
std::pair< std::array< int, 3 >, std::array< int, 3 > > getLocalDomain() const
Get local domain.
Definition AMReX_FFT_R2C.H:793
void forward(RT const *in, CT *out)
Forward transform.
Definition AMReX_FFT_R2C.H:1008
R2C(R2C const &)=delete
std::pair< BoxArray, DistributionMapping > getSpectralDataLayout() const
Get BoxArray and DistributionMapping for spectral data.
Definition AMReX_FFT_R2C.H:1412
std::pair< cMF *, IntVect > getSpectralData() const
Get the internal spectral data.
R2C(std::array< int, 3 > const &domain_size, Info const &info=Info{})
Constructor.
Definition AMReX_FFT_R2C.H:728
FabArray< BaseFab< GpuComplex< T > > > cMF
Definition AMReX_FFT_R2C.H:49
std::pair< std::array< int, 3 >, std::array< int, 3 > > getLocalSpectralDomain() const
Get local spectral domain.
Definition AMReX_FFT_R2C.H:819
void forwardThenBackward(MF const &inmf, MF &outmf, F const &post_forward, int incomp=0, int outcomp=0)
Forward and then backward transform.
Definition AMReX_FFT_R2C.H:202
void setLocalSpectralDomain(std::array< int, 3 > const &local_start, std::array< int, 3 > const &local_size)
Set local spectral domain.
Definition AMReX_FFT_R2C.H:810
void backward(CT const *in, RT *out)
Backward transform.
Definition AMReX_FFT_R2C.H:1099
void forward(MF const &inmf, int incomp=0)
Forward transform.
Definition AMReX_FFT_R2C.H:895
IntVect nGrowVect() const noexcept
Definition AMReX_FabArrayBase.H:80
int size() const noexcept
Return the number of FABs in the FabArray.
Definition AMReX_FabArrayBase.H:110
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition AMReX_FabArrayBase.H:131
bool empty() const noexcept
Definition AMReX_FabArrayBase.H:89
Box fabbox(int K) const noexcept
Return the Kth FABs Box in the FabArray. That is, the region the Kth fab is actually defined on.
Definition AMReX_FabArrayBase.cpp:217
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition AMReX_FabArrayBase.H:95
void setFab(int boxno, std::unique_ptr< FAB > elem)
Explicitly set the Kth FAB in the FabArray to point to elem.
Definition AMReX_FabArray.H:2357
void ParallelCopy(const FabArray< FAB > &src, const Periodicity &period=Periodicity::NonPeriodic(), CpOp op=FabArrayBase::COPY)
Definition AMReX_FabArray.H:849
typename std::conditional_t< IsBaseFab< BaseFab< GpuComplex< T > > >::value, BaseFab< GpuComplex< T > >, FABType >::value_type value_type
Definition AMReX_FabArray.H:360
void define(const BoxArray &bxs, const DistributionMapping &dm, int nvar, int ngrow, const MFInfo &info=MFInfo(), const FabFactory< FAB > &factory=DefaultFabFactory< FAB >())
Define this FabArray identically to that performed by the constructor having an analogous function si...
Definition AMReX_FabArray.H:2173
void LocalCopy(FabArray< SFAB > const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
Perform local copy of FabArray data.
Definition AMReX_FabArray.H:1954
A collection (stored as an array) of FArrayBox objects.
Definition AMReX_MultiFab.H:40
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition AMReX_Periodicity.H:17
static const Periodicity & NonPeriodic() noexcept
Definition AMReX_Periodicity.cpp:52
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
amrex_long Long
Definition AMReX_INT.H:30
void ParallelForOMP(T n, L const &f) noexcept
Performance-portable kernel launch function with optional OpenMP threading.
Definition AMReX_GpuLaunch.H:319
Arena * The_Arena()
Definition AMReX_Arena.cpp:783
int MyProc() noexcept
Definition AMReX_ParallelDescriptor.H:128
int NProcs() noexcept
Definition AMReX_ParallelDescriptor.H:255
Definition AMReX_FFT_Helper.H:52
Direction
Definition AMReX_FFT_Helper.H:54
void dtod_memcpy_async(void *p_d_dst, const void *p_d_src, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:329
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:263
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
MPI_Comm Communicator() noexcept
Definition AMReX_ParallelDescriptor.H:223
BoxND< 3 > Box
Box is an alias for amrex::BoxND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:30
bool is_aligned(const void *p, std::size_t alignment) noexcept
Return whether the address p is aligned to alignment bytes.
Definition AMReX_Arena.H:36
BoxArray decompose(Box const &domain, int nboxes, Array< bool, 3 > const &decomp, bool no_overlap)
Decompose domain box into BoxArray.
Definition AMReX_BoxArray.cpp:1947
IntVectND< 3 > IntVect
IntVect is an alias for amrex::IntVectND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:33
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:230
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition AMReX_FabArrayUtility.H:2019
__host__ __device__ constexpr T elemwiseMin(T const &a, T const &b) noexcept
Return the element-wise minimum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:62
Definition AMReX_FFT_Helper.H:64
bool twod_mode
Definition AMReX_FFT_Helper.H:75
bool oned_mode
We might have a special twod_mode: nx or ny == 1 && nz > 1.
Definition AMReX_FFT_Helper.H:78
int batch_size
Batched FFT size. Only support in R2C, not R2X.
Definition AMReX_FFT_Helper.H:81
DomainStrategy domain_strategy
Domain composition strategy.
Definition AMReX_FFT_Helper.H:66
int nprocs
Max number of processes to use.
Definition AMReX_FFT_Helper.H:84
int pencil_threshold
Definition AMReX_FFT_Helper.H:70
Definition AMReX_FFT_Helper.H:180
std::conditional_t< std::is_same_v< float, T >, cuComplex, cuDoubleComplex > VendorComplex
Definition AMReX_FFT_Helper.H:184
FabArray memory allocation information.
Definition AMReX_FabArray.H:66
MFInfo & SetAlloc(bool a) noexcept
Definition AMReX_FabArray.H:73
This class specializes behaviour on local copies and unpacking receive buffers.
Definition AMReX_NonLocalBC.H:626
This is the index mapping based on the DTOS MultiBlockDestToSrc.
Definition AMReX_NonLocalBC.H:216
Contains information about which components take part of the data transaction.
Definition AMReX_NonLocalBC.H:539
int n_components
Definition AMReX_NonLocalBC.H:542
Communication datatype (note: this structure also works without MPI)
Definition AMReX_ccse-mpi.H:78