Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_FFT_R2C.H
Go to the documentation of this file.
1#ifndef AMREX_FFT_R2C_H_
2#define AMREX_FFT_R2C_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_MultiFab.H>
6#include <AMReX_FFT_Helper.H>
7#include <algorithm>
8#include <numeric>
9#include <tuple>
10
11namespace amrex::FFT
12{
13
20template <typename T> class OpenBCSolver;
21template <typename T> class Poisson;
22template <typename T> class PoissonHybrid;
23
45template <typename T = Real, FFT::Direction D = FFT::Direction::both, bool C = false>
46class R2C
47{
48public:
50 using MF = std::conditional_t
51 <C, cMF, std::conditional_t<std::is_same_v<T,Real>,
53
54 template <typename U> friend class OpenBCSolver;
55 template <typename U> friend class Poisson;
56 template <typename U> friend class PoissonHybrid;
57
64 explicit R2C (Box const& domain, Info const& info = Info{});
65
75 explicit R2C (std::array<int,AMREX_SPACEDIM> const& domain_size,
76 Info const& info = Info{});
77
78 ~R2C ();
79
80 R2C (R2C const&) = delete;
81 R2C (R2C &&) = delete;
82 R2C& operator= (R2C const&) = delete;
83 R2C& operator= (R2C &&) = delete;
84
108 void setLocalDomain (std::array<int,AMREX_SPACEDIM> const& local_start,
109 std::array<int,AMREX_SPACEDIM> const& local_size);
110
124 std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
126
155 void setLocalSpectralDomain (std::array<int,AMREX_SPACEDIM> const& local_start,
156 std::array<int,AMREX_SPACEDIM> const& local_size);
157
176 std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
178
200 template <typename F, Direction DIR=D>
201 requires (DIR == Direction::both)
202 void forwardThenBackward (MF const& inmf, MF& outmf, F const& post_forward,
203 int incomp = 0, int outcomp = 0)
204 {
206 !m_info.twod_mode && m_info.batch_size == 1,
207 "FFT::R2C::forwardThenBackward(post_forward) currently supports only !twod_mode and batch_size==1");
208 BL_PROFILE("FFT::R2C::forwardbackward");
209 this->forward(inmf, incomp);
210 this->post_forward_doit_0(post_forward);
211 this->backward(outmf, outcomp);
212 }
213
224 template <Direction DIR=D>
225 requires ((DIR == Direction::forward) || (DIR == Direction::both))
226 void forward (MF const& inmf, int incomp = 0);
227
239 template <Direction DIR=D>
240 requires ((DIR == Direction::forward) || (DIR == Direction::both))
241 void forward (MF const& inmf, cMF& outmf, int incomp = 0, int outcomp = 0);
242
259 template <typename RT, typename CT, Direction DIR=D, bool CP=C>
260 requires ((DIR == Direction::forward) || (DIR == Direction::both))
261 && (((sizeof(RT)*2 == sizeof(CT)) && (!CP))
262 || ((sizeof(RT) == sizeof(CT)) && CP))
263 void forward (RT const* in, CT* out);
264
274 template <Direction DIR=D>
275 requires (DIR == Direction::both)
276 void backward (MF& outmf, int outcomp = 0);
277
289 template <Direction DIR=D>
290 requires ((DIR == Direction::backward) || (DIR == Direction::both))
291 void backward (cMF const& inmf, MF& outmf, int incomp = 0, int outcomp = 0);
292
309 template <typename CT, typename RT, Direction DIR=D, bool CP=C>
310 requires ((DIR == Direction::backward) || (DIR == Direction::both))
311 && (((sizeof(RT)*2 == sizeof(CT)) && (!CP))
312 || ((sizeof(RT) == sizeof(CT)) && CP))
313 void backward (CT const* in, RT* out);
314
322 [[nodiscard]] T scalingFactor () const;
323
335 template <Direction DIR=D>
336 requires ((DIR == Direction::forward) || (DIR == Direction::both))
337 std::pair<cMF*,IntVect> getSpectralData () const;
338
339
349 [[nodiscard]] std::pair<BoxArray,DistributionMapping> getSpectralDataLayout () const;
350
356 template <typename F>
357 void post_forward_doit_0 (F const& post_forward);
358
359 template <typename F>
365 void post_forward_doit_1 (F const& post_forward);
366
367private:
368
369 void prepare_openbc ();
370
371 void backward_doit (MF& outmf, IntVect const& ngout = IntVect(0),
372 Periodicity const& period = Periodicity::NonPeriodic(),
373 int outcomp = 0);
374
375 void backward_doit (cMF const& inmf, MF& outmf,
376 IntVect const& ngout = IntVect(0),
377 Periodicity const& period = Periodicity::NonPeriodic(),
378 int incomp = 0, int outcomp = 0);
379
380 std::pair<Plan<T>,Plan<T>> make_c2c_plans (cMF& inout, int ndims) const;
381
382 static Box make_domain_x (Box const& domain)
383 {
384 if constexpr (C) {
385 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(0)-1,
386 domain.length(1)-1,
387 domain.length(2)-1)),
388 domain.ixType());
389 } else {
390 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(0)/2,
391 domain.length(1)-1,
392 domain.length(2)-1)),
393 domain.ixType());
394 }
395 }
396
397 static Box make_domain_y (Box const& domain)
398 {
399 if constexpr (C) {
400 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(1)-1,
401 domain.length(0)-1,
402 domain.length(2)-1)),
403 domain.ixType());
404 } else {
405 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(1)-1,
406 domain.length(0)/2,
407 domain.length(2)-1)),
408 domain.ixType());
409 }
410 }
411
412 static Box make_domain_z (Box const& domain)
413 {
414 if constexpr (C) {
415 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(2)-1,
416 domain.length(0)-1,
417 domain.length(1)-1)),
418 domain.ixType());
419 } else {
420 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(2)-1,
421 domain.length(0)/2,
422 domain.length(1)-1)),
423 domain.ixType());
424 }
425 }
426
427 static std::pair<BoxArray,DistributionMapping>
428 make_layout_from_local_domain (std::array<int,AMREX_SPACEDIM> const& local_start,
429 std::array<int,AMREX_SPACEDIM> const& local_size);
430
431 template <typename FA, typename RT>
432 std::pair<std::unique_ptr<char,DataDeleter>,std::size_t>
433 install_raw_ptr (FA& fa, RT const* p);
434
435 Plan<T> m_fft_fwd_x{};
436 Plan<T> m_fft_bwd_x{};
437 Plan<T> m_fft_fwd_y{};
438 Plan<T> m_fft_bwd_y{};
439 Plan<T> m_fft_fwd_z{};
440 Plan<T> m_fft_bwd_z{};
441 Plan<T> m_fft_fwd_x_half{};
442 Plan<T> m_fft_bwd_x_half{};
443
444 // Comm meta-data. In the forward phase, we start with (x,y,z),
445 // transpose to (y,x,z) and then (z,x,y). In the backward phase, we
446 // perform inverse transpose.
447 std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2y; // (x,y,z) -> (y,x,z)
448 std::unique_ptr<MultiBlockCommMetaData> m_cmd_y2x; // (y,x,z) -> (x,y,z)
449 std::unique_ptr<MultiBlockCommMetaData> m_cmd_y2z; // (y,x,z) -> (z,x,y)
450 std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2y; // (z,x,y) -> (y,x,z)
451 std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2z; // (x,y,z) -> (z,x,y)
452 std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2x; // (z,x,y) -> (x,y,z)
453 std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2z_half; // for openbc
454 std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2x_half; // for openbc
455 Swap01 m_dtos_x2y{};
456 Swap01 m_dtos_y2x{};
457 Swap02 m_dtos_y2z{};
458 Swap02 m_dtos_z2y{};
459 RotateFwd m_dtos_x2z{};
460 RotateBwd m_dtos_z2x{};
461
462 MF m_rx;
463 cMF m_cx;
464 cMF m_cy;
465 cMF m_cz;
466
467 mutable MF m_raw_mf;
468 mutable cMF m_raw_cmf;
469
470 std::unique_ptr<char,DataDeleter> m_data_1;
471 std::unique_ptr<char,DataDeleter> m_data_2;
472
473 Box m_real_domain;
474 Box m_spectral_domain_x;
475 Box m_spectral_domain_y;
476 Box m_spectral_domain_z;
477
478 std::unique_ptr<R2C<T,D,C>> m_r2c_sub;
479 detail::SubHelper m_sub_helper;
480
481 Info m_info;
482
483 bool m_do_alld_fft = false;
484 bool m_slab_decomp = false;
485 bool m_openbc_half = false;
486};
487
488template <typename T, Direction D, bool C>
489R2C<T,D,C>::R2C (Box const& domain, Info const& info)
490 : m_real_domain(domain),
491 m_spectral_domain_x(make_domain_x(domain)),
492#if (AMREX_SPACEDIM >= 2)
493 m_spectral_domain_y(make_domain_y(domain)),
494#if (AMREX_SPACEDIM == 3)
495 m_spectral_domain_z(make_domain_z(domain)),
496#endif
497#endif
498 m_sub_helper(domain),
499 m_info(info)
500{
501 BL_PROFILE("FFT::R2C");
502
503 static_assert(std::is_same_v<float,T> || std::is_same_v<double,T>);
504
505 AMREX_ALWAYS_ASSERT(m_real_domain.numPts() > 1);
506#if (AMREX_SPACEDIM == 2)
508#else
509 if (m_info.twod_mode) {
510 AMREX_ALWAYS_ASSERT((int(domain.length(0) > 1) +
511 int(domain.length(1) > 1) +
512 int(domain.length(2) > 1)) >= 2);
513 }
514#endif
515
516 {
517 Box subbox = m_sub_helper.make_box(m_real_domain);
518 if (subbox.size() != m_real_domain.size()) {
519 m_r2c_sub = std::make_unique<R2C<T,D,C>>(subbox, m_info);
520 return;
521 }
522 }
523
524 int myproc = ParallelContext::MyProcSub();
525 int nprocs = std::min(ParallelContext::NProcsSub(), m_info.nprocs);
526
527#if (AMREX_SPACEDIM == 3)
529 if (m_info.twod_mode) {
531 } else {
532 int shortside = m_real_domain.shortside();
533 if (shortside < m_info.pencil_threshold*nprocs) {
535 } else {
537 }
538 }
539 }
540
541 if (!m_info.oned_mode) {
542 if (m_info.twod_mode) {
543 m_slab_decomp = true;
544 } else if (m_info.domain_strategy == DomainStrategy::slab && (m_real_domain.length(1) > 1)) {
545 m_slab_decomp = true;
546 }
547 }
548#endif
549
550 auto const ncomp = m_info.batch_size;
551
552 auto bax = amrex::decompose(m_real_domain, nprocs,
553 {AMREX_D_DECL(false,!m_slab_decomp,m_real_domain.length(2)>1)}, true);
554
555 DistributionMapping dmx = detail::make_iota_distromap(bax.size());
556 m_rx.define(bax, dmx, ncomp, 0, MFInfo().SetAlloc(false));
557
558 {
559 BoxList bl = bax.boxList();
560 for (auto & b : bl) {
561 b.shift(-m_real_domain.smallEnd());
562 b.setBig(0, m_spectral_domain_x.bigEnd(0));
563 }
564 BoxArray cbax(std::move(bl));
565 m_cx.define(cbax, dmx, ncomp, 0, MFInfo().SetAlloc(false));
566 }
567
568 m_do_alld_fft = (ParallelDescriptor::NProcs() == 1) &&
569 (! m_info.twod_mode) && (! m_info.oned_mode);
570
571 if (!m_do_alld_fft) // do a series of 1d or 2d ffts
572 {
573 //
574 // make data containers
575 //
576
577#if (AMREX_SPACEDIM >= 2)
579 if ((m_real_domain.length(1) > 1) && !m_slab_decomp && !m_info.oned_mode)
580 {
581 auto cbay = amrex::decompose(m_spectral_domain_y, nprocs,
582 {AMREX_D_DECL(false,true,true)}, true);
583 if (cbay.size() == dmx.size()) {
584 cdmy = dmx;
585 } else {
586 cdmy = detail::make_iota_distromap(cbay.size());
587 }
588 m_cy.define(cbay, cdmy, ncomp, 0, MFInfo().SetAlloc(false));
589 }
590#endif
591
592#if (AMREX_SPACEDIM == 3)
593 if (!m_info.oned_mode && !m_info.twod_mode &&
594 m_real_domain.length(1) > 1 &&
595 m_real_domain.length(2) > 1)
596 {
597 auto cbaz = amrex::decompose(m_spectral_domain_z, nprocs,
598 {false,true,true}, true);
600 if (cbaz.size() == dmx.size()) {
601 cdmz = dmx;
602 } else if (cbaz.size() == cdmy.size()) {
603 cdmz = cdmy;
604 } else {
605 cdmz = detail::make_iota_distromap(cbaz.size());
606 }
607 m_cz.define(cbaz, cdmz, ncomp, 0, MFInfo().SetAlloc(false));
608 }
609#endif
610
611 if constexpr (C) {
612 if (m_slab_decomp) {
613 m_data_1 = detail::make_mfs_share(m_rx, m_cx);
614 m_data_2 = detail::make_mfs_share(m_cz, m_cz);
615 } else {
616 m_data_1 = detail::make_mfs_share(m_rx, m_cz);
617 m_data_2 = detail::make_mfs_share(m_cy, m_cy);
618 // make m_cx an alias to m_rx
619 if (myproc < m_cx.size()) {
620 Box const& box = m_cx.fabbox(myproc);
621 using FAB = typename cMF::FABType::value_type;
622 m_cx.setFab(myproc, FAB(box, ncomp, m_rx[myproc].dataPtr()));
623 }
624 }
625 } else {
626 if (m_slab_decomp) {
627 m_data_1 = detail::make_mfs_share(m_rx, m_cz);
628 m_data_2 = detail::make_mfs_share(m_cx, m_cx);
629 } else {
630 m_data_1 = detail::make_mfs_share(m_rx, m_cy);
631 m_data_2 = detail::make_mfs_share(m_cx, m_cz);
632 }
633 }
634
635 //
636 // make copiers
637 //
638
639#if (AMREX_SPACEDIM >= 2)
640 if (! m_cy.empty()) {
641 // comm meta-data between x and y phases
642 m_cmd_x2y = std::make_unique<MultiBlockCommMetaData>
643 (m_cy, m_spectral_domain_y, m_cx, IntVect(0), m_dtos_x2y);
644 m_cmd_y2x = std::make_unique<MultiBlockCommMetaData>
645 (m_cx, m_spectral_domain_x, m_cy, IntVect(0), m_dtos_y2x);
646 }
647#endif
648#if (AMREX_SPACEDIM == 3)
649 if (! m_cz.empty() ) {
650 if (m_slab_decomp) {
651 // comm meta-data between xy and z phases
652 m_cmd_x2z = std::make_unique<MultiBlockCommMetaData>
653 (m_cz, m_spectral_domain_z, m_cx, IntVect(0), m_dtos_x2z);
654 m_cmd_z2x = std::make_unique<MultiBlockCommMetaData>
655 (m_cx, m_spectral_domain_x, m_cz, IntVect(0), m_dtos_z2x);
656 } else {
657 // comm meta-data between y and z phases
658 m_cmd_y2z = std::make_unique<MultiBlockCommMetaData>
659 (m_cz, m_spectral_domain_z, m_cy, IntVect(0), m_dtos_y2z);
660 m_cmd_z2y = std::make_unique<MultiBlockCommMetaData>
661 (m_cy, m_spectral_domain_y, m_cz, IntVect(0), m_dtos_z2y);
662 }
663 }
664#endif
665
666 //
667 // make plans
668 //
669
670 if (myproc < m_rx.size())
671 {
672 if constexpr (C) {
673 int ndims = m_slab_decomp ? 2 : 1;
674 std::tie(m_fft_fwd_x, m_fft_bwd_x) = make_c2c_plans(m_cx, ndims);
675 } else {
676 Box const& box = m_rx.box(myproc);
677 auto* pr = m_rx[myproc].dataPtr();
678 auto* pc = (typename Plan<T>::VendorComplex *)m_cx[myproc].dataPtr();
679#ifdef AMREX_USE_SYCL
680 m_fft_fwd_x.template init_r2c<Direction::forward>(box, pr, pc, m_slab_decomp, ncomp);
681 m_fft_bwd_x = m_fft_fwd_x;
682#else
683 if constexpr (D == Direction::both || D == Direction::forward) {
684 m_fft_fwd_x.template init_r2c<Direction::forward>(box, pr, pc, m_slab_decomp, ncomp);
685 }
686 if constexpr (D == Direction::both || D == Direction::backward) {
687 m_fft_bwd_x.template init_r2c<Direction::backward>(box, pr, pc, m_slab_decomp, ncomp);
688 }
689#endif
690 }
691 }
692
693#if (AMREX_SPACEDIM >= 2)
694 if (! m_cy.empty()) {
695 std::tie(m_fft_fwd_y, m_fft_bwd_y) = make_c2c_plans(m_cy,1);
696 }
697#endif
698#if (AMREX_SPACEDIM == 3)
699 if (! m_cz.empty()) {
700 std::tie(m_fft_fwd_z, m_fft_bwd_z) = make_c2c_plans(m_cz,1);
701 }
702#endif
703 }
704 else // do fft in all dimensions at the same time
705 {
706 if constexpr (C) {
707 m_data_1 = detail::make_mfs_share(m_rx, m_cx);
708 std::tie(m_fft_fwd_x, m_fft_bwd_x) = make_c2c_plans(m_cx,AMREX_SPACEDIM);
709 } else {
710 m_data_1 = detail::make_mfs_share(m_rx, m_rx);
711 m_data_2 = detail::make_mfs_share(m_cx, m_cx);
712
713 auto const& len = m_real_domain.length();
714 auto* pr = (void*)m_rx[0].dataPtr();
715 auto* pc = (void*)m_cx[0].dataPtr();
716#ifdef AMREX_USE_SYCL
717 m_fft_fwd_x.template init_r2c<Direction::forward>(len, pr, pc, false, ncomp);
718 m_fft_bwd_x = m_fft_fwd_x;
719#else
720 if constexpr (D == Direction::both || D == Direction::forward) {
721 m_fft_fwd_x.template init_r2c<Direction::forward>(len, pr, pc, false, ncomp);
722 }
723 if constexpr (D == Direction::both || D == Direction::backward) {
724 m_fft_bwd_x.template init_r2c<Direction::backward>(len, pr, pc, false, ncomp);
725 }
726#endif
727 }
728 }
729}
730
731template <typename T, Direction D, bool C>
732R2C<T,D,C>::R2C (std::array<int,AMREX_SPACEDIM> const& domain_size, Info const& info)
733 : R2C<T,D,C>(Box(IntVect(0),IntVect(domain_size)-1), info)
734{}
735
736template <typename T, Direction D, bool C>
738{
739 if (m_fft_bwd_x.plan != m_fft_fwd_x.plan) {
740 m_fft_bwd_x.destroy();
741 }
742 if (m_fft_bwd_y.plan != m_fft_fwd_y.plan) {
743 m_fft_bwd_y.destroy();
744 }
745 if (m_fft_bwd_z.plan != m_fft_fwd_z.plan) {
746 m_fft_bwd_z.destroy();
747 }
748 m_fft_fwd_x.destroy();
749 m_fft_fwd_y.destroy();
750 m_fft_fwd_z.destroy();
751 if (m_fft_bwd_x_half.plan != m_fft_fwd_x_half.plan) {
752 m_fft_bwd_x_half.destroy();
753 }
754 m_fft_fwd_x_half.destroy();
755}
756
757template <typename T, Direction D, bool C>
758std::pair<BoxArray,DistributionMapping>
759R2C<T,D,C>::make_layout_from_local_domain (std::array<int,AMREX_SPACEDIM> const& local_start,
760 std::array<int,AMREX_SPACEDIM> const& local_size)
761{
762 IntVect lo(local_start);
763 IntVect len(local_size);
764 Box bx(lo, lo+len-1);
765#ifdef AMREX_USE_MPI
767 MPI_Allgather(&bx, 1, ParallelDescriptor::Mpi_typemap<Box>::type(),
768 allboxes.data(), 1, ParallelDescriptor::Mpi_typemap<Box>::type(),
770 Vector<int> pmap;
771 pmap.reserve(allboxes.size());
772 for (int i = 0; i < allboxes.size(); ++i) {
773 if (allboxes[i].ok()) {
774 pmap.push_back(ParallelContext::local_to_global_rank(i));
775 }
776 }
777 allboxes.erase(std::remove_if(allboxes.begin(), allboxes.end(),
778 [=] (Box const& b) { return b.isEmpty(); }),
779 allboxes.end());
780 BoxList bl(std::move(allboxes));
781 return std::make_pair(BoxArray(std::move(bl)), DistributionMapping(std::move(pmap)));
782#else
783 return std::make_pair(BoxArray(bx), DistributionMapping(Vector<int>({0})));
784#endif
785}
786
787template <typename T, Direction D, bool C>
788void R2C<T,D,C>::setLocalDomain (std::array<int,AMREX_SPACEDIM> const& local_start,
789 std::array<int,AMREX_SPACEDIM> const& local_size)
790{
791 auto const& [ba, dm] = make_layout_from_local_domain(local_start, local_size);
792 m_raw_mf = MF(ba, dm, m_rx.nComp(), 0, MFInfo().SetAlloc(false));
793}
794
795template <typename T, Direction D, bool C>
796std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
798{
799 m_raw_mf = MF(m_rx.boxArray(), m_rx.DistributionMap(), m_rx.nComp(), 0,
800 MFInfo{}.SetAlloc(false));
801
802 auto const myproc = ParallelContext::MyProcSub();
803 if (myproc < m_rx.size()) {
804 Box const& box = m_rx.box(myproc);
805 return std::make_pair(box.smallEnd().toArray(),
806 box.length().toArray());
807 } else {
808 return std::make_pair(std::array<int,AMREX_SPACEDIM>{AMREX_D_DECL(0,0,0)},
809 std::array<int,AMREX_SPACEDIM>{AMREX_D_DECL(0,0,0)});
810 }
811}
812
813template <typename T, Direction D, bool C>
814void R2C<T,D,C>::setLocalSpectralDomain (std::array<int,AMREX_SPACEDIM> const& local_start,
815 std::array<int,AMREX_SPACEDIM> const& local_size)
816{
817 auto const& [ba, dm] = make_layout_from_local_domain(local_start, local_size);
818 m_raw_cmf = cMF(ba, dm, m_rx.nComp(), 0, MFInfo().SetAlloc(false));
819}
820
821template <typename T, Direction D, bool C>
822std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
824{
825 auto const ncomp = m_info.batch_size;
826 auto const& [ba, dm] = getSpectralDataLayout();
827
828 m_raw_cmf = cMF(ba, dm, ncomp, 0, MFInfo{}.SetAlloc(false));
829
830 auto const myproc = ParallelContext::MyProcSub();
831 if (myproc < m_raw_cmf.size()) {
832 Box const& box = m_raw_cmf.box(myproc);
833 return std::make_pair(box.smallEnd().toArray(), box.length().toArray());
834 } else {
835 return std::make_pair(std::array<int,AMREX_SPACEDIM>{AMREX_D_DECL(0,0,0)},
836 std::array<int,AMREX_SPACEDIM>{AMREX_D_DECL(0,0,0)});
837 }
838}
839
840template <typename T, Direction D, bool C>
842{
843 if (C || m_r2c_sub) { amrex::Abort("R2C: OpenBC not supported with reduced dimensions or complex inputs"); }
844
845#if (AMREX_SPACEDIM == 3)
846 if (m_do_alld_fft) { return; }
847
848 auto const ncomp = m_info.batch_size;
849
850 if (m_slab_decomp && ! m_fft_fwd_x_half.defined) {
851 auto* fab = detail::get_fab(m_rx);
852 if (fab) {
853 Box bottom_half = m_real_domain;
854 bottom_half.growHi(2,-m_real_domain.length(2)/2);
855 Box box = fab->box() & bottom_half;
856 if (box.ok()) {
857 auto* pr = fab->dataPtr();
858 auto* pc = (typename Plan<T>::VendorComplex *)
859 detail::get_fab(m_cx)->dataPtr();
860#ifdef AMREX_USE_SYCL
861 m_fft_fwd_x_half.template init_r2c<Direction::forward>
862 (box, pr, pc, m_slab_decomp, ncomp);
863 m_fft_bwd_x_half = m_fft_fwd_x_half;
864#else
865 if constexpr (D == Direction::both || D == Direction::forward) {
866 m_fft_fwd_x_half.template init_r2c<Direction::forward>
867 (box, pr, pc, m_slab_decomp, ncomp);
868 }
869 if constexpr (D == Direction::both || D == Direction::backward) {
870 m_fft_bwd_x_half.template init_r2c<Direction::backward>
871 (box, pr, pc, m_slab_decomp, ncomp);
872 }
873#endif
874 }
875 }
876 } // else todo
877
878 if (m_cmd_x2z && ! m_cmd_x2z_half) {
879 Box bottom_half = m_spectral_domain_z;
880 // Note that z-direction's index is 0 because we z is the
881 // unit-stride direction here.
882 bottom_half.growHi(0,-m_spectral_domain_z.length(0)/2);
883 m_cmd_x2z_half = std::make_unique<MultiBlockCommMetaData>
884 (m_cz, bottom_half, m_cx, IntVect(0), m_dtos_x2z);
885 }
886
887 if (m_cmd_z2x && ! m_cmd_z2x_half) {
888 Box bottom_half = m_spectral_domain_x;
889 bottom_half.growHi(2,-m_spectral_domain_x.length(2)/2);
890 m_cmd_z2x_half = std::make_unique<MultiBlockCommMetaData>
891 (m_cx, bottom_half, m_cz, IntVect(0), m_dtos_z2x);
892 }
893#endif
894}
895
896template <typename T, Direction D, bool C>
897template <Direction DIR>
898requires ((DIR == Direction::forward) || (DIR == Direction::both))
899void R2C<T,D,C>::forward (MF const& inmf, int incomp)
900{
901 BL_PROFILE("FFT::R2C::forward(in)");
902
903 auto const ncomp = m_info.batch_size;
904
905 if (m_r2c_sub) {
906 if (m_sub_helper.ghost_safe(inmf.nGrowVect())) {
907 m_r2c_sub->forward(m_sub_helper.make_alias_mf(inmf), incomp);
908 } else {
909 MF tmp(inmf.boxArray(), inmf.DistributionMap(), ncomp, 0);
910 tmp.LocalCopy(inmf, incomp, 0, ncomp, IntVect(0));
911 m_r2c_sub->forward(m_sub_helper.make_alias_mf(tmp),0);
912 }
913 return;
914 }
915
916 if (&m_rx != &inmf) {
917 m_rx.ParallelCopy(inmf, incomp, 0, ncomp);
918 }
919
920 if (m_do_alld_fft) {
921 if constexpr (C) {
922 m_fft_fwd_x.template compute_c2c<Direction::forward>();
923 } else {
924 m_fft_fwd_x.template compute_r2c<Direction::forward>();
925 }
926 return;
927 }
928
929 auto& fft_x = m_openbc_half ? m_fft_fwd_x_half : m_fft_fwd_x;
930 if constexpr (C) {
931 fft_x.template compute_c2c<Direction::forward>();
932 } else {
933 fft_x.template compute_r2c<Direction::forward>();
934 }
935
936 if ( m_cmd_x2y) {
937 ParallelCopy(m_cy, m_cx, *m_cmd_x2y, 0, 0, ncomp, m_dtos_x2y);
938 }
939 m_fft_fwd_y.template compute_c2c<Direction::forward>();
940
941 if ( m_cmd_y2z) {
942 ParallelCopy(m_cz, m_cy, *m_cmd_y2z, 0, 0, ncomp, m_dtos_y2z);
943 }
944#if (AMREX_SPACEDIM == 3)
945 else if ( m_cmd_x2z) {
946 if (m_openbc_half) {
947 NonLocalBC::PackComponents components{};
948 components.n_components = ncomp;
950 {components, m_dtos_x2z};
951 auto handler = ParallelCopy_nowait(m_cz, m_cx, *m_cmd_x2z_half, packing);
952
953 Box upper_half = m_spectral_domain_z;
954 // Note that z-direction's index is 0 because we z is the
955 // unit-stride direction here.
956 upper_half.growLo (0,-m_spectral_domain_z.length(0)/2);
957 m_cz.setVal(0, upper_half, 0, ncomp);
958
959 ParallelCopy_finish(m_cz, std::move(handler), *m_cmd_x2z_half, packing);
960 } else {
961 ParallelCopy(m_cz, m_cx, *m_cmd_x2z, 0, 0, ncomp, m_dtos_x2z);
962 }
963 }
964#endif
965 m_fft_fwd_z.template compute_c2c<Direction::forward>();
966}
967
968template <typename T, Direction D, bool C>
969template <typename FA, typename RT>
970std::pair<std::unique_ptr<char,DataDeleter>,std::size_t>
971R2C<T,D,C>::install_raw_ptr (FA& fa, RT const* p)
972{
973 AMREX_ALWAYS_ASSERT(!fa.empty());
974
975 using FAB = typename FA::FABType::value_type;
976 using T_FAB = typename FAB::value_type;
977 static_assert(sizeof(T_FAB) == sizeof(RT));
978
979 auto const ncomp = m_info.batch_size;
980 auto const& ia = fa.IndexArray();
981
982 T_FAB* pp = nullptr;
983 std::size_t sz = 0;
984
985 if ( ! ia.empty() ) {
986 int K = ia[0];
987 Box const& box = fa.fabbox(K);
988 if ((alignof(T_FAB) == alignof(RT)) || amrex::is_aligned(p,alignof(T_FAB))) {
989 pp = (T_FAB*)p;
990 } else {
991 sz = sizeof(T_FAB) * box.numPts() * ncomp;
992 pp = (T_FAB*) The_Arena()->alloc(sz);
993 }
994 fa.setFab(K, FAB(box,ncomp,pp));
995 }
996
997 if (sz == 0) {
998 return std::make_pair(std::unique_ptr<char,DataDeleter>{},std::size_t(0));
999 } else {
1000 return std::make_pair(std::unique_ptr<char,DataDeleter>
1001 {(char*)pp,DataDeleter{The_Arena()}}, sz);
1002 }
1003}
1004
1005
1006template <typename T, Direction D, bool C>
1007template <typename RT, typename CT, Direction DIR, bool CP>
1008requires ((DIR == Direction::forward) || (DIR == Direction::both))
1009 && (((sizeof(RT)*2 == sizeof(CT)) && (!CP))
1010 || ((sizeof(RT) == sizeof(CT)) && CP))
1011void R2C<T,D,C>::forward (RT const* in, CT* out)
1012{
1013 auto [rdata, rsz] = install_raw_ptr(m_raw_mf, in);
1014 auto [cdata, csz] = install_raw_ptr(m_raw_cmf, out);
1015
1016 if (rsz > 0) {
1017 Gpu::dtod_memcpy_async(rdata.get(),in,rsz);
1019 }
1020
1021 forward(m_raw_mf, m_raw_cmf);
1022
1023 if (csz) {
1024 Gpu::dtod_memcpy_async(out,cdata.get(),csz);
1026 }
1027}
1028
1029template <typename T, Direction D, bool C>
1030template <Direction DIR>
1031requires (DIR == Direction::both)
1032void R2C<T,D,C>::backward (MF& outmf, int outcomp)
1033{
1034 backward_doit(outmf, IntVect(0), Periodicity::NonPeriodic(), outcomp);
1035}
1036
1037template <typename T, Direction D, bool C>
1038void R2C<T,D,C>::backward_doit (MF& outmf, IntVect const& ngout,
1039 Periodicity const& period, int outcomp)
1040{
1041 BL_PROFILE("FFT::R2C::backward(out)");
1042
1043 auto const ncomp = m_info.batch_size;
1044
1045 if (m_r2c_sub) {
1046 if (m_sub_helper.ghost_safe(outmf.nGrowVect())) {
1047 MF submf = m_sub_helper.make_alias_mf(outmf);
1048 IntVect const& subngout = m_sub_helper.make_iv(ngout);
1049 Periodicity const& subperiod = m_sub_helper.make_periodicity(period);
1050 m_r2c_sub->backward_doit(submf, subngout, subperiod, outcomp);
1051 } else {
1052 MF tmp(outmf.boxArray(), outmf.DistributionMap(), ncomp,
1053 m_sub_helper.make_safe_ghost(outmf.nGrowVect()));
1054 this->backward_doit(tmp, ngout, period, 0);
1055 outmf.LocalCopy(tmp, 0, outcomp, ncomp, tmp.nGrowVect());
1056 }
1057 return;
1058 }
1059
1060 if (m_do_alld_fft) {
1061 if constexpr (C) {
1062 m_fft_bwd_x.template compute_c2c<Direction::backward>();
1063 } else {
1064 m_fft_bwd_x.template compute_r2c<Direction::backward>();
1065 }
1066 outmf.ParallelCopy(m_rx, 0, outcomp, ncomp, IntVect(0),
1067 amrex::elemwiseMin(ngout,outmf.nGrowVect()), period);
1068 return;
1069 }
1070
1071 m_fft_bwd_z.template compute_c2c<Direction::backward>();
1072 if ( m_cmd_z2y) {
1073 ParallelCopy(m_cy, m_cz, *m_cmd_z2y, 0, 0, ncomp, m_dtos_z2y);
1074 }
1075#if (AMREX_SPACEDIM == 3)
1076 else if ( m_cmd_z2x) {
1077 auto const& cmd = m_openbc_half ? m_cmd_z2x_half : m_cmd_z2x;
1078 ParallelCopy(m_cx, m_cz, *cmd, 0, 0, ncomp, m_dtos_z2x);
1079 }
1080#endif
1081
1082 m_fft_bwd_y.template compute_c2c<Direction::backward>();
1083 if ( m_cmd_y2x) {
1084 ParallelCopy(m_cx, m_cy, *m_cmd_y2x, 0, 0, ncomp, m_dtos_y2x);
1085 }
1086
1087 auto& fft_x = m_openbc_half ? m_fft_bwd_x_half : m_fft_bwd_x;
1088 if constexpr (C) {
1089 fft_x.template compute_c2c<Direction::backward>();
1090 } else {
1091 fft_x.template compute_r2c<Direction::backward>();
1092 }
1093 outmf.ParallelCopy(m_rx, 0, outcomp, ncomp, IntVect(0),
1094 amrex::elemwiseMin(ngout,outmf.nGrowVect()), period);
1095}
1096
1097template <typename T, Direction D, bool C>
1098template <typename CT, typename RT, Direction DIR, bool CP>
1099requires ((DIR == Direction::backward) || (DIR == Direction::both))
1100 && (((sizeof(RT)*2 == sizeof(CT)) && (!CP))
1101 || ((sizeof(RT) == sizeof(CT)) && CP))
1102void R2C<T,D,C>::backward (CT const* in, RT* out)
1103{
1104 auto [rdata, rsz] = install_raw_ptr(m_raw_mf, out);
1105 auto [cdata, csz] = install_raw_ptr(m_raw_cmf, in);
1106
1107 if (csz) {
1108 Gpu::dtod_memcpy_async(cdata.get(),in,csz);
1110 }
1111
1112 backward(m_raw_cmf, m_raw_mf);
1113
1114 if (rsz > 0) {
1115 Gpu::dtod_memcpy_async(out,rdata.get(),rsz);
1117 }
1118}
1119
1120template <typename T, Direction D, bool C>
1121std::pair<Plan<T>, Plan<T>>
1122R2C<T,D,C>::make_c2c_plans (cMF& inout, int ndims) const
1123{
1124 Plan<T> fwd;
1125 Plan<T> bwd;
1126
1127 auto* fab = detail::get_fab(inout);
1128 if (!fab) { return {fwd, bwd};}
1129
1130 Box const& box = fab->box();
1131 auto* pio = (typename Plan<T>::VendorComplex *)fab->dataPtr();
1132
1133 auto const ncomp = m_info.batch_size;
1134
1135#ifdef AMREX_USE_SYCL
1136 fwd.template init_c2c<Direction::forward>(box, pio, ncomp, ndims);
1137 bwd = fwd;
1138#else
1139 if constexpr (D == Direction::both || D == Direction::forward) {
1140 fwd.template init_c2c<Direction::forward>(box, pio, ncomp, ndims);
1141 }
1142 if constexpr (D == Direction::both || D == Direction::backward) {
1143 bwd.template init_c2c<Direction::backward>(box, pio, ncomp, ndims);
1144 }
1145#endif
1146
1147 return {fwd, bwd};
1148}
1149
1150template <typename T, Direction D, bool C>
1151template <typename F>
1152void R2C<T,D,C>::post_forward_doit_0 (F const& post_forward)
1153{
1154 if (m_info.twod_mode || m_info.batch_size > 1) {
1155 amrex::Abort("xxxxx todo: post_forward");
1156#if (AMREX_SPACEDIM > 1)
1157 } else if (m_r2c_sub) {
1158 // We need to pass the originally ordered indices to post_forward.
1159#if (AMREX_SPACEDIM == 2)
1160 // The original domain is (1,ny). The sub domain is (ny,1).
1161 m_r2c_sub->post_forward_doit_1
1162 ([=] AMREX_GPU_DEVICE (int i, int, int, auto& sp)
1163 {
1164 post_forward(0, i, 0, sp);
1165 });
1166#else
1167 if (m_real_domain.length(0) == 1 && m_real_domain.length(1) == 1) {
1168 // Original domain: (1, 1, nz). Sub domain: (nz, 1, 1)
1169 m_r2c_sub->post_forward_doit_1
1170 ([=] AMREX_GPU_DEVICE (int i, int, int, auto& sp)
1171 {
1172 post_forward(0, 0, i, sp);
1173 });
1174 } else if (m_real_domain.length(0) == 1 && m_real_domain.length(2) == 1) {
1175 // Original domain: (1, ny, 1). Sub domain: (ny, 1, 1)
1176 m_r2c_sub->post_forward_doit_1
1177 ([=] AMREX_GPU_DEVICE (int i, int, int, auto& sp)
1178 {
1179 post_forward(0, i, 0, sp);
1180 });
1181 } else if (m_real_domain.length(0) == 1) {
1182 // Original domain: (1, ny, nz). Sub domain: (ny, nz, 1)
1183 m_r2c_sub->post_forward_doit_1
1184 ([=] AMREX_GPU_DEVICE (int i, int j, int, auto& sp)
1185 {
1186 post_forward(0, i, j, sp);
1187 });
1188 } else if (m_real_domain.length(1) == 1) {
1189 // Original domain: (nx, 1, nz). Sub domain: (nx, nz, 1)
1190 m_r2c_sub->post_forward_doit_1
1191 ([=] AMREX_GPU_DEVICE (int i, int j, int, auto& sp)
1192 {
1193 post_forward(i, 0, j, sp);
1194 });
1195 } else {
1196 amrex::Abort("R2c::post_forward_doit_0: how did this happen?");
1197 }
1198#endif
1199#endif
1200 } else {
1201 this->post_forward_doit_1(post_forward);
1202 }
1203}
1204
1205template <typename T, Direction D, bool C>
1206template <typename F>
1207void R2C<T,D,C>::post_forward_doit_1 (F const& post_forward)
1208{
1209 if (m_info.twod_mode || m_info.batch_size > 1) {
1210 amrex::Abort("xxxxx todo: post_forward");
1211 } else if (m_r2c_sub) {
1212 amrex::Abort("R2C::post_forward_doit_1: How did this happen?");
1213 } else {
1214 if ( ! m_cz.empty()) {
1215 auto* spectral_fab = detail::get_fab(m_cz);
1216 if (spectral_fab) {
1217 auto const& a = spectral_fab->array(); // m_cz's ordering is z,x,y
1218 ParallelForOMP(spectral_fab->box(),
1219 [=] AMREX_GPU_DEVICE (int iz, int jx, int ky)
1220 {
1221 post_forward(jx,ky,iz,a(iz,jx,ky));
1222 });
1223 }
1224 } else if ( ! m_cy.empty()) {
1225 auto* spectral_fab = detail::get_fab(m_cy);
1226 if (spectral_fab) {
1227 auto const& a = spectral_fab->array(); // m_cy's ordering is y,x,z
1228 ParallelForOMP(spectral_fab->box(),
1229 [=] AMREX_GPU_DEVICE (int iy, int jx, int k)
1230 {
1231 post_forward(jx,iy,k,a(iy,jx,k));
1232 });
1233 }
1234 } else {
1235 auto* spectral_fab = detail::get_fab(m_cx);
1236 if (spectral_fab) {
1237 auto const& a = spectral_fab->array();
1238 ParallelForOMP(spectral_fab->box(),
1239 [=] AMREX_GPU_DEVICE (int i, int j, int k)
1240 {
1241 post_forward(i,j,k,a(i,j,k));
1242 });
1243 }
1244 }
1245 }
1246}
1247
1248template <typename T, Direction D, bool C>
1250{
1251#if (AMREX_SPACEDIM == 3)
1252 if (m_info.oned_mode && !m_info.twod_mode) {
1253 return T(1)/T(Long(m_real_domain.length(0)));
1254 } else if (m_info.twod_mode) {
1255 return T(1)/T(Long(m_real_domain.length(0)) *
1256 Long(m_real_domain.length(1)));
1257 } else
1258#elif (AMREX_SPACEDIM == 2)
1259 if (m_info.oned_mode) {
1260 return T(1)/T(Long(m_real_domain.length(0)));
1261 } else
1262#endif
1263 {
1264 return T(1)/T(m_real_domain.numPts());
1265 }
1266}
1267
1268template <typename T, Direction D, bool C>
1269template <Direction DIR>
1270requires ((DIR == Direction::forward) || (DIR == Direction::both))
1271std::pair<typename R2C<T,D,C>::cMF *, IntVect>
1273{
1274#if (AMREX_SPACEDIM > 1)
1275 if (m_r2c_sub) {
1276 auto [cmf, order] = m_r2c_sub->getSpectralData();
1277 return std::make_pair(cmf, m_sub_helper.inverse_order(order));
1278 } else
1279#endif
1280 if (!m_cz.empty()) {
1281 return std::make_pair(const_cast<cMF*>(&m_cz), IntVect{AMREX_D_DECL(2,0,1)});
1282 } else if (!m_cy.empty()) {
1283 return std::make_pair(const_cast<cMF*>(&m_cy), IntVect{AMREX_D_DECL(1,0,2)});
1284 } else {
1285 return std::make_pair(const_cast<cMF*>(&m_cx), IntVect{AMREX_D_DECL(0,1,2)});
1286 }
1287}
1288
1289template <typename T, Direction D, bool C>
1290template <Direction DIR>
1291requires ((DIR == Direction::forward) || (DIR == Direction::both))
1292void R2C<T,D,C>::forward (MF const& inmf, cMF& outmf, int incomp, int outcomp)
1293{
1294 BL_PROFILE("FFT::R2C::forward(inout)");
1295
1296 auto const ncomp = m_info.batch_size;
1297
1298 if (m_r2c_sub)
1299 {
1300 bool inmf_safe = m_sub_helper.ghost_safe(inmf.nGrowVect());
1301 MF inmf_sub, inmf_tmp;
1302 int incomp_sub;
1303 if (inmf_safe) {
1304 inmf_sub = m_sub_helper.make_alias_mf(inmf);
1305 incomp_sub = incomp;
1306 } else {
1307 inmf_tmp.define(inmf.boxArray(), inmf.DistributionMap(), ncomp, 0);
1308 inmf_tmp.LocalCopy(inmf, incomp, 0, ncomp, IntVect(0));
1309 inmf_sub = m_sub_helper.make_alias_mf(inmf_tmp);
1310 incomp_sub = 0;
1311 }
1312
1313 bool outmf_safe = m_sub_helper.ghost_safe(outmf.nGrowVect());
1314 cMF outmf_sub, outmf_tmp;
1315 int outcomp_sub;
1316 if (outmf_safe) {
1317 outmf_sub = m_sub_helper.make_alias_mf(outmf);
1318 outcomp_sub = outcomp;
1319 } else {
1320 outmf_tmp.define(outmf.boxArray(), outmf.DistributionMap(), ncomp, 0);
1321 outmf_sub = m_sub_helper.make_alias_mf(outmf_tmp);
1322 outcomp_sub = 0;
1323 }
1324
1325 m_r2c_sub->forward(inmf_sub, outmf_sub, incomp_sub, outcomp_sub);
1326
1327 if (!outmf_safe) {
1328 outmf.LocalCopy(outmf_tmp, 0, outcomp, ncomp, IntVect(0));
1329 }
1330 }
1331 else
1332 {
1333 forward(inmf, incomp);
1334 if (!m_cz.empty()) { // m_cz's order (z,x,y) -> (x,y,z)
1335 RotateBwd dtos{};
1337 (outmf, m_spectral_domain_x, m_cz, IntVect(0), dtos);
1338 ParallelCopy(outmf, m_cz, cmd, 0, outcomp, ncomp, dtos);
1339 } else if (!m_cy.empty()) { // m_cy's order (y,x,z) -> (x,y,z)
1341 (outmf, m_spectral_domain_x, m_cy, IntVect(0), m_dtos_y2x);
1342 ParallelCopy(outmf, m_cy, cmd, 0, outcomp, ncomp, m_dtos_y2x);
1343 } else {
1344 outmf.ParallelCopy(m_cx, 0, outcomp, ncomp);
1345 }
1346 }
1347}
1348
1349template <typename T, Direction D, bool C>
1350template <Direction DIR>
1351requires ((DIR == Direction::backward) || (DIR == Direction::both))
1352void R2C<T,D,C>::backward (cMF const& inmf, MF& outmf, int incomp, int outcomp)
1353{
1354 backward_doit(inmf, outmf, IntVect(0), Periodicity::NonPeriodic(), incomp, outcomp);
1355}
1356
1357template <typename T, Direction D, bool C>
1358void R2C<T,D,C>::backward_doit (cMF const& inmf, MF& outmf, IntVect const& ngout,
1359 Periodicity const& period, int incomp, int outcomp)
1360{
1361 BL_PROFILE("FFT::R2C::backward(inout)");
1362
1363 auto const ncomp = m_info.batch_size;
1364
1365 if (m_r2c_sub)
1366 {
1367 bool inmf_safe = m_sub_helper.ghost_safe(inmf.nGrowVect());
1368 cMF inmf_sub, inmf_tmp;
1369 int incomp_sub;
1370 if (inmf_safe) {
1371 inmf_sub = m_sub_helper.make_alias_mf(inmf);
1372 incomp_sub = incomp;
1373 } else {
1374 inmf_tmp.define(inmf.boxArray(), inmf.DistributionMap(), ncomp, 0);
1375 inmf_tmp.LocalCopy(inmf, incomp, 0, ncomp, IntVect(0));
1376 inmf_sub = m_sub_helper.make_alias_mf(inmf_tmp);
1377 incomp_sub = 0;
1378 }
1379
1380 bool outmf_safe = m_sub_helper.ghost_safe(outmf.nGrowVect());
1381 MF outmf_sub, outmf_tmp;
1382 int outcomp_sub;
1383 if (outmf_safe) {
1384 outmf_sub = m_sub_helper.make_alias_mf(outmf);
1385 outcomp_sub = outcomp;
1386 } else {
1387 IntVect const& ngtmp = m_sub_helper.make_safe_ghost(outmf.nGrowVect());
1388 outmf_tmp.define(outmf.boxArray(), outmf.DistributionMap(), ncomp, ngtmp);
1389 outmf_sub = m_sub_helper.make_alias_mf(outmf_tmp);
1390 outcomp_sub = 0;
1391 }
1392
1393 IntVect const& subngout = m_sub_helper.make_iv(ngout);
1394 Periodicity const& subperiod = m_sub_helper.make_periodicity(period);
1395 m_r2c_sub->backward_doit(inmf_sub, outmf_sub, subngout, subperiod, incomp_sub, outcomp_sub);
1396
1397 if (!outmf_safe) {
1398 outmf.LocalCopy(outmf_tmp, 0, outcomp, ncomp, outmf_tmp.nGrowVect());
1399 }
1400 }
1401 else
1402 {
1403 if (!m_cz.empty()) { // (x,y,z) -> m_cz's order (z,x,y)
1404 RotateFwd dtos{};
1405 MultiBlockCommMetaData cmd
1406 (m_cz, m_spectral_domain_z, inmf, IntVect(0), dtos);
1407 ParallelCopy(m_cz, inmf, cmd, incomp, 0, ncomp, dtos);
1408 } else if (!m_cy.empty()) { // (x,y,z) -> m_cy's ordering (y,x,z)
1409 MultiBlockCommMetaData cmd
1410 (m_cy, m_spectral_domain_y, inmf, IntVect(0), m_dtos_x2y);
1411 ParallelCopy(m_cy, inmf, cmd, incomp, 0, ncomp, m_dtos_x2y);
1412 } else {
1413 m_cx.ParallelCopy(inmf, incomp, 0, ncomp);
1414 }
1415 backward_doit(outmf, ngout, period, outcomp);
1416 }
1417}
1418
1419template <typename T, Direction D, bool C>
1420std::pair<BoxArray,DistributionMapping>
1422{
1423#if (AMREX_SPACEDIM > 1)
1424 if (m_r2c_sub) {
1425 auto const& [ba, dm] = m_r2c_sub->getSpectralDataLayout();
1426 return std::make_pair(m_sub_helper.inverse_boxarray(ba), dm);
1427 }
1428#endif
1429
1430#if (AMREX_SPACEDIM == 3)
1431 if (!m_cz.empty()) {
1432 BoxList bl = m_cz.boxArray().boxList();
1433 for (auto& b : bl) {
1434 auto lo = b.smallEnd();
1435 auto hi = b.bigEnd();
1436 std::swap(lo[0], lo[1]);
1437 std::swap(lo[1], lo[2]);
1438 std::swap(hi[0], hi[1]);
1439 std::swap(hi[1], hi[2]);
1440 b.setSmall(lo);
1441 b.setBig(hi);
1442 }
1443 return std::make_pair(BoxArray(std::move(bl)), m_cz.DistributionMap());
1444 } else
1445#endif
1446#if (AMREX_SPACEDIM >= 2)
1447 if (!m_cy.empty()) {
1448 BoxList bl = m_cy.boxArray().boxList();
1449 for (auto& b : bl) {
1450 auto lo = b.smallEnd();
1451 auto hi = b.bigEnd();
1452 std::swap(lo[0], lo[1]);
1453 std::swap(hi[0], hi[1]);
1454 b.setSmall(lo);
1455 b.setBig(hi);
1456 }
1457 return std::make_pair(BoxArray(std::move(bl)), m_cy.DistributionMap());
1458 } else
1459#endif
1460 {
1461 return std::make_pair(m_cx.boxArray(), m_cx.DistributionMap());
1462 }
1463}
1464
1466template <typename T = Real, FFT::Direction D = FFT::Direction::both>
1468
1469}
1470
1471#endif
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:49
#define AMREX_ALWAYS_ASSERT(EX)
Definition AMReX_BLassert.H:50
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
amrex::ParmParse pp
Input file parser instance for the given namespace.
Definition AMReX_HypreIJIface.cpp:15
#define AMREX_D_DECL(a, b, c)
Definition AMReX_SPACE.H:171
virtual void * alloc(std::size_t sz)=0
A collection of Boxes stored in an Array.
Definition AMReX_BoxArray.H:564
A class for managing a List of Boxes that share a common IndexType. This class implements operations ...
Definition AMReX_BoxList.H:52
BoxList & shift(int dir, int nzones)
Applies Box::shift(int,int) to each Box in the BoxList.
Definition AMReX_BoxList.cpp:565
__host__ __device__ const IntVectND< dim > & bigEnd() const &noexcept
Return the inclusive upper bound of the box.
Definition AMReX_Box.H:124
__host__ __device__ Long numPts() const noexcept
Return the number of points contained in the BoxND.
Definition AMReX_Box.H:364
__host__ __device__ IntVectND< dim > length() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:155
__host__ __device__ int shortside(int &dir) const noexcept
Return length of shortest side. dir is modified to give direction with shortest side: 0....
Definition AMReX_Box.H:445
__host__ __device__ IntVectND< dim > size() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:148
__host__ __device__ BoxND & growLo(int idir, int n_cell=1) noexcept
Grow the BoxND on the low end by n_cell cells in direction idir. NOTE: n_cell negative shrinks the Bo...
Definition AMReX_Box.H:670
__host__ __device__ IndexTypeND< dim > ixType() const noexcept
Return the indexing type.
Definition AMReX_Box.H:136
__host__ __device__ BoxND & growHi(int idir, int n_cell=1) noexcept
Grow the BoxND on the high end by n_cell cells in direction idir. NOTE: n_cell negative shrinks the B...
Definition AMReX_Box.H:681
__host__ __device__ const IntVectND< dim > & smallEnd() const &noexcept
Return the inclusive lower bound of the box.
Definition AMReX_Box.H:112
Calculates the distribution of FABs to MPI processes.
Definition AMReX_DistributionMapping.H:43
Long size() const noexcept
Length of the underlying processor map.
Definition AMReX_DistributionMapping.H:129
Convolution-based solver for open boundary conditions using Green's functions.
Definition AMReX_FFT_OpenBCSolver.H:26
3D Poisson solver for periodic, Dirichlet & Neumann boundaries in the first two dimensions,...
Definition AMReX_FFT_Poisson.H:188
Poisson solver for periodic, Dirichlet & Neumann boundaries using FFT.
Definition AMReX_FFT_Poisson.H:67
Parallel Discrete Fourier Transform.
Definition AMReX_FFT_R2C.H:47
std::conditional_t< C, cMF, std::conditional_t< std::is_same_v< T, Real >, MultiFab, FabArray< BaseFab< T > > > > MF
Definition AMReX_FFT_R2C.H:52
R2C & operator=(R2C const &)=delete
~R2C()
Definition AMReX_FFT_R2C.H:737
void setLocalDomain(std::array< int, 3 > const &local_start, std::array< int, 3 > const &local_size)
Set local domain.
Definition AMReX_FFT_R2C.H:788
R2C(Box const &domain, Info const &info=Info{})
Constructor.
Definition AMReX_FFT_R2C.H:489
R2C(R2C &&)=delete
void post_forward_doit_1(F const &post_forward)
CUDA-visible helper that redistributes and applies post_forward for the batched layout.
Definition AMReX_FFT_R2C.H:1207
T scalingFactor() const
Scaling factor. If the data goes through forward and then backward, the result multiplied by the scal...
Definition AMReX_FFT_R2C.H:1249
std::pair< cMF *, IntVect > getSpectralData() const
Get the internal spectral data.
void post_forward_doit_0(F const &post_forward)
CUDA-visible hook that walks internal spectral data and applies post_forward.
Definition AMReX_FFT_R2C.H:1152
std::pair< std::array< int, 3 >, std::array< int, 3 > > getLocalDomain() const
Get local domain.
Definition AMReX_FFT_R2C.H:797
R2C(R2C const &)=delete
std::pair< BoxArray, DistributionMapping > getSpectralDataLayout() const
Get BoxArray and DistributionMapping for spectral data.
Definition AMReX_FFT_R2C.H:1421
void forwardThenBackward(MF const &inmf, MF &outmf, F const &post_forward, int incomp=0, int outcomp=0)
Forward and then backward transform.
Definition AMReX_FFT_R2C.H:202
R2C(std::array< int, 3 > const &domain_size, Info const &info=Info{})
Constructor.
Definition AMReX_FFT_R2C.H:732
FabArray< BaseFab< GpuComplex< T > > > cMF
Definition AMReX_FFT_R2C.H:49
std::pair< std::array< int, 3 >, std::array< int, 3 > > getLocalSpectralDomain() const
Get local spectral domain.
Definition AMReX_FFT_R2C.H:823
void setLocalSpectralDomain(std::array< int, 3 > const &local_start, std::array< int, 3 > const &local_size)
Set local spectral domain.
Definition AMReX_FFT_R2C.H:814
IntVect nGrowVect() const noexcept
Definition AMReX_FabArrayBase.H:80
int size() const noexcept
Return the number of FABs in the FabArray.
Definition AMReX_FabArrayBase.H:110
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition AMReX_FabArrayBase.H:131
bool empty() const noexcept
Definition AMReX_FabArrayBase.H:89
Box fabbox(int K) const noexcept
Return the Kth FABs Box in the FabArray. That is, the region the Kth fab is actually defined on.
Definition AMReX_FabArrayBase.cpp:217
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition AMReX_FabArrayBase.H:95
void setFab(int boxno, std::unique_ptr< FAB > elem)
Explicitly set the Kth FAB in the FabArray to point to elem.
Definition AMReX_FabArray.H:2397
void ParallelCopy(const FabArray< FAB > &src, const Periodicity &period=Periodicity::NonPeriodic(), CpOp op=FabArrayBase::COPY)
Definition AMReX_FabArray.H:873
typename std::conditional_t< IsBaseFab< BaseFab< GpuComplex< T > > >::value, BaseFab< GpuComplex< T > >, FABType >::value_type value_type
Definition AMReX_FabArray.H:355
void define(const BoxArray &bxs, const DistributionMapping &dm, int nvar, int ngrow, const MFInfo &info=MFInfo(), const FabFactory< FAB > &factory=DefaultFabFactory< FAB >())
Define this FabArray identically to that performed by the constructor having an analogous function si...
Definition AMReX_FabArray.H:2213
void LocalCopy(FabArray< SFAB > const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
Perform local copy of FabArray data.
Definition AMReX_FabArray.H:1989
A collection (stored as an array) of FArrayBox objects.
Definition AMReX_MultiFab.H:40
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition AMReX_Periodicity.H:17
static const Periodicity & NonPeriodic() noexcept
Definition AMReX_Periodicity.cpp:52
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:29
amrex_long Long
Definition AMReX_INT.H:30
void ParallelForOMP(T n, L const &f) noexcept
Performance-portable kernel launch function with optional OpenMP threading.
Definition AMReX_GpuLaunch.H:327
Arena * The_Arena()
Definition AMReX_Arena.cpp:820
int NProcs() noexcept
Definition AMReX_ParallelDescriptor.H:255
Definition AMReX_FFT_Helper.H:52
void dtod_memcpy_async(void *p_d_dst, const void *p_d_src, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:449
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
int local_to_global_rank(int rank) noexcept
translate between local rank and global rank
Definition AMReX_ParallelContext.H:95
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
BoxND< 3 > Box
Box is an alias for amrex::BoxND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:30
bool is_aligned(const void *p, std::size_t alignment) noexcept
Return whether the address p is aligned to alignment bytes.
Definition AMReX_Arena.H:39
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition AMReX_FabArrayUtility.H:1951
BoxArray decompose(Box const &domain, int nboxes, Array< bool, 3 > const &decomp, bool no_overlap)
Decompose domain box into BoxArray.
Definition AMReX_BoxArray.cpp:1943
IntVectND< 3 > IntVect
IntVect is an alias for amrex::IntVectND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:33
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:241
__host__ __device__ constexpr T elemwiseMin(T const &a, T const &b) noexcept
Return the element-wise minimum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:63
Definition AMReX_FFT_Helper.H:64
bool twod_mode
Definition AMReX_FFT_Helper.H:75
bool oned_mode
Definition AMReX_FFT_Helper.H:84
int batch_size
Batched FFT size. Only support in R2C, not R2X.
Definition AMReX_FFT_Helper.H:87
DomainStrategy domain_strategy
Domain composition strategy.
Definition AMReX_FFT_Helper.H:66
int nprocs
Max number of processes to use.
Definition AMReX_FFT_Helper.H:90
int pencil_threshold
Definition AMReX_FFT_Helper.H:70
Definition AMReX_FFT_Helper.H:194
std::conditional_t< std::is_same_v< float, T >, cuComplex, cuDoubleComplex > VendorComplex
Definition AMReX_FFT_Helper.H:198
FabArray memory allocation information.
Definition AMReX_FabArray.H:68
MFInfo & SetAlloc(bool a) noexcept
Definition AMReX_FabArray.H:75
This class specializes behaviour on local copies and unpacking receive buffers.
Definition AMReX_NonLocalBC.H:642
This is the index mapping based on the DTOS MultiBlockDestToSrc.
Definition AMReX_NonLocalBC.H:223
Contains information about which components take part of the data transaction.
Definition AMReX_NonLocalBC.H:550
int n_components
Definition AMReX_NonLocalBC.H:553
Communication datatype (note: this structure also works without MPI)
Definition AMReX_ccse-mpi.H:78