Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_FFT_R2C.H
Go to the documentation of this file.
1#ifndef AMREX_FFT_R2C_H_
2#define AMREX_FFT_R2C_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_MultiFab.H>
6#include <AMReX_FFT_Helper.H>
7#include <algorithm>
8#include <map>
9#include <numeric>
10#include <tuple>
11
12namespace amrex::FFT
13{
14
21template <typename T> class OpenBCSolver;
22template <typename T> class Poisson;
23template <typename T> class PoissonHybrid;
24
46template <typename T = Real, FFT::Direction D = FFT::Direction::both, bool C = false>
47class R2C
48{
49public:
51 using MF = std::conditional_t
52 <C, cMF, std::conditional_t<std::is_same_v<T,Real>,
54
55 template <typename U> friend class OpenBCSolver;
56 template <typename U> friend class Poisson;
57 template <typename U> friend class PoissonHybrid;
58
65 explicit R2C (Box const& domain, Info const& info = Info{});
66
76 explicit R2C (std::array<int,AMREX_SPACEDIM> const& domain_size,
77 Info const& info = Info{});
78
79 ~R2C ();
80
81 R2C (R2C const&) = delete;
82 R2C (R2C &&) = delete;
83 R2C& operator= (R2C const&) = delete;
84 R2C& operator= (R2C &&) = delete;
85
109 void setLocalDomain (std::array<int,AMREX_SPACEDIM> const& local_start,
110 std::array<int,AMREX_SPACEDIM> const& local_size);
111
125 std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
127
156 void setLocalSpectralDomain (std::array<int,AMREX_SPACEDIM> const& local_start,
157 std::array<int,AMREX_SPACEDIM> const& local_size);
158
177 std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
179
201 template <typename F, Direction DIR=D>
202 requires (DIR == Direction::both)
203 void forwardThenBackward (MF const& inmf, MF& outmf, F const& post_forward,
204 int incomp = 0, int outcomp = 0)
205 {
207 !m_info.twod_mode && m_info.batch_size == 1,
208 "FFT::R2C::forwardThenBackward(post_forward) currently supports only !twod_mode and batch_size==1");
209 BL_PROFILE("FFT::R2C::forwardbackward");
210 this->forward(inmf, incomp);
211 this->post_forward_doit_0(post_forward);
212 this->backward(outmf, outcomp);
213 }
214
225 template <Direction DIR=D>
226 requires ((DIR == Direction::forward) || (DIR == Direction::both))
227 void forward (MF const& inmf, int incomp = 0);
228
240 template <Direction DIR=D>
241 requires ((DIR == Direction::forward) || (DIR == Direction::both))
242 void forward (MF const& inmf, cMF& outmf, int incomp = 0, int outcomp = 0);
243
260 template <typename RT, typename CT, Direction DIR=D, bool CP=C>
261 requires ((DIR == Direction::forward) || (DIR == Direction::both))
262 && (((sizeof(RT)*2 == sizeof(CT)) && (!CP))
263 || ((sizeof(RT) == sizeof(CT)) && CP))
264 void forward (RT const* in, CT* out);
265
275 template <Direction DIR=D>
276 requires (DIR == Direction::both)
277 void backward (MF& outmf, int outcomp = 0);
278
290 template <Direction DIR=D>
291 requires ((DIR == Direction::backward) || (DIR == Direction::both))
292 void backward (cMF const& inmf, MF& outmf, int incomp = 0, int outcomp = 0);
293
310 template <typename CT, typename RT, Direction DIR=D, bool CP=C>
311 requires ((DIR == Direction::backward) || (DIR == Direction::both))
312 && (((sizeof(RT)*2 == sizeof(CT)) && (!CP))
313 || ((sizeof(RT) == sizeof(CT)) && CP))
314 void backward (CT const* in, RT* out);
315
323 [[nodiscard]] T scalingFactor () const;
324
336 template <Direction DIR=D>
337 requires ((DIR == Direction::forward) || (DIR == Direction::both))
338 std::pair<cMF*,IntVect> getSpectralData () const;
339
340
350 [[nodiscard]] std::pair<BoxArray,DistributionMapping> getSpectralDataLayout () const;
351
357 template <typename F>
358 void post_forward_doit_0 (F const& post_forward);
359
360 template <typename F>
366 void post_forward_doit_1 (F const& post_forward);
367
368private:
369
370 void prepare_openbc ();
371
372 void backward_doit (MF& outmf, IntVect const& ngout = IntVect(0),
373 Periodicity const& period = Periodicity::NonPeriodic(),
374 int outcomp = 0);
375
376 void backward_doit (cMF const& inmf, MF& outmf,
377 IntVect const& ngout = IntVect(0),
378 Periodicity const& period = Periodicity::NonPeriodic(),
379 int incomp = 0, int outcomp = 0);
380
381 std::pair<Plan<T>,Plan<T>> make_c2c_plans (cMF& inout, int ndims) const;
382
383 [[nodiscard]] MF& make_m_rx_alias (IndexType const& ix_type);
384
385 [[nodiscard]] Periodicity make_m_rx_alias_periodicity (IndexType const& ix_type,
386 Periodicity const& period) const;
387
388 static Box make_domain_x (Box const& domain)
389 {
390 if constexpr (C) {
391 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(0)-1,
392 domain.length(1)-1,
393 domain.length(2)-1)),
394 domain.ixType());
395 } else {
396 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(0)/2,
397 domain.length(1)-1,
398 domain.length(2)-1)),
399 domain.ixType());
400 }
401 }
402
403 static Box make_domain_y (Box const& domain)
404 {
405 if constexpr (C) {
406 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(1)-1,
407 domain.length(0)-1,
408 domain.length(2)-1)),
409 domain.ixType());
410 } else {
411 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(1)-1,
412 domain.length(0)/2,
413 domain.length(2)-1)),
414 domain.ixType());
415 }
416 }
417
418 static Box make_domain_z (Box const& domain)
419 {
420 if constexpr (C) {
421 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(2)-1,
422 domain.length(0)-1,
423 domain.length(1)-1)),
424 domain.ixType());
425 } else {
426 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(2)-1,
427 domain.length(0)/2,
428 domain.length(1)-1)),
429 domain.ixType());
430 }
431 }
432
433 static std::pair<BoxArray,DistributionMapping>
434 make_layout_from_local_domain (std::array<int,AMREX_SPACEDIM> const& local_start,
435 std::array<int,AMREX_SPACEDIM> const& local_size);
436
437 template <typename FA, typename RT>
438 std::pair<std::unique_ptr<char,DataDeleter>,std::size_t>
439 install_raw_ptr (FA& fa, RT const* p);
440
441 Plan<T> m_fft_fwd_x{};
442 Plan<T> m_fft_bwd_x{};
443 Plan<T> m_fft_fwd_y{};
444 Plan<T> m_fft_bwd_y{};
445 Plan<T> m_fft_fwd_z{};
446 Plan<T> m_fft_bwd_z{};
447 Plan<T> m_fft_fwd_x_half{};
448 Plan<T> m_fft_bwd_x_half{};
449
450 // Comm meta-data. In the forward phase, we start with (x,y,z),
451 // transpose to (y,x,z) and then (z,x,y). In the backward phase, we
452 // perform inverse transpose.
453 std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2y; // (x,y,z) -> (y,x,z)
454 std::unique_ptr<MultiBlockCommMetaData> m_cmd_y2x; // (y,x,z) -> (x,y,z)
455 std::unique_ptr<MultiBlockCommMetaData> m_cmd_y2z; // (y,x,z) -> (z,x,y)
456 std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2y; // (z,x,y) -> (y,x,z)
457 std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2z; // (x,y,z) -> (z,x,y)
458 std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2x; // (z,x,y) -> (x,y,z)
459 std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2z_half; // for openbc
460 std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2x_half; // for openbc
461 Swap01 m_dtos_x2y{};
462 Swap01 m_dtos_y2x{};
463 Swap02 m_dtos_y2z{};
464 Swap02 m_dtos_z2y{};
465 RotateFwd m_dtos_x2z{};
466 RotateBwd m_dtos_z2x{};
467
468 MF m_rx;
469 cMF m_cx;
470 cMF m_cy;
471 cMF m_cz;
472
473 mutable MF m_raw_mf;
474 mutable cMF m_raw_cmf;
475
476 std::map<IntVect,MF> m_rx_aliases;
477
478 std::unique_ptr<char,DataDeleter> m_data_1;
479 std::unique_ptr<char,DataDeleter> m_data_2;
480
481 Box m_real_domain;
482 Box m_spectral_domain_x;
483 Box m_spectral_domain_y;
484 Box m_spectral_domain_z;
485
486 std::unique_ptr<R2C<T,D,C>> m_r2c_sub;
487 detail::SubHelper m_sub_helper;
488
489 Info m_info;
490
491 bool m_do_alld_fft = false;
492 bool m_slab_decomp = false;
493 bool m_openbc_half = false;
494};
495
496template <typename T, Direction D, bool C>
497R2C<T,D,C>::R2C (Box const& domain, Info const& info)
498 : m_real_domain(domain),
499 m_spectral_domain_x(make_domain_x(domain)),
500#if (AMREX_SPACEDIM >= 2)
501 m_spectral_domain_y(make_domain_y(domain)),
502#if (AMREX_SPACEDIM == 3)
503 m_spectral_domain_z(make_domain_z(domain)),
504#endif
505#endif
506 m_sub_helper(domain),
507 m_info(info)
508{
509 BL_PROFILE("FFT::R2C");
510
511 static_assert(std::is_same_v<float,T> || std::is_same_v<double,T>);
512
513 AMREX_ALWAYS_ASSERT(m_real_domain.numPts() > 1);
514#if (AMREX_SPACEDIM == 2)
516#else
517 if (m_info.twod_mode) {
518 AMREX_ALWAYS_ASSERT((int(domain.length(0) > 1) +
519 int(domain.length(1) > 1) +
520 int(domain.length(2) > 1)) >= 2);
521 }
522#endif
523
524 {
525 Box subbox = m_sub_helper.make_box(m_real_domain);
526 if (subbox.size() != m_real_domain.size()) {
527 m_r2c_sub = std::make_unique<R2C<T,D,C>>(subbox, m_info);
528 return;
529 }
530 }
531
532 int myproc = ParallelContext::MyProcSub();
533 int nprocs = std::min(ParallelContext::NProcsSub(), m_info.nprocs);
534
535#if (AMREX_SPACEDIM == 3)
537 if (m_info.twod_mode) {
539 } else {
540 int shortside = m_real_domain.shortside();
541 if (shortside < m_info.pencil_threshold*nprocs) {
543 } else {
545 }
546 }
547 }
548
549 if (!m_info.oned_mode) {
550 if (m_info.twod_mode) {
551 m_slab_decomp = true;
552 } else if (m_info.domain_strategy == DomainStrategy::slab && (m_real_domain.length(1) > 1)) {
553 m_slab_decomp = true;
554 }
555 }
556#endif
557
558 auto const ncomp = m_info.batch_size;
559
560 auto bax = amrex::decompose(m_real_domain, nprocs,
561 {AMREX_D_DECL(false,!m_slab_decomp,m_real_domain.length(2)>1)}, true);
562
563 DistributionMapping dmx = detail::make_iota_distromap(bax.size());
564 m_rx.define(bax, dmx, ncomp, 0, MFInfo().SetAlloc(false));
565
566 {
567 BoxList bl = bax.boxList();
568 for (auto & b : bl) {
569 b.shift(-m_real_domain.smallEnd());
570 b.setBig(0, m_spectral_domain_x.bigEnd(0));
571 }
572 BoxArray cbax(std::move(bl));
573 m_cx.define(cbax, dmx, ncomp, 0, MFInfo().SetAlloc(false));
574 }
575
576 m_do_alld_fft = (ParallelDescriptor::NProcs() == 1) &&
577 (! m_info.twod_mode) && (! m_info.oned_mode);
578
579 if (!m_do_alld_fft) // do a series of 1d or 2d ffts
580 {
581 //
582 // make data containers
583 //
584
585#if (AMREX_SPACEDIM >= 2)
587 if ((m_real_domain.length(1) > 1) && !m_slab_decomp && !m_info.oned_mode)
588 {
589 auto cbay = amrex::decompose(m_spectral_domain_y, nprocs,
590 {AMREX_D_DECL(false,true,true)}, true);
591 if (cbay.size() == dmx.size()) {
592 cdmy = dmx;
593 } else {
594 cdmy = detail::make_iota_distromap(cbay.size());
595 }
596 m_cy.define(cbay, cdmy, ncomp, 0, MFInfo().SetAlloc(false));
597 }
598#endif
599
600#if (AMREX_SPACEDIM == 3)
601 if (!m_info.oned_mode && !m_info.twod_mode &&
602 m_real_domain.length(1) > 1 &&
603 m_real_domain.length(2) > 1)
604 {
605 auto cbaz = amrex::decompose(m_spectral_domain_z, nprocs,
606 {false,true,true}, true);
608 if (cbaz.size() == dmx.size()) {
609 cdmz = dmx;
610 } else if (cbaz.size() == cdmy.size()) {
611 cdmz = cdmy;
612 } else {
613 cdmz = detail::make_iota_distromap(cbaz.size());
614 }
615 m_cz.define(cbaz, cdmz, ncomp, 0, MFInfo().SetAlloc(false));
616 }
617#endif
618
619 if constexpr (C) {
620 if (m_slab_decomp) {
621 m_data_1 = detail::make_mfs_share(m_rx, m_cx);
622 m_data_2 = detail::make_mfs_share(m_cz, m_cz);
623 } else {
624 m_data_1 = detail::make_mfs_share(m_rx, m_cz);
625 m_data_2 = detail::make_mfs_share(m_cy, m_cy);
626 // make m_cx an alias to m_rx
627 if (myproc < m_cx.size()) {
628 Box const& box = m_cx.fabbox(myproc);
629 using FAB = typename cMF::FABType::value_type;
630 m_cx.setFab(myproc, FAB(box, ncomp, m_rx[myproc].dataPtr()));
631 }
632 }
633 } else {
634 if (m_slab_decomp) {
635 m_data_1 = detail::make_mfs_share(m_rx, m_cz);
636 m_data_2 = detail::make_mfs_share(m_cx, m_cx);
637 } else {
638 m_data_1 = detail::make_mfs_share(m_rx, m_cy);
639 m_data_2 = detail::make_mfs_share(m_cx, m_cz);
640 }
641 }
642
643 //
644 // make copiers
645 //
646
647#if (AMREX_SPACEDIM >= 2)
648 if (! m_cy.empty()) {
649 // comm meta-data between x and y phases
650 m_cmd_x2y = std::make_unique<MultiBlockCommMetaData>
651 (m_cy, m_spectral_domain_y, m_cx, IntVect(0), m_dtos_x2y);
652 m_cmd_y2x = std::make_unique<MultiBlockCommMetaData>
653 (m_cx, m_spectral_domain_x, m_cy, IntVect(0), m_dtos_y2x);
654 }
655#endif
656#if (AMREX_SPACEDIM == 3)
657 if (! m_cz.empty() ) {
658 if (m_slab_decomp) {
659 // comm meta-data between xy and z phases
660 m_cmd_x2z = std::make_unique<MultiBlockCommMetaData>
661 (m_cz, m_spectral_domain_z, m_cx, IntVect(0), m_dtos_x2z);
662 m_cmd_z2x = std::make_unique<MultiBlockCommMetaData>
663 (m_cx, m_spectral_domain_x, m_cz, IntVect(0), m_dtos_z2x);
664 } else {
665 // comm meta-data between y and z phases
666 m_cmd_y2z = std::make_unique<MultiBlockCommMetaData>
667 (m_cz, m_spectral_domain_z, m_cy, IntVect(0), m_dtos_y2z);
668 m_cmd_z2y = std::make_unique<MultiBlockCommMetaData>
669 (m_cy, m_spectral_domain_y, m_cz, IntVect(0), m_dtos_z2y);
670 }
671 }
672#endif
673
674 //
675 // make plans
676 //
677
678 if (myproc < m_rx.size())
679 {
680 if constexpr (C) {
681 int ndims = m_slab_decomp ? 2 : 1;
682 std::tie(m_fft_fwd_x, m_fft_bwd_x) = make_c2c_plans(m_cx, ndims);
683 } else {
684 Box const& box = m_rx.box(myproc);
685 auto* pr = m_rx[myproc].dataPtr();
686 auto* pc = (typename Plan<T>::VendorComplex *)m_cx[myproc].dataPtr();
687#ifdef AMREX_USE_SYCL
688 m_fft_fwd_x.template init_r2c<Direction::forward>(box, pr, pc, m_slab_decomp, ncomp);
689 m_fft_bwd_x = m_fft_fwd_x;
690#else
691 if constexpr (D == Direction::both || D == Direction::forward) {
692 m_fft_fwd_x.template init_r2c<Direction::forward>(box, pr, pc, m_slab_decomp, ncomp);
693 }
694 if constexpr (D == Direction::both || D == Direction::backward) {
695 m_fft_bwd_x.template init_r2c<Direction::backward>(box, pr, pc, m_slab_decomp, ncomp);
696 }
697#endif
698 }
699 }
700
701#if (AMREX_SPACEDIM >= 2)
702 if (! m_cy.empty()) {
703 std::tie(m_fft_fwd_y, m_fft_bwd_y) = make_c2c_plans(m_cy,1);
704 }
705#endif
706#if (AMREX_SPACEDIM == 3)
707 if (! m_cz.empty()) {
708 std::tie(m_fft_fwd_z, m_fft_bwd_z) = make_c2c_plans(m_cz,1);
709 }
710#endif
711 }
712 else // do fft in all dimensions at the same time
713 {
714 if constexpr (C) {
715 m_data_1 = detail::make_mfs_share(m_rx, m_cx);
716 std::tie(m_fft_fwd_x, m_fft_bwd_x) = make_c2c_plans(m_cx,AMREX_SPACEDIM);
717 } else {
718 m_data_1 = detail::make_mfs_share(m_rx, m_rx);
719 m_data_2 = detail::make_mfs_share(m_cx, m_cx);
720
721 auto const& len = m_real_domain.length();
722 auto* pr = (void*)m_rx[0].dataPtr();
723 auto* pc = (void*)m_cx[0].dataPtr();
724#ifdef AMREX_USE_SYCL
725 m_fft_fwd_x.template init_r2c<Direction::forward>(len, pr, pc, false, ncomp);
726 m_fft_bwd_x = m_fft_fwd_x;
727#else
728 if constexpr (D == Direction::both || D == Direction::forward) {
729 m_fft_fwd_x.template init_r2c<Direction::forward>(len, pr, pc, false, ncomp);
730 }
731 if constexpr (D == Direction::both || D == Direction::backward) {
732 m_fft_bwd_x.template init_r2c<Direction::backward>(len, pr, pc, false, ncomp);
733 }
734#endif
735 }
736 }
737}
738
739template <typename T, Direction D, bool C>
740R2C<T,D,C>::R2C (std::array<int,AMREX_SPACEDIM> const& domain_size, Info const& info)
741 : R2C<T,D,C>(Box(IntVect(0),IntVect(domain_size)-1), info)
742{}
743
744template <typename T, Direction D, bool C>
746{
747 if (m_fft_bwd_x.plan != m_fft_fwd_x.plan) {
748 m_fft_bwd_x.destroy();
749 }
750 if (m_fft_bwd_y.plan != m_fft_fwd_y.plan) {
751 m_fft_bwd_y.destroy();
752 }
753 if (m_fft_bwd_z.plan != m_fft_fwd_z.plan) {
754 m_fft_bwd_z.destroy();
755 }
756 m_fft_fwd_x.destroy();
757 m_fft_fwd_y.destroy();
758 m_fft_fwd_z.destroy();
759 if (m_fft_bwd_x_half.plan != m_fft_fwd_x_half.plan) {
760 m_fft_bwd_x_half.destroy();
761 }
762 m_fft_fwd_x_half.destroy();
763}
764
765template <typename T, Direction D, bool C>
766std::pair<BoxArray,DistributionMapping>
767R2C<T,D,C>::make_layout_from_local_domain (std::array<int,AMREX_SPACEDIM> const& local_start,
768 std::array<int,AMREX_SPACEDIM> const& local_size)
769{
770 IntVect lo(local_start);
771 IntVect len(local_size);
772 Box bx(lo, lo+len-1);
773#ifdef AMREX_USE_MPI
775 MPI_Allgather(&bx, 1, ParallelDescriptor::Mpi_typemap<Box>::type(),
776 allboxes.data(), 1, ParallelDescriptor::Mpi_typemap<Box>::type(),
778 Vector<int> pmap;
779 pmap.reserve(allboxes.size());
780 for (int i = 0; i < allboxes.size(); ++i) {
781 if (allboxes[i].ok()) {
782 pmap.push_back(ParallelContext::local_to_global_rank(i));
783 }
784 }
785 std::erase_if(allboxes, [=] (Box const& b) { return b.isEmpty(); });
786 BoxList bl(std::move(allboxes));
787 return std::make_pair(BoxArray(std::move(bl)), DistributionMapping(std::move(pmap)));
788#else
789 return std::make_pair(BoxArray(bx), DistributionMapping(Vector<int>({0})));
790#endif
791}
792
793template <typename T, Direction D, bool C>
794void R2C<T,D,C>::setLocalDomain (std::array<int,AMREX_SPACEDIM> const& local_start,
795 std::array<int,AMREX_SPACEDIM> const& local_size)
796{
797 auto const& [ba, dm] = make_layout_from_local_domain(local_start, local_size);
798 m_raw_mf = MF(ba, dm, m_rx.nComp(), 0, MFInfo().SetAlloc(false));
799}
800
801template <typename T, Direction D, bool C>
802std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
804{
805 m_raw_mf = MF(m_rx.boxArray(), m_rx.DistributionMap(), m_rx.nComp(), 0,
806 MFInfo{}.SetAlloc(false));
807
808 auto const myproc = ParallelContext::MyProcSub();
809 if (myproc < m_rx.size()) {
810 Box const& box = m_rx.box(myproc);
811 return std::make_pair(box.smallEnd().toArray(),
812 box.length().toArray());
813 } else {
814 return std::make_pair(std::array<int,AMREX_SPACEDIM>{AMREX_D_DECL(0,0,0)},
815 std::array<int,AMREX_SPACEDIM>{AMREX_D_DECL(0,0,0)});
816 }
817}
818
819template <typename T, Direction D, bool C>
820void R2C<T,D,C>::setLocalSpectralDomain (std::array<int,AMREX_SPACEDIM> const& local_start,
821 std::array<int,AMREX_SPACEDIM> const& local_size)
822{
823 auto const& [ba, dm] = make_layout_from_local_domain(local_start, local_size);
824 m_raw_cmf = cMF(ba, dm, m_rx.nComp(), 0, MFInfo().SetAlloc(false));
825}
826
827template <typename T, Direction D, bool C>
828std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
830{
831 auto const ncomp = m_info.batch_size;
832 auto const& [ba, dm] = getSpectralDataLayout();
833
834 m_raw_cmf = cMF(ba, dm, ncomp, 0, MFInfo{}.SetAlloc(false));
835
836 auto const myproc = ParallelContext::MyProcSub();
837 if (myproc < m_raw_cmf.size()) {
838 Box const& box = m_raw_cmf.box(myproc);
839 return std::make_pair(box.smallEnd().toArray(), box.length().toArray());
840 } else {
841 return std::make_pair(std::array<int,AMREX_SPACEDIM>{AMREX_D_DECL(0,0,0)},
842 std::array<int,AMREX_SPACEDIM>{AMREX_D_DECL(0,0,0)});
843 }
844}
845
846template <typename T, Direction D, bool C>
848{
849 if (C || m_r2c_sub) { amrex::Abort("R2C: OpenBC not supported with reduced dimensions or complex inputs"); }
850
851#if (AMREX_SPACEDIM == 3)
852 if (m_do_alld_fft) { return; }
853
854 auto const ncomp = m_info.batch_size;
855
856 if (m_slab_decomp && ! m_fft_fwd_x_half.defined) {
857 auto* fab = detail::get_fab(m_rx);
858 if (fab) {
859 Box bottom_half = m_real_domain;
860 bottom_half.growHi(2,-m_real_domain.length(2)/2);
861 Box box = fab->box() & bottom_half;
862 if (box.ok()) {
863 auto* pr = fab->dataPtr();
864 auto* pc = (typename Plan<T>::VendorComplex *)
865 detail::get_fab(m_cx)->dataPtr();
866#ifdef AMREX_USE_SYCL
867 m_fft_fwd_x_half.template init_r2c<Direction::forward>
868 (box, pr, pc, m_slab_decomp, ncomp);
869 m_fft_bwd_x_half = m_fft_fwd_x_half;
870#else
871 if constexpr (D == Direction::both || D == Direction::forward) {
872 m_fft_fwd_x_half.template init_r2c<Direction::forward>
873 (box, pr, pc, m_slab_decomp, ncomp);
874 }
875 if constexpr (D == Direction::both || D == Direction::backward) {
876 m_fft_bwd_x_half.template init_r2c<Direction::backward>
877 (box, pr, pc, m_slab_decomp, ncomp);
878 }
879#endif
880 }
881 }
882 } // else todo
883
884 if (m_cmd_x2z && ! m_cmd_x2z_half) {
885 Box bottom_half = m_spectral_domain_z;
886 // Note that z-direction's index is 0 because we z is the
887 // unit-stride direction here.
888 bottom_half.growHi(0,-m_spectral_domain_z.length(0)/2);
889 m_cmd_x2z_half = std::make_unique<MultiBlockCommMetaData>
890 (m_cz, bottom_half, m_cx, IntVect(0), m_dtos_x2z);
891 }
892
893 if (m_cmd_z2x && ! m_cmd_z2x_half) {
894 Box bottom_half = m_spectral_domain_x;
895 bottom_half.growHi(2,-m_spectral_domain_x.length(2)/2);
896 m_cmd_z2x_half = std::make_unique<MultiBlockCommMetaData>
897 (m_cx, bottom_half, m_cz, IntVect(0), m_dtos_z2x);
898 }
899#endif
900}
901
902template <typename T, Direction D, bool C>
903typename R2C<T,D,C>::MF&
904R2C<T,D,C>::make_m_rx_alias (IndexType const& ix_type)
905{
906 auto const key = ix_type.ixType();
907 auto it = m_rx_aliases.find(key);
908 if (it != m_rx_aliases.end()) {
909 return it->second;
910 }
911
912 BoxList bl(ix_type);
913 for (auto b : m_rx.boxArray().boxList()) {
914 b.setType(ix_type);
915 bl.push_back(b);
916 }
917
918 MF alias(BoxArray(std::move(bl)), m_rx.DistributionMap(), m_rx.nComp(),
919 m_rx.nGrowVect(), MFInfo{}.SetAlloc(false));
920
921 using FAB = typename MF::fab_type;
922 for (MFIter mfi(alias, MFItInfo().DisableDeviceSync()); mfi.isValid(); ++mfi) {
923 alias.setFab(mfi, FAB(mfi.fabbox(), m_rx.nComp(), m_rx[mfi.index()].dataPtr()));
924 }
925
926 auto [alias_it, inserted] = m_rx_aliases.emplace(key, std::move(alias));
927 amrex::ignore_unused(inserted);
928 return alias_it->second;
929}
930
931template <typename T, Direction D, bool C>
932Periodicity
933R2C<T,D,C>::make_m_rx_alias_periodicity (IndexType const& ix_type,
934 Periodicity const& period) const
935{
936 IntVect per = period.intVect();
937 for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
938 if (ix_type.nodeCentered(idim) && per[idim] == 0) {
939 per[idim] = m_real_domain.length(idim);
940 }
941 }
942 return Periodicity(per);
943}
944
945template <typename T, Direction D, bool C>
946template <Direction DIR>
947requires ((DIR == Direction::forward) || (DIR == Direction::both))
948void R2C<T,D,C>::forward (MF const& inmf, int incomp)
949{
950 BL_PROFILE("FFT::R2C::forward(in)");
951
952 auto const ncomp = m_info.batch_size;
953
954 if (m_r2c_sub) {
955 if (m_sub_helper.ghost_safe(inmf.nGrowVect())) {
956 m_r2c_sub->forward(m_sub_helper.make_alias_mf(inmf), incomp);
957 } else {
958 MF tmp(inmf.boxArray(), inmf.DistributionMap(), ncomp, 0);
959 tmp.LocalCopy(inmf, incomp, 0, ncomp, IntVect(0));
960 m_r2c_sub->forward(m_sub_helper.make_alias_mf(tmp),0);
961 }
962 return;
963 }
964
965 if (&m_rx != &inmf) {
966 if (m_rx.boxArray().ixType() == inmf.boxArray().ixType()) {
967 m_rx.ParallelCopy(inmf, incomp, 0, ncomp);
968 } else {
969 MF& rx_alias = make_m_rx_alias(inmf.boxArray().ixType());
970 rx_alias.ParallelCopy(inmf, incomp, 0, ncomp);
971 }
972 }
973
974 if (m_do_alld_fft) {
975 if constexpr (C) {
976 m_fft_fwd_x.template compute_c2c<Direction::forward>();
977 } else {
978 m_fft_fwd_x.template compute_r2c<Direction::forward>();
979 }
980 return;
981 }
982
983 auto& fft_x = m_openbc_half ? m_fft_fwd_x_half : m_fft_fwd_x;
984 if constexpr (C) {
985 fft_x.template compute_c2c<Direction::forward>();
986 } else {
987 fft_x.template compute_r2c<Direction::forward>();
988 }
989
990 if ( m_cmd_x2y) {
991 ParallelCopy(m_cy, m_cx, *m_cmd_x2y, 0, 0, ncomp, m_dtos_x2y);
992 }
993 m_fft_fwd_y.template compute_c2c<Direction::forward>();
994
995 if ( m_cmd_y2z) {
996 ParallelCopy(m_cz, m_cy, *m_cmd_y2z, 0, 0, ncomp, m_dtos_y2z);
997 }
998#if (AMREX_SPACEDIM == 3)
999 else if ( m_cmd_x2z) {
1000 if (m_openbc_half) {
1001 NonLocalBC::PackComponents components{};
1002 components.n_components = ncomp;
1004 {components, m_dtos_x2z};
1005 auto handler = ParallelCopy_nowait(m_cz, m_cx, *m_cmd_x2z_half, packing);
1006
1007 Box upper_half = m_spectral_domain_z;
1008 // Note that z-direction's index is 0 because we z is the
1009 // unit-stride direction here.
1010 upper_half.growLo (0,-m_spectral_domain_z.length(0)/2);
1011 m_cz.setVal(0, upper_half, 0, ncomp);
1012
1013 ParallelCopy_finish(m_cz, std::move(handler), *m_cmd_x2z_half, packing);
1014 } else {
1015 ParallelCopy(m_cz, m_cx, *m_cmd_x2z, 0, 0, ncomp, m_dtos_x2z);
1016 }
1017 }
1018#endif
1019 m_fft_fwd_z.template compute_c2c<Direction::forward>();
1020}
1021
1022template <typename T, Direction D, bool C>
1023template <typename FA, typename RT>
1024std::pair<std::unique_ptr<char,DataDeleter>,std::size_t>
1025R2C<T,D,C>::install_raw_ptr (FA& fa, RT const* p)
1026{
1027 AMREX_ALWAYS_ASSERT(!fa.empty());
1028
1029 using FAB = typename FA::FABType::value_type;
1030 using T_FAB = typename FAB::value_type;
1031 static_assert(sizeof(T_FAB) == sizeof(RT));
1032
1033 auto const ncomp = m_info.batch_size;
1034 auto const& ia = fa.IndexArray();
1035
1036 T_FAB* pp = nullptr;
1037 std::size_t sz = 0;
1038
1039 if ( ! ia.empty() ) {
1040 int K = ia[0];
1041 Box const& box = fa.fabbox(K);
1042 if ((alignof(T_FAB) == alignof(RT)) || amrex::is_aligned(p,alignof(T_FAB))) {
1043 pp = (T_FAB*)p;
1044 } else {
1045 sz = sizeof(T_FAB) * box.numPts() * ncomp;
1046 pp = (T_FAB*) The_Arena()->alloc(sz);
1047 }
1048 fa.setFab(K, FAB(box,ncomp,pp));
1049 }
1050
1051 if (sz == 0) {
1052 return std::make_pair(std::unique_ptr<char,DataDeleter>{},std::size_t(0));
1053 } else {
1054 return std::make_pair(std::unique_ptr<char,DataDeleter>
1055 {(char*)pp,DataDeleter{The_Arena()}}, sz);
1056 }
1057}
1058
1059
1060template <typename T, Direction D, bool C>
1061template <typename RT, typename CT, Direction DIR, bool CP>
1062requires ((DIR == Direction::forward) || (DIR == Direction::both))
1063 && (((sizeof(RT)*2 == sizeof(CT)) && (!CP))
1064 || ((sizeof(RT) == sizeof(CT)) && CP))
1065void R2C<T,D,C>::forward (RT const* in, CT* out)
1066{
1067 auto [rdata, rsz] = install_raw_ptr(m_raw_mf, in);
1068 auto [cdata, csz] = install_raw_ptr(m_raw_cmf, out);
1069
1070 if (rsz > 0) {
1071 Gpu::dtod_memcpy_async(rdata.get(),in,rsz);
1073 }
1074
1075 forward(m_raw_mf, m_raw_cmf);
1076
1077 if (csz) {
1078 Gpu::dtod_memcpy_async(out,cdata.get(),csz);
1080 }
1081}
1082
1083template <typename T, Direction D, bool C>
1084template <Direction DIR>
1085requires (DIR == Direction::both)
1086void R2C<T,D,C>::backward (MF& outmf, int outcomp)
1087{
1088 backward_doit(outmf, IntVect(0), Periodicity::NonPeriodic(), outcomp);
1089}
1090
1091template <typename T, Direction D, bool C>
1092void R2C<T,D,C>::backward_doit (MF& outmf, IntVect const& ngout,
1093 Periodicity const& period, int outcomp)
1094{
1095 BL_PROFILE("FFT::R2C::backward(out)");
1096
1097 auto const ncomp = m_info.batch_size;
1098
1099 if (m_r2c_sub) {
1100 if (m_sub_helper.ghost_safe(outmf.nGrowVect())) {
1101 MF submf = m_sub_helper.make_alias_mf(outmf);
1102 IntVect const& subngout = m_sub_helper.make_iv(ngout);
1103 Periodicity const& subperiod = m_sub_helper.make_periodicity(period);
1104 m_r2c_sub->backward_doit(submf, subngout, subperiod, outcomp);
1105 } else {
1106 MF tmp(outmf.boxArray(), outmf.DistributionMap(), ncomp,
1107 m_sub_helper.make_safe_ghost(outmf.nGrowVect()));
1108 this->backward_doit(tmp, ngout, period, 0);
1109 outmf.LocalCopy(tmp, 0, outcomp, ncomp, tmp.nGrowVect());
1110 }
1111 return;
1112 }
1113
1114 if (m_do_alld_fft) {
1115 if constexpr (C) {
1116 m_fft_bwd_x.template compute_c2c<Direction::backward>();
1117 } else {
1118 m_fft_bwd_x.template compute_r2c<Direction::backward>();
1119 }
1120 auto const dst_nghost = amrex::elemwiseMin(ngout,outmf.nGrowVect());
1121 if (m_rx.boxArray().ixType() == outmf.boxArray().ixType()) {
1122 outmf.ParallelCopy(m_rx, 0, outcomp, ncomp, IntVect(0),
1123 dst_nghost, period);
1124 } else {
1125 MF& rx_alias = make_m_rx_alias(outmf.boxArray().ixType());
1126 auto const copy_period = make_m_rx_alias_periodicity(outmf.boxArray().ixType(), period);
1127 outmf.ParallelCopy(rx_alias, 0, outcomp, ncomp, IntVect(0),
1128 dst_nghost, copy_period);
1129 }
1130 return;
1131 }
1132
1133 m_fft_bwd_z.template compute_c2c<Direction::backward>();
1134 if ( m_cmd_z2y) {
1135 ParallelCopy(m_cy, m_cz, *m_cmd_z2y, 0, 0, ncomp, m_dtos_z2y);
1136 }
1137#if (AMREX_SPACEDIM == 3)
1138 else if ( m_cmd_z2x) {
1139 auto const& cmd = m_openbc_half ? m_cmd_z2x_half : m_cmd_z2x;
1140 ParallelCopy(m_cx, m_cz, *cmd, 0, 0, ncomp, m_dtos_z2x);
1141 }
1142#endif
1143
1144 m_fft_bwd_y.template compute_c2c<Direction::backward>();
1145 if ( m_cmd_y2x) {
1146 ParallelCopy(m_cx, m_cy, *m_cmd_y2x, 0, 0, ncomp, m_dtos_y2x);
1147 }
1148
1149 auto& fft_x = m_openbc_half ? m_fft_bwd_x_half : m_fft_bwd_x;
1150 if constexpr (C) {
1151 fft_x.template compute_c2c<Direction::backward>();
1152 } else {
1153 fft_x.template compute_r2c<Direction::backward>();
1154 }
1155 auto const dst_nghost = amrex::elemwiseMin(ngout,outmf.nGrowVect());
1156 if (m_rx.boxArray().ixType() == outmf.boxArray().ixType()) {
1157 outmf.ParallelCopy(m_rx, 0, outcomp, ncomp, IntVect(0),
1158 dst_nghost, period);
1159 } else {
1160 MF& rx_alias = make_m_rx_alias(outmf.boxArray().ixType());
1161 auto const copy_period = make_m_rx_alias_periodicity(outmf.boxArray().ixType(), period);
1162 outmf.ParallelCopy(rx_alias, 0, outcomp, ncomp, IntVect(0),
1163 dst_nghost, copy_period);
1164 }
1165}
1166
1167template <typename T, Direction D, bool C>
1168template <typename CT, typename RT, Direction DIR, bool CP>
1169requires ((DIR == Direction::backward) || (DIR == Direction::both))
1170 && (((sizeof(RT)*2 == sizeof(CT)) && (!CP))
1171 || ((sizeof(RT) == sizeof(CT)) && CP))
1172void R2C<T,D,C>::backward (CT const* in, RT* out)
1173{
1174 auto [rdata, rsz] = install_raw_ptr(m_raw_mf, out);
1175 auto [cdata, csz] = install_raw_ptr(m_raw_cmf, in);
1176
1177 if (csz) {
1178 Gpu::dtod_memcpy_async(cdata.get(),in,csz);
1180 }
1181
1182 backward(m_raw_cmf, m_raw_mf);
1183
1184 if (rsz > 0) {
1185 Gpu::dtod_memcpy_async(out,rdata.get(),rsz);
1187 }
1188}
1189
1190template <typename T, Direction D, bool C>
1191std::pair<Plan<T>, Plan<T>>
1192R2C<T,D,C>::make_c2c_plans (cMF& inout, int ndims) const
1193{
1194 Plan<T> fwd;
1195 Plan<T> bwd;
1196
1197 auto* fab = detail::get_fab(inout);
1198 if (!fab) { return {fwd, bwd};}
1199
1200 Box const& box = fab->box();
1201 auto* pio = (typename Plan<T>::VendorComplex *)fab->dataPtr();
1202
1203 auto const ncomp = m_info.batch_size;
1204
1205#ifdef AMREX_USE_SYCL
1206 fwd.template init_c2c<Direction::forward>(box, pio, ncomp, ndims);
1207 bwd = fwd;
1208#else
1209 if constexpr (D == Direction::both || D == Direction::forward) {
1210 fwd.template init_c2c<Direction::forward>(box, pio, ncomp, ndims);
1211 }
1212 if constexpr (D == Direction::both || D == Direction::backward) {
1213 bwd.template init_c2c<Direction::backward>(box, pio, ncomp, ndims);
1214 }
1215#endif
1216
1217 return {fwd, bwd};
1218}
1219
1220template <typename T, Direction D, bool C>
1221template <typename F>
1222void R2C<T,D,C>::post_forward_doit_0 (F const& post_forward)
1223{
1224 if (m_info.twod_mode || m_info.batch_size > 1) {
1225 amrex::Abort("xxxxx todo: post_forward");
1226#if (AMREX_SPACEDIM > 1)
1227 } else if (m_r2c_sub) {
1228 // We need to pass the originally ordered indices to post_forward.
1229#if (AMREX_SPACEDIM == 2)
1230 // The original domain is (1,ny). The sub domain is (ny,1).
1231 m_r2c_sub->post_forward_doit_1
1232 ([=] AMREX_GPU_DEVICE (int i, int, int, auto& sp)
1233 {
1234 post_forward(0, i, 0, sp);
1235 });
1236#else
1237 if (m_real_domain.length(0) == 1 && m_real_domain.length(1) == 1) {
1238 // Original domain: (1, 1, nz). Sub domain: (nz, 1, 1)
1239 m_r2c_sub->post_forward_doit_1
1240 ([=] AMREX_GPU_DEVICE (int i, int, int, auto& sp)
1241 {
1242 post_forward(0, 0, i, sp);
1243 });
1244 } else if (m_real_domain.length(0) == 1 && m_real_domain.length(2) == 1) {
1245 // Original domain: (1, ny, 1). Sub domain: (ny, 1, 1)
1246 m_r2c_sub->post_forward_doit_1
1247 ([=] AMREX_GPU_DEVICE (int i, int, int, auto& sp)
1248 {
1249 post_forward(0, i, 0, sp);
1250 });
1251 } else if (m_real_domain.length(0) == 1) {
1252 // Original domain: (1, ny, nz). Sub domain: (ny, nz, 1)
1253 m_r2c_sub->post_forward_doit_1
1254 ([=] AMREX_GPU_DEVICE (int i, int j, int, auto& sp)
1255 {
1256 post_forward(0, i, j, sp);
1257 });
1258 } else if (m_real_domain.length(1) == 1) {
1259 // Original domain: (nx, 1, nz). Sub domain: (nx, nz, 1)
1260 m_r2c_sub->post_forward_doit_1
1261 ([=] AMREX_GPU_DEVICE (int i, int j, int, auto& sp)
1262 {
1263 post_forward(i, 0, j, sp);
1264 });
1265 } else {
1266 amrex::Abort("R2c::post_forward_doit_0: how did this happen?");
1267 }
1268#endif
1269#endif
1270 } else {
1271 this->post_forward_doit_1(post_forward);
1272 }
1273}
1274
1275template <typename T, Direction D, bool C>
1276template <typename F>
1277void R2C<T,D,C>::post_forward_doit_1 (F const& post_forward)
1278{
1279 if (m_info.twod_mode || m_info.batch_size > 1) {
1280 amrex::Abort("xxxxx todo: post_forward");
1281 } else if (m_r2c_sub) {
1282 amrex::Abort("R2C::post_forward_doit_1: How did this happen?");
1283 } else {
1284 if ( ! m_cz.empty()) {
1285 auto* spectral_fab = detail::get_fab(m_cz);
1286 if (spectral_fab) {
1287 auto const& a = spectral_fab->array(); // m_cz's ordering is z,x,y
1288 ParallelForOMP(spectral_fab->box(),
1289 [=] AMREX_GPU_DEVICE (int iz, int jx, int ky)
1290 {
1291 post_forward(jx,ky,iz,a(iz,jx,ky));
1292 });
1293 }
1294 } else if ( ! m_cy.empty()) {
1295 auto* spectral_fab = detail::get_fab(m_cy);
1296 if (spectral_fab) {
1297 auto const& a = spectral_fab->array(); // m_cy's ordering is y,x,z
1298 ParallelForOMP(spectral_fab->box(),
1299 [=] AMREX_GPU_DEVICE (int iy, int jx, int k)
1300 {
1301 post_forward(jx,iy,k,a(iy,jx,k));
1302 });
1303 }
1304 } else {
1305 auto* spectral_fab = detail::get_fab(m_cx);
1306 if (spectral_fab) {
1307 auto const& a = spectral_fab->array();
1308 ParallelForOMP(spectral_fab->box(),
1309 [=] AMREX_GPU_DEVICE (int i, int j, int k)
1310 {
1311 post_forward(i,j,k,a(i,j,k));
1312 });
1313 }
1314 }
1315 }
1316}
1317
1318template <typename T, Direction D, bool C>
1320{
1321#if (AMREX_SPACEDIM == 3)
1322 if (m_info.oned_mode && !m_info.twod_mode) {
1323 return T(1)/T(Long(m_real_domain.length(0)));
1324 } else if (m_info.twod_mode) {
1325 return T(1)/T(Long(m_real_domain.length(0)) *
1326 Long(m_real_domain.length(1)));
1327 } else
1328#elif (AMREX_SPACEDIM == 2)
1329 if (m_info.oned_mode) {
1330 return T(1)/T(Long(m_real_domain.length(0)));
1331 } else
1332#endif
1333 {
1334 return T(1)/T(m_real_domain.numPts());
1335 }
1336}
1337
1338template <typename T, Direction D, bool C>
1339template <Direction DIR>
1340requires ((DIR == Direction::forward) || (DIR == Direction::both))
1341std::pair<typename R2C<T,D,C>::cMF *, IntVect>
1343{
1344#if (AMREX_SPACEDIM > 1)
1345 if (m_r2c_sub) {
1346 auto [cmf, order] = m_r2c_sub->getSpectralData();
1347 return std::make_pair(cmf, m_sub_helper.inverse_order(order));
1348 } else
1349#endif
1350 if (!m_cz.empty()) {
1351 return std::make_pair(const_cast<cMF*>(&m_cz), IntVect{AMREX_D_DECL(2,0,1)});
1352 } else if (!m_cy.empty()) {
1353 return std::make_pair(const_cast<cMF*>(&m_cy), IntVect{AMREX_D_DECL(1,0,2)});
1354 } else {
1355 return std::make_pair(const_cast<cMF*>(&m_cx), IntVect{AMREX_D_DECL(0,1,2)});
1356 }
1357}
1358
1359template <typename T, Direction D, bool C>
1360template <Direction DIR>
1361requires ((DIR == Direction::forward) || (DIR == Direction::both))
1362void R2C<T,D,C>::forward (MF const& inmf, cMF& outmf, int incomp, int outcomp)
1363{
1364 BL_PROFILE("FFT::R2C::forward(inout)");
1365
1366 auto const ncomp = m_info.batch_size;
1367
1368 if (m_r2c_sub)
1369 {
1370 bool inmf_safe = m_sub_helper.ghost_safe(inmf.nGrowVect());
1371 MF inmf_sub, inmf_tmp;
1372 int incomp_sub;
1373 if (inmf_safe) {
1374 inmf_sub = m_sub_helper.make_alias_mf(inmf);
1375 incomp_sub = incomp;
1376 } else {
1377 inmf_tmp.define(inmf.boxArray(), inmf.DistributionMap(), ncomp, 0);
1378 inmf_tmp.LocalCopy(inmf, incomp, 0, ncomp, IntVect(0));
1379 inmf_sub = m_sub_helper.make_alias_mf(inmf_tmp);
1380 incomp_sub = 0;
1381 }
1382
1383 bool outmf_safe = m_sub_helper.ghost_safe(outmf.nGrowVect());
1384 cMF outmf_sub, outmf_tmp;
1385 int outcomp_sub;
1386 if (outmf_safe) {
1387 outmf_sub = m_sub_helper.make_alias_mf(outmf);
1388 outcomp_sub = outcomp;
1389 } else {
1390 outmf_tmp.define(outmf.boxArray(), outmf.DistributionMap(), ncomp, 0);
1391 outmf_sub = m_sub_helper.make_alias_mf(outmf_tmp);
1392 outcomp_sub = 0;
1393 }
1394
1395 m_r2c_sub->forward(inmf_sub, outmf_sub, incomp_sub, outcomp_sub);
1396
1397 if (!outmf_safe) {
1398 outmf.LocalCopy(outmf_tmp, 0, outcomp, ncomp, IntVect(0));
1399 }
1400 }
1401 else
1402 {
1403 forward(inmf, incomp);
1404 if (!m_cz.empty()) { // m_cz's order (z,x,y) -> (x,y,z)
1405 RotateBwd dtos{};
1407 (outmf, m_spectral_domain_x, m_cz, IntVect(0), dtos);
1408 ParallelCopy(outmf, m_cz, cmd, 0, outcomp, ncomp, dtos);
1409 } else if (!m_cy.empty()) { // m_cy's order (y,x,z) -> (x,y,z)
1411 (outmf, m_spectral_domain_x, m_cy, IntVect(0), m_dtos_y2x);
1412 ParallelCopy(outmf, m_cy, cmd, 0, outcomp, ncomp, m_dtos_y2x);
1413 } else {
1414 outmf.ParallelCopy(m_cx, 0, outcomp, ncomp);
1415 }
1416 }
1417}
1418
1419template <typename T, Direction D, bool C>
1420template <Direction DIR>
1421requires ((DIR == Direction::backward) || (DIR == Direction::both))
1422void R2C<T,D,C>::backward (cMF const& inmf, MF& outmf, int incomp, int outcomp)
1423{
1424 backward_doit(inmf, outmf, IntVect(0), Periodicity::NonPeriodic(), incomp, outcomp);
1425}
1426
1427template <typename T, Direction D, bool C>
1428void R2C<T,D,C>::backward_doit (cMF const& inmf, MF& outmf, IntVect const& ngout,
1429 Periodicity const& period, int incomp, int outcomp)
1430{
1431 BL_PROFILE("FFT::R2C::backward(inout)");
1432
1433 auto const ncomp = m_info.batch_size;
1434
1435 if (m_r2c_sub)
1436 {
1437 bool inmf_safe = m_sub_helper.ghost_safe(inmf.nGrowVect());
1438 cMF inmf_sub, inmf_tmp;
1439 int incomp_sub;
1440 if (inmf_safe) {
1441 inmf_sub = m_sub_helper.make_alias_mf(inmf);
1442 incomp_sub = incomp;
1443 } else {
1444 inmf_tmp.define(inmf.boxArray(), inmf.DistributionMap(), ncomp, 0);
1445 inmf_tmp.LocalCopy(inmf, incomp, 0, ncomp, IntVect(0));
1446 inmf_sub = m_sub_helper.make_alias_mf(inmf_tmp);
1447 incomp_sub = 0;
1448 }
1449
1450 bool outmf_safe = m_sub_helper.ghost_safe(outmf.nGrowVect());
1451 MF outmf_sub, outmf_tmp;
1452 int outcomp_sub;
1453 if (outmf_safe) {
1454 outmf_sub = m_sub_helper.make_alias_mf(outmf);
1455 outcomp_sub = outcomp;
1456 } else {
1457 IntVect const& ngtmp = m_sub_helper.make_safe_ghost(outmf.nGrowVect());
1458 outmf_tmp.define(outmf.boxArray(), outmf.DistributionMap(), ncomp, ngtmp);
1459 outmf_sub = m_sub_helper.make_alias_mf(outmf_tmp);
1460 outcomp_sub = 0;
1461 }
1462
1463 IntVect const& subngout = m_sub_helper.make_iv(ngout);
1464 Periodicity const& subperiod = m_sub_helper.make_periodicity(period);
1465 m_r2c_sub->backward_doit(inmf_sub, outmf_sub, subngout, subperiod, incomp_sub, outcomp_sub);
1466
1467 if (!outmf_safe) {
1468 outmf.LocalCopy(outmf_tmp, 0, outcomp, ncomp, outmf_tmp.nGrowVect());
1469 }
1470 }
1471 else
1472 {
1473 if (!m_cz.empty()) { // (x,y,z) -> m_cz's order (z,x,y)
1474 RotateFwd dtos{};
1475 MultiBlockCommMetaData cmd
1476 (m_cz, m_spectral_domain_z, inmf, IntVect(0), dtos);
1477 ParallelCopy(m_cz, inmf, cmd, incomp, 0, ncomp, dtos);
1478 } else if (!m_cy.empty()) { // (x,y,z) -> m_cy's ordering (y,x,z)
1479 MultiBlockCommMetaData cmd
1480 (m_cy, m_spectral_domain_y, inmf, IntVect(0), m_dtos_x2y);
1481 ParallelCopy(m_cy, inmf, cmd, incomp, 0, ncomp, m_dtos_x2y);
1482 } else {
1483 m_cx.ParallelCopy(inmf, incomp, 0, ncomp);
1484 }
1485 backward_doit(outmf, ngout, period, outcomp);
1486 }
1487}
1488
1489template <typename T, Direction D, bool C>
1490std::pair<BoxArray,DistributionMapping>
1492{
1493#if (AMREX_SPACEDIM > 1)
1494 if (m_r2c_sub) {
1495 auto const& [ba, dm] = m_r2c_sub->getSpectralDataLayout();
1496 return std::make_pair(m_sub_helper.inverse_boxarray(ba), dm);
1497 }
1498#endif
1499
1500#if (AMREX_SPACEDIM == 3)
1501 if (!m_cz.empty()) {
1502 BoxList bl = m_cz.boxArray().boxList();
1503 for (auto& b : bl) {
1504 auto lo = b.smallEnd();
1505 auto hi = b.bigEnd();
1506 std::swap(lo[0], lo[1]);
1507 std::swap(lo[1], lo[2]);
1508 std::swap(hi[0], hi[1]);
1509 std::swap(hi[1], hi[2]);
1510 b.setSmall(lo);
1511 b.setBig(hi);
1512 }
1513 return std::make_pair(BoxArray(std::move(bl)), m_cz.DistributionMap());
1514 } else
1515#endif
1516#if (AMREX_SPACEDIM >= 2)
1517 if (!m_cy.empty()) {
1518 BoxList bl = m_cy.boxArray().boxList();
1519 for (auto& b : bl) {
1520 auto lo = b.smallEnd();
1521 auto hi = b.bigEnd();
1522 std::swap(lo[0], lo[1]);
1523 std::swap(hi[0], hi[1]);
1524 b.setSmall(lo);
1525 b.setBig(hi);
1526 }
1527 return std::make_pair(BoxArray(std::move(bl)), m_cy.DistributionMap());
1528 } else
1529#endif
1530 {
1531 return std::make_pair(m_cx.boxArray(), m_cx.DistributionMap());
1532 }
1533}
1534
1536template <typename T = Real, FFT::Direction D = FFT::Direction::both>
1538
1539}
1540
1541#endif
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:49
#define AMREX_ALWAYS_ASSERT(EX)
Definition AMReX_BLassert.H:50
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
amrex::ParmParse pp
Input file parser instance for the given namespace.
Definition AMReX_HypreIJIface.cpp:15
#define AMREX_D_DECL(a, b, c)
Definition AMReX_SPACE.H:171
virtual void * alloc(std::size_t sz)=0
A collection of Boxes stored in an Array.
Definition AMReX_BoxArray.H:564
A class for managing a List of Boxes that share a common IndexType. This class implements operations ...
Definition AMReX_BoxList.H:52
BoxList & shift(int dir, int nzones)
Applies Box::shift(int,int) to each Box in the BoxList.
Definition AMReX_BoxList.cpp:563
__host__ __device__ const IntVectND< dim > & bigEnd() const &noexcept
Return the inclusive upper bound of the box.
Definition AMReX_Box.H:124
__host__ __device__ Long numPts() const noexcept
Return the number of points contained in the BoxND.
Definition AMReX_Box.H:364
__host__ __device__ IntVectND< dim > length() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:155
__host__ __device__ int shortside(int &dir) const noexcept
Return length of shortest side. dir is modified to give direction with shortest side: 0....
Definition AMReX_Box.H:445
__host__ __device__ IntVectND< dim > size() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:148
__host__ __device__ BoxND & growLo(int idir, int n_cell=1) noexcept
Grow the BoxND on the low end by n_cell cells in direction idir. NOTE: n_cell negative shrinks the Bo...
Definition AMReX_Box.H:670
__host__ __device__ IndexTypeND< dim > ixType() const noexcept
Return the indexing type.
Definition AMReX_Box.H:136
__host__ __device__ BoxND & growHi(int idir, int n_cell=1) noexcept
Grow the BoxND on the high end by n_cell cells in direction idir. NOTE: n_cell negative shrinks the B...
Definition AMReX_Box.H:681
__host__ __device__ const IntVectND< dim > & smallEnd() const &noexcept
Return the inclusive lower bound of the box.
Definition AMReX_Box.H:112
Calculates the distribution of FABs to MPI processes.
Definition AMReX_DistributionMapping.H:43
Long size() const noexcept
Length of the underlying processor map.
Definition AMReX_DistributionMapping.H:129
Convolution-based solver for open boundary conditions using Green's functions.
Definition AMReX_FFT_OpenBCSolver.H:26
3D Poisson solver for periodic, Dirichlet & Neumann boundaries in the first two dimensions,...
Definition AMReX_FFT_Poisson.H:164
Poisson solver for periodic, Dirichlet & Neumann boundaries using FFT.
Definition AMReX_FFT_Poisson.H:32
Parallel Discrete Fourier Transform.
Definition AMReX_FFT_R2C.H:48
std::conditional_t< C, cMF, std::conditional_t< std::is_same_v< T, Real >, MultiFab, FabArray< BaseFab< T > > > > MF
Definition AMReX_FFT_R2C.H:53
R2C & operator=(R2C const &)=delete
~R2C()
Definition AMReX_FFT_R2C.H:745
void setLocalDomain(std::array< int, 3 > const &local_start, std::array< int, 3 > const &local_size)
Set local domain.
Definition AMReX_FFT_R2C.H:794
R2C(Box const &domain, Info const &info=Info{})
Constructor.
Definition AMReX_FFT_R2C.H:497
R2C(R2C &&)=delete
void post_forward_doit_1(F const &post_forward)
CUDA-visible helper that redistributes and applies post_forward for the batched layout.
Definition AMReX_FFT_R2C.H:1277
T scalingFactor() const
Scaling factor. If the data goes through forward and then backward, the result multiplied by the scal...
Definition AMReX_FFT_R2C.H:1319
std::pair< cMF *, IntVect > getSpectralData() const
Get the internal spectral data.
void post_forward_doit_0(F const &post_forward)
CUDA-visible hook that walks internal spectral data and applies post_forward.
Definition AMReX_FFT_R2C.H:1222
std::pair< std::array< int, 3 >, std::array< int, 3 > > getLocalDomain() const
Get local domain.
Definition AMReX_FFT_R2C.H:803
R2C(R2C const &)=delete
std::pair< BoxArray, DistributionMapping > getSpectralDataLayout() const
Get BoxArray and DistributionMapping for spectral data.
Definition AMReX_FFT_R2C.H:1491
void forwardThenBackward(MF const &inmf, MF &outmf, F const &post_forward, int incomp=0, int outcomp=0)
Forward and then backward transform.
Definition AMReX_FFT_R2C.H:203
R2C(std::array< int, 3 > const &domain_size, Info const &info=Info{})
Constructor.
Definition AMReX_FFT_R2C.H:740
FabArray< BaseFab< GpuComplex< T > > > cMF
Definition AMReX_FFT_R2C.H:50
std::pair< std::array< int, 3 >, std::array< int, 3 > > getLocalSpectralDomain() const
Get local spectral domain.
Definition AMReX_FFT_R2C.H:829
void setLocalSpectralDomain(std::array< int, 3 > const &local_start, std::array< int, 3 > const &local_size)
Set local spectral domain.
Definition AMReX_FFT_R2C.H:820
IntVect nGrowVect() const noexcept
Definition AMReX_FabArrayBase.H:80
int size() const noexcept
Return the number of FABs in the FabArray.
Definition AMReX_FabArrayBase.H:110
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition AMReX_FabArrayBase.H:130
bool empty() const noexcept
Definition AMReX_FabArrayBase.H:89
Box fabbox(int K) const noexcept
Return the Kth FABs Box in the FabArray. That is, the region the Kth fab is actually defined on.
Definition AMReX_FabArrayBase.cpp:217
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition AMReX_FabArrayBase.H:95
void setFab(int boxno, std::unique_ptr< FAB > elem)
Explicitly set the Kth FAB in the FabArray to point to elem.
Definition AMReX_FabArray.H:2397
void ParallelCopy(const FabArray< FAB > &src, const Periodicity &period=Periodicity::NonPeriodic(), CpOp op=FabArrayBase::COPY)
Definition AMReX_FabArray.H:873
typename std::conditional_t< IsBaseFab< BaseFab< GpuComplex< T > > >::value, BaseFab< GpuComplex< T > >, FABType >::value_type value_type
Definition AMReX_FabArray.H:355
void define(const BoxArray &bxs, const DistributionMapping &dm, int nvar, int ngrow, const MFInfo &info=MFInfo(), const FabFactory< FAB > &factory=DefaultFabFactory< FAB >())
Define this FabArray identically to that performed by the constructor having an analogous function si...
Definition AMReX_FabArray.H:2213
void LocalCopy(FabArray< SFAB > const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
Perform local copy of FabArray data.
Definition AMReX_FabArray.H:1989
A collection (stored as an array) of FArrayBox objects.
Definition AMReX_MultiFab.H:40
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition AMReX_Periodicity.H:17
static const Periodicity & NonPeriodic() noexcept
Definition AMReX_Periodicity.cpp:52
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:29
amrex_long Long
Definition AMReX_INT.H:30
void ParallelForOMP(T n, L const &f) noexcept
Performance-portable kernel launch function with optional OpenMP threading.
Definition AMReX_GpuLaunch.H:328
Arena * The_Arena()
Definition AMReX_Arena.cpp:820
int NProcs() noexcept
Definition AMReX_ParallelDescriptor.H:255
Definition AMReX_FFT_Helper.H:53
void dtod_memcpy_async(void *p_d_dst, const void *p_d_src, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:449
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
int local_to_global_rank(int rank) noexcept
translate between local rank and global rank
Definition AMReX_ParallelContext.H:95
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139
BoxND< 3 > Box
Box is an alias for amrex::BoxND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:30
bool is_aligned(const void *p, std::size_t alignment) noexcept
Return whether the address p is aligned to alignment bytes.
Definition AMReX_Arena.H:39
IndexTypeND< 3 > IndexType
IndexType is an alias for amrex::IndexTypeND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:36
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition AMReX_FabArrayUtility.H:1951
BoxArray decompose(Box const &domain, int nboxes, Array< bool, 3 > const &decomp, bool no_overlap)
Decompose domain box into BoxArray.
Definition AMReX_BoxArray.cpp:1943
IntVectND< 3 > IntVect
IntVect is an alias for amrex::IntVectND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:33
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:241
__host__ __device__ constexpr T elemwiseMin(T const &a, T const &b) noexcept
Return the element-wise minimum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:63
BoxArray const & boxArray(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2862
Definition AMReX_FFT_Helper.H:83
bool twod_mode
Definition AMReX_FFT_Helper.H:94
bool oned_mode
Definition AMReX_FFT_Helper.H:103
int batch_size
Batched FFT size. Only support in R2C, not R2X.
Definition AMReX_FFT_Helper.H:106
DomainStrategy domain_strategy
Domain composition strategy.
Definition AMReX_FFT_Helper.H:85
int nprocs
Max number of processes to use.
Definition AMReX_FFT_Helper.H:109
int pencil_threshold
Definition AMReX_FFT_Helper.H:89
Definition AMReX_FFT_Helper.H:360
std::conditional_t< std::is_same_v< float, T >, cuComplex, cuDoubleComplex > VendorComplex
Definition AMReX_FFT_Helper.H:364
FabArray memory allocation information.
Definition AMReX_FabArray.H:68
MFInfo & SetAlloc(bool a) noexcept
Definition AMReX_FabArray.H:75
This class specializes behaviour on local copies and unpacking receive buffers.
Definition AMReX_NonLocalBC.H:642
This is the index mapping based on the DTOS MultiBlockDestToSrc.
Definition AMReX_NonLocalBC.H:223
Contains information about which components take part of the data transaction.
Definition AMReX_NonLocalBC.H:550
int n_components
Definition AMReX_NonLocalBC.H:553
Communication datatype (note: this structure also works without MPI)
Definition AMReX_ccse-mpi.H:78