1#ifndef AMREX_FFT_R2C_H_
2#define AMREX_FFT_R2C_H_
3#include <AMReX_Config.H>
20template <
typename T>
class OpenBCSolver;
21template <
typename T>
class Poisson;
22template <
typename T>
class PoissonHybrid;
45template <
typename T = Real, FFT::Direction D = FFT::Direction::both,
bool C = false>
50 using MF = std::conditional_t
51 <
C,
cMF, std::conditional_t<std::is_same_v<T,Real>,
55 template <
typename U>
friend class Poisson;
75 explicit R2C (std::array<int,AMREX_SPACEDIM>
const& domain_size,
109 std::array<int,AMREX_SPACEDIM>
const& local_size);
124 std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
156 std::array<int,AMREX_SPACEDIM>
const& local_size);
176 std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
201 std::enable_if_t<DIR == Direction::both, int> = 0>
203 int incomp = 0,
int outcomp = 0)
238 void forward (
MF const& inmf,
cMF& outmf,
int incomp = 0,
int outcomp = 0);
256 template <
typename RT,
typename CT,
Direction DIR=D,
bool CP=
C,
259 && ((
sizeof(RT)*2 ==
sizeof(CT) && !CP) ||
260 (
sizeof(RT) ==
sizeof(CT) && CP)),
int> = 0>
272 template <Direction DIR=D, std::enable_if_t<DIR == Direction::both,
int> = 0>
288 void backward (
cMF const& inmf,
MF& outmf,
int incomp = 0,
int outcomp = 0);
306 template <
typename CT,
typename RT,
Direction DIR=D,
bool CP=
C,
309 && ((
sizeof(RT)*2 ==
sizeof(CT) && !CP) ||
310 (
sizeof(RT) ==
sizeof(CT) && CP)),
int> = 0>
353 template <
typename F>
356 template <
typename F>
366 void prepare_openbc ();
372 void backward_doit (
cMF const& inmf,
MF& outmf,
375 int incomp = 0,
int outcomp = 0);
377 std::pair<Plan<T>,
Plan<T>> make_c2c_plans (
cMF& inout,
int ndims)
const;
379 static Box make_domain_x (
Box const& domain)
394 static Box make_domain_y (
Box const& domain)
399 domain.length(2)-1)),
404 domain.length(2)-1)),
409 static Box make_domain_z (
Box const& domain)
414 domain.length(1)-1)),
419 domain.length(1)-1)),
424 static std::pair<BoxArray,DistributionMapping>
425 make_layout_from_local_domain (std::array<int,AMREX_SPACEDIM>
const& local_start,
426 std::array<int,AMREX_SPACEDIM>
const& local_size);
428 template <
typename FA,
typename RT>
429 std::pair<std::unique_ptr<char,DataDeleter>,std::size_t>
430 install_raw_ptr (FA& fa, RT
const* p);
432 Plan<T> m_fft_fwd_x{};
433 Plan<T> m_fft_bwd_x{};
434 Plan<T> m_fft_fwd_y{};
435 Plan<T> m_fft_bwd_y{};
436 Plan<T> m_fft_fwd_z{};
437 Plan<T> m_fft_bwd_z{};
438 Plan<T> m_fft_fwd_x_half{};
439 Plan<T> m_fft_bwd_x_half{};
444 std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2y;
445 std::unique_ptr<MultiBlockCommMetaData> m_cmd_y2x;
446 std::unique_ptr<MultiBlockCommMetaData> m_cmd_y2z;
447 std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2y;
448 std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2z;
449 std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2x;
450 std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2z_half;
451 std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2x_half;
456 RotateFwd m_dtos_x2z{};
457 RotateBwd m_dtos_z2x{};
465 mutable cMF m_raw_cmf;
467 std::unique_ptr<char,DataDeleter> m_data_1;
468 std::unique_ptr<char,DataDeleter> m_data_2;
471 Box m_spectral_domain_x;
472 Box m_spectral_domain_y;
473 Box m_spectral_domain_z;
475 std::unique_ptr<R2C<T,D,C>> m_r2c_sub;
476 detail::SubHelper m_sub_helper;
480 bool m_do_alld_fft =
false;
481 bool m_slab_decomp =
false;
482 bool m_openbc_half =
false;
485template <
typename T, Direction D,
bool C>
487 : m_real_domain(domain),
488 m_spectral_domain_x(make_domain_x(domain)),
489#if (AMREX_SPACEDIM >= 2)
490 m_spectral_domain_y(make_domain_y(domain)),
491#if (AMREX_SPACEDIM == 3)
492 m_spectral_domain_z(make_domain_z(domain)),
495 m_sub_helper(domain),
500 static_assert(std::is_same_v<float,T> || std::is_same_v<double,T>);
503#if (AMREX_SPACEDIM == 2)
508 int(domain.
length(1) > 1) +
509 int(domain.
length(2) > 1)) >= 2);
514 Box subbox = m_sub_helper.make_box(m_real_domain);
515 if (subbox.
size() != m_real_domain.
size()) {
516 m_r2c_sub = std::make_unique<R2C<T,D,C>>(subbox, m_info);
524#if (AMREX_SPACEDIM == 3)
529 int shortside = m_real_domain.
shortside();
540 m_slab_decomp =
true;
542 m_slab_decomp =
true;
554 m_rx.define(bax, dmx, ncomp, 0,
MFInfo().SetAlloc(
false));
558 for (
auto & b : bl) {
560 b.setBig(0, m_spectral_domain_x.
bigEnd(0));
563 m_cx.
define(cbax, dmx, ncomp, 0,
MFInfo().SetAlloc(
false));
574#if (AMREX_SPACEDIM >= 2)
576 if ((m_real_domain.
length(1) > 1) && !m_slab_decomp && !m_info.
oned_mode)
580 if (cbay.size() == dmx.size()) {
583 cdmy = detail::make_iota_distromap(cbay.size());
585 m_cy.
define(cbay, cdmy, ncomp, 0,
MFInfo().SetAlloc(
false));
589#if (AMREX_SPACEDIM == 3)
590 if (m_real_domain.
length(1) > 1 &&
594 {
false,
true,
true},
true);
596 if (cbaz.size() == dmx.size()) {
598 }
else if (cbaz.size() == cdmy.
size()) {
601 cdmz = detail::make_iota_distromap(cbaz.size());
603 m_cz.
define(cbaz, cdmz, ncomp, 0,
MFInfo().SetAlloc(
false));
609 m_data_1 = detail::make_mfs_share(m_rx, m_cx);
610 m_data_2 = detail::make_mfs_share(m_cz, m_cz);
612 m_data_1 = detail::make_mfs_share(m_rx, m_cz);
613 m_data_2 = detail::make_mfs_share(m_cy, m_cy);
615 if (myproc < m_cx.
size()) {
618 m_cx.
setFab(myproc, FAB(box, ncomp, m_rx[myproc].dataPtr()));
623 m_data_1 = detail::make_mfs_share(m_rx, m_cz);
624 m_data_2 = detail::make_mfs_share(m_cx, m_cx);
626 m_data_1 = detail::make_mfs_share(m_rx, m_cy);
627 m_data_2 = detail::make_mfs_share(m_cx, m_cz);
635#if (AMREX_SPACEDIM >= 2)
636 if (! m_cy.
empty()) {
638 m_cmd_x2y = std::make_unique<MultiBlockCommMetaData>
639 (m_cy, m_spectral_domain_y, m_cx,
IntVect(0), m_dtos_x2y);
640 m_cmd_y2x = std::make_unique<MultiBlockCommMetaData>
641 (m_cx, m_spectral_domain_x, m_cy,
IntVect(0), m_dtos_y2x);
644#if (AMREX_SPACEDIM == 3)
645 if (! m_cz.
empty() ) {
648 m_cmd_x2z = std::make_unique<MultiBlockCommMetaData>
649 (m_cz, m_spectral_domain_z, m_cx,
IntVect(0), m_dtos_x2z);
650 m_cmd_z2x = std::make_unique<MultiBlockCommMetaData>
651 (m_cx, m_spectral_domain_x, m_cz,
IntVect(0), m_dtos_z2x);
654 m_cmd_y2z = std::make_unique<MultiBlockCommMetaData>
655 (m_cz, m_spectral_domain_z, m_cy,
IntVect(0), m_dtos_y2z);
656 m_cmd_z2y = std::make_unique<MultiBlockCommMetaData>
657 (m_cy, m_spectral_domain_y, m_cz,
IntVect(0), m_dtos_z2y);
666 if (myproc < m_rx.size())
669 int ndims = m_slab_decomp ? 2 : 1;
670 std::tie(m_fft_fwd_x, m_fft_bwd_x) = make_c2c_plans(m_cx, ndims);
672 Box const& box = m_rx.box(myproc);
673 auto* pr = m_rx[myproc].dataPtr();
676 m_fft_fwd_x.template init_r2c<Direction::forward>(box, pr, pc, m_slab_decomp, ncomp);
677 m_fft_bwd_x = m_fft_fwd_x;
680 m_fft_fwd_x.template init_r2c<Direction::forward>(box, pr, pc, m_slab_decomp, ncomp);
683 m_fft_bwd_x.template init_r2c<Direction::backward>(box, pr, pc, m_slab_decomp, ncomp);
689#if (AMREX_SPACEDIM >= 2)
690 if (! m_cy.
empty()) {
691 std::tie(m_fft_fwd_y, m_fft_bwd_y) = make_c2c_plans(m_cy,1);
694#if (AMREX_SPACEDIM == 3)
695 if (! m_cz.
empty()) {
696 std::tie(m_fft_fwd_z, m_fft_bwd_z) = make_c2c_plans(m_cz,1);
703 m_data_1 = detail::make_mfs_share(m_rx, m_cx);
704 std::tie(m_fft_fwd_x, m_fft_bwd_x) = make_c2c_plans(m_cx,AMREX_SPACEDIM);
706 m_data_1 = detail::make_mfs_share(m_rx, m_rx);
707 m_data_2 = detail::make_mfs_share(m_cx, m_cx);
709 auto const& len = m_real_domain.
length();
710 auto* pr = (
void*)m_rx[0].dataPtr();
711 auto* pc = (
void*)m_cx[0].dataPtr();
713 m_fft_fwd_x.template init_r2c<Direction::forward>(len, pr, pc,
false, ncomp);
714 m_fft_bwd_x = m_fft_fwd_x;
717 m_fft_fwd_x.template init_r2c<Direction::forward>(len, pr, pc,
false, ncomp);
720 m_fft_bwd_x.template init_r2c<Direction::backward>(len, pr, pc,
false, ncomp);
727template <
typename T, Direction D,
bool C>
732template <
typename T, Direction D,
bool C>
735 if (m_fft_bwd_x.plan != m_fft_fwd_x.plan) {
736 m_fft_bwd_x.destroy();
738 if (m_fft_bwd_y.plan != m_fft_fwd_y.plan) {
739 m_fft_bwd_y.destroy();
741 if (m_fft_bwd_z.plan != m_fft_fwd_z.plan) {
742 m_fft_bwd_z.destroy();
744 m_fft_fwd_x.destroy();
745 m_fft_fwd_y.destroy();
746 m_fft_fwd_z.destroy();
747 if (m_fft_bwd_x_half.plan != m_fft_fwd_x_half.plan) {
748 m_fft_bwd_x_half.destroy();
750 m_fft_fwd_x_half.destroy();
753template <
typename T, Direction D,
bool C>
754std::pair<BoxArray,DistributionMapping>
756 std::array<int,AMREX_SPACEDIM>
const& local_size)
760 Box bx(lo, lo+len-1);
767 pmap.reserve(allboxes.size());
768 for (
int i = 0; i < allboxes.size(); ++i) {
769 if (allboxes[i].ok()) {
773 allboxes.erase(std::remove_if(allboxes.begin(), allboxes.end(),
774 [=] (
Box const& b) { return b.isEmpty(); }),
776 BoxList bl(std::move(allboxes));
777 return std::make_pair(BoxArray(std::move(bl)), DistributionMapping(std::move(pmap)));
779 return std::make_pair(BoxArray(bx), DistributionMapping(Vector<int>({0})));
783template <
typename T, Direction D,
bool C>
785 std::array<int,AMREX_SPACEDIM>
const& local_size)
787 auto const& [ba, dm] = make_layout_from_local_domain(local_start, local_size);
788 m_raw_mf =
MF(ba, dm, m_rx.nComp(), 0,
MFInfo().SetAlloc(
false));
791template <
typename T, Direction D,
bool C>
792std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
795 m_raw_mf =
MF(m_rx.boxArray(), m_rx.DistributionMap(), m_rx.nComp(), 0,
799 if (myproc < m_rx.size()) {
800 Box const& box = m_rx.box(myproc);
801 return std::make_pair(box.
smallEnd().toArray(),
804 return std::make_pair(std::array<int,AMREX_SPACEDIM>{
AMREX_D_DECL(0,0,0)},
809template <
typename T, Direction D,
bool C>
811 std::array<int,AMREX_SPACEDIM>
const& local_size)
813 auto const& [ba, dm] = make_layout_from_local_domain(local_start, local_size);
814 m_raw_cmf =
cMF(ba, dm, m_rx.nComp(), 0,
MFInfo().SetAlloc(
false));
817template <
typename T, Direction D,
bool C>
818std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
821 auto const ncomp = m_info.batch_size;
822 auto const& [ba, dm] = getSpectralDataLayout();
827 if (myproc < m_raw_cmf.size()) {
828 Box const& box = m_raw_cmf.box(myproc);
829 return std::make_pair(box.
smallEnd().toArray(), box.
length().toArray());
831 return std::make_pair(std::array<int,AMREX_SPACEDIM>{
AMREX_D_DECL(0,0,0)},
836template <
typename T, Direction D,
bool C>
839 if (
C || m_r2c_sub) {
amrex::Abort(
"R2C: OpenBC not supported with reduced dimensions or complex inputs"); }
841#if (AMREX_SPACEDIM == 3)
842 if (m_do_alld_fft) {
return; }
844 auto const ncomp = m_info.batch_size;
846 if (m_slab_decomp && ! m_fft_fwd_x_half.defined) {
847 auto* fab = detail::get_fab(m_rx);
849 Box bottom_half = m_real_domain;
850 bottom_half.
growHi(2,-m_real_domain.length(2)/2);
851 Box box = fab->box() & bottom_half;
853 auto* pr = fab->dataPtr();
855 detail::get_fab(m_cx)->dataPtr();
857 m_fft_fwd_x_half.template init_r2c<Direction::forward>
858 (box, pr, pc, m_slab_decomp, ncomp);
859 m_fft_bwd_x_half = m_fft_fwd_x_half;
862 m_fft_fwd_x_half.template init_r2c<Direction::forward>
863 (box, pr, pc, m_slab_decomp, ncomp);
866 m_fft_bwd_x_half.template init_r2c<Direction::backward>
867 (box, pr, pc, m_slab_decomp, ncomp);
874 if (m_cmd_x2z && ! m_cmd_x2z_half) {
875 Box bottom_half = m_spectral_domain_z;
878 bottom_half.
growHi(0,-m_spectral_domain_z.length(0)/2);
879 m_cmd_x2z_half = std::make_unique<MultiBlockCommMetaData>
880 (m_cz, bottom_half, m_cx,
IntVect(0), m_dtos_x2z);
883 if (m_cmd_z2x && ! m_cmd_z2x_half) {
884 Box bottom_half = m_spectral_domain_x;
885 bottom_half.
growHi(2,-m_spectral_domain_x.length(2)/2);
886 m_cmd_z2x_half = std::make_unique<MultiBlockCommMetaData>
887 (m_cx, bottom_half, m_cz,
IntVect(0), m_dtos_z2x);
892template <
typename T, Direction D,
bool C>
899 auto const ncomp = m_info.batch_size;
902 if (m_sub_helper.ghost_safe(inmf.nGrowVect())) {
903 m_r2c_sub->forward(m_sub_helper.make_alias_mf(inmf), incomp);
905 MF tmp(inmf.boxArray(), inmf.DistributionMap(), ncomp, 0);
906 tmp.LocalCopy(inmf, incomp, 0, ncomp,
IntVect(0));
907 m_r2c_sub->forward(m_sub_helper.make_alias_mf(tmp),0);
912 if (&m_rx != &inmf) {
913 m_rx.ParallelCopy(inmf, incomp, 0, ncomp);
918 m_fft_fwd_x.template compute_c2c<Direction::forward>();
920 m_fft_fwd_x.template compute_r2c<Direction::forward>();
925 auto& fft_x = m_openbc_half ? m_fft_fwd_x_half : m_fft_fwd_x;
927 fft_x.template compute_c2c<Direction::forward>();
929 fft_x.template compute_r2c<Direction::forward>();
933 ParallelCopy(m_cy, m_cx, *m_cmd_x2y, 0, 0, ncomp, m_dtos_x2y);
935 m_fft_fwd_y.template compute_c2c<Direction::forward>();
938 ParallelCopy(m_cz, m_cy, *m_cmd_y2z, 0, 0, ncomp, m_dtos_y2z);
940#if (AMREX_SPACEDIM == 3)
941 else if ( m_cmd_x2z) {
946 {components, m_dtos_x2z};
947 auto handler = ParallelCopy_nowait(m_cz, m_cx, *m_cmd_x2z_half, packing);
949 Box upper_half = m_spectral_domain_z;
952 upper_half.
growLo (0,-m_spectral_domain_z.length(0)/2);
953 m_cz.setVal(0, upper_half, 0, ncomp);
955 ParallelCopy_finish(m_cz, std::move(handler), *m_cmd_x2z_half, packing);
957 ParallelCopy(m_cz, m_cx, *m_cmd_x2z, 0, 0, ncomp, m_dtos_x2z);
961 m_fft_fwd_z.template compute_c2c<Direction::forward>();
964template <
typename T, Direction D,
bool C>
965template <
typename FA,
typename RT>
966std::pair<std::unique_ptr<char,DataDeleter>,std::size_t>
971 using FAB =
typename FA::FABType::value_type;
972 using T_FAB =
typename FAB::value_type;
973 static_assert(
sizeof(T_FAB) ==
sizeof(RT));
975 auto const ncomp = m_info.batch_size;
976 auto const& ia = fa.IndexArray();
981 if ( ! ia.empty() ) {
983 Box const& box = fa.fabbox(K);
987 sz =
sizeof(T_FAB) * box.
numPts() * ncomp;
990 fa.setFab(K, FAB(box,ncomp,
pp));
994 return std::make_pair(std::unique_ptr<char,DataDeleter>{},std::size_t(0));
996 return std::make_pair(std::unique_ptr<char,DataDeleter>
1002template <
typename T, Direction D,
bool C>
1003template <
typename RT,
typename CT,
Direction DIR,
bool CP,
1006 && ((
sizeof(RT)*2 ==
sizeof(CT) && !CP) ||
1007 (
sizeof(RT) ==
sizeof(CT) && CP)),
int> >
1010 auto [rdata, rsz] = install_raw_ptr(m_raw_mf, in);
1011 auto [cdata, csz] = install_raw_ptr(m_raw_cmf, out);
1026template <
typename T, Direction D,
bool C>
1027template <Direction DIR, std::enable_if_t<DIR == Direction::both,
int> >
1033template <
typename T, Direction D,
bool C>
1039 auto const ncomp = m_info.batch_size;
1042 if (m_sub_helper.ghost_safe(outmf.nGrowVect())) {
1043 MF submf = m_sub_helper.make_alias_mf(outmf);
1044 IntVect const& subngout = m_sub_helper.make_iv(ngout);
1045 Periodicity const& subperiod = m_sub_helper.make_periodicity(period);
1046 m_r2c_sub->backward_doit(submf, subngout, subperiod, outcomp);
1048 MF tmp(outmf.boxArray(), outmf.DistributionMap(), ncomp,
1049 m_sub_helper.make_safe_ghost(outmf.nGrowVect()));
1050 this->backward_doit(tmp, ngout, period, 0);
1051 outmf.LocalCopy(tmp, 0, outcomp, ncomp, tmp.nGrowVect());
1056 if (m_do_alld_fft) {
1058 m_fft_bwd_x.template compute_c2c<Direction::backward>();
1060 m_fft_bwd_x.template compute_r2c<Direction::backward>();
1062 outmf.ParallelCopy(m_rx, 0, outcomp, ncomp,
IntVect(0),
1067 m_fft_bwd_z.template compute_c2c<Direction::backward>();
1069 ParallelCopy(m_cy, m_cz, *m_cmd_z2y, 0, 0, ncomp, m_dtos_z2y);
1071#if (AMREX_SPACEDIM == 3)
1072 else if ( m_cmd_z2x) {
1073 auto const& cmd = m_openbc_half ? m_cmd_z2x_half : m_cmd_z2x;
1074 ParallelCopy(m_cx, m_cz, *cmd, 0, 0, ncomp, m_dtos_z2x);
1078 m_fft_bwd_y.template compute_c2c<Direction::backward>();
1080 ParallelCopy(m_cx, m_cy, *m_cmd_y2x, 0, 0, ncomp, m_dtos_y2x);
1083 auto& fft_x = m_openbc_half ? m_fft_bwd_x_half : m_fft_bwd_x;
1085 fft_x.template compute_c2c<Direction::backward>();
1087 fft_x.template compute_r2c<Direction::backward>();
1089 outmf.ParallelCopy(m_rx, 0, outcomp, ncomp,
IntVect(0),
1093template <
typename T, Direction D,
bool C>
1094template <
typename CT,
typename RT,
Direction DIR,
bool CP,
1097 && ((
sizeof(RT)*2 ==
sizeof(CT) && !CP) ||
1098 (
sizeof(RT) ==
sizeof(CT) && CP)),
int> >
1101 auto [rdata, rsz] = install_raw_ptr(m_raw_mf, out);
1102 auto [cdata, csz] = install_raw_ptr(m_raw_cmf, in);
1117template <
typename T, Direction D,
bool C>
1124 auto* fab = detail::get_fab(inout);
1125 if (!fab) {
return {fwd, bwd};}
1127 Box const& box = fab->box();
1130 auto const ncomp = m_info.batch_size;
1132#ifdef AMREX_USE_SYCL
1133 fwd.template init_c2c<Direction::forward>(box, pio, ncomp, ndims);
1137 fwd.template init_c2c<Direction::forward>(box, pio, ncomp, ndims);
1140 bwd.template init_c2c<Direction::backward>(box, pio, ncomp, ndims);
1147template <
typename T, Direction D,
bool C>
1148template <
typename F>
1151 if (m_info.twod_mode || m_info.batch_size > 1) {
1153#if (AMREX_SPACEDIM > 1)
1154 }
else if (m_r2c_sub) {
1156#if (AMREX_SPACEDIM == 2)
1158 m_r2c_sub->post_forward_doit_1
1161 post_forward(0, i, 0, sp);
1164 if (m_real_domain.length(0) == 1 && m_real_domain.length(1) == 1) {
1166 m_r2c_sub->post_forward_doit_1
1169 post_forward(0, 0, i, sp);
1171 }
else if (m_real_domain.length(0) == 1 && m_real_domain.length(2) == 1) {
1173 m_r2c_sub->post_forward_doit_1
1176 post_forward(0, i, 0, sp);
1178 }
else if (m_real_domain.length(0) == 1) {
1180 m_r2c_sub->post_forward_doit_1
1183 post_forward(0, i, j, sp);
1185 }
else if (m_real_domain.length(1) == 1) {
1187 m_r2c_sub->post_forward_doit_1
1190 post_forward(i, 0, j, sp);
1193 amrex::Abort(
"R2c::post_forward_doit_0: how did this happen?");
1198 this->post_forward_doit_1(post_forward);
1202template <
typename T, Direction D,
bool C>
1203template <
typename F>
1206 if (m_info.twod_mode || m_info.batch_size > 1) {
1208 }
else if (m_r2c_sub) {
1209 amrex::Abort(
"R2C::post_forward_doit_1: How did this happen?");
1211 if ( ! m_cz.empty()) {
1212 auto* spectral_fab = detail::get_fab(m_cz);
1214 auto const& a = spectral_fab->array();
1218 post_forward(jx,ky,iz,a(iz,jx,ky));
1221 }
else if ( ! m_cy.empty()) {
1222 auto* spectral_fab = detail::get_fab(m_cy);
1224 auto const& a = spectral_fab->array();
1228 post_forward(jx,iy,k,a(iy,jx,k));
1232 auto* spectral_fab = detail::get_fab(m_cx);
1234 auto const& a = spectral_fab->array();
1238 post_forward(i,j,k,a(i,j,k));
1245template <
typename T, Direction D,
bool C>
1248#if (AMREX_SPACEDIM == 3)
1249 if (m_info.twod_mode) {
1250 return T(1)/T(
Long(m_real_domain.length(0)) *
1251 Long(m_real_domain.length(1)));
1255 return T(1)/T(m_real_domain.numPts());
1259template <
typename T, Direction D,
bool C>
1262std::pair<typename R2C<T,D,C>::cMF *,
IntVect>
1265#if (AMREX_SPACEDIM > 1)
1267 auto [cmf, order] = m_r2c_sub->getSpectralData();
1268 return std::make_pair(cmf, m_sub_helper.inverse_order(order));
1271 if (!m_cz.empty()) {
1273 }
else if (!m_cy.empty()) {
1280template <
typename T, Direction D,
bool C>
1287 auto const ncomp = m_info.batch_size;
1291 bool inmf_safe = m_sub_helper.ghost_safe(inmf.nGrowVect());
1292 MF inmf_sub, inmf_tmp;
1295 inmf_sub = m_sub_helper.make_alias_mf(inmf);
1296 incomp_sub = incomp;
1298 inmf_tmp.define(inmf.boxArray(), inmf.DistributionMap(), ncomp, 0);
1299 inmf_tmp.LocalCopy(inmf, incomp, 0, ncomp,
IntVect(0));
1300 inmf_sub = m_sub_helper.make_alias_mf(inmf_tmp);
1304 bool outmf_safe = m_sub_helper.ghost_safe(outmf.
nGrowVect());
1305 cMF outmf_sub, outmf_tmp;
1308 outmf_sub = m_sub_helper.make_alias_mf(outmf);
1309 outcomp_sub = outcomp;
1312 outmf_sub = m_sub_helper.make_alias_mf(outmf_tmp);
1316 m_r2c_sub->forward(inmf_sub, outmf_sub, incomp_sub, outcomp_sub);
1325 if (!m_cz.empty()) {
1328 (outmf, m_spectral_domain_x, m_cz,
IntVect(0), dtos);
1329 ParallelCopy(outmf, m_cz, cmd, 0, outcomp, ncomp, dtos);
1330 }
else if (!m_cy.empty()) {
1332 (outmf, m_spectral_domain_x, m_cy,
IntVect(0), m_dtos_y2x);
1333 ParallelCopy(outmf, m_cy, cmd, 0, outcomp, ncomp, m_dtos_y2x);
1340template <
typename T, Direction D,
bool C>
1348template <
typename T, Direction D,
bool C>
1350 Periodicity const& period,
int incomp,
int outcomp)
1354 auto const ncomp = m_info.batch_size;
1358 bool inmf_safe = m_sub_helper.ghost_safe(inmf.nGrowVect());
1359 cMF inmf_sub, inmf_tmp;
1362 inmf_sub = m_sub_helper.make_alias_mf(inmf);
1363 incomp_sub = incomp;
1365 inmf_tmp.define(inmf.boxArray(), inmf.DistributionMap(), ncomp, 0);
1366 inmf_tmp.LocalCopy(inmf, incomp, 0, ncomp,
IntVect(0));
1367 inmf_sub = m_sub_helper.make_alias_mf(inmf_tmp);
1371 bool outmf_safe = m_sub_helper.ghost_safe(outmf.nGrowVect());
1372 MF outmf_sub, outmf_tmp;
1375 outmf_sub = m_sub_helper.make_alias_mf(outmf);
1376 outcomp_sub = outcomp;
1378 IntVect const& ngtmp = m_sub_helper.make_safe_ghost(outmf.nGrowVect());
1379 outmf_tmp.define(outmf.boxArray(), outmf.DistributionMap(), ncomp, ngtmp);
1380 outmf_sub = m_sub_helper.make_alias_mf(outmf_tmp);
1384 IntVect const& subngout = m_sub_helper.make_iv(ngout);
1385 Periodicity
const& subperiod = m_sub_helper.make_periodicity(period);
1386 m_r2c_sub->backward_doit(inmf_sub, outmf_sub, subngout, subperiod, incomp_sub, outcomp_sub);
1389 outmf.LocalCopy(outmf_tmp, 0, outcomp, ncomp, outmf_tmp.nGrowVect());
1394 if (!m_cz.empty()) {
1396 MultiBlockCommMetaData cmd
1397 (m_cz, m_spectral_domain_z, inmf,
IntVect(0), dtos);
1399 }
else if (!m_cy.empty()) {
1400 MultiBlockCommMetaData cmd
1401 (m_cy, m_spectral_domain_y, inmf,
IntVect(0), m_dtos_x2y);
1402 ParallelCopy(m_cy, inmf, cmd, incomp, 0, ncomp, m_dtos_x2y);
1404 m_cx.ParallelCopy(inmf, incomp, 0, ncomp);
1406 backward_doit(outmf, ngout, period, outcomp);
1410template <
typename T, Direction D,
bool C>
1411std::pair<BoxArray,DistributionMapping>
1414#if (AMREX_SPACEDIM > 1)
1416 auto const& [ba, dm] = m_r2c_sub->getSpectralDataLayout();
1417 return std::make_pair(m_sub_helper.inverse_boxarray(ba), dm);
1421#if (AMREX_SPACEDIM == 3)
1422 if (!m_cz.empty()) {
1423 BoxList bl = m_cz.boxArray().boxList();
1424 for (
auto& b : bl) {
1425 auto lo = b.smallEnd();
1426 auto hi = b.bigEnd();
1427 std::swap(lo[0], lo[1]);
1428 std::swap(lo[1], lo[2]);
1429 std::swap(hi[0], hi[1]);
1430 std::swap(hi[1], hi[2]);
1434 return std::make_pair(
BoxArray(std::move(bl)), m_cz.DistributionMap());
1437#if (AMREX_SPACEDIM >= 2)
1438 if (!m_cy.empty()) {
1439 BoxList bl = m_cy.boxArray().boxList();
1440 for (
auto& b : bl) {
1441 auto lo = b.smallEnd();
1442 auto hi = b.bigEnd();
1443 std::swap(lo[0], lo[1]);
1444 std::swap(hi[0], hi[1]);
1448 return std::make_pair(
BoxArray(std::move(bl)), m_cy.DistributionMap());
1452 return std::make_pair(m_cx.boxArray(), m_cx.DistributionMap());
1457template <
typename T = Real, FFT::Direction D = FFT::Direction::both>
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ALWAYS_ASSERT(EX)
Definition AMReX_BLassert.H:50
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
amrex::ParmParse pp
Input file parser instance for the given namespace.
Definition AMReX_HypreIJIface.cpp:15
#define AMREX_D_DECL(a, b, c)
Definition AMReX_SPACE.H:171
virtual void * alloc(std::size_t sz)=0
A collection of Boxes stored in an Array.
Definition AMReX_BoxArray.H:568
A class for managing a List of Boxes that share a common IndexType. This class implements operations ...
Definition AMReX_BoxList.H:52
BoxList & shift(int dir, int nzones)
Applies Box::shift(int,int) to each Box in the BoxList.
Definition AMReX_BoxList.cpp:565
__host__ __device__ const IntVectND< dim > & bigEnd() const &noexcept
Return the inclusive upper bound of the box.
Definition AMReX_Box.H:123
__host__ __device__ Long numPts() const noexcept
Return the number of points contained in the BoxND.
Definition AMReX_Box.H:356
__host__ __device__ IntVectND< dim > length() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:154
__host__ __device__ int shortside(int &dir) const noexcept
Return length of shortest side. dir is modified to give direction with shortest side: 0....
Definition AMReX_Box.H:437
__host__ __device__ IntVectND< dim > size() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:147
__host__ __device__ BoxND & growLo(int idir, int n_cell=1) noexcept
Grow the BoxND on the low end by n_cell cells in direction idir. NOTE: n_cell negative shrinks the Bo...
Definition AMReX_Box.H:662
__host__ __device__ IndexTypeND< dim > ixType() const noexcept
Return the indexing type.
Definition AMReX_Box.H:135
__host__ __device__ BoxND & growHi(int idir, int n_cell=1) noexcept
Grow the BoxND on the high end by n_cell cells in direction idir. NOTE: n_cell negative shrinks the B...
Definition AMReX_Box.H:673
__host__ __device__ const IntVectND< dim > & smallEnd() const &noexcept
Return the inclusive lower bound of the box.
Definition AMReX_Box.H:111
Calculates the distribution of FABs to MPI processes.
Definition AMReX_DistributionMapping.H:43
Long size() const noexcept
Length of the underlying processor map.
Definition AMReX_DistributionMapping.H:129
Convolution-based solver for open boundary conditions using Green's functions.
Definition AMReX_FFT_OpenBCSolver.H:24
3D Poisson solver for periodic, Dirichlet & Neumann boundaries in the first two dimensions,...
Definition AMReX_FFT_Poisson.H:187
Poisson solver for periodic, Dirichlet & Neumann boundaries using FFT.
Definition AMReX_FFT_Poisson.H:67
Parallel Discrete Fourier Transform.
Definition AMReX_FFT_R2C.H:47
std::conditional_t< C, cMF, std::conditional_t< std::is_same_v< T, Real >, MultiFab, FabArray< BaseFab< T > > > > MF
Definition AMReX_FFT_R2C.H:52
R2C & operator=(R2C const &)=delete
void forward(MF const &inmf, cMF &outmf, int incomp=0, int outcomp=0)
Forward transform.
Definition AMReX_FFT_R2C.H:1283
~R2C()
Definition AMReX_FFT_R2C.H:733
void setLocalDomain(std::array< int, 3 > const &local_start, std::array< int, 3 > const &local_size)
Set local domain.
Definition AMReX_FFT_R2C.H:784
R2C(Box const &domain, Info const &info=Info{})
Constructor.
Definition AMReX_FFT_R2C.H:486
void backward(cMF const &inmf, MF &outmf, int incomp=0, int outcomp=0)
Backward transform.
Definition AMReX_FFT_R2C.H:1343
void backward(MF &outmf, int outcomp=0)
Backward transform.
Definition AMReX_FFT_R2C.H:1028
void post_forward_doit_1(F const &post_forward)
CUDA-visible helper that redistributes and applies post_forward for the batched layout.
Definition AMReX_FFT_R2C.H:1204
T scalingFactor() const
Scaling factor. If the data goes through forward and then backward, the result multiplied by the scal...
Definition AMReX_FFT_R2C.H:1246
void post_forward_doit_0(F const &post_forward)
CUDA-visible hook that walks internal spectral data and applies post_forward.
Definition AMReX_FFT_R2C.H:1149
std::pair< std::array< int, 3 >, std::array< int, 3 > > getLocalDomain() const
Get local domain.
Definition AMReX_FFT_R2C.H:793
void forward(RT const *in, CT *out)
Forward transform.
Definition AMReX_FFT_R2C.H:1008
std::pair< BoxArray, DistributionMapping > getSpectralDataLayout() const
Get BoxArray and DistributionMapping for spectral data.
Definition AMReX_FFT_R2C.H:1412
std::pair< cMF *, IntVect > getSpectralData() const
Get the internal spectral data.
R2C(std::array< int, 3 > const &domain_size, Info const &info=Info{})
Constructor.
Definition AMReX_FFT_R2C.H:728
FabArray< BaseFab< GpuComplex< T > > > cMF
Definition AMReX_FFT_R2C.H:49
std::pair< std::array< int, 3 >, std::array< int, 3 > > getLocalSpectralDomain() const
Get local spectral domain.
Definition AMReX_FFT_R2C.H:819
void forwardThenBackward(MF const &inmf, MF &outmf, F const &post_forward, int incomp=0, int outcomp=0)
Forward and then backward transform.
Definition AMReX_FFT_R2C.H:202
void setLocalSpectralDomain(std::array< int, 3 > const &local_start, std::array< int, 3 > const &local_size)
Set local spectral domain.
Definition AMReX_FFT_R2C.H:810
void backward(CT const *in, RT *out)
Backward transform.
Definition AMReX_FFT_R2C.H:1099
void forward(MF const &inmf, int incomp=0)
Forward transform.
Definition AMReX_FFT_R2C.H:895
IntVect nGrowVect() const noexcept
Definition AMReX_FabArrayBase.H:80
int size() const noexcept
Return the number of FABs in the FabArray.
Definition AMReX_FabArrayBase.H:110
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition AMReX_FabArrayBase.H:131
bool empty() const noexcept
Definition AMReX_FabArrayBase.H:89
Box fabbox(int K) const noexcept
Return the Kth FABs Box in the FabArray. That is, the region the Kth fab is actually defined on.
Definition AMReX_FabArrayBase.cpp:217
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition AMReX_FabArrayBase.H:95
void setFab(int boxno, std::unique_ptr< FAB > elem)
Explicitly set the Kth FAB in the FabArray to point to elem.
Definition AMReX_FabArray.H:2357
void ParallelCopy(const FabArray< FAB > &src, const Periodicity &period=Periodicity::NonPeriodic(), CpOp op=FabArrayBase::COPY)
Definition AMReX_FabArray.H:849
typename std::conditional_t< IsBaseFab< BaseFab< GpuComplex< T > > >::value, BaseFab< GpuComplex< T > >, FABType >::value_type value_type
Definition AMReX_FabArray.H:360
void define(const BoxArray &bxs, const DistributionMapping &dm, int nvar, int ngrow, const MFInfo &info=MFInfo(), const FabFactory< FAB > &factory=DefaultFabFactory< FAB >())
Define this FabArray identically to that performed by the constructor having an analogous function si...
Definition AMReX_FabArray.H:2173
void LocalCopy(FabArray< SFAB > const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
Perform local copy of FabArray data.
Definition AMReX_FabArray.H:1954
A collection (stored as an array) of FArrayBox objects.
Definition AMReX_MultiFab.H:40
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition AMReX_Periodicity.H:17
static const Periodicity & NonPeriodic() noexcept
Definition AMReX_Periodicity.cpp:52
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
amrex_long Long
Definition AMReX_INT.H:30
void ParallelForOMP(T n, L const &f) noexcept
Performance-portable kernel launch function with optional OpenMP threading.
Definition AMReX_GpuLaunch.H:319
Arena * The_Arena()
Definition AMReX_Arena.cpp:783
int MyProc() noexcept
Definition AMReX_ParallelDescriptor.H:128
int NProcs() noexcept
Definition AMReX_ParallelDescriptor.H:255
Definition AMReX_FFT_Helper.H:52
Direction
Definition AMReX_FFT_Helper.H:54
void dtod_memcpy_async(void *p_d_dst, const void *p_d_src, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:329
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:263
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
MPI_Comm Communicator() noexcept
Definition AMReX_ParallelDescriptor.H:223
BoxND< 3 > Box
Box is an alias for amrex::BoxND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:30
bool is_aligned(const void *p, std::size_t alignment) noexcept
Return whether the address p is aligned to alignment bytes.
Definition AMReX_Arena.H:36
BoxArray decompose(Box const &domain, int nboxes, Array< bool, 3 > const &decomp, bool no_overlap)
Decompose domain box into BoxArray.
Definition AMReX_BoxArray.cpp:1947
IntVectND< 3 > IntVect
IntVect is an alias for amrex::IntVectND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:33
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:230
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition AMReX_FabArrayUtility.H:2019
__host__ __device__ constexpr T elemwiseMin(T const &a, T const &b) noexcept
Return the element-wise minimum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:62
Definition AMReX_FFT_Helper.H:64
bool twod_mode
Definition AMReX_FFT_Helper.H:75
bool oned_mode
We might have a special twod_mode: nx or ny == 1 && nz > 1.
Definition AMReX_FFT_Helper.H:78
int batch_size
Batched FFT size. Only support in R2C, not R2X.
Definition AMReX_FFT_Helper.H:81
DomainStrategy domain_strategy
Domain composition strategy.
Definition AMReX_FFT_Helper.H:66
int nprocs
Max number of processes to use.
Definition AMReX_FFT_Helper.H:84
int pencil_threshold
Definition AMReX_FFT_Helper.H:70
Definition AMReX_FFT_Helper.H:180
std::conditional_t< std::is_same_v< float, T >, cuComplex, cuDoubleComplex > VendorComplex
Definition AMReX_FFT_Helper.H:184
FabArray memory allocation information.
Definition AMReX_FabArray.H:66
MFInfo & SetAlloc(bool a) noexcept
Definition AMReX_FabArray.H:73
This class specializes behaviour on local copies and unpacking receive buffers.
Definition AMReX_NonLocalBC.H:626
Contains information about which components take part of the data transaction.
Definition AMReX_NonLocalBC.H:539
int n_components
Definition AMReX_NonLocalBC.H:542
Communication datatype (note: this structure also works without MPI)
Definition AMReX_ccse-mpi.H:78