Block-Structured AMR Software Framework
 
Loading...
Searching...
No Matches
AMReX_FFT_R2C.H
Go to the documentation of this file.
1#ifndef AMREX_FFT_R2C_H_
2#define AMREX_FFT_R2C_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_MultiFab.H>
6#include <AMReX_FFT_Helper.H>
7#include <algorithm>
8#include <numeric>
9#include <tuple>
10
11namespace amrex::FFT
12{
13
14template <typename T> class OpenBCSolver;
15template <typename T> class Poisson;
16template <typename T> class PoissonHybrid;
17
39template <typename T = Real, FFT::Direction D = FFT::Direction::both, bool C = false>
40class R2C
41{
42public:
44 using MF = std::conditional_t
45 <C, cMF, std::conditional_t<std::is_same_v<T,Real>,
47
48 template <typename U> friend class OpenBCSolver;
49 template <typename U> friend class Poisson;
50 template <typename U> friend class PoissonHybrid;
51
58 explicit R2C (Box const& domain, Info const& info = Info{});
59
69 explicit R2C (std::array<int,AMREX_SPACEDIM> const& domain_size,
70 Info const& info = Info{});
71
72 ~R2C ();
73
74 R2C (R2C const&) = delete;
75 R2C (R2C &&) = delete;
76 R2C& operator= (R2C const&) = delete;
77 R2C& operator= (R2C &&) = delete;
78
102 void setLocalDomain (std::array<int,AMREX_SPACEDIM> const& local_start,
103 std::array<int,AMREX_SPACEDIM> const& local_size);
104
116 std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
118
147 void setLocalSpectralDomain (std::array<int,AMREX_SPACEDIM> const& local_start,
148 std::array<int,AMREX_SPACEDIM> const& local_size);
149
167 std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
169
191 template <typename F, Direction DIR=D,
192 std::enable_if_t<DIR == Direction::both, int> = 0>
193 void forwardThenBackward (MF const& inmf, MF& outmf, F const& post_forward,
194 int incomp = 0, int outcomp = 0)
195 {
196 BL_PROFILE("FFT::R2C::forwardbackward");
197 this->forward(inmf, incomp);
198 this->post_forward_doit_0(post_forward);
199 this->backward(outmf, outcomp);
200 }
201
212 template <Direction DIR=D, std::enable_if_t<DIR == Direction::forward ||
213 DIR == Direction::both, int> = 0>
214 void forward (MF const& inmf, int incomp = 0);
215
227 template <Direction DIR=D, std::enable_if_t<DIR == Direction::forward ||
228 DIR == Direction::both, int> = 0>
229 void forward (MF const& inmf, cMF& outmf, int incomp = 0, int outcomp = 0);
230
244 template <typename RT, typename CT, Direction DIR=D, bool CP=C,
245 std::enable_if_t<(DIR == Direction::forward ||
246 DIR == Direction::both)
247 && ((sizeof(RT)*2 == sizeof(CT) && !CP) ||
248 (sizeof(RT) == sizeof(CT) && CP)), int> = 0>
249 void forward (RT const* in, CT* out);
250
260 template <Direction DIR=D, std::enable_if_t<DIR == Direction::both, int> = 0>
261 void backward (MF& outmf, int outcomp = 0);
262
274 template <Direction DIR=D, std::enable_if_t<DIR == Direction::backward ||
275 DIR == Direction::both, int> = 0>
276 void backward (cMF const& inmf, MF& outmf, int incomp = 0, int outcomp = 0);
277
291 template <typename CT, typename RT, Direction DIR=D, bool CP=C,
292 std::enable_if_t<(DIR == Direction::backward ||
293 DIR == Direction::both)
294 && ((sizeof(RT)*2 == sizeof(CT) && !CP) ||
295 (sizeof(RT) == sizeof(CT) && CP)), int> = 0>
296 void backward (CT const* in, RT* out);
297
301 [[nodiscard]] T scalingFactor () const;
302
312 template <Direction DIR=D, std::enable_if_t<DIR == Direction::forward ||
313 DIR == Direction::both, int> = 0>
314 std::pair<cMF*,IntVect> getSpectralData () const;
315
323 [[nodiscard]] std::pair<BoxArray,DistributionMapping> getSpectralDataLayout () const;
324
325 // This is a private function, but it's public for cuda.
326 template <typename F>
327 void post_forward_doit_0 (F const& post_forward);
328
329 template <typename F>
330 void post_forward_doit_1 (F const& post_forward);
331
332private:
333
334 void prepare_openbc ();
335
336 void backward_doit (MF& outmf, IntVect const& ngout = IntVect(0),
337 Periodicity const& period = Periodicity::NonPeriodic(),
338 int outcomp = 0);
339
340 void backward_doit (cMF const& inmf, MF& outmf,
341 IntVect const& ngout = IntVect(0),
342 Periodicity const& period = Periodicity::NonPeriodic(),
343 int incomp = 0, int outcomp = 0);
344
345 std::pair<Plan<T>,Plan<T>> make_c2c_plans (cMF& inout, int ndims) const;
346
347 static Box make_domain_x (Box const& domain)
348 {
349 if constexpr (C) {
350 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(0)-1,
351 domain.length(1)-1,
352 domain.length(2)-1)),
353 domain.ixType());
354 } else {
355 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(0)/2,
356 domain.length(1)-1,
357 domain.length(2)-1)),
358 domain.ixType());
359 }
360 }
361
362 static Box make_domain_y (Box const& domain)
363 {
364 if constexpr (C) {
365 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(1)-1,
366 domain.length(0)-1,
367 domain.length(2)-1)),
368 domain.ixType());
369 } else {
370 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(1)-1,
371 domain.length(0)/2,
372 domain.length(2)-1)),
373 domain.ixType());
374 }
375 }
376
377 static Box make_domain_z (Box const& domain)
378 {
379 if constexpr (C) {
380 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(2)-1,
381 domain.length(0)-1,
382 domain.length(1)-1)),
383 domain.ixType());
384 } else {
385 return Box(IntVect(0), IntVect(AMREX_D_DECL(domain.length(2)-1,
386 domain.length(0)/2,
387 domain.length(1)-1)),
388 domain.ixType());
389 }
390 }
391
392 static std::pair<BoxArray,DistributionMapping>
393 make_layout_from_local_domain (std::array<int,AMREX_SPACEDIM> const& local_start,
394 std::array<int,AMREX_SPACEDIM> const& local_size);
395
396 template <typename FA, typename RT>
397 std::pair<std::unique_ptr<char,DataDeleter>,std::size_t>
398 install_raw_ptr (FA& fa, RT const* p);
399
400 Plan<T> m_fft_fwd_x{};
401 Plan<T> m_fft_bwd_x{};
402 Plan<T> m_fft_fwd_y{};
403 Plan<T> m_fft_bwd_y{};
404 Plan<T> m_fft_fwd_z{};
405 Plan<T> m_fft_bwd_z{};
406 Plan<T> m_fft_fwd_x_half{};
407 Plan<T> m_fft_bwd_x_half{};
408
409 // Comm meta-data. In the forward phase, we start with (x,y,z),
410 // transpose to (y,x,z) and then (z,x,y). In the backward phase, we
411 // perform inverse transpose.
412 std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2y; // (x,y,z) -> (y,x,z)
413 std::unique_ptr<MultiBlockCommMetaData> m_cmd_y2x; // (y,x,z) -> (x,y,z)
414 std::unique_ptr<MultiBlockCommMetaData> m_cmd_y2z; // (y,x,z) -> (z,x,y)
415 std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2y; // (z,x,y) -> (y,x,z)
416 std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2z; // (x,y,z) -> (z,x,y)
417 std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2x; // (z,x,y) -> (x,y,z)
418 std::unique_ptr<MultiBlockCommMetaData> m_cmd_x2z_half; // for openbc
419 std::unique_ptr<MultiBlockCommMetaData> m_cmd_z2x_half; // for openbc
420 Swap01 m_dtos_x2y{};
421 Swap01 m_dtos_y2x{};
422 Swap02 m_dtos_y2z{};
423 Swap02 m_dtos_z2y{};
424 RotateFwd m_dtos_x2z{};
425 RotateBwd m_dtos_z2x{};
426
427 MF m_rx;
428 cMF m_cx;
429 cMF m_cy;
430 cMF m_cz;
431
432 mutable MF m_raw_mf;
433 mutable cMF m_raw_cmf;
434
435 std::unique_ptr<char,DataDeleter> m_data_1;
436 std::unique_ptr<char,DataDeleter> m_data_2;
437
438 Box m_real_domain;
439 Box m_spectral_domain_x;
440 Box m_spectral_domain_y;
441 Box m_spectral_domain_z;
442
443 std::unique_ptr<R2C<T,D,C>> m_r2c_sub;
444 detail::SubHelper m_sub_helper;
445
446 Info m_info;
447
448 bool m_do_alld_fft = false;
449 bool m_slab_decomp = false;
450 bool m_openbc_half = false;
451};
452
453template <typename T, Direction D, bool C>
454R2C<T,D,C>::R2C (Box const& domain, Info const& info)
455 : m_real_domain(domain),
456 m_spectral_domain_x(make_domain_x(domain)),
457#if (AMREX_SPACEDIM >= 2)
458 m_spectral_domain_y(make_domain_y(domain)),
459#if (AMREX_SPACEDIM == 3)
460 m_spectral_domain_z(make_domain_z(domain)),
461#endif
462#endif
463 m_sub_helper(domain),
464 m_info(info)
465{
466 BL_PROFILE("FFT::R2C");
467
468 static_assert(std::is_same_v<float,T> || std::is_same_v<double,T>);
469
470 AMREX_ALWAYS_ASSERT(m_real_domain.numPts() > 1);
471#if (AMREX_SPACEDIM == 2)
473#else
474 if (m_info.twod_mode) {
475 AMREX_ALWAYS_ASSERT((int(domain.length(0) > 1) +
476 int(domain.length(1) > 1) +
477 int(domain.length(2) > 1)) >= 2);
478 }
479#endif
480
481 {
482 Box subbox = m_sub_helper.make_box(m_real_domain);
483 if (subbox.size() != m_real_domain.size()) {
484 m_r2c_sub = std::make_unique<R2C<T,D,C>>(subbox, m_info);
485 return;
486 }
487 }
488
489 int myproc = ParallelContext::MyProcSub();
490 int nprocs = std::min(ParallelContext::NProcsSub(), m_info.nprocs);
491
492#if (AMREX_SPACEDIM == 3)
494 if (m_info.twod_mode) {
496 } else {
497 int shortside = m_real_domain.shortside();
498 if (shortside < m_info.pencil_threshold*nprocs) {
500 } else {
502 }
503 }
504 }
505
506 if (!m_info.oned_mode) {
507 if (m_info.twod_mode) {
508 m_slab_decomp = true;
509 } else if (m_info.domain_strategy == DomainStrategy::slab && (m_real_domain.length(1) > 1)) {
510 m_slab_decomp = true;
511 }
512 }
513
514#endif
515
516 auto const ncomp = m_info.batch_size;
517
518 auto bax = amrex::decompose(m_real_domain, nprocs,
519 {AMREX_D_DECL(false,!m_slab_decomp,m_real_domain.length(2)>1)}, true);
520
521 DistributionMapping dmx = detail::make_iota_distromap(bax.size());
522 m_rx.define(bax, dmx, ncomp, 0, MFInfo().SetAlloc(false));
523
524 {
525 BoxList bl = bax.boxList();
526 for (auto & b : bl) {
527 b.shift(-m_real_domain.smallEnd());
528 b.setBig(0, m_spectral_domain_x.bigEnd(0));
529 }
530 BoxArray cbax(std::move(bl));
531 m_cx.define(cbax, dmx, ncomp, 0, MFInfo().SetAlloc(false));
532 }
533
534 m_do_alld_fft = (ParallelDescriptor::NProcs() == 1) && (! m_info.twod_mode);
535
536 if (!m_do_alld_fft) // do a series of 1d or 2d ffts
537 {
538 //
539 // make data containers
540 //
541
542#if (AMREX_SPACEDIM >= 2)
544 if ((m_real_domain.length(1) > 1) && !m_slab_decomp && !m_info.oned_mode)
545 {
546 auto cbay = amrex::decompose(m_spectral_domain_y, nprocs,
547 {AMREX_D_DECL(false,true,true)}, true);
548 if (cbay.size() == dmx.size()) {
549 cdmy = dmx;
550 } else {
551 cdmy = detail::make_iota_distromap(cbay.size());
552 }
553 m_cy.define(cbay, cdmy, ncomp, 0, MFInfo().SetAlloc(false));
554 }
555#endif
556
557#if (AMREX_SPACEDIM == 3)
558 if (m_real_domain.length(1) > 1 &&
559 (! m_info.twod_mode && m_real_domain.length(2) > 1))
560 {
561 auto cbaz = amrex::decompose(m_spectral_domain_z, nprocs,
562 {false,true,true}, true);
564 if (cbaz.size() == dmx.size()) {
565 cdmz = dmx;
566 } else if (cbaz.size() == cdmy.size()) {
567 cdmz = cdmy;
568 } else {
569 cdmz = detail::make_iota_distromap(cbaz.size());
570 }
571 m_cz.define(cbaz, cdmz, ncomp, 0, MFInfo().SetAlloc(false));
572 }
573#endif
574
575 if constexpr (C) {
576 if (m_slab_decomp) {
577 m_data_1 = detail::make_mfs_share(m_rx, m_cx);
578 m_data_2 = detail::make_mfs_share(m_cz, m_cz);
579 } else {
580 m_data_1 = detail::make_mfs_share(m_rx, m_cz);
581 m_data_2 = detail::make_mfs_share(m_cy, m_cy);
582 // make m_cx an alias to m_rx
583 if (myproc < m_cx.size()) {
584 Box const& box = m_cx.fabbox(myproc);
585 using FAB = typename cMF::FABType::value_type;
586 m_cx.setFab(myproc, FAB(box, ncomp, m_rx[myproc].dataPtr()));
587 }
588 }
589 } else {
590 if (m_slab_decomp) {
591 m_data_1 = detail::make_mfs_share(m_rx, m_cz);
592 m_data_2 = detail::make_mfs_share(m_cx, m_cx);
593 } else {
594 m_data_1 = detail::make_mfs_share(m_rx, m_cy);
595 m_data_2 = detail::make_mfs_share(m_cx, m_cz);
596 }
597 }
598
599 //
600 // make copiers
601 //
602
603#if (AMREX_SPACEDIM >= 2)
604 if (! m_cy.empty()) {
605 // comm meta-data between x and y phases
606 m_cmd_x2y = std::make_unique<MultiBlockCommMetaData>
607 (m_cy, m_spectral_domain_y, m_cx, IntVect(0), m_dtos_x2y);
608 m_cmd_y2x = std::make_unique<MultiBlockCommMetaData>
609 (m_cx, m_spectral_domain_x, m_cy, IntVect(0), m_dtos_y2x);
610 }
611#endif
612#if (AMREX_SPACEDIM == 3)
613 if (! m_cz.empty() ) {
614 if (m_slab_decomp) {
615 // comm meta-data between xy and z phases
616 m_cmd_x2z = std::make_unique<MultiBlockCommMetaData>
617 (m_cz, m_spectral_domain_z, m_cx, IntVect(0), m_dtos_x2z);
618 m_cmd_z2x = std::make_unique<MultiBlockCommMetaData>
619 (m_cx, m_spectral_domain_x, m_cz, IntVect(0), m_dtos_z2x);
620 } else {
621 // comm meta-data between y and z phases
622 m_cmd_y2z = std::make_unique<MultiBlockCommMetaData>
623 (m_cz, m_spectral_domain_z, m_cy, IntVect(0), m_dtos_y2z);
624 m_cmd_z2y = std::make_unique<MultiBlockCommMetaData>
625 (m_cy, m_spectral_domain_y, m_cz, IntVect(0), m_dtos_z2y);
626 }
627 }
628#endif
629
630 //
631 // make plans
632 //
633
634 if (myproc < m_rx.size())
635 {
636 if constexpr (C) {
637 int ndims = m_slab_decomp ? 2 : 1;
638 std::tie(m_fft_fwd_x, m_fft_bwd_x) = make_c2c_plans(m_cx, ndims);
639 } else {
640 Box const& box = m_rx.box(myproc);
641 auto* pr = m_rx[myproc].dataPtr();
642 auto* pc = (typename Plan<T>::VendorComplex *)m_cx[myproc].dataPtr();
643#ifdef AMREX_USE_SYCL
644 m_fft_fwd_x.template init_r2c<Direction::forward>(box, pr, pc, m_slab_decomp, ncomp);
645 m_fft_bwd_x = m_fft_fwd_x;
646#else
647 if constexpr (D == Direction::both || D == Direction::forward) {
648 m_fft_fwd_x.template init_r2c<Direction::forward>(box, pr, pc, m_slab_decomp, ncomp);
649 }
650 if constexpr (D == Direction::both || D == Direction::backward) {
651 m_fft_bwd_x.template init_r2c<Direction::backward>(box, pr, pc, m_slab_decomp, ncomp);
652 }
653#endif
654 }
655 }
656
657#if (AMREX_SPACEDIM >= 2)
658 if (! m_cy.empty()) {
659 std::tie(m_fft_fwd_y, m_fft_bwd_y) = make_c2c_plans(m_cy,1);
660 }
661#endif
662#if (AMREX_SPACEDIM == 3)
663 if (! m_cz.empty()) {
664 std::tie(m_fft_fwd_z, m_fft_bwd_z) = make_c2c_plans(m_cz,1);
665 }
666#endif
667 }
668 else // do fft in all dimensions at the same time
669 {
670 if constexpr (C) {
671 m_data_1 = detail::make_mfs_share(m_rx, m_cx);
672 std::tie(m_fft_fwd_x, m_fft_bwd_x) = make_c2c_plans(m_cx,AMREX_SPACEDIM);
673 } else {
674 m_data_1 = detail::make_mfs_share(m_rx, m_rx);
675 m_data_2 = detail::make_mfs_share(m_cx, m_cx);
676
677 auto const& len = m_real_domain.length();
678 auto* pr = (void*)m_rx[0].dataPtr();
679 auto* pc = (void*)m_cx[0].dataPtr();
680#ifdef AMREX_USE_SYCL
681 m_fft_fwd_x.template init_r2c<Direction::forward>(len, pr, pc, false, ncomp);
682 m_fft_bwd_x = m_fft_fwd_x;
683#else
684 if constexpr (D == Direction::both || D == Direction::forward) {
685 m_fft_fwd_x.template init_r2c<Direction::forward>(len, pr, pc, false, ncomp);
686 }
687 if constexpr (D == Direction::both || D == Direction::backward) {
688 m_fft_bwd_x.template init_r2c<Direction::backward>(len, pr, pc, false, ncomp);
689 }
690#endif
691 }
692 }
693}
694
695template <typename T, Direction D, bool C>
696R2C<T,D,C>::R2C (std::array<int,AMREX_SPACEDIM> const& domain_size, Info const& info)
697 : R2C<T,D,C>(Box(IntVect(0),IntVect(domain_size)-1), info)
698{}
699
700template <typename T, Direction D, bool C>
702{
703 if (m_fft_bwd_x.plan != m_fft_fwd_x.plan) {
704 m_fft_bwd_x.destroy();
705 }
706 if (m_fft_bwd_y.plan != m_fft_fwd_y.plan) {
707 m_fft_bwd_y.destroy();
708 }
709 if (m_fft_bwd_z.plan != m_fft_fwd_z.plan) {
710 m_fft_bwd_z.destroy();
711 }
712 m_fft_fwd_x.destroy();
713 m_fft_fwd_y.destroy();
714 m_fft_fwd_z.destroy();
715 if (m_fft_bwd_x_half.plan != m_fft_fwd_x_half.plan) {
716 m_fft_bwd_x_half.destroy();
717 }
718 m_fft_fwd_x_half.destroy();
719}
720
721template <typename T, Direction D, bool C>
722std::pair<BoxArray,DistributionMapping>
723R2C<T,D,C>::make_layout_from_local_domain (std::array<int,AMREX_SPACEDIM> const& local_start,
724 std::array<int,AMREX_SPACEDIM> const& local_size)
725{
726 IntVect lo(local_start);
727 IntVect len(local_size);
728 Box bx(lo, lo+len-1);
729#ifdef AMREX_USE_MPI
731 MPI_Allgather(&bx, 1, ParallelDescriptor::Mpi_typemap<Box>::type(),
732 allboxes.data(), 1, ParallelDescriptor::Mpi_typemap<Box>::type(),
734 Vector<int> pmap;
735 pmap.reserve(allboxes.size());
736 for (int i = 0; i < allboxes.size(); ++i) {
737 if (allboxes[i].ok()) {
738 pmap.push_back(i);
739 }
740 }
741 allboxes.erase(std::remove_if(allboxes.begin(), allboxes.end(),
742 [=] (Box const& b) { return b.isEmpty(); }),
743 allboxes.end());
744 BoxList bl(std::move(allboxes));
745 return std::make_pair(BoxArray(std::move(bl)), DistributionMapping(std::move(pmap)));
746#else
747 return std::make_pair(BoxArray(bx), DistributionMapping(Vector<int>({0})));
748#endif
749}
750
751template <typename T, Direction D, bool C>
752void R2C<T,D,C>::setLocalDomain (std::array<int,AMREX_SPACEDIM> const& local_start,
753 std::array<int,AMREX_SPACEDIM> const& local_size)
754{
755 auto const& [ba, dm] = make_layout_from_local_domain(local_start, local_size);
756 m_raw_mf = MF(ba, dm, m_rx.nComp(), 0, MFInfo().SetAlloc(false));
757}
758
759template <typename T, Direction D, bool C>
760std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
762{
763 m_raw_mf = MF(m_rx.boxArray(), m_rx.DistributionMap(), m_rx.nComp(), 0,
764 MFInfo{}.SetAlloc(false));
765
766 auto const myproc = ParallelDescriptor::MyProc();
767 if (myproc < m_rx.size()) {
768 Box const& box = m_rx.box(myproc);
769 return std::make_pair(box.smallEnd().toArray(),
770 box.length().toArray());
771 } else {
772 return std::make_pair(std::array<int,AMREX_SPACEDIM>{AMREX_D_DECL(0,0,0)},
773 std::array<int,AMREX_SPACEDIM>{AMREX_D_DECL(0,0,0)});
774 }
775}
776
777template <typename T, Direction D, bool C>
778void R2C<T,D,C>::setLocalSpectralDomain (std::array<int,AMREX_SPACEDIM> const& local_start,
779 std::array<int,AMREX_SPACEDIM> const& local_size)
780{
781 auto const& [ba, dm] = make_layout_from_local_domain(local_start, local_size);
782 m_raw_cmf = cMF(ba, dm, m_rx.nComp(), 0, MFInfo().SetAlloc(false));
783}
784
785template <typename T, Direction D, bool C>
786std::pair<std::array<int,AMREX_SPACEDIM>,std::array<int,AMREX_SPACEDIM>>
788{
789 auto const ncomp = m_info.batch_size;
790 auto const& [ba, dm] = getSpectralDataLayout();
791
792 m_raw_cmf = cMF(ba, dm, ncomp, 0, MFInfo{}.SetAlloc(false));
793
794 auto const myproc = ParallelDescriptor::MyProc();
795 if (myproc < m_raw_cmf.size()) {
796 Box const& box = m_raw_cmf.box(myproc);
797 return std::make_pair(box.smallEnd().toArray(), box.length().toArray());
798 } else {
799 return std::make_pair(std::array<int,AMREX_SPACEDIM>{AMREX_D_DECL(0,0,0)},
800 std::array<int,AMREX_SPACEDIM>{AMREX_D_DECL(0,0,0)});
801 }
802}
803
804template <typename T, Direction D, bool C>
806{
807 if (C || m_r2c_sub) { amrex::Abort("R2C: OpenBC not supported with reduced dimensions or complex inputs"); }
808
809#if (AMREX_SPACEDIM == 3)
810 if (m_do_alld_fft) { return; }
811
812 auto const ncomp = m_info.batch_size;
813
814 if (m_slab_decomp && ! m_fft_fwd_x_half.defined) {
815 auto* fab = detail::get_fab(m_rx);
816 if (fab) {
817 Box bottom_half = m_real_domain;
818 bottom_half.growHi(2,-m_real_domain.length(2)/2);
819 Box box = fab->box() & bottom_half;
820 if (box.ok()) {
821 auto* pr = fab->dataPtr();
822 auto* pc = (typename Plan<T>::VendorComplex *)
823 detail::get_fab(m_cx)->dataPtr();
824#ifdef AMREX_USE_SYCL
825 m_fft_fwd_x_half.template init_r2c<Direction::forward>
826 (box, pr, pc, m_slab_decomp, ncomp);
827 m_fft_bwd_x_half = m_fft_fwd_x_half;
828#else
829 if constexpr (D == Direction::both || D == Direction::forward) {
830 m_fft_fwd_x_half.template init_r2c<Direction::forward>
831 (box, pr, pc, m_slab_decomp, ncomp);
832 }
833 if constexpr (D == Direction::both || D == Direction::backward) {
834 m_fft_bwd_x_half.template init_r2c<Direction::backward>
835 (box, pr, pc, m_slab_decomp, ncomp);
836 }
837#endif
838 }
839 }
840 } // else todo
841
842 if (m_cmd_x2z && ! m_cmd_x2z_half) {
843 Box bottom_half = m_spectral_domain_z;
844 // Note that z-direction's index is 0 because we z is the
845 // unit-stride direction here.
846 bottom_half.growHi(0,-m_spectral_domain_z.length(0)/2);
847 m_cmd_x2z_half = std::make_unique<MultiBlockCommMetaData>
848 (m_cz, bottom_half, m_cx, IntVect(0), m_dtos_x2z);
849 }
850
851 if (m_cmd_z2x && ! m_cmd_z2x_half) {
852 Box bottom_half = m_spectral_domain_x;
853 bottom_half.growHi(2,-m_spectral_domain_x.length(2)/2);
854 m_cmd_z2x_half = std::make_unique<MultiBlockCommMetaData>
855 (m_cx, bottom_half, m_cz, IntVect(0), m_dtos_z2x);
856 }
857#endif
858}
859
860template <typename T, Direction D, bool C>
861template <Direction DIR, std::enable_if_t<DIR == Direction::forward ||
862 DIR == Direction::both, int> >
863void R2C<T,D,C>::forward (MF const& inmf, int incomp)
864{
865 BL_PROFILE("FFT::R2C::forward(in)");
866
867 auto const ncomp = m_info.batch_size;
868
869 if (m_r2c_sub) {
870 if (m_sub_helper.ghost_safe(inmf.nGrowVect())) {
871 m_r2c_sub->forward(m_sub_helper.make_alias_mf(inmf), incomp);
872 } else {
873 MF tmp(inmf.boxArray(), inmf.DistributionMap(), ncomp, 0);
874 tmp.LocalCopy(inmf, incomp, 0, ncomp, IntVect(0));
875 m_r2c_sub->forward(m_sub_helper.make_alias_mf(tmp),0);
876 }
877 return;
878 }
879
880 if (&m_rx != &inmf) {
881 m_rx.ParallelCopy(inmf, incomp, 0, ncomp);
882 }
883
884 if (m_do_alld_fft) {
885 if constexpr (C) {
886 m_fft_fwd_x.template compute_c2c<Direction::forward>();
887 } else {
888 m_fft_fwd_x.template compute_r2c<Direction::forward>();
889 }
890 return;
891 }
892
893 auto& fft_x = m_openbc_half ? m_fft_fwd_x_half : m_fft_fwd_x;
894 if constexpr (C) {
895 fft_x.template compute_c2c<Direction::forward>();
896 } else {
897 fft_x.template compute_r2c<Direction::forward>();
898 }
899
900 if ( m_cmd_x2y) {
901 ParallelCopy(m_cy, m_cx, *m_cmd_x2y, 0, 0, ncomp, m_dtos_x2y);
902 }
903 m_fft_fwd_y.template compute_c2c<Direction::forward>();
904
905 if ( m_cmd_y2z) {
906 ParallelCopy(m_cz, m_cy, *m_cmd_y2z, 0, 0, ncomp, m_dtos_y2z);
907 }
908#if (AMREX_SPACEDIM == 3)
909 else if ( m_cmd_x2z) {
910 if (m_openbc_half) {
911 NonLocalBC::PackComponents components{};
912 components.n_components = ncomp;
914 {components, m_dtos_x2z};
915 auto handler = ParallelCopy_nowait(m_cz, m_cx, *m_cmd_x2z_half, packing);
916
917 Box upper_half = m_spectral_domain_z;
918 // Note that z-direction's index is 0 because we z is the
919 // unit-stride direction here.
920 upper_half.growLo (0,-m_spectral_domain_z.length(0)/2);
921 m_cz.setVal(0, upper_half, 0, ncomp);
922
923 ParallelCopy_finish(m_cz, std::move(handler), *m_cmd_x2z_half, packing);
924 } else {
925 ParallelCopy(m_cz, m_cx, *m_cmd_x2z, 0, 0, ncomp, m_dtos_x2z);
926 }
927 }
928#endif
929 m_fft_fwd_z.template compute_c2c<Direction::forward>();
930}
931
932template <typename T, Direction D, bool C>
933template <typename FA, typename RT>
934std::pair<std::unique_ptr<char,DataDeleter>,std::size_t>
935R2C<T,D,C>::install_raw_ptr (FA& fa, RT const* p)
936{
937 AMREX_ALWAYS_ASSERT(!fa.empty());
938
939 using FAB = typename FA::FABType::value_type;
940 using T_FAB = typename FAB::value_type;
941 static_assert(sizeof(T_FAB) == sizeof(RT));
942
943 auto const ncomp = m_info.batch_size;
944 auto const& ia = fa.IndexArray();
945
946 T_FAB* pp = nullptr;
947 std::size_t sz = 0;
948
949 if ( ! ia.empty() ) {
950 int K = ia[0];
951 Box const& box = fa.fabbox(K);
952 if ((alignof(T_FAB) == alignof(RT)) || amrex::is_aligned(p,alignof(T_FAB))) {
953 pp = (T_FAB*)p;
954 } else {
955 sz = sizeof(T_FAB) * box.numPts() * ncomp;
956 pp = (T_FAB*) The_Arena()->alloc(sz);
957 }
958 fa.setFab(K, FAB(box,ncomp,pp));
959 }
960
961 if (sz == 0) {
962 return std::make_pair(std::unique_ptr<char,DataDeleter>{},std::size_t(0));
963 } else {
964 return std::make_pair(std::unique_ptr<char,DataDeleter>
965 {(char*)pp,DataDeleter{The_Arena()}}, sz);
966 }
967}
968
969
970template <typename T, Direction D, bool C>
971template <typename RT, typename CT, Direction DIR, bool CP,
972 std::enable_if_t<(DIR == Direction::forward ||
973 DIR == Direction::both)
974 && ((sizeof(RT)*2 == sizeof(CT) && !CP) ||
975 (sizeof(RT) == sizeof(CT) && CP)), int> >
976void R2C<T,D,C>::forward (RT const* in, CT* out)
977{
978 auto [rdata, rsz] = install_raw_ptr(m_raw_mf, in);
979 auto [cdata, csz] = install_raw_ptr(m_raw_cmf, out);
980
981 if (rsz > 0) {
982 Gpu::dtod_memcpy_async(rdata.get(),in,rsz);
984 }
985
986 forward(m_raw_mf, m_raw_cmf);
987
988 if (csz) {
989 Gpu::dtod_memcpy_async(out,cdata.get(),csz);
991 }
992}
993
994template <typename T, Direction D, bool C>
995template <Direction DIR, std::enable_if_t<DIR == Direction::both, int> >
996void R2C<T,D,C>::backward (MF& outmf, int outcomp)
997{
998 backward_doit(outmf, IntVect(0), Periodicity::NonPeriodic(), outcomp);
999}
1000
1001template <typename T, Direction D, bool C>
1002void R2C<T,D,C>::backward_doit (MF& outmf, IntVect const& ngout,
1003 Periodicity const& period, int outcomp)
1004{
1005 BL_PROFILE("FFT::R2C::backward(out)");
1006
1007 auto const ncomp = m_info.batch_size;
1008
1009 if (m_r2c_sub) {
1010 if (m_sub_helper.ghost_safe(outmf.nGrowVect())) {
1011 MF submf = m_sub_helper.make_alias_mf(outmf);
1012 IntVect const& subngout = m_sub_helper.make_iv(ngout);
1013 Periodicity const& subperiod = m_sub_helper.make_periodicity(period);
1014 m_r2c_sub->backward_doit(submf, subngout, subperiod, outcomp);
1015 } else {
1016 MF tmp(outmf.boxArray(), outmf.DistributionMap(), ncomp,
1017 m_sub_helper.make_safe_ghost(outmf.nGrowVect()));
1018 this->backward_doit(tmp, ngout, period, 0);
1019 outmf.LocalCopy(tmp, 0, outcomp, ncomp, tmp.nGrowVect());
1020 }
1021 return;
1022 }
1023
1024 if (m_do_alld_fft) {
1025 if constexpr (C) {
1026 m_fft_bwd_x.template compute_c2c<Direction::backward>();
1027 } else {
1028 m_fft_bwd_x.template compute_r2c<Direction::backward>();
1029 }
1030 outmf.ParallelCopy(m_rx, 0, outcomp, ncomp, IntVect(0),
1031 amrex::elemwiseMin(ngout,outmf.nGrowVect()), period);
1032 return;
1033 }
1034
1035 m_fft_bwd_z.template compute_c2c<Direction::backward>();
1036 if ( m_cmd_z2y) {
1037 ParallelCopy(m_cy, m_cz, *m_cmd_z2y, 0, 0, ncomp, m_dtos_z2y);
1038 }
1039#if (AMREX_SPACEDIM == 3)
1040 else if ( m_cmd_z2x) {
1041 auto const& cmd = m_openbc_half ? m_cmd_z2x_half : m_cmd_z2x;
1042 ParallelCopy(m_cx, m_cz, *cmd, 0, 0, ncomp, m_dtos_z2x);
1043 }
1044#endif
1045
1046 m_fft_bwd_y.template compute_c2c<Direction::backward>();
1047 if ( m_cmd_y2x) {
1048 ParallelCopy(m_cx, m_cy, *m_cmd_y2x, 0, 0, ncomp, m_dtos_y2x);
1049 }
1050
1051 auto& fft_x = m_openbc_half ? m_fft_bwd_x_half : m_fft_bwd_x;
1052 if constexpr (C) {
1053 fft_x.template compute_c2c<Direction::backward>();
1054 } else {
1055 fft_x.template compute_r2c<Direction::backward>();
1056 }
1057 outmf.ParallelCopy(m_rx, 0, outcomp, ncomp, IntVect(0),
1058 amrex::elemwiseMin(ngout,outmf.nGrowVect()), period);
1059}
1060
1061template <typename T, Direction D, bool C>
1062template <typename CT, typename RT, Direction DIR, bool CP,
1063 std::enable_if_t<(DIR == Direction::backward ||
1064 DIR == Direction::both)
1065 && ((sizeof(RT)*2 == sizeof(CT) && !CP) ||
1066 (sizeof(RT) == sizeof(CT) && CP)), int> >
1067void R2C<T,D,C>::backward (CT const* in, RT* out)
1068{
1069 auto [rdata, rsz] = install_raw_ptr(m_raw_mf, out);
1070 auto [cdata, csz] = install_raw_ptr(m_raw_cmf, in);
1071
1072 if (csz) {
1073 Gpu::dtod_memcpy_async(cdata.get(),in,csz);
1075 }
1076
1077 backward(m_raw_cmf, m_raw_mf);
1078
1079 if (rsz > 0) {
1080 Gpu::dtod_memcpy_async(out,rdata.get(),rsz);
1082 }
1083}
1084
1085template <typename T, Direction D, bool C>
1086std::pair<Plan<T>, Plan<T>>
1087R2C<T,D,C>::make_c2c_plans (cMF& inout, int ndims) const
1088{
1089 Plan<T> fwd;
1090 Plan<T> bwd;
1091
1092 auto* fab = detail::get_fab(inout);
1093 if (!fab) { return {fwd, bwd};}
1094
1095 Box const& box = fab->box();
1096 auto* pio = (typename Plan<T>::VendorComplex *)fab->dataPtr();
1097
1098 auto const ncomp = m_info.batch_size;
1099
1100#ifdef AMREX_USE_SYCL
1101 fwd.template init_c2c<Direction::forward>(box, pio, ncomp, ndims);
1102 bwd = fwd;
1103#else
1104 if constexpr (D == Direction::both || D == Direction::forward) {
1105 fwd.template init_c2c<Direction::forward>(box, pio, ncomp, ndims);
1106 }
1107 if constexpr (D == Direction::both || D == Direction::backward) {
1108 bwd.template init_c2c<Direction::backward>(box, pio, ncomp, ndims);
1109 }
1110#endif
1111
1112 return {fwd, bwd};
1113}
1114
1115template <typename T, Direction D, bool C>
1116template <typename F>
1117void R2C<T,D,C>::post_forward_doit_0 (F const& post_forward)
1118{
1119 if (m_info.twod_mode || m_info.batch_size > 1) {
1120 amrex::Abort("xxxxx todo: post_forward");
1121#if (AMREX_SPACEDIM > 1)
1122 } else if (m_r2c_sub) {
1123 // We need to pass the originally ordered indices to post_forward.
1124#if (AMREX_SPACEDIM == 2)
1125 // The original domain is (1,ny). The sub domain is (ny,1).
1126 m_r2c_sub->post_forward_doit_1
1127 ([=] AMREX_GPU_DEVICE (int i, int, int, auto& sp)
1128 {
1129 post_forward(0, i, 0, sp);
1130 });
1131#else
1132 if (m_real_domain.length(0) == 1 && m_real_domain.length(1) == 1) {
1133 // Original domain: (1, 1, nz). Sub domain: (nz, 1, 1)
1134 m_r2c_sub->post_forward_doit_1
1135 ([=] AMREX_GPU_DEVICE (int i, int, int, auto& sp)
1136 {
1137 post_forward(0, 0, i, sp);
1138 });
1139 } else if (m_real_domain.length(0) == 1 && m_real_domain.length(2) == 1) {
1140 // Original domain: (1, ny, 1). Sub domain: (ny, 1, 1)
1141 m_r2c_sub->post_forward_doit_1
1142 ([=] AMREX_GPU_DEVICE (int i, int, int, auto& sp)
1143 {
1144 post_forward(0, i, 0, sp);
1145 });
1146 } else if (m_real_domain.length(0) == 1) {
1147 // Original domain: (1, ny, nz). Sub domain: (ny, nz, 1)
1148 m_r2c_sub->post_forward_doit_1
1149 ([=] AMREX_GPU_DEVICE (int i, int j, int, auto& sp)
1150 {
1151 post_forward(0, i, j, sp);
1152 });
1153 } else if (m_real_domain.length(1) == 1) {
1154 // Original domain: (nx, 1, nz). Sub domain: (nx, nz, 1)
1155 m_r2c_sub->post_forward_doit_1
1156 ([=] AMREX_GPU_DEVICE (int i, int j, int, auto& sp)
1157 {
1158 post_forward(i, 0, j, sp);
1159 });
1160 } else {
1161 amrex::Abort("R2c::post_forward_doit_0: how did this happen?");
1162 }
1163#endif
1164#endif
1165 } else {
1166 this->post_forward_doit_1(post_forward);
1167 }
1168}
1169
1170template <typename T, Direction D, bool C>
1171template <typename F>
1172void R2C<T,D,C>::post_forward_doit_1 (F const& post_forward)
1173{
1174 if (m_info.twod_mode || m_info.batch_size > 1) {
1175 amrex::Abort("xxxxx todo: post_forward");
1176 } else if (m_r2c_sub) {
1177 amrex::Abort("R2C::post_forward_doit_1: How did this happen?");
1178 } else {
1179 if ( ! m_cz.empty()) {
1180 auto* spectral_fab = detail::get_fab(m_cz);
1181 if (spectral_fab) {
1182 auto const& a = spectral_fab->array(); // m_cz's ordering is z,x,y
1183 ParallelForOMP(spectral_fab->box(),
1184 [=] AMREX_GPU_DEVICE (int iz, int jx, int ky)
1185 {
1186 post_forward(jx,ky,iz,a(iz,jx,ky));
1187 });
1188 }
1189 } else if ( ! m_cy.empty()) {
1190 auto* spectral_fab = detail::get_fab(m_cy);
1191 if (spectral_fab) {
1192 auto const& a = spectral_fab->array(); // m_cy's ordering is y,x,z
1193 ParallelForOMP(spectral_fab->box(),
1194 [=] AMREX_GPU_DEVICE (int iy, int jx, int k)
1195 {
1196 post_forward(jx,iy,k,a(iy,jx,k));
1197 });
1198 }
1199 } else {
1200 auto* spectral_fab = detail::get_fab(m_cx);
1201 if (spectral_fab) {
1202 auto const& a = spectral_fab->array();
1203 ParallelForOMP(spectral_fab->box(),
1204 [=] AMREX_GPU_DEVICE (int i, int j, int k)
1205 {
1206 post_forward(i,j,k,a(i,j,k));
1207 });
1208 }
1209 }
1210 }
1211}
1212
1213template <typename T, Direction D, bool C>
1215{
1216#if (AMREX_SPACEDIM == 3)
1217 if (m_info.twod_mode) {
1218 return T(1)/T(Long(m_real_domain.length(0)) *
1219 Long(m_real_domain.length(1)));
1220 } else
1221#endif
1222 {
1223 return T(1)/T(m_real_domain.numPts());
1224 }
1225}
1226
1227template <typename T, Direction D, bool C>
1228template <Direction DIR, std::enable_if_t<DIR == Direction::forward ||
1229 DIR == Direction::both, int> >
1230std::pair<typename R2C<T,D,C>::cMF *, IntVect>
1232{
1233#if (AMREX_SPACEDIM > 1)
1234 if (m_r2c_sub) {
1235 auto [cmf, order] = m_r2c_sub->getSpectralData();
1236 return std::make_pair(cmf, m_sub_helper.inverse_order(order));
1237 } else
1238#endif
1239 if (!m_cz.empty()) {
1240 return std::make_pair(const_cast<cMF*>(&m_cz), IntVect{AMREX_D_DECL(2,0,1)});
1241 } else if (!m_cy.empty()) {
1242 return std::make_pair(const_cast<cMF*>(&m_cy), IntVect{AMREX_D_DECL(1,0,2)});
1243 } else {
1244 return std::make_pair(const_cast<cMF*>(&m_cx), IntVect{AMREX_D_DECL(0,1,2)});
1245 }
1246}
1247
1248template <typename T, Direction D, bool C>
1249template <Direction DIR, std::enable_if_t<DIR == Direction::forward ||
1250 DIR == Direction::both, int> >
1251void R2C<T,D,C>::forward (MF const& inmf, cMF& outmf, int incomp, int outcomp)
1252{
1253 BL_PROFILE("FFT::R2C::forward(inout)");
1254
1255 auto const ncomp = m_info.batch_size;
1256
1257 if (m_r2c_sub)
1258 {
1259 bool inmf_safe = m_sub_helper.ghost_safe(inmf.nGrowVect());
1260 MF inmf_sub, inmf_tmp;
1261 int incomp_sub;
1262 if (inmf_safe) {
1263 inmf_sub = m_sub_helper.make_alias_mf(inmf);
1264 incomp_sub = incomp;
1265 } else {
1266 inmf_tmp.define(inmf.boxArray(), inmf.DistributionMap(), ncomp, 0);
1267 inmf_tmp.LocalCopy(inmf, incomp, 0, ncomp, IntVect(0));
1268 inmf_sub = m_sub_helper.make_alias_mf(inmf_tmp);
1269 incomp_sub = 0;
1270 }
1271
1272 bool outmf_safe = m_sub_helper.ghost_safe(outmf.nGrowVect());
1273 cMF outmf_sub, outmf_tmp;
1274 int outcomp_sub;
1275 if (outmf_safe) {
1276 outmf_sub = m_sub_helper.make_alias_mf(outmf);
1277 outcomp_sub = outcomp;
1278 } else {
1279 outmf_tmp.define(outmf.boxArray(), outmf.DistributionMap(), ncomp, 0);
1280 outmf_sub = m_sub_helper.make_alias_mf(outmf_tmp);
1281 outcomp_sub = 0;
1282 }
1283
1284 m_r2c_sub->forward(inmf_sub, outmf_sub, incomp_sub, outcomp_sub);
1285
1286 if (!outmf_safe) {
1287 outmf.LocalCopy(outmf_tmp, 0, outcomp, ncomp, IntVect(0));
1288 }
1289 }
1290 else
1291 {
1292 forward(inmf, incomp);
1293 if (!m_cz.empty()) { // m_cz's order (z,x,y) -> (x,y,z)
1294 RotateBwd dtos{};
1296 (outmf, m_spectral_domain_x, m_cz, IntVect(0), dtos);
1297 ParallelCopy(outmf, m_cz, cmd, 0, outcomp, ncomp, dtos);
1298 } else if (!m_cy.empty()) { // m_cy's order (y,x,z) -> (x,y,z)
1300 (outmf, m_spectral_domain_x, m_cy, IntVect(0), m_dtos_y2x);
1301 ParallelCopy(outmf, m_cy, cmd, 0, outcomp, ncomp, m_dtos_y2x);
1302 } else {
1303 outmf.ParallelCopy(m_cx, 0, outcomp, ncomp);
1304 }
1305 }
1306}
1307
1308template <typename T, Direction D, bool C>
1309template <Direction DIR, std::enable_if_t<DIR == Direction::backward ||
1310 DIR == Direction::both, int> >
1311void R2C<T,D,C>::backward (cMF const& inmf, MF& outmf, int incomp, int outcomp)
1312{
1313 backward_doit(inmf, outmf, IntVect(0), Periodicity::NonPeriodic(), incomp, outcomp);
1314}
1315
1316template <typename T, Direction D, bool C>
1317void R2C<T,D,C>::backward_doit (cMF const& inmf, MF& outmf, IntVect const& ngout,
1318 Periodicity const& period, int incomp, int outcomp)
1319{
1320 BL_PROFILE("FFT::R2C::backward(inout)");
1321
1322 auto const ncomp = m_info.batch_size;
1323
1324 if (m_r2c_sub)
1325 {
1326 bool inmf_safe = m_sub_helper.ghost_safe(inmf.nGrowVect());
1327 cMF inmf_sub, inmf_tmp;
1328 int incomp_sub;
1329 if (inmf_safe) {
1330 inmf_sub = m_sub_helper.make_alias_mf(inmf);
1331 incomp_sub = incomp;
1332 } else {
1333 inmf_tmp.define(inmf.boxArray(), inmf.DistributionMap(), ncomp, 0);
1334 inmf_tmp.LocalCopy(inmf, incomp, 0, ncomp, IntVect(0));
1335 inmf_sub = m_sub_helper.make_alias_mf(inmf_tmp);
1336 incomp_sub = 0;
1337 }
1338
1339 bool outmf_safe = m_sub_helper.ghost_safe(outmf.nGrowVect());
1340 MF outmf_sub, outmf_tmp;
1341 int outcomp_sub;
1342 if (outmf_safe) {
1343 outmf_sub = m_sub_helper.make_alias_mf(outmf);
1344 outcomp_sub = outcomp;
1345 } else {
1346 IntVect const& ngtmp = m_sub_helper.make_safe_ghost(outmf.nGrowVect());
1347 outmf_tmp.define(outmf.boxArray(), outmf.DistributionMap(), ncomp, ngtmp);
1348 outmf_sub = m_sub_helper.make_alias_mf(outmf_tmp);
1349 outcomp_sub = 0;
1350 }
1351
1352 IntVect const& subngout = m_sub_helper.make_iv(ngout);
1353 Periodicity const& subperiod = m_sub_helper.make_periodicity(period);
1354 m_r2c_sub->backward_doit(inmf_sub, outmf_sub, subngout, subperiod, incomp_sub, outcomp_sub);
1355
1356 if (!outmf_safe) {
1357 outmf.LocalCopy(outmf_tmp, 0, outcomp, ncomp, outmf_tmp.nGrowVect());
1358 }
1359 }
1360 else
1361 {
1362 if (!m_cz.empty()) { // (x,y,z) -> m_cz's order (z,x,y)
1363 RotateFwd dtos{};
1364 MultiBlockCommMetaData cmd
1365 (m_cz, m_spectral_domain_z, inmf, IntVect(0), dtos);
1366 ParallelCopy(m_cz, inmf, cmd, incomp, 0, ncomp, dtos);
1367 } else if (!m_cy.empty()) { // (x,y,z) -> m_cy's ordering (y,x,z)
1368 MultiBlockCommMetaData cmd
1369 (m_cy, m_spectral_domain_y, inmf, IntVect(0), m_dtos_x2y);
1370 ParallelCopy(m_cy, inmf, cmd, incomp, 0, ncomp, m_dtos_x2y);
1371 } else {
1372 m_cx.ParallelCopy(inmf, incomp, 0, ncomp);
1373 }
1374 backward_doit(outmf, ngout, period, outcomp);
1375 }
1376}
1377
1378template <typename T, Direction D, bool C>
1379std::pair<BoxArray,DistributionMapping>
1381{
1382#if (AMREX_SPACEDIM > 1)
1383 if (m_r2c_sub) {
1384 auto const& [ba, dm] = m_r2c_sub->getSpectralDataLayout();
1385 return std::make_pair(m_sub_helper.inverse_boxarray(ba), dm);
1386 }
1387#endif
1388
1389#if (AMREX_SPACEDIM == 3)
1390 if (!m_cz.empty()) {
1391 BoxList bl = m_cz.boxArray().boxList();
1392 for (auto& b : bl) {
1393 auto lo = b.smallEnd();
1394 auto hi = b.bigEnd();
1395 std::swap(lo[0], lo[1]);
1396 std::swap(lo[1], lo[2]);
1397 std::swap(hi[0], hi[1]);
1398 std::swap(hi[1], hi[2]);
1399 b.setSmall(lo);
1400 b.setBig(hi);
1401 }
1402 return std::make_pair(BoxArray(std::move(bl)), m_cz.DistributionMap());
1403 } else
1404#endif
1405#if (AMREX_SPACEDIM >= 2)
1406 if (!m_cy.empty()) {
1407 BoxList bl = m_cy.boxArray().boxList();
1408 for (auto& b : bl) {
1409 auto lo = b.smallEnd();
1410 auto hi = b.bigEnd();
1411 std::swap(lo[0], lo[1]);
1412 std::swap(hi[0], hi[1]);
1413 b.setSmall(lo);
1414 b.setBig(hi);
1415 }
1416 return std::make_pair(BoxArray(std::move(bl)), m_cy.DistributionMap());
1417 } else
1418#endif
1419 {
1420 return std::make_pair(m_cx.boxArray(), m_cx.DistributionMap());
1421 }
1422}
1423
1425template <typename T = Real, FFT::Direction D = FFT::Direction::both>
1427
1428}
1429
1430#endif
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ALWAYS_ASSERT(EX)
Definition AMReX_BLassert.H:50
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
amrex::ParmParse pp
Input file parser instance for the given namespace.
Definition AMReX_HypreIJIface.cpp:15
#define AMREX_D_DECL(a, b, c)
Definition AMReX_SPACE.H:171
virtual void * alloc(std::size_t sz)=0
A collection of Boxes stored in an Array.
Definition AMReX_BoxArray.H:567
A class for managing a List of Boxes that share a common IndexType. This class implements operations ...
Definition AMReX_BoxList.H:52
BoxList & shift(int dir, int nzones)
Applies Box::shift(int,int) to each Box in the BoxList.
Definition AMReX_BoxList.cpp:565
__host__ __device__ const IntVectND< dim > & bigEnd() const &noexcept
Return the inclusive upper bound of the box.
Definition AMReX_Box.H:123
__host__ __device__ Long numPts() const noexcept
Return the number of points contained in the BoxND.
Definition AMReX_Box.H:356
__host__ __device__ IntVectND< dim > length() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:154
__host__ __device__ int shortside(int &dir) const noexcept
Return length of shortest side. dir is modified to give direction with shortest side: 0....
Definition AMReX_Box.H:437
__host__ __device__ IntVectND< dim > size() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:147
__host__ __device__ BoxND & growLo(int idir, int n_cell=1) noexcept
Grow the BoxND on the low end by n_cell cells in direction idir. NOTE: n_cell negative shrinks the Bo...
Definition AMReX_Box.H:662
__host__ __device__ IndexTypeND< dim > ixType() const noexcept
Return the indexing type.
Definition AMReX_Box.H:135
__host__ __device__ BoxND & growHi(int idir, int n_cell=1) noexcept
Grow the BoxND on the high end by n_cell cells in direction idir. NOTE: n_cell negative shrinks the B...
Definition AMReX_Box.H:673
__host__ __device__ const IntVectND< dim > & smallEnd() const &noexcept
Return the inclusive lower bound of the box.
Definition AMReX_Box.H:111
Calculates the distribution of FABs to MPI processes.
Definition AMReX_DistributionMapping.H:43
Long size() const noexcept
Length of the underlying processor map.
Definition AMReX_DistributionMapping.H:129
Definition AMReX_FFT_OpenBCSolver.H:11
3D Poisson solver for periodic, Dirichlet & Neumann boundaries in the first two dimensions,...
Definition AMReX_FFT_Poisson.H:151
Poisson solver for periodic, Dirichlet & Neumann boundaries using FFT.
Definition AMReX_FFT_Poisson.H:61
Parallel Discrete Fourier Transform.
Definition AMReX_FFT_R2C.H:41
std::conditional_t< C, cMF, std::conditional_t< std::is_same_v< T, Real >, MultiFab, FabArray< BaseFab< T > > > > MF
Definition AMReX_FFT_R2C.H:46
R2C & operator=(R2C const &)=delete
void forward(MF const &inmf, cMF &outmf, int incomp=0, int outcomp=0)
Forward transform.
Definition AMReX_FFT_R2C.H:1251
~R2C()
Definition AMReX_FFT_R2C.H:701
void setLocalDomain(std::array< int, 3 > const &local_start, std::array< int, 3 > const &local_size)
Set local domain.
Definition AMReX_FFT_R2C.H:752
R2C(Box const &domain, Info const &info=Info{})
Constructor.
Definition AMReX_FFT_R2C.H:454
void backward(cMF const &inmf, MF &outmf, int incomp=0, int outcomp=0)
Backward transform.
Definition AMReX_FFT_R2C.H:1311
R2C(R2C &&)=delete
void backward(MF &outmf, int outcomp=0)
Backward transform.
Definition AMReX_FFT_R2C.H:996
void post_forward_doit_1(F const &post_forward)
Definition AMReX_FFT_R2C.H:1172
T scalingFactor() const
Definition AMReX_FFT_R2C.H:1214
void post_forward_doit_0(F const &post_forward)
Definition AMReX_FFT_R2C.H:1117
std::pair< std::array< int, 3 >, std::array< int, 3 > > getLocalDomain() const
Get local domain.
Definition AMReX_FFT_R2C.H:761
void forward(RT const *in, CT *out)
Forward transform.
Definition AMReX_FFT_R2C.H:976
R2C(R2C const &)=delete
std::pair< BoxArray, DistributionMapping > getSpectralDataLayout() const
Get BoxArray and DistributionMapping for spectral data.
Definition AMReX_FFT_R2C.H:1380
std::pair< cMF *, IntVect > getSpectralData() const
Get the internal spectral data.
R2C(std::array< int, 3 > const &domain_size, Info const &info=Info{})
Constructor.
Definition AMReX_FFT_R2C.H:696
FabArray< BaseFab< GpuComplex< T > > > cMF
Definition AMReX_FFT_R2C.H:43
std::pair< std::array< int, 3 >, std::array< int, 3 > > getLocalSpectralDomain() const
Get local spectral domain.
Definition AMReX_FFT_R2C.H:787
void forwardThenBackward(MF const &inmf, MF &outmf, F const &post_forward, int incomp=0, int outcomp=0)
Forward and then backward transform.
Definition AMReX_FFT_R2C.H:193
void setLocalSpectralDomain(std::array< int, 3 > const &local_start, std::array< int, 3 > const &local_size)
Set local spectral domain.
Definition AMReX_FFT_R2C.H:778
void backward(CT const *in, RT *out)
Backward transform.
Definition AMReX_FFT_R2C.H:1067
void forward(MF const &inmf, int incomp=0)
Forward transform.
Definition AMReX_FFT_R2C.H:863
IntVect nGrowVect() const noexcept
Definition AMReX_FabArrayBase.H:80
int size() const noexcept
Return the number of FABs in the FabArray.
Definition AMReX_FabArrayBase.H:110
const DistributionMapping & DistributionMap() const noexcept
Return constant reference to associated DistributionMapping.
Definition AMReX_FabArrayBase.H:131
bool empty() const noexcept
Definition AMReX_FabArrayBase.H:89
Box fabbox(int K) const noexcept
Return the Kth FABs Box in the FabArray. That is, the region the Kth fab is actually defined on.
Definition AMReX_FabArrayBase.cpp:220
const BoxArray & boxArray() const noexcept
Return a constant reference to the BoxArray that defines the valid region associated with this FabArr...
Definition AMReX_FabArrayBase.H:95
void setFab(int boxno, std::unique_ptr< FAB > elem)
Explicitly set the Kth FAB in the FabArray to point to elem.
Definition AMReX_FabArray.H:2355
void ParallelCopy(const FabArray< FAB > &src, const Periodicity &period=Periodicity::NonPeriodic(), CpOp op=FabArrayBase::COPY)
Definition AMReX_FabArray.H:847
typename std::conditional_t< IsBaseFab< BaseFab< GpuComplex< T > > >::value, BaseFab< GpuComplex< T > >, FABType >::value_type value_type
Definition AMReX_FabArray.H:358
void define(const BoxArray &bxs, const DistributionMapping &dm, int nvar, int ngrow, const MFInfo &info=MFInfo(), const FabFactory< FAB > &factory=DefaultFabFactory< FAB >())
Define this FabArray identically to that performed by the constructor having an analogous function si...
Definition AMReX_FabArray.H:2171
void LocalCopy(FabArray< SFAB > const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
Perform local copy of FabArray data.
Definition AMReX_FabArray.H:1952
A collection (stored as an array) of FArrayBox objects.
Definition AMReX_MultiFab.H:40
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition AMReX_Periodicity.H:17
static const Periodicity & NonPeriodic() noexcept
Definition AMReX_Periodicity.cpp:52
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
amrex_long Long
Definition AMReX_INT.H:30
void ParallelForOMP(T n, L const &f) noexcept
Performance-portable kernel launch function with optional OpenMP threading.
Definition AMReX_GpuLaunch.H:243
int MyProc() noexcept
Definition AMReX_ParallelDescriptor.H:128
int NProcs() noexcept
Definition AMReX_ParallelDescriptor.H:255
Definition AMReX_FFT_Helper.H:46
Direction
Definition AMReX_FFT_Helper.H:48
void dtod_memcpy_async(void *p_d_dst, const void *p_d_src, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:329
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:263
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
MPI_Comm Communicator() noexcept
Definition AMReX_ParallelDescriptor.H:223
BoxND< 3 > Box
Box is an alias for amrex::BoxND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:27
bool is_aligned(const void *p, std::size_t alignment) noexcept
Definition AMReX_Arena.H:35
BoxArray decompose(Box const &domain, int nboxes, Array< bool, 3 > const &decomp, bool no_overlap)
Decompose domain box into BoxArray.
Definition AMReX_BoxArray.cpp:1940
IntVectND< 3 > IntVect
IntVect is an alias for amrex::IntVectND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:30
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:230
void ParallelCopy(MF &dst, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &ng_src=IntVect(0), IntVect const &ng_dst=IntVect(0), Periodicity const &period=Periodicity::NonPeriodic())
dst = src w/ MPI communication
Definition AMReX_FabArrayUtility.H:2019
Arena * The_Arena()
Definition AMReX_Arena.cpp:783
__host__ __device__ constexpr T elemwiseMin(T const &a, T const &b) noexcept
Definition AMReX_Algorithm.H:49
Definition AMReX_FFT_Helper.H:58
bool twod_mode
Definition AMReX_FFT_Helper.H:69
bool oned_mode
We might have a special twod_mode: nx or ny == 1 && nz > 1.
Definition AMReX_FFT_Helper.H:72
int batch_size
Batched FFT size. Only support in R2C, not R2X.
Definition AMReX_FFT_Helper.H:75
DomainStrategy domain_strategy
Domain composition strategy.
Definition AMReX_FFT_Helper.H:60
int nprocs
Max number of processes to use.
Definition AMReX_FFT_Helper.H:78
int pencil_threshold
Definition AMReX_FFT_Helper.H:64
Definition AMReX_FFT_Helper.H:134
std::conditional_t< std::is_same_v< float, T >, cuComplex, cuDoubleComplex > VendorComplex
Definition AMReX_FFT_Helper.H:138
FabArray memory allocation information.
Definition AMReX_FabArray.H:66
MFInfo & SetAlloc(bool a) noexcept
Definition AMReX_FabArray.H:73
This class specializes behaviour on local copies and unpacking receive buffers.
Definition AMReX_NonLocalBC.H:626
This is the index mapping based on the DTOS MultiBlockDestToSrc.
Definition AMReX_NonLocalBC.H:216
Contains information about which components take part of the data transaction.
Definition AMReX_NonLocalBC.H:539
int n_components
Definition AMReX_NonLocalBC.H:542
Communication datatype (note: this structure also works without MPI)
Definition AMReX_ccse-mpi.H:78