amrex/doxygen/AMReX__BaseFabUtility_8H_source.html

 #ifndef AMREX_BASEFAB_UTILITY_H_

 #define AMREX_BASEFAB_UTILITY_H_

 #include <AMReX_Config.H>


 #include <AMReX_BaseFab.H>

 #include <AMReX_TypeTraits.H>


 namespace amrex {


 template <class Tto, class Tfrom>

 AMREX_GPU_HOST_DEVICE

 void

 cast (BaseFab<Tto>& tofab, BaseFab<Tfrom> const& fromfab,

       Box const& bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept

 {

     auto const& tdata = tofab.array();

     auto const& fdata = fromfab.const_array();

     amrex::LoopConcurrent(bx, ncomp.n, [=] (int i, int j, int k, int n) noexcept

     {

         tdata(i,j,k,n+dcomp.i) = static_cast<Tto>(fdata(i,j,k,n+scomp.i));

     });

 }


 template <typename STRUCT, typename F,

           std::enable_if_t<(sizeof(STRUCT)<=36*8) &&

                                   AMREX_IS_TRIVIALLY_COPYABLE(STRUCT) &&

                                   std::is_trivially_destructible_v<STRUCT>,

                                   int>FOO = 0>

 void fill (BaseFab<STRUCT>& aos_fab, F const& f)

 {

     Box const& box = aos_fab.box();

     auto const& aos = aos_fab.array();

     using T = typename STRUCT::value_type;

     constexpr int STRUCTSIZE = sizeof(STRUCT)/sizeof(T);

     static_assert(sizeof(STRUCT) == sizeof(T)*STRUCTSIZE,

                   "amrex::fill: sizeof(STRUCT) != sizeof(T)*STRUCTSIZE");

 #ifdef AMREX_USE_GPU

     if (Gpu::inLaunchRegion()) {

         BoxIndexer indexer(box);

         const auto ntotcells = std::uint64_t(box.numPts());

         int nthreads_per_block = (STRUCTSIZE <= 8) ? 256 : 128;

         std::uint64_t nblocks_long = (ntotcells+nthreads_per_block-1)/nthreads_per_block;

         AMREX_ASSERT(nblocks_long <= std::uint64_t(std::numeric_limits<int>::max()));

         auto nblocks = int(nblocks_long);

         std::size_t shared_mem_bytes = nthreads_per_block * sizeof(STRUCT);

         T* p = (T*)aos_fab.dataPtr();

 #ifdef AMREX_USE_SYCL

         amrex::launch(nblocks, nthreads_per_block, shared_mem_bytes, Gpu::gpuStream(),

         [=] AMREX_GPU_DEVICE (Gpu::Handler const& handler) noexcept

         {

             auto const icell = std::uint64_t(handler.globalIdx());

             std::uint64_t const blockDimx = handler.blockDim();

             std::uint64_t const threadIdxx = handler.threadIdx();

             std::uint64_t const blockIdxx = handler.blockIdx();

             auto const shared = (T*)handler.sharedMemory();

             if (icell < indexer.numPts()) {

                 auto ga = new(shared+threadIdxx*STRUCTSIZE) STRUCT;

                 auto [i, j, k] = indexer(icell);

                 f(*ga, i, j, k);

             }

             handler.sharedBarrier();

             for (std::uint64_t m = threadIdxx,

                      mend = amrex::min<std::uint64_t>(blockDimx, indexer.numPts()-blockDimx*blockIdxx) * STRUCTSIZE;

                  m < mend; m += blockDimx) {

                 p[blockDimx*blockIdxx*STRUCTSIZE+m] = shared[m];

             }

         });

 #else

         amrex::launch(nblocks, nthreads_per_block, shared_mem_bytes, Gpu::gpuStream(),

         [=] AMREX_GPU_DEVICE () noexcept

         {

             std::uint64_t const icell = std::uint64_t(blockDim.x)*blockIdx.x+threadIdx.x;

             Gpu::SharedMemory<T> gsm;

             T* const shared = gsm.dataPtr();

             if (icell < indexer.numPts()) {

                 auto ga = new(shared+std::uint64_t(threadIdx.x)*STRUCTSIZE) STRUCT;

                 auto [i, j, k] = indexer(icell);

                 f(*ga, i, j, k);

             }

             __syncthreads();

             for (std::uint64_t m = threadIdx.x,

                      mend = amrex::min<std::uint64_t>(blockDim.x, indexer.numPts()-std::uint64_t(blockDim.x)*blockIdx.x) * STRUCTSIZE;

                  m < mend; m += blockDim.x) {

                 p[std::uint64_t(blockDim.x)*blockIdx.x*STRUCTSIZE+m] = shared[m];

             }

         });

 #endif

     } else

 #endif

     {

         amrex::LoopOnCpu(box, [&] (int i, int j, int k) noexcept

         {

             f(aos(i,j,k), i, j, k);

         });

     }

 }


 }


 #endif

AMREX_ASSERT
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38

AMReX_BaseFab.H

AMREX_GPU_DEVICE
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18

AMREX_GPU_HOST_DEVICE
#define AMREX_GPU_HOST_DEVICE
Definition: AMReX_GpuQualifiers.H:20

AMReX_TypeTraits.H

AMREX_IS_TRIVIALLY_COPYABLE
#define AMREX_IS_TRIVIALLY_COPYABLE(T)
Definition: AMReX_TypeTraits.H:10

amrex::BaseFab
A FortranArrayBox(FAB)-like object.
Definition: AMReX_BaseFab.H:183

amrex::BaseFab::dataPtr
T * dataPtr(int n=0) noexcept
Returns a pointer to an object of type T that is the value of the Nth component associated with the c...
Definition: AMReX_BaseFab.H:352

amrex::BaseFab::array
AMREX_FORCE_INLINE Array4< T const  > array() const noexcept
Definition: AMReX_BaseFab.H:379

amrex::BaseFab::box
const Box & box() const noexcept
Returns the domain (box) where the array is defined.
Definition: AMReX_BaseFab.H:291

amrex::BoxND< AMREX_SPACEDIM >

amrex::BoxND::numPts
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
Returns the number of points contained in the BoxND.
Definition: AMReX_Box.H:346

amrex::Gpu::inLaunchRegion
bool inLaunchRegion() noexcept
Definition: AMReX_GpuControl.H:86

amrex::Gpu::gpuStream
gpuStream_t gpuStream() noexcept
Definition: AMReX_GpuDevice.H:218

amrex::SundialsUserFun::f
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44

amrex::detail::max
@ max
Definition: AMReX_ParallelReduce.H:17

amrex
Definition: AMReX_Amr.cpp:49

amrex::LoopOnCpu
AMREX_ATTRIBUTE_FLATTEN_FOR void LoopOnCpu(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition: AMReX_Loop.H:354

amrex::launch
void launch(T const &n, L &&f) noexcept
Definition: AMReX_GpuLaunchFunctsC.H:120

amrex::fill
void fill(BaseFab< STRUCT > &aos_fab, F const &f)
Definition: AMReX_BaseFabUtility.H:29

amrex::cast
AMREX_GPU_HOST_DEVICE void cast(BaseFab< Tto > &tofab, BaseFab< Tfrom > const &fromfab, Box const &bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept
Definition: AMReX_BaseFabUtility.H:13

amrex::LoopConcurrent
AMREX_GPU_HOST_DEVICE AMREX_ATTRIBUTE_FLATTEN_FOR void LoopConcurrent(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition: AMReX_Loop.H:149

amrex::int
const int[]
Definition: AMReX_BLProfiler.cpp:1664

amrex::BoxIndexerND
Definition: AMReX_Box.H:2027

amrex::BoxIndexerND::numPts
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE std::uint64_t numPts() const
Definition: AMReX_Box.H:2068

amrex::DestComp
Definition: AMReX_BaseFab.H:72

amrex::Gpu::Handler
Definition: AMReX_GpuTypes.H:86

amrex::Gpu::SharedMemory
Definition: AMReX_GpuMemory.H:125

amrex::Gpu::SharedMemory::dataPtr
AMREX_GPU_DEVICE T * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:126

amrex::NumComps
Definition: AMReX_BaseFab.H:78

amrex::SrcComp
Definition: AMReX_BaseFab.H:66