1 #ifndef AMREX_BASEFAB_UTILITY_H_
2 #define AMREX_BASEFAB_UTILITY_H_
3 #include <AMReX_Config.H>
10 template <
class Tto,
class Tfrom>
16 auto const& tdata = tofab.array();
17 auto const& fdata = fromfab.const_array();
20 tdata(i,j,k,n+dcomp.i) = static_cast<Tto>(fdata(i,j,k,n+scomp.i));
24 template <
typename STRUCT,
typename F,
25 std::enable_if_t<(
sizeof(STRUCT)<=36*8) &&
27 std::is_trivially_destructible_v<STRUCT>,
31 Box const& box = aos_fab.
box();
32 auto const& aos = aos_fab.
array();
33 using T =
typename STRUCT::value_type;
34 constexpr
int STRUCTSIZE =
sizeof(STRUCT)/
sizeof(T);
35 static_assert(
sizeof(STRUCT) ==
sizeof(T)*STRUCTSIZE,
36 "amrex::fill: sizeof(STRUCT) != sizeof(T)*STRUCTSIZE");
40 const auto ntotcells = std::uint64_t(box.
numPts());
41 int nthreads_per_block = (STRUCTSIZE <= 8) ? 256 : 128;
42 std::uint64_t nblocks_long = (ntotcells+nthreads_per_block-1)/nthreads_per_block;
44 auto nblocks =
int(nblocks_long);
45 std::size_t shared_mem_bytes = nthreads_per_block *
sizeof(STRUCT);
51 auto const icell = std::uint64_t(handler.globalIdx());
52 std::uint64_t
const blockDimx = handler.blockDim();
53 std::uint64_t
const threadIdxx = handler.threadIdx();
54 std::uint64_t
const blockIdxx = handler.blockIdx();
55 auto const shared = (T*)handler.sharedMemory();
56 if (icell < indexer.
numPts()) {
57 auto ga =
new(shared+threadIdxx*STRUCTSIZE) STRUCT;
58 auto [i, j, k] = indexer(icell);
61 handler.sharedBarrier();
62 for (std::uint64_t m = threadIdxx,
63 mend = amrex::min<std::uint64_t>(blockDimx, indexer.
numPts()-blockDimx*blockIdxx) * STRUCTSIZE;
64 m < mend; m += blockDimx) {
65 p[blockDimx*blockIdxx*STRUCTSIZE+m] = shared[m];
72 std::uint64_t
const icell = std::uint64_t(blockDim.x)*blockIdx.x+threadIdx.x;
74 T*
const shared = gsm.
dataPtr();
75 if (icell < indexer.
numPts()) {
76 auto ga =
new(shared+std::uint64_t(threadIdx.x)*STRUCTSIZE) STRUCT;
77 auto [i, j, k] = indexer(icell);
81 for (std::uint64_t m = threadIdx.x,
82 mend = amrex::min<std::uint64_t>(blockDim.x, indexer.
numPts()-std::uint64_t(blockDim.x)*blockIdx.x) * STRUCTSIZE;
83 m < mend; m += blockDim.x) {
84 p[std::uint64_t(blockDim.x)*blockIdx.x*STRUCTSIZE+m] = shared[m];
93 f(aos(i,j,k), i, j, k);
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition: AMReX_GpuQualifiers.H:20
#define AMREX_IS_TRIVIALLY_COPYABLE(T)
Definition: AMReX_TypeTraits.H:10
A FortranArrayBox(FAB)-like object.
Definition: AMReX_BaseFab.H:183
T * dataPtr(int n=0) noexcept
Returns a pointer to an object of type T that is the value of the Nth component associated with the c...
Definition: AMReX_BaseFab.H:352
AMREX_FORCE_INLINE Array4< T const > array() const noexcept
Definition: AMReX_BaseFab.H:379
const Box & box() const noexcept
Returns the domain (box) where the array is defined.
Definition: AMReX_BaseFab.H:291
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
Returns the number of points contained in the BoxND.
Definition: AMReX_Box.H:346
bool inLaunchRegion() noexcept
Definition: AMReX_GpuControl.H:86
gpuStream_t gpuStream() noexcept
Definition: AMReX_GpuDevice.H:218
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44
@ max
Definition: AMReX_ParallelReduce.H:17
Definition: AMReX_Amr.cpp:49
AMREX_ATTRIBUTE_FLATTEN_FOR void LoopOnCpu(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition: AMReX_Loop.H:355
void launch(T const &n, L &&f) noexcept
Definition: AMReX_GpuLaunchFunctsC.H:120
void fill(BaseFab< STRUCT > &aos_fab, F const &f)
Definition: AMReX_BaseFabUtility.H:29
AMREX_GPU_HOST_DEVICE void cast(BaseFab< Tto > &tofab, BaseFab< Tfrom > const &fromfab, Box const &bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept
Definition: AMReX_BaseFabUtility.H:13
AMREX_GPU_HOST_DEVICE AMREX_ATTRIBUTE_FLATTEN_FOR void LoopConcurrent(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition: AMReX_Loop.H:150
const int[]
Definition: AMReX_BLProfiler.cpp:1664
Definition: AMReX_Box.H:2027
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE std::uint64_t numPts() const
Definition: AMReX_Box.H:2068
Definition: AMReX_BaseFab.H:72
Definition: AMReX_GpuTypes.H:86
Definition: AMReX_GpuMemory.H:125
AMREX_GPU_DEVICE T * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:126
Definition: AMReX_BaseFab.H:78
Definition: AMReX_BaseFab.H:66