Block-Structured AMR Software Framework
AMReX_BaseFabUtility.H
Go to the documentation of this file.
1 #ifndef AMREX_BASEFAB_UTILITY_H_
2 #define AMREX_BASEFAB_UTILITY_H_
3 #include <AMReX_Config.H>
4 
5 #include <AMReX_BaseFab.H>
6 #include <AMReX_TypeTraits.H>
7 
8 namespace amrex {
9 
10 template <class Tto, class Tfrom>
12 void
13 cast (BaseFab<Tto>& tofab, BaseFab<Tfrom> const& fromfab,
14  Box const& bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept
15 {
16  auto const& tdata = tofab.array();
17  auto const& fdata = fromfab.const_array();
18  amrex::LoopConcurrent(bx, ncomp.n, [=] (int i, int j, int k, int n) noexcept
19  {
20  tdata(i,j,k,n+dcomp.i) = static_cast<Tto>(fdata(i,j,k,n+scomp.i));
21  });
22 }
23 
24 template <typename STRUCT, typename F,
25  std::enable_if_t<(sizeof(STRUCT)<=36*8) &&
27  std::is_trivially_destructible_v<STRUCT>,
28  int>FOO = 0>
29 void fill (BaseFab<STRUCT>& aos_fab, F const& f)
30 {
31  Box const& box = aos_fab.box();
32  auto const& aos = aos_fab.array();
33  using T = typename STRUCT::value_type;
34  constexpr int STRUCTSIZE = sizeof(STRUCT)/sizeof(T);
35  static_assert(sizeof(STRUCT) == sizeof(T)*STRUCTSIZE,
36  "amrex::fill: sizeof(STRUCT) != sizeof(T)*STRUCTSIZE");
37 #ifdef AMREX_USE_GPU
38  if (Gpu::inLaunchRegion()) {
39  BoxIndexer indexer(box);
40  const auto ntotcells = std::uint64_t(box.numPts());
41  int nthreads_per_block = (STRUCTSIZE <= 8) ? 256 : 128;
42  std::uint64_t nblocks_long = (ntotcells+nthreads_per_block-1)/nthreads_per_block;
43  AMREX_ASSERT(nblocks_long <= std::uint64_t(std::numeric_limits<int>::max()));
44  auto nblocks = int(nblocks_long);
45  std::size_t shared_mem_bytes = nthreads_per_block * sizeof(STRUCT);
46  T* p = (T*)aos_fab.dataPtr();
47 #ifdef AMREX_USE_SYCL
48  amrex::launch(nblocks, nthreads_per_block, shared_mem_bytes, Gpu::gpuStream(),
49  [=] AMREX_GPU_DEVICE (Gpu::Handler const& handler) noexcept
50  {
51  auto const icell = std::uint64_t(handler.globalIdx());
52  std::uint64_t const blockDimx = handler.blockDim();
53  std::uint64_t const threadIdxx = handler.threadIdx();
54  std::uint64_t const blockIdxx = handler.blockIdx();
55  auto const shared = (T*)handler.sharedMemory();
56  if (icell < indexer.numPts()) {
57  auto ga = new(shared+threadIdxx*STRUCTSIZE) STRUCT;
58  auto [i, j, k] = indexer(icell);
59  f(*ga, i, j, k);
60  }
61  handler.sharedBarrier();
62  for (std::uint64_t m = threadIdxx,
63  mend = amrex::min<std::uint64_t>(blockDimx, indexer.numPts()-blockDimx*blockIdxx) * STRUCTSIZE;
64  m < mend; m += blockDimx) {
65  p[blockDimx*blockIdxx*STRUCTSIZE+m] = shared[m];
66  }
67  });
68 #else
69  amrex::launch(nblocks, nthreads_per_block, shared_mem_bytes, Gpu::gpuStream(),
70  [=] AMREX_GPU_DEVICE () noexcept
71  {
72  std::uint64_t const icell = std::uint64_t(blockDim.x)*blockIdx.x+threadIdx.x;
74  T* const shared = gsm.dataPtr();
75  if (icell < indexer.numPts()) {
76  auto ga = new(shared+std::uint64_t(threadIdx.x)*STRUCTSIZE) STRUCT;
77  auto [i, j, k] = indexer(icell);
78  f(*ga, i, j, k);
79  }
80  __syncthreads();
81  for (std::uint64_t m = threadIdx.x,
82  mend = amrex::min<std::uint64_t>(blockDim.x, indexer.numPts()-std::uint64_t(blockDim.x)*blockIdx.x) * STRUCTSIZE;
83  m < mend; m += blockDim.x) {
84  p[std::uint64_t(blockDim.x)*blockIdx.x*STRUCTSIZE+m] = shared[m];
85  }
86  });
87 #endif
88  } else
89 #endif
90  {
91  amrex::LoopOnCpu(box, [&] (int i, int j, int k) noexcept
92  {
93  f(aos(i,j,k), i, j, k);
94  });
95  }
96 }
97 
98 }
99 
100 #endif
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition: AMReX_GpuQualifiers.H:20
#define AMREX_IS_TRIVIALLY_COPYABLE(T)
Definition: AMReX_TypeTraits.H:10
A FortranArrayBox(FAB)-like object.
Definition: AMReX_BaseFab.H:183
T * dataPtr(int n=0) noexcept
Returns a pointer to an object of type T that is the value of the Nth component associated with the c...
Definition: AMReX_BaseFab.H:352
AMREX_FORCE_INLINE Array4< T const > array() const noexcept
Definition: AMReX_BaseFab.H:379
const Box & box() const noexcept
Returns the domain (box) where the array is defined.
Definition: AMReX_BaseFab.H:291
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
Returns the number of points contained in the BoxND.
Definition: AMReX_Box.H:346
bool inLaunchRegion() noexcept
Definition: AMReX_GpuControl.H:86
gpuStream_t gpuStream() noexcept
Definition: AMReX_GpuDevice.H:218
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44
@ max
Definition: AMReX_ParallelReduce.H:17
Definition: AMReX_Amr.cpp:49
AMREX_ATTRIBUTE_FLATTEN_FOR void LoopOnCpu(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition: AMReX_Loop.H:354
void launch(T const &n, L &&f) noexcept
Definition: AMReX_GpuLaunchFunctsC.H:120
void fill(BaseFab< STRUCT > &aos_fab, F const &f)
Definition: AMReX_BaseFabUtility.H:29
AMREX_GPU_HOST_DEVICE void cast(BaseFab< Tto > &tofab, BaseFab< Tfrom > const &fromfab, Box const &bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept
Definition: AMReX_BaseFabUtility.H:13
AMREX_GPU_HOST_DEVICE AMREX_ATTRIBUTE_FLATTEN_FOR void LoopConcurrent(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition: AMReX_Loop.H:149
const int[]
Definition: AMReX_BLProfiler.cpp:1664
Definition: AMReX_Box.H:2027
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE std::uint64_t numPts() const
Definition: AMReX_Box.H:2068
Definition: AMReX_BaseFab.H:72
Definition: AMReX_GpuTypes.H:86
Definition: AMReX_GpuMemory.H:125
AMREX_GPU_DEVICE T * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:126
Definition: AMReX_BaseFab.H:78
Definition: AMReX_BaseFab.H:66