1#ifndef AMREX_MF_PARALLEL_FOR_G_H_
2#define AMREX_MF_PARALLEL_FOR_G_H_
3#include <AMReX_Config.H>
12namespace amrex::detail {
15void build_par_for_boxes (
char*& hp, BoxIndexer*& pboxes, Vector<Box>
const& boxes)
17 if (boxes.empty()) {
return; }
18 const int nboxes = boxes.size();
19 const std::size_t nbytes = nboxes*
sizeof(
BoxIndexer);
22 for (
int i = 0; i < nboxes; ++i) {
27 Gpu::htod_memcpy_async(dp, hp, nbytes);
32void destroy_par_for_boxes (
char* hp,
char* dp)
38namespace parfor_mf_detail {
41 auto call_f (F
const& f,
int b,
int i,
int j,
int k,
int)
noexcept
42 ->
decltype(f(0,0,0,0))
49 auto call_f (F
const& f,
int b,
int i,
int j,
int k,
int ncomp)
noexcept
50 ->
decltype(f(0,0,0,0,0))
52 for (
int n = 0; n < ncomp; ++n) {
58template <
int MT,
typename MF,
typename F>
59std::enable_if_t<IsFabArray<MF>::value>
60ParallelFor_doit (MF
const& mf, IntVect
const& nghost,
int ncomp, IntVect
const&,
bool, F
const& f)
62 const auto& index_array = mf.IndexArray();
63 const int nboxes = index_array.size();
67 }
else if (nboxes == 1) {
71 parfor_mf_detail::call_f(f, 0, i, j, k, ncomp);
74 auto const& parforinfo = mf.getParForInfo(nghost);
75 auto nblocks_per_box = parforinfo.getNBlocksPerBox(MT);
77 const int nblocks = nblocks_per_box * nboxes;
78 const BoxIndexer* dp_boxes = parforinfo.getBoxes();
80#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
82 amrex::launch_global<MT>
83 <<<nblocks, MT, 0, Gpu::gpuStream()>>>
86 int ibox = int(blockIdx.x) / nblocks_per_box;
87 auto icell = std::uint64_t(blockIdx.x-ibox*nblocks_per_box)*MT + threadIdx.x;
89#elif defined(AMREX_USE_SYCL)
91 amrex::launch<MT>(nblocks, Gpu::gpuStream(),
94 int blockIdxx = item.get_group_linear_id();
95 int threadIdxx = item.get_local_linear_id();
96 int ibox = int(blockIdxx) / nblocks_per_box;
97 auto icell = std::uint64_t(blockIdxx-ibox*nblocks_per_box)*MT + threadIdxx;
100 if (icell < indexer.numPts()) {
101 auto [i, j, k] = indexer(icell);
102 parfor_mf_detail::call_f(f, ibox, i, j, k, ncomp);
109template <
typename MF,
typename F>
110std::enable_if_t<IsFabArray<MF>::value>
111ParallelFor_doit (MF
const& mf, IntVect
const& nghost,
int ncomp, IntVect
const& ts,
bool dynamic, F&& f)
114 constexpr int MT = 128;
116 constexpr int MT = AMREX_GPU_MAX_THREADS;
118 ParallelFor_doit<MT>(mf, nghost, ncomp, ts, dynamic, std::forward<F>(f));
121template <
typename MF,
typename F>
122std::enable_if_t<IsFabArray<MF>::value>
123ParallelFor_doit (MF
const& mf, IntVect
const& nghost, IntVect
const& ts,
bool dynamic, F&& f)
126 constexpr int MT = 128;
128 constexpr int MT = AMREX_GPU_MAX_THREADS;
130 ParallelFor_doit<MT>(mf, nghost, 1, ts, dynamic, std::forward<F>(f));
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_GPU_ERROR_CHECK()
Definition AMReX_GpuError.H:133
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
amrex_long Long
Definition AMReX_INT.H:30
__host__ __device__ BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition AMReX_Box.H:1280
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:193
BoxND< 3 > Box
Box is an alias for amrex::BoxND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:27
BoxIndexerND< 3 > BoxIndexer
Definition AMReX_Box.H:2224
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:823
Arena * The_Arena()
Definition AMReX_Arena.cpp:783