1#ifndef AMREX_MF_PARALLEL_FOR_G_H_
2#define AMREX_MF_PARALLEL_FOR_G_H_
3#include <AMReX_Config.H>
13namespace amrex::detail {
16void build_par_for_boxes (
char*& hp,
BoxIndexer*& pboxes, Vector<Box>
const& boxes)
18 if (boxes.empty()) {
return; }
19 const int nboxes = boxes.size();
20 const std::size_t nbytes = nboxes*
sizeof(
BoxIndexer);
23 for (
int i = 0; i < nboxes; ++i) {
33void destroy_par_for_boxes (
char* hp,
char* dp)
39namespace parfor_mf_detail {
42 auto call_f (F
const& f,
int b,
int i,
int j,
int k,
int)
noexcept
43 ->
decltype(f(0,0,0,0))
50 auto call_f (F
const& f,
int b,
int i,
int j,
int k,
int ncomp)
noexcept
51 ->
decltype(f(0,0,0,0,0))
53 for (
int n = 0; n < ncomp; ++n) {
59template <
int MT, FabArrayType MF,
typename F>
61ParallelFor_doit (MF
const& mf,
IntVect const& nghost,
int ncomp,
IntVect const&,
bool,
F const& f)
63 const auto& index_array = mf.IndexArray();
64 const int nboxes = index_array.size();
68 }
else if (nboxes == 1) {
72 parfor_mf_detail::call_f(f, 0, i, j, k, ncomp);
75 auto const& parforinfo = mf.getParForInfo(nghost);
76 auto nblocks_per_box = parforinfo.getNBlocksPerBox(MT);
78 const int nblocks = nblocks_per_box * nboxes;
79 const BoxIndexer* dp_boxes = parforinfo.getBoxes();
81#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
83 amrex::launch_global<MT>
87 int ibox =
int(blockIdx.x) / nblocks_per_box;
88 auto icell = std::uint64_t(blockIdx.x-ibox*nblocks_per_box)*MT + threadIdx.x;
90#elif defined(AMREX_USE_SYCL)
95 int blockIdxx = item.get_group_linear_id();
96 int threadIdxx = item.get_local_linear_id();
97 int ibox =
int(blockIdxx) / nblocks_per_box;
98 auto icell = std::uint64_t(blockIdxx-ibox*nblocks_per_box)*MT + threadIdxx;
101 if (icell < indexer.numPts()) {
102 auto [i, j, k] = indexer(icell);
103 parfor_mf_detail::call_f(f, ibox, i, j, k, ncomp);
110template <FabArrayType MF,
typename F>
112ParallelFor_doit (MF
const& mf,
IntVect const& nghost,
int ncomp,
IntVect const& ts,
bool dynamic,
F&& f)
115 constexpr int MT = 128;
117 constexpr int MT = AMREX_GPU_MAX_THREADS;
119 ParallelFor_doit<MT>(mf, nghost, ncomp, ts, dynamic, std::forward<F>(f));
122template <FabArrayType MF,
typename F>
124ParallelFor_doit (MF
const& mf,
IntVect const& nghost,
IntVect const& ts,
bool dynamic,
F&& f)
127 constexpr int MT = 128;
129 constexpr int MT = AMREX_GPU_MAX_THREADS;
131 ParallelFor_doit<MT>(mf, nghost, 1, ts, dynamic, std::forward<F>(f));
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_GPU_ERROR_CHECK()
Definition AMReX_GpuError.H:151
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
amrex_long Long
Definition AMReX_INT.H:30
__host__ __device__ BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition AMReX_Box.H:1289
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:860
Arena * The_Arena()
Definition AMReX_Arena.cpp:820
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:421
gpuStream_t gpuStream() noexcept
Definition AMReX_GpuDevice.H:291
void ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:202
BoxND< 3 > Box
Box is an alias for amrex::BoxND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:30
BoxIndexerND< 3 > BoxIndexer
Definition AMReX_Box.H:2242
IntVectND< 3 > IntVect
IntVect is an alias for amrex::IntVectND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:33
const int[]
Definition AMReX_BLProfiler.cpp:1664