1#ifndef AMREX_TAG_PARALLELFOR_H_
2#define AMREX_TAG_PARALLELFOR_H_
3#include <AMReX_Config.H>
25template <
class T0,
class T1=T0>
36template <
class T0,
class T1=T0>
109std::enable_if_t<std::is_same<std::decay_t<decltype(std::declval<T>().box())>,
Box>::value,
113 AMREX_ASSERT(tag.box().numPts() < Long(std::numeric_limits<int>::max()));
114 return static_cast<int>(tag.box().numPts());
118std::enable_if_t<std::is_integral<std::decay_t<decltype(std::declval<T>().size())> >::value,
122 AMREX_ASSERT(tag.size() < Long(std::numeric_limits<int>::max()));
126template <
typename T,
typename F>
128std::enable_if_t<std::is_same<std::decay_t<decltype(std::declval<T>().box())>,
Box>::value,
void>
131 sycl::nd_item<1>
const& item,
133 int icell, T
const& tag,
F&& f)
noexcept
135 int ncells = tag.box().
numPts();
138 int k = icell / (len.x*len.y);
139 int j = (icell - k*(len.x*len.y)) / len.x;
140 int i = (icell - k*(len.x*len.y)) - j*len.x;
145 f(item, icell, ncells, i, j, k, tag);
147 f( icell, ncells, i, j, k, tag);
151template <
typename T,
typename F>
153std::enable_if_t<std::is_integral<std::decay_t<decltype(std::declval<T>().size())> >::value,
void>
156 sycl::nd_item<1>
const& item,
158 int i, T
const& tag,
F&& f)
noexcept
168template <
class TagType,
class F>
172 const int ntags = tags.
size();
173 if (ntags == 0) {
return; }
175 Long l_ntotwarps = 0;
178 nwarps.reserve(ntags+1);
179 for (
int i = 0; i < ntags; ++i)
182 nwarps.push_back(ntotwarps);
185 ntotwarps +=
static_cast<int>(nw);
187 nwarps.push_back(ntotwarps);
189 std::size_t sizeof_tags = ntags*
sizeof(TagType);
191 std::size_t sizeof_nwarps = (ntags+1)*
sizeof(
int);
192 std::size_t total_buf_size = offset_nwarps + sizeof_nwarps;
197 std::memcpy(h_buffer, tags.data(), sizeof_tags);
198 std::memcpy(h_buffer+offset_nwarps, nwarps.data(), sizeof_nwarps);
201 auto d_tags =
reinterpret_cast<TagType*
>(d_buffer);
202 auto d_nwarps =
reinterpret_cast<int*
>(d_buffer+offset_nwarps);
204 constexpr int nthreads = 256;
206 int nblocks = (ntotwarps + nwarps_per_block-1) / nwarps_per_block;
209 AMREX_ASSERT(l_ntotwarps+nwarps_per_block-1 < Long(std::numeric_limits<int>::max()));
214 [[sycl::reqd_work_group_size(nthreads)]]
221 std::size_t g_tid = item.get_global_id(0);
223 auto g_tid = std::size_t(blockDim.x)*blockIdx.x + threadIdx.x;
226 if (g_wid >= ntotwarps) {
return; }
230 int b_wid = g_wid - d_nwarps[tag_id];
252template <
class TagType,
class F>
253std::enable_if_t<std::is_same<std::decay_t<decltype(std::declval<TagType>().box())>,
260 sycl::nd_item<1>
const& ,
262 int icell,
int ncells,
int i,
int j,
int k, TagType
const& tag)
noexcept
264 if (icell < ncells) {
265 for (
int n = 0; n < ncomp; ++n) {
272template <
class TagType,
class F>
273std::enable_if_t<std::is_same<std::decay_t<decltype(std::declval<TagType>().box())>,
Box>::value,
void>
279 sycl::nd_item<1>
const& ,
281 int icell,
int ncells,
int i,
int j,
int k, TagType
const& tag)
noexcept
283 if (icell < ncells) {
289template <
class TagType,
class F>
290std::enable_if_t<std::is_integral<std::decay_t<decltype(std::declval<TagType>().
size())> >::value,
void>
291ParallelFor (Vector<TagType>
const& tags,
F && f)
296 sycl::nd_item<1>
const& ,
298 int icell,
int ncells, TagType
const& tag)
noexcept
300 if (icell < ncells) {
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
static std::size_t align(std::size_t sz)
Given a minimum required arena size of sz bytes, this returns the next largest arena size that will a...
Definition AMReX_Arena.cpp:143
AMREX_GPU_HOST_DEVICE IntVectND< dim > size() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:139
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
Returns the number of points contained in the BoxND.
Definition AMReX_Box.H:346
static AMREX_EXPORT constexpr int warp_size
Definition AMReX_GpuDevice.H:173
Encapsulation of the Orientation of the Faces of a Box.
Definition AMReX_Orientation.H:29
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:27
Long size() const noexcept
Definition AMReX_Vector.H:50
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:237
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:251
gpuStream_t gpuStream() noexcept
Definition AMReX_GpuDevice.H:218
void ParallelFor_doit(Vector< TagType > const &tags, F &&f)
Definition AMReX_TagParallelFor.H:170
Long get_tag_size(T const &tag) noexcept
Definition AMReX_TagParallelFor.H:111
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void tagparfor_call_f(int icell, T const &tag, F &&f) noexcept
Definition AMReX_TagParallelFor.H:129
Definition AMReX_Amr.cpp:49
BoxND< AMREX_SPACEDIM > Box
Definition AMReX_BaseFwd.H:27
void launch(T const &n, L &&f) noexcept
Definition AMReX_GpuLaunchFunctsC.H:120
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
Definition AMReX_Algorithm.H:105
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:308
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:127
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 length(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:322
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:656
const int[]
Definition AMReX_BLProfiler.cpp:1664
Arena * The_Arena()
Definition AMReX_Arena.cpp:616
Definition AMReX_FabArrayCommI.H:896
Definition AMReX_TagParallelFor.H:86
Box bx
Definition AMReX_TagParallelFor.H:88
Array4< T > fab
Definition AMReX_TagParallelFor.H:87
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:92
Dim3 offset
Definition AMReX_TagParallelFor.H:89
Definition AMReX_TagParallelFor.H:76
Array4< T > fab
Definition AMReX_TagParallelFor.H:77
Box bx
Definition AMReX_TagParallelFor.H:78
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:82
Orientation face
Definition AMReX_TagParallelFor.H:79
Definition AMReX_TagParallelFor.H:57
Array4< T > dfab
Definition AMReX_TagParallelFor.H:58
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:62
Box dbox
Definition AMReX_TagParallelFor.H:59
Definition AMReX_TagParallelFor.H:66
T val
Definition AMReX_TagParallelFor.H:69
Array4< T > dfab
Definition AMReX_TagParallelFor.H:67
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:72
Box dbox
Definition AMReX_TagParallelFor.H:68
Definition AMReX_TagParallelFor.H:26
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:33
Array4< T1 const > sfab
Definition AMReX_TagParallelFor.H:28
Dim3 offset
Definition AMReX_TagParallelFor.H:30
Array4< T0 > dfab
Definition AMReX_TagParallelFor.H:27
Box dbox
Definition AMReX_TagParallelFor.H:29
Definition AMReX_TagParallelFor.H:37
Dim3 offset
Definition AMReX_TagParallelFor.H:42
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:45
Box dbox
Definition AMReX_TagParallelFor.H:41
Array4< T0 > dfab
Definition AMReX_TagParallelFor.H:38
Array4< int > mask
Definition AMReX_TagParallelFor.H:40
Array4< T1 const > sfab
Definition AMReX_TagParallelFor.H:39
Definition AMReX_TagParallelFor.H:16
Array4< T > dfab
Definition AMReX_TagParallelFor.H:17
Box dbox
Definition AMReX_TagParallelFor.H:19
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:22
Array4< T const > sfab
Definition AMReX_TagParallelFor.H:18
Definition AMReX_TagParallelFor.H:49
Array4< T > dfab
Definition AMReX_TagParallelFor.H:50
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box box() const noexcept
Definition AMReX_TagParallelFor.H:53
Definition AMReX_Array4.H:61
Definition AMReX_Dim3.H:12
Definition AMReX_TagParallelFor.H:96
T * p
Definition AMReX_TagParallelFor.H:97
Long m_size
Definition AMReX_TagParallelFor.H:98
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Long size() const noexcept
Definition AMReX_TagParallelFor.H:101