1 #ifndef AMREX_TAG_PARALLELFOR_H_
2 #define AMREX_TAG_PARALLELFOR_H_
3 #include <AMReX_Config.H>
25 template <
class T0,
class T1=T0>
36 template <
class T0,
class T1=T0>
108 template <
typename T>
109 std::enable_if_t<std::is_same<std::decay_t<decltype(std::declval<T>().box())>,
Box>::value,
114 return static_cast<int>(tag.box().numPts());
117 template <
typename T>
118 std::enable_if_t<std::is_integral<std::decay_t<decltype(std::declval<T>().size())> >::value,
126 template <
typename T,
typename F>
128 std::enable_if_t<std::is_same<std::decay_t<decltype(std::declval<T>().box())>,
Box>::value>
130 #ifdef AMREX_USE_SYCL
131 sycl::nd_item<1>
const& item,
133 int icell, T
const& tag, F&&
f) noexcept
135 int ncells = tag.box().numPts();
138 int k = icell / (len.x*len.y);
139 int j = (icell - k*(len.x*len.y)) / len.x;
140 int i = (icell - k*(len.x*len.y)) - j*len.x;
144 #ifdef AMREX_USE_SYCL
145 f(item, icell, ncells, i, j, k, tag);
147 f( icell, ncells, i, j, k, tag);
151 template <
typename T,
typename F>
153 std::enable_if_t<std::is_integral<std::decay_t<decltype(std::declval<T>().size())> >::value>
155 #ifdef AMREX_USE_SYCL
156 sycl::nd_item<1>
const& item,
158 int i, T
const& tag, F&&
f) noexcept
161 #ifdef AMREX_USE_SYCL
168 template <
class TagType,
class F>
172 const int ntags = tags.
size();
173 if (ntags == 0) {
return; }
175 Long l_ntotwarps = 0;
178 nwarps.reserve(ntags+1);
179 for (
int i = 0; i < ntags; ++i)
182 nwarps.push_back(ntotwarps);
185 ntotwarps +=
static_cast<int>(nw);
187 nwarps.push_back(ntotwarps);
189 std::size_t sizeof_tags = ntags*
sizeof(TagType);
191 std::size_t sizeof_nwarps = (ntags+1)*
sizeof(
int);
192 std::size_t total_buf_size = offset_nwarps + sizeof_nwarps;
198 std::memcpy(h_buffer+offset_nwarps, nwarps.data(), sizeof_nwarps);
201 auto d_tags =
reinterpret_cast<TagType*
>(d_buffer);
202 auto d_nwarps =
reinterpret_cast<int*
>(d_buffer+offset_nwarps);
204 constexpr
int nthreads = 256;
206 int nblocks = (ntotwarps + nwarps_per_block-1) / nwarps_per_block;
212 #ifdef AMREX_USE_SYCL
214 [[sycl::reqd_work_group_size(nthreads)]]
220 #ifdef AMREX_USE_SYCL
221 std::size_t g_tid = item.get_global_id(0);
223 auto g_tid = std::size_t(blockDim.x)*blockIdx.x + threadIdx.x;
226 if (g_wid >= ntotwarps) {
return; }
230 int b_wid = g_wid - d_nwarps[tag_id];
231 #ifdef AMREX_USE_SYCL
238 #ifdef AMREX_USE_SYCL
252 template <
class TagType,
class F>
253 std::enable_if_t<std::is_same<std::decay_t<decltype(std::declval<TagType>().box())>,
259 #ifdef AMREX_USE_SYCL
260 sycl::nd_item<1>
const& ,
262 int icell,
int ncells,
int i,
int j,
int k, TagType
const& tag) noexcept
264 if (icell < ncells) {
265 for (
int n = 0; n < ncomp; ++n) {
272 template <
class TagType,
class F>
273 std::enable_if_t<std::is_same<std::decay_t<decltype(std::declval<TagType>().box())>,
Box>::value>
278 #ifdef AMREX_USE_SYCL
279 sycl::nd_item<1>
const& ,
281 int icell,
int ncells,
int i,
int j,
int k, TagType
const& tag) noexcept
283 if (icell < ncells) {
289 template <
class TagType,
class F>
290 std::enable_if_t<std::is_integral<std::decay_t<decltype(std::declval<TagType>().size())> >::value>
295 #ifdef AMREX_USE_SYCL
296 sycl::nd_item<1>
const& ,
298 int icell,
int ncells, TagType
const& tag) noexcept
300 if (icell < ncells) {
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition: AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition: AMReX_GpuQualifiers.H:20
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
static std::size_t align(std::size_t sz)
Given a minimum required arena size of sz bytes, this returns the next largest arena size that will a...
Definition: AMReX_Arena.cpp:143
virtual void * alloc(std::size_t sz)=0
static constexpr AMREX_EXPORT int warp_size
Definition: AMReX_GpuDevice.H:173
Encapsulation of the Orientation of the Faces of a Box.
Definition: AMReX_Orientation.H:29
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition: AMReX_Vector.H:27
Long size() const noexcept
Definition: AMReX_Vector.H:50
void streamSynchronize() noexcept
Definition: AMReX_GpuDevice.H:237
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void * memcpy(void *dest, const void *src, std::size_t count)
Definition: AMReX_GpuUtility.H:214
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:251
gpuStream_t gpuStream() noexcept
Definition: AMReX_GpuDevice.H:218
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44
@ max
Definition: AMReX_ParallelReduce.H:17
void ParallelFor_doit(Vector< TagType > const &tags, F &&f)
Definition: AMReX_TagParallelFor.H:170
std::enable_if_t< std::is_same< std::decay_t< decltype(std::declval< T >).box())>, Box >::value, Long > get_tag_size(T const &tag) noexcept
Definition: AMReX_TagParallelFor.H:111
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE std::enable_if_t< std::is_same< std::decay_t< decltype(std::declval< T >).box())>, Box >::value > tagparfor_call_f(int icell, T const &tag, F &&f) noexcept
Definition: AMReX_TagParallelFor.H:129
Definition: AMReX_Amr.cpp:49
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:200
BoxND< AMREX_SPACEDIM > Box
Definition: AMReX_BaseFwd.H:27
void launch(T const &n, L &&f) noexcept
Definition: AMReX_GpuLaunchFunctsC.H:120
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
Definition: AMReX_Algorithm.H:105
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
Definition: AMReX_Array4.H:308
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition: AMReX.H:111
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 length(Array4< T > const &a) noexcept
Definition: AMReX_Array4.H:322
Arena * The_Pinned_Arena()
Definition: AMReX_Arena.cpp:649
const int[]
Definition: AMReX_BLProfiler.cpp:1664
Arena * The_Arena()
Definition: AMReX_Arena.cpp:609
Definition: AMReX_FabArrayCommI.H:896
Definition: AMReX_TagParallelFor.H:86
Box bx
Definition: AMReX_TagParallelFor.H:88
Array4< T > fab
Definition: AMReX_TagParallelFor.H:87
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition: AMReX_TagParallelFor.H:92
Dim3 offset
Definition: AMReX_TagParallelFor.H:89
Definition: AMReX_TagParallelFor.H:76
Array4< T > fab
Definition: AMReX_TagParallelFor.H:77
Box bx
Definition: AMReX_TagParallelFor.H:78
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition: AMReX_TagParallelFor.H:82
Orientation face
Definition: AMReX_TagParallelFor.H:79
Definition: AMReX_TagParallelFor.H:57
Array4< T > dfab
Definition: AMReX_TagParallelFor.H:58
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition: AMReX_TagParallelFor.H:62
Box dbox
Definition: AMReX_TagParallelFor.H:59
Definition: AMReX_TagParallelFor.H:66
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition: AMReX_TagParallelFor.H:72
T val
Definition: AMReX_TagParallelFor.H:69
Array4< T > dfab
Definition: AMReX_TagParallelFor.H:67
Box dbox
Definition: AMReX_TagParallelFor.H:68
Definition: AMReX_TagParallelFor.H:26
Array4< T1 const > sfab
Definition: AMReX_TagParallelFor.H:28
Dim3 offset
Definition: AMReX_TagParallelFor.H:30
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition: AMReX_TagParallelFor.H:33
Array4< T0 > dfab
Definition: AMReX_TagParallelFor.H:27
Box dbox
Definition: AMReX_TagParallelFor.H:29
Definition: AMReX_TagParallelFor.H:37
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition: AMReX_TagParallelFor.H:45
Dim3 offset
Definition: AMReX_TagParallelFor.H:42
Box dbox
Definition: AMReX_TagParallelFor.H:41
Array4< T0 > dfab
Definition: AMReX_TagParallelFor.H:38
Array4< int > mask
Definition: AMReX_TagParallelFor.H:40
Array4< T1 const > sfab
Definition: AMReX_TagParallelFor.H:39
Definition: AMReX_TagParallelFor.H:16
Array4< T > dfab
Definition: AMReX_TagParallelFor.H:17
Box dbox
Definition: AMReX_TagParallelFor.H:19
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition: AMReX_TagParallelFor.H:22
Array4< T const > sfab
Definition: AMReX_TagParallelFor.H:18
Definition: AMReX_TagParallelFor.H:49
Array4< T > dfab
Definition: AMReX_TagParallelFor.H:50
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box box() const noexcept
Definition: AMReX_TagParallelFor.H:53
Definition: AMReX_Array4.H:61
Definition: AMReX_Dim3.H:12
Definition: AMReX_TagParallelFor.H:96
T * p
Definition: AMReX_TagParallelFor.H:97
Long m_size
Definition: AMReX_TagParallelFor.H:98
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Long size() const noexcept
Definition: AMReX_TagParallelFor.H:101