1#ifndef AMREX_TAG_PARALLELFOR_H_
2#define AMREX_TAG_PARALLELFOR_H_
3#include <AMReX_Config.H>
25template <
class T0,
class T1=T0>
36template <
class T0,
class T1=T0>
106 template <
typename T>
107 std::enable_if_t<std::is_same_v<std::decay_t<decltype(std::declval<T>().box())>,
Box>, Long>
110 AMREX_ASSERT(tag.box().numPts() < Long(std::numeric_limits<int>::max()));
111 return static_cast<int>(tag.box().numPts());
114 template <
typename T>
115 std::enable_if_t<std::is_integral_v<std::decay_t<decltype(std::declval<T>().size())> >, Long>
118 AMREX_ASSERT(tag.size() < Long(std::numeric_limits<int>::max()));
122 template <
typename T>
124 std::enable_if_t<std::is_same_v<std::decay_t<decltype(std::declval<T>().box())>,
Box>,
bool>
127 template <
typename T>
129 std::enable_if_t<std::is_integral_v<std::decay_t<decltype(std::declval<T>().
size())> >,
bool>
134template <
class TagType>
171 other.h_buffer =
nullptr;
172 other.d_buffer =
nullptr;
173 other.d_tags =
nullptr;
174 other.d_nwarps =
nullptr;
178 other.defined =
false;
181 if (
this == &other) {
186 other.h_buffer =
nullptr;
188 other.d_buffer =
nullptr;
190 other.d_tags =
nullptr;
192 other.d_nwarps =
nullptr;
200 other.defined =
false;
218 Long l_ntotwarps = 0;
221 nwarps.reserve(
ntags+1);
222 for (
int i = 0; i <
ntags; ++i)
233 std::size_t sizeof_tags =
ntags*
sizeof(TagType);
235 std::size_t sizeof_nwarps = (
ntags+1)*
sizeof(
int);
236 std::size_t total_buf_size = offset_nwarps + sizeof_nwarps;
241 std::memcpy(
h_buffer, tags.data(), sizeof_tags);
242 std::memcpy(
h_buffer+offset_nwarps, nwarps.data(), sizeof_nwarps);
254 AMREX_ALWAYS_ASSERT(l_ntotwarps+nwarps_per_block-1 < Long(std::numeric_limits<int>::max()));
256 std::size_t sizeof_tags =
ntags*
sizeof(TagType);
259 std::memcpy(
h_buffer, tags.data(), sizeof_tags);
288template <
typename T,
typename F>
290std::enable_if_t<std::is_same_v<std::decay_t<decltype(std::declval<T>().box())>,
Box>,
void>
293 sycl::nd_item<1>
const& item,
295 int icell, T
const& tag,
F&& f)
noexcept
297 int ncells = tag.box().
numPts();
300 int k = icell / (len.x*len.y);
301 int j = (icell - k*(len.x*len.y)) / len.x;
302 int i = (icell - k*(len.x*len.y)) - j*len.x;
307 f(item, icell, ncells, i, j, k, tag);
309 f( icell, ncells, i, j, k, tag);
313template <
typename T,
typename F>
315std::enable_if_t<std::is_integral_v<std::decay_t<decltype(std::declval<T>().size())> >,
void>
318 sycl::nd_item<1>
const& item,
320 int i, T
const& tag,
F&& f)
noexcept
330template <
class TagType,
class F>
336 if (tv.
ntags == 0) {
return; }
338 const auto d_tags = tv.
d_tags;
340 const auto ntags = tv.
ntags;
347 [[sycl::reqd_work_group_size(nthreads)]]
354 std::size_t g_tid = item.get_global_id(0);
356 auto g_tid = std::size_t(blockDim.x)*blockIdx.x + threadIdx.x;
359 if (g_wid >= ntotwarps) {
return; }
363 int b_wid = g_wid - d_nwarps[tag_id];
381template <
class TagType,
class F>
390 if (tv.
ntags == 0) {
return; }
392 const auto d_tags = tv.
d_tags;
393 const auto ntags = tv.
ntags;
396#pragma omp parallel for
398 for (
int itag = 0; itag < ntags; ++itag) {
400 const auto& t = d_tags[itag];
406 for (
int k = lo.z; k <= hi.z; ++k) {
407 for (
int j = lo.y; j <= hi.y; ++j) {
409 for (
int i = lo.x; i <= hi.x; ++i) {
415 const auto size = t.size();
418 for (
int i = 0; i <
size; ++i) {
429template <
class TagType,
class F>
430std::enable_if_t<std::is_same_v<std::decay_t<decltype(std::declval<TagType>().box())>,
Box>>
436 sycl::nd_item<1>
const& ,
438 int icell,
int ncells,
int i,
int j,
int k, TagType
const& tag)
noexcept
440 if (icell < ncells) {
441 for (
int n = 0; n < ncomp; ++n) {
448template <
class TagType,
class F>
449std::enable_if_t<std::is_same_v<std::decay_t<decltype(std::declval<TagType>().box())>,
Box>,
void>
455 sycl::nd_item<1>
const& ,
457 int icell,
int ncells,
int i,
int j,
int k, TagType
const& tag)
noexcept
459 if (icell < ncells) {
465template <
class TagType,
class F>
466std::enable_if_t<std::is_integral_v<std::decay_t<decltype(std::declval<TagType>().
size())> >,
void>
467ParallelFor (TagVector<TagType>
const& tv,
F const& f)
472 sycl::nd_item<1>
const& ,
474 int icell,
int ncells, TagType
const& tag)
noexcept
476 if (icell < ncells) {
482template <
class TagType,
class F>
483std::enable_if_t<std::is_same_v<std::decay_t<decltype(std::declval<TagType>().box())>,
Box>>
490template <
class TagType,
class F>
491std::enable_if_t<std::is_same_v<std::decay_t<decltype(std::declval<TagType>().box())>,
Box>,
void>
498template <
class TagType,
class F>
499std::enable_if_t<std::is_integral_v<std::decay_t<decltype(std::declval<TagType>().size())> >,
void>
500ParallelFor (Vector<TagType>
const& tags,
F && f)
502 TagVector<TagType> tv{tags};
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_ALWAYS_ASSERT(EX)
Definition AMReX_BLassert.H:50
#define AMREX_PRAGMA_SIMD
Definition AMReX_Extension.H:80
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
static std::size_t align(std::size_t sz)
Given a minimum required arena size of sz bytes, this returns the next largest arena size that will a...
Definition AMReX_Arena.cpp:143
AMREX_GPU_HOST_DEVICE IntVectND< dim > size() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:139
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
Returns the number of points contained in the BoxND.
Definition AMReX_Box.H:346
static AMREX_EXPORT constexpr int warp_size
Definition AMReX_GpuDevice.H:173
Encapsulation of the Orientation of the Faces of a Box.
Definition AMReX_Orientation.H:29
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:27
Long size() const noexcept
Definition AMReX_Vector.H:50
AMREX_GPU_HOST_DEVICE Long size(T const &b) noexcept
integer version
Definition AMReX_GpuRange.H:26
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:237
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:251
gpuStream_t gpuStream() noexcept
Definition AMReX_GpuDevice.H:218
Long get_tag_size(T const &tag) noexcept
Definition AMReX_TagParallelFor.H:108
void ParallelFor_doit(TagVector< TagType > const &tv, F const &f)
Definition AMReX_TagParallelFor.H:332
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void tagparfor_call_f(int icell, T const &tag, F &&f) noexcept
Definition AMReX_TagParallelFor.H:291
constexpr bool is_box_tag(T const &)
Definition AMReX_TagParallelFor.H:125
Definition AMReX_Amr.cpp:49
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:191
BoxND< AMREX_SPACEDIM > Box
Definition AMReX_BaseFwd.H:27
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 ubound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:319
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
Definition AMReX_Algorithm.H:105
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:312
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:127
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 length(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:326
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:656
const int[]
Definition AMReX_BLProfiler.cpp:1664
Arena * The_Arena()
Definition AMReX_Arena.cpp:616
Definition AMReX_FabArrayCommI.H:896
Definition AMReX_TagParallelFor.H:86
Box bx
Definition AMReX_TagParallelFor.H:88
Array4< T > fab
Definition AMReX_TagParallelFor.H:87
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:92
Dim3 offset
Definition AMReX_TagParallelFor.H:89
Definition AMReX_TagParallelFor.H:76
Array4< T > fab
Definition AMReX_TagParallelFor.H:77
Box bx
Definition AMReX_TagParallelFor.H:78
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:82
Orientation face
Definition AMReX_TagParallelFor.H:79
Definition AMReX_TagParallelFor.H:57
Array4< T > dfab
Definition AMReX_TagParallelFor.H:58
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:62
Box dbox
Definition AMReX_TagParallelFor.H:59
Definition AMReX_TagParallelFor.H:66
T val
Definition AMReX_TagParallelFor.H:69
Array4< T > dfab
Definition AMReX_TagParallelFor.H:67
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:72
Box dbox
Definition AMReX_TagParallelFor.H:68
Definition AMReX_TagParallelFor.H:26
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:33
Array4< T1 const > sfab
Definition AMReX_TagParallelFor.H:28
Dim3 offset
Definition AMReX_TagParallelFor.H:30
Array4< T0 > dfab
Definition AMReX_TagParallelFor.H:27
Box dbox
Definition AMReX_TagParallelFor.H:29
Definition AMReX_TagParallelFor.H:37
Dim3 offset
Definition AMReX_TagParallelFor.H:42
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:45
Box dbox
Definition AMReX_TagParallelFor.H:41
Array4< T0 > dfab
Definition AMReX_TagParallelFor.H:38
Array4< int > mask
Definition AMReX_TagParallelFor.H:40
Array4< T1 const > sfab
Definition AMReX_TagParallelFor.H:39
Definition AMReX_TagParallelFor.H:16
Array4< T > dfab
Definition AMReX_TagParallelFor.H:17
Box dbox
Definition AMReX_TagParallelFor.H:19
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:22
Array4< T const > sfab
Definition AMReX_TagParallelFor.H:18
Definition AMReX_TagParallelFor.H:49
Array4< T > dfab
Definition AMReX_TagParallelFor.H:50
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box box() const noexcept
Definition AMReX_TagParallelFor.H:53
Definition AMReX_Array4.H:61
Definition AMReX_Dim3.H:12
Definition AMReX_TagParallelFor.H:135
TagVector(TagVector &&other) noexcept
Definition AMReX_TagParallelFor.H:161
~TagVector()
Definition AMReX_TagParallelFor.H:153
char * h_buffer
Definition AMReX_TagParallelFor.H:137
TagVector(const TagVector &other)=delete
TagType * d_tags
Definition AMReX_TagParallelFor.H:139
bool defined
Definition AMReX_TagParallelFor.H:144
int * d_nwarps
Definition AMReX_TagParallelFor.H:140
TagVector(Vector< TagType > const &tags)
Definition AMReX_TagParallelFor.H:149
bool is_defined() const
Definition AMReX_TagParallelFor.H:204
int ntotwarps
Definition AMReX_TagParallelFor.H:142
char * d_buffer
Definition AMReX_TagParallelFor.H:138
int ntags
Definition AMReX_TagParallelFor.H:141
TagVector & operator=(const TagVector &other)=delete
int nblocks
Definition AMReX_TagParallelFor.H:143
static constexpr int nthreads
Definition AMReX_TagParallelFor.H:145
void undefine()
Definition AMReX_TagParallelFor.H:267
void define(Vector< TagType > const &tags)
Definition AMReX_TagParallelFor.H:206
Definition AMReX_TagParallelFor.H:96
T * p
Definition AMReX_TagParallelFor.H:97
Long m_size
Definition AMReX_TagParallelFor.H:98
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Long size() const noexcept
Definition AMReX_TagParallelFor.H:101