1#ifndef AMREX_TAG_PARALLELFOR_H_
2#define AMREX_TAG_PARALLELFOR_H_
3#include <AMReX_Config.H>
25template <
class T0,
class T1=T0>
37template <
class T0,
class T1=T0>
128 template <
typename T>
129 requires (std::same_as<std::decay_t<decltype(std::declval<T>().box())>,
Box>)
130 Long get_tag_size (T
const& tag)
noexcept
133 return static_cast<int>(tag.box().numPts());
136 template <
typename T>
137 requires (std::is_integral_v<std::decay_t<decltype(std::declval<T>().size())> >)
138 Long get_tag_size (T
const& tag)
noexcept
144 template <
typename T>
145 requires (std::same_as<std::decay_t<decltype(std::declval<T>().box())>,
Box>)
146 constexpr bool is_box_tag (T
const&) {
return true; }
148 template <
typename T>
149 requires (std::is_integral_v<std::decay_t<decltype(std::declval<T>().size())> >)
150 constexpr bool is_box_tag (T
const&) {
return false; }
155template <
class TagType>
192 other.h_buffer =
nullptr;
193 other.d_buffer =
nullptr;
194 other.d_tags =
nullptr;
195 other.d_nwarps =
nullptr;
199 other.defined =
false;
202 if (
this == &other) {
207 other.h_buffer =
nullptr;
209 other.d_buffer =
nullptr;
211 other.d_tags =
nullptr;
213 other.d_nwarps =
nullptr;
221 other.defined =
false;
239 Long l_ntotwarps = 0;
242 nwarps.reserve(
ntags+1);
243 for (
int i = 0; i <
ntags; ++i)
254 std::size_t sizeof_tags =
ntags*
sizeof(TagType);
256 std::size_t sizeof_nwarps = (
ntags+1)*
sizeof(
int);
257 std::size_t total_buf_size = offset_nwarps + sizeof_nwarps;
262 std::memcpy(
h_buffer, tags.data(), sizeof_tags);
263 std::memcpy(
h_buffer+offset_nwarps, nwarps.data(), sizeof_nwarps);
277 std::size_t sizeof_tags =
ntags*
sizeof(TagType);
280 std::memcpy(
h_buffer, tags.data(), sizeof_tags);
310template <
typename T,
typename F>
311requires (std::same_as<std::decay_t<decltype(std::declval<T>().box())>,
Box>)
313void tagparfor_call_f (
int icell, T
const& tag,
F&& f)
noexcept
315 int ncells = tag.box().
numPts();
318 int k = icell / (len.x*len.y);
319 int j = (icell - k*(len.x*len.y)) / len.x;
320 int i = (icell - k*(len.x*len.y)) - j*len.x;
324 f(icell, ncells, i, j, k, tag);
327template <
typename T,
typename F>
328requires (std::is_integral_v<std::decay_t<decltype(std::declval<T>().size())> >)
330void tagparfor_call_f (
int i, T
const& tag,
F&& f)
noexcept
336template <
class TagType,
class F>
338ParallelFor_doit (TagVector<TagType>
const& tv,
F const& f)
342 if (tv.ntags == 0 || tv.ntotwarps == 0) {
return; }
344 const auto d_tags = tv.d_tags;
345 const auto d_nwarps = tv.d_nwarps;
346 const auto ntags = tv.ntags;
347 const auto ntotwarps = tv.ntotwarps;
353 [[sycl::reqd_work_group_size(nthreads)]]
360 std::size_t g_tid = item.get_global_id(0);
362 auto g_tid = std::size_t(blockDim.x)*blockIdx.x + threadIdx.x;
365 if (g_wid >= ntotwarps) {
return; }
369 int b_wid = g_wid - d_nwarps[tag_id];
377 tagparfor_call_f(icell, d_tags[tag_id], f);
383template <
class TagType,
class F>
385ParallelFor_doit (TagVector<TagType>
const& tv,
F const& f)
392 constexpr bool tag_type = is_box_tag(TagType{});
394 if (tv.ntags == 0) {
return; }
396 const auto d_tags = tv.d_tags;
397 const auto ntags = tv.ntags;
400#pragma omp parallel for
402 for (
int itag = 0; itag < ntags; ++itag) {
404 const auto& t = d_tags[itag];
406 if constexpr (tag_type) {
410 for (
int k = lo.z; k <= hi.z; ++k) {
411 for (
int j = lo.y; j <= hi.y; ++j) {
413 for (
int i = lo.x; i <= hi.x; ++i) {
419 const auto size = t.size();
422 for (
int i = 0; i < size; ++i) {
434template <
class TagType,
class F>
435requires (std::same_as<std::decay_t<decltype(std::declval<TagType>().box())>,
Box>)
438 detail::ParallelFor_doit(tv,
440 int icell,
int ncells,
int i,
int j,
int k, TagType
const& tag)
noexcept
442 if (icell < ncells) {
443 for (
int n = 0; n < ncomp; ++n) {
450template <
class TagType,
class F>
451requires (std::same_as<std::decay_t<decltype(std::declval<TagType>().box())>,
Box>)
454 detail::ParallelFor_doit(tv,
456 int icell,
int ncells,
int i,
int j,
int k, TagType
const& tag)
noexcept
458 if (icell < ncells) {
464template <
class TagType,
class F>
465requires (std::is_integral_v<std::decay_t<decltype(std::declval<TagType>().size())> >)
468 detail::ParallelFor_doit(tv,
470 int icell,
int ncells, TagType
const& tag)
noexcept
472 if (icell < ncells) {
478template <
class TagType,
class F>
479requires (std::same_as<std::decay_t<decltype(std::declval<TagType>().box())>,
Box>)
486template <
class TagType,
class F>
487requires (std::same_as<std::decay_t<decltype(std::declval<TagType>().box())>,
Box>)
494template <
class TagType,
class F>
495requires (std::is_integral_v<std::decay_t<decltype(std::declval<TagType>().size())> >)
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_ALWAYS_ASSERT(EX)
Definition AMReX_BLassert.H:50
#define AMREX_PRAGMA_SIMD
Definition AMReX_Extension.H:80
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
static std::size_t align(std::size_t sz)
Given a minimum required arena size of sz bytes, this returns the next largest arena size that will a...
Definition AMReX_Arena.cpp:167
__host__ __device__ Long numPts() const noexcept
Return the number of points contained in the BoxND.
Definition AMReX_Box.H:364
static constexpr int warp_size
Definition AMReX_GpuDevice.H:236
Encapsulation of the Orientation of the Faces of a Box.
Definition AMReX_Orientation.H:29
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:29
Long size() const noexcept
Definition AMReX_Vector.H:54
amrex_long Long
Definition AMReX_INT.H:30
__host__ __device__ Dim3 ubound(Array4< T > const &a) noexcept
Return the inclusive upper bounds of an Array4 in Dim3 form.
Definition AMReX_Array4.H:1359
__host__ __device__ Dim3 length(Array4< T > const &a) noexcept
Return the spatial extents of an Array4 in Dim3 form.
Definition AMReX_Array4.H:1373
__host__ __device__ Dim3 lbound(Array4< T > const &a) noexcept
Return the inclusive lower bounds of an Array4 in Dim3 form.
Definition AMReX_Array4.H:1345
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:860
Arena * The_Arena()
Definition AMReX_Arena.cpp:820
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:421
gpuStream_t gpuStream() noexcept
Definition AMReX_GpuDevice.H:291
Definition AMReX_Amr.cpp:50
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139
__host__ __device__ T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
Find a root of a scalar function on a bracketing interval using bisection.
Definition AMReX_Algorithm.H:151
void ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:202
BoxND< 3 > Box
Box is an alias for amrex::BoxND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:30
const int[]
Definition AMReX_BLProfiler.cpp:1664
Definition AMReX_TagParallelFor.H:87
Box bx
Definition AMReX_TagParallelFor.H:89
Array4< T > fab
Definition AMReX_TagParallelFor.H:88
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:93
Dim3 offset
Definition AMReX_TagParallelFor.H:90
Definition AMReX_TagParallelFor.H:77
Array4< T > fab
Definition AMReX_TagParallelFor.H:78
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:83
Box bx
Definition AMReX_TagParallelFor.H:79
Orientation face
Definition AMReX_TagParallelFor.H:80
Definition AMReX_TagParallelFor.H:58
Array4< T > dfab
Definition AMReX_TagParallelFor.H:59
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:63
Box dbox
Definition AMReX_TagParallelFor.H:60
Definition AMReX_TagParallelFor.H:67
T val
Definition AMReX_TagParallelFor.H:70
Array4< T > dfab
Definition AMReX_TagParallelFor.H:68
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:73
Box dbox
Definition AMReX_TagParallelFor.H:69
Definition AMReX_TagParallelFor.H:26
Array4< T1 const > sfab
Definition AMReX_TagParallelFor.H:29
Dim3 offset
Definition AMReX_TagParallelFor.H:31
int dindex
Definition AMReX_TagParallelFor.H:28
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:34
Array4< T0 > dfab
Definition AMReX_TagParallelFor.H:27
Box dbox
Definition AMReX_TagParallelFor.H:30
Definition AMReX_TagParallelFor.H:38
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:46
Dim3 offset
Definition AMReX_TagParallelFor.H:43
Box dbox
Definition AMReX_TagParallelFor.H:42
Array4< T0 > dfab
Definition AMReX_TagParallelFor.H:39
Array4< int > mask
Definition AMReX_TagParallelFor.H:41
Array4< T1 const > sfab
Definition AMReX_TagParallelFor.H:40
Definition AMReX_TagParallelFor.H:16
Array4< T > dfab
Definition AMReX_TagParallelFor.H:17
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:22
Box dbox
Definition AMReX_TagParallelFor.H:19
Array4< T const > sfab
Definition AMReX_TagParallelFor.H:18
Definition AMReX_TagParallelFor.H:50
Array4< T > dfab
Definition AMReX_TagParallelFor.H:51
__host__ __device__ Box box() const noexcept
Definition AMReX_TagParallelFor.H:54
A multidimensional array accessor.
Definition AMReX_Array4.H:285
Definition AMReX_TagParallelFor.H:106
Box bx
Definition AMReX_TagParallelFor.H:109
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:112
std::ptrdiff_t poff
Definition AMReX_TagParallelFor.H:108
Array4< T > dfab
Definition AMReX_TagParallelFor.H:107
Definition AMReX_TagParallelFor.H:116
std::ptrdiff_t poff
Definition AMReX_TagParallelFor.H:118
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:122
Array4< T const > sfab
Definition AMReX_TagParallelFor.H:117
Box bx
Definition AMReX_TagParallelFor.H:119
Definition AMReX_Dim3.H:13
Definition AMReX_TagParallelFor.H:156
TagVector(TagVector &&other) noexcept
Definition AMReX_TagParallelFor.H:182
~TagVector()
Definition AMReX_TagParallelFor.H:174
char * h_buffer
Definition AMReX_TagParallelFor.H:158
TagVector(const TagVector &other)=delete
TagType * d_tags
Definition AMReX_TagParallelFor.H:160
bool defined
Definition AMReX_TagParallelFor.H:165
int * d_nwarps
Definition AMReX_TagParallelFor.H:161
TagVector(Vector< TagType > const &tags)
Definition AMReX_TagParallelFor.H:170
bool is_defined() const
Definition AMReX_TagParallelFor.H:225
int ntotwarps
Definition AMReX_TagParallelFor.H:163
char * d_buffer
Definition AMReX_TagParallelFor.H:159
int ntags
Definition AMReX_TagParallelFor.H:162
TagVector & operator=(const TagVector &other)=delete
int nblocks
Definition AMReX_TagParallelFor.H:164
static constexpr int nthreads
Definition AMReX_TagParallelFor.H:166
void undefine()
Definition AMReX_TagParallelFor.H:288
void define(Vector< TagType > const &tags)
Definition AMReX_TagParallelFor.H:227
Definition AMReX_TagParallelFor.H:97
T * p
Definition AMReX_TagParallelFor.H:98
Long m_size
Definition AMReX_TagParallelFor.H:99
__host__ __device__ Long size() const noexcept
Definition AMReX_TagParallelFor.H:102