1#ifndef AMREX_TAG_PARALLELFOR_H_
2#define AMREX_TAG_PARALLELFOR_H_
3#include <AMReX_Config.H>
25template <
class T0,
class T1=T0>
37template <
class T0,
class T1=T0>
128 template <
typename T>
129 std::enable_if_t<std::is_same_v<std::decay_t<decltype(std::declval<T>().box())>, Box>, Long>
130 get_tag_size (T
const& tag)
noexcept
132 AMREX_ASSERT(tag.box().numPts() < Long(std::numeric_limits<int>::max()));
133 return static_cast<int>(tag.box().numPts());
136 template <
typename T>
137 std::enable_if_t<std::is_integral_v<std::decay_t<decltype(std::declval<T>().size())> >, Long>
138 get_tag_size (T
const& tag)
noexcept
144 template <
typename T>
146 std::enable_if_t<std::is_same_v<std::decay_t<decltype(std::declval<T>().box())>, Box>,
bool>
147 is_box_tag (T
const&) {
return true; }
149 template <
typename T>
151 std::enable_if_t<std::is_integral_v<std::decay_t<decltype(std::declval<T>().size())> >,
bool>
152 is_box_tag (T
const&) {
return false; }
157template <
class TagType>
194 other.h_buffer =
nullptr;
195 other.d_buffer =
nullptr;
196 other.d_tags =
nullptr;
197 other.d_nwarps =
nullptr;
201 other.defined =
false;
204 if (
this == &other) {
209 other.h_buffer =
nullptr;
211 other.d_buffer =
nullptr;
213 other.d_tags =
nullptr;
215 other.d_nwarps =
nullptr;
223 other.defined =
false;
241 Long l_ntotwarps = 0;
244 nwarps.reserve(
ntags+1);
245 for (
int i = 0; i <
ntags; ++i)
256 std::size_t sizeof_tags =
ntags*
sizeof(TagType);
258 std::size_t sizeof_nwarps = (
ntags+1)*
sizeof(
int);
259 std::size_t total_buf_size = offset_nwarps + sizeof_nwarps;
264 std::memcpy(
h_buffer, tags.data(), sizeof_tags);
265 std::memcpy(
h_buffer+offset_nwarps, nwarps.data(), sizeof_nwarps);
279 std::size_t sizeof_tags =
ntags*
sizeof(TagType);
282 std::memcpy(
h_buffer, tags.data(), sizeof_tags);
312template <
typename T,
typename F>
314std::enable_if_t<std::is_same_v<std::decay_t<decltype(std::declval<T>().box())>, Box>,
void>
315tagparfor_call_f (
int icell, T
const& tag, F&& f)
noexcept
317 int ncells = tag.box().numPts();
320 int k = icell / (len.x*len.y);
321 int j = (icell - k*(len.x*len.y)) / len.x;
322 int i = (icell - k*(len.x*len.y)) - j*len.x;
326 f(icell, ncells, i, j, k, tag);
329template <
typename T,
typename F>
331std::enable_if_t<std::is_integral_v<std::decay_t<decltype(std::declval<T>().size())> >,
void>
332tagparfor_call_f (
int i, T
const& tag, F&& f)
noexcept
338template <
class TagType,
class F>
340ParallelFor_doit (TagVector<TagType>
const& tv, F
const& f)
344 if (tv.ntags == 0) {
return; }
346 const auto d_tags = tv.d_tags;
347 const auto d_nwarps = tv.d_nwarps;
348 const auto ntags = tv.ntags;
349 const auto ntotwarps = tv.ntotwarps;
350 constexpr auto nthreads = TagVector<TagType>::nthreads;
352 amrex::launch<nthreads>(tv.nblocks, Gpu::gpuStream(),
355 [[sycl::reqd_work_group_size(nthreads)]]
356 [[sycl::reqd_sub_group_size(Gpu::Device::warp_size)]]
362 std::size_t g_tid = item.get_global_id(0);
364 auto g_tid = std::size_t(blockDim.x)*blockIdx.x + threadIdx.x;
366 auto g_wid = int(g_tid / Gpu::Device::warp_size);
367 if (g_wid >= ntotwarps) {
return; }
371 int b_wid = g_wid - d_nwarps[tag_id];
373 int lane = item.get_local_id(0) % Gpu::Device::warp_size;
375 int lane = threadIdx.x % Gpu::Device::warp_size;
377 int icell = b_wid*Gpu::Device::warp_size + lane;
379 tagparfor_call_f(icell, d_tags[tag_id], f);
385template <
class TagType,
class F>
387ParallelFor_doit (TagVector<TagType>
const& tv, F
const& f)
394 constexpr bool tag_type = is_box_tag(TagType{});
396 if (tv.ntags == 0) {
return; }
398 const auto d_tags = tv.d_tags;
399 const auto ntags = tv.ntags;
402#pragma omp parallel for
404 for (
int itag = 0; itag < ntags; ++itag) {
406 const auto& t = d_tags[itag];
408 if constexpr (tag_type) {
412 for (
int k = lo.z; k <= hi.z; ++k) {
413 for (
int j = lo.y; j <= hi.y; ++j) {
415 for (
int i = lo.x; i <= hi.x; ++i) {
421 const auto size = t.size();
424 for (
int i = 0; i < size; ++i) {
436template <
class TagType,
class F>
437std::enable_if_t<std::is_same_v<std::decay_t<decltype(std::declval<TagType>().box())>,
Box>>
440 detail::ParallelFor_doit(tv,
442 int icell,
int ncells,
int i,
int j,
int k, TagType
const& tag)
noexcept
444 if (icell < ncells) {
445 for (
int n = 0; n < ncomp; ++n) {
452template <
class TagType,
class F>
453std::enable_if_t<std::is_same_v<std::decay_t<decltype(std::declval<TagType>().box())>,
Box>,
void>
456 detail::ParallelFor_doit(tv,
458 int icell,
int ncells,
int i,
int j,
int k, TagType
const& tag)
noexcept
460 if (icell < ncells) {
466template <
class TagType,
class F>
467std::enable_if_t<std::is_integral_v<std::decay_t<decltype(std::declval<TagType>().
size())> >,
void>
470 detail::ParallelFor_doit(tv,
472 int icell,
int ncells, TagType
const& tag)
noexcept
474 if (icell < ncells) {
480template <
class TagType,
class F>
481std::enable_if_t<std::is_same_v<std::decay_t<decltype(std::declval<TagType>().box())>,
Box>>
488template <
class TagType,
class F>
489std::enable_if_t<std::is_same_v<std::decay_t<decltype(std::declval<TagType>().box())>,
Box>,
void>
496template <
class TagType,
class F>
497std::enable_if_t<std::is_integral_v<std::decay_t<decltype(std::declval<TagType>().size())> >,
void>
500 TagVector<TagType> tv{tags};
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_ALWAYS_ASSERT(EX)
Definition AMReX_BLassert.H:50
#define AMREX_PRAGMA_SIMD
Definition AMReX_Extension.H:80
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
static std::size_t align(std::size_t sz)
Given a minimum required arena size of sz bytes, this returns the next largest arena size that will a...
Definition AMReX_Arena.cpp:152
__host__ __device__ IntVectND< dim > size() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:147
static constexpr int warp_size
Definition AMReX_GpuDevice.H:197
Encapsulation of the Orientation of the Faces of a Box.
Definition AMReX_Orientation.H:29
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
Long size() const noexcept
Definition AMReX_Vector.H:53
amrex_long Long
Definition AMReX_INT.H:30
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:263
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:301
Definition AMReX_Amr.cpp:49
__host__ __device__ T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
Definition AMReX_Algorithm.H:105
__host__ __device__ Dim3 ubound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:319
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:138
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:193
__host__ __device__ Dim3 length(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:326
BoxND< 3 > Box
Box is an alias for amrex::BoxND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:27
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:823
Arena * The_Arena()
Definition AMReX_Arena.cpp:783
__host__ __device__ Dim3 lbound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:312
Definition AMReX_TagParallelFor.H:87
Box bx
Definition AMReX_TagParallelFor.H:89
Array4< T > fab
Definition AMReX_TagParallelFor.H:88
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:93
Dim3 offset
Definition AMReX_TagParallelFor.H:90
Definition AMReX_TagParallelFor.H:77
Array4< T > fab
Definition AMReX_TagParallelFor.H:78
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:83
Box bx
Definition AMReX_TagParallelFor.H:79
Orientation face
Definition AMReX_TagParallelFor.H:80
Definition AMReX_TagParallelFor.H:58
Array4< T > dfab
Definition AMReX_TagParallelFor.H:59
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:63
Box dbox
Definition AMReX_TagParallelFor.H:60
Definition AMReX_TagParallelFor.H:67
T val
Definition AMReX_TagParallelFor.H:70
Array4< T > dfab
Definition AMReX_TagParallelFor.H:68
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:73
Box dbox
Definition AMReX_TagParallelFor.H:69
Definition AMReX_TagParallelFor.H:26
Array4< T1 const > sfab
Definition AMReX_TagParallelFor.H:29
Dim3 offset
Definition AMReX_TagParallelFor.H:31
int dindex
Definition AMReX_TagParallelFor.H:28
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:34
Array4< T0 > dfab
Definition AMReX_TagParallelFor.H:27
Box dbox
Definition AMReX_TagParallelFor.H:30
Definition AMReX_TagParallelFor.H:38
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:46
Dim3 offset
Definition AMReX_TagParallelFor.H:43
Box dbox
Definition AMReX_TagParallelFor.H:42
Array4< T0 > dfab
Definition AMReX_TagParallelFor.H:39
Array4< int > mask
Definition AMReX_TagParallelFor.H:41
Array4< T1 const > sfab
Definition AMReX_TagParallelFor.H:40
Definition AMReX_TagParallelFor.H:16
Array4< T > dfab
Definition AMReX_TagParallelFor.H:17
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:22
Box dbox
Definition AMReX_TagParallelFor.H:19
Array4< T const > sfab
Definition AMReX_TagParallelFor.H:18
Definition AMReX_TagParallelFor.H:50
Array4< T > dfab
Definition AMReX_TagParallelFor.H:51
__host__ __device__ Box box() const noexcept
Definition AMReX_TagParallelFor.H:54
Definition AMReX_Array4.H:61
Definition AMReX_TagParallelFor.H:106
Box bx
Definition AMReX_TagParallelFor.H:109
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:112
std::ptrdiff_t poff
Definition AMReX_TagParallelFor.H:108
Array4< T > dfab
Definition AMReX_TagParallelFor.H:107
Definition AMReX_TagParallelFor.H:116
std::ptrdiff_t poff
Definition AMReX_TagParallelFor.H:118
__host__ __device__ Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:122
Array4< T const > sfab
Definition AMReX_TagParallelFor.H:117
Box bx
Definition AMReX_TagParallelFor.H:119
Definition AMReX_Dim3.H:12
Definition AMReX_TagParallelFor.H:158
TagVector(TagVector &&other) noexcept
Definition AMReX_TagParallelFor.H:184
~TagVector()
Definition AMReX_TagParallelFor.H:176
char * h_buffer
Definition AMReX_TagParallelFor.H:160
TagVector(const TagVector &other)=delete
TagType * d_tags
Definition AMReX_TagParallelFor.H:162
bool defined
Definition AMReX_TagParallelFor.H:167
int * d_nwarps
Definition AMReX_TagParallelFor.H:163
TagVector(Vector< TagType > const &tags)
Definition AMReX_TagParallelFor.H:172
bool is_defined() const
Definition AMReX_TagParallelFor.H:227
int ntotwarps
Definition AMReX_TagParallelFor.H:165
char * d_buffer
Definition AMReX_TagParallelFor.H:161
int ntags
Definition AMReX_TagParallelFor.H:164
TagVector & operator=(const TagVector &other)=delete
int nblocks
Definition AMReX_TagParallelFor.H:166
static constexpr int nthreads
Definition AMReX_TagParallelFor.H:168
void undefine()
Definition AMReX_TagParallelFor.H:290
void define(Vector< TagType > const &tags)
Definition AMReX_TagParallelFor.H:229
Definition AMReX_TagParallelFor.H:97
T * p
Definition AMReX_TagParallelFor.H:98
Long m_size
Definition AMReX_TagParallelFor.H:99
__host__ __device__ Long size() const noexcept
Definition AMReX_TagParallelFor.H:102