Block-Structured AMR Software Framework
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Loading...
Searching...
No Matches
AMReX_TagParallelFor.H
Go to the documentation of this file.
1#ifndef AMREX_TAG_PARALLELFOR_H_
2#define AMREX_TAG_PARALLELFOR_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_Arena.H>
6#include <AMReX_Array4.H>
7#include <AMReX_Box.H>
8#include <AMReX_GpuLaunch.H>
9#include <AMReX_Vector.H>
10#include <limits>
11#include <utility>
12
13namespace amrex {
14
15template <class T>
20
22 Box const& box () const noexcept { return dbox; }
23};
24
25template <class T0, class T1=T0>
30 Dim3 offset; // sbox.smallEnd() - dbox.smallEnd()
31
33 Box const& box () const noexcept { return dbox; }
34};
35
36template <class T0, class T1=T0>
42 Dim3 offset; // sbox.smallEnd() - dbox.smallEnd()
43
45 Box const& box () const noexcept { return dbox; }
46};
47
48template <class T>
49struct Array4Tag {
51
53 Box box () const noexcept { return Box(dfab); }
54};
55
56template <class T>
60
62 Box const& box () const noexcept { return dbox; }
63};
64
65template <class T>
69 T val;
70
72 Box const& box () const noexcept { return dbox; }
73};
74
75template <class T>
80
82 Box const& box() const noexcept { return bx; }
83};
84
85template <class T>
90
92 Box const& box() const noexcept { return bx; }
93};
94
95template <class T>
96struct VectorTag {
97 T* p;
98 Long m_size;
99
101 Long size () const noexcept { return m_size; }
102};
103
104#ifdef AMREX_USE_GPU
105
106namespace detail {
107
108template <typename T>
109std::enable_if_t<std::is_same<std::decay_t<decltype(std::declval<T>().box())>, Box>::value,
110 Long>
111get_tag_size (T const& tag) noexcept
112{
113 AMREX_ASSERT(tag.box().numPts() < Long(std::numeric_limits<int>::max()));
114 return static_cast<int>(tag.box().numPts());
115}
116
117template <typename T>
118std::enable_if_t<std::is_integral<std::decay_t<decltype(std::declval<T>().size())> >::value,
119 Long>
120get_tag_size (T const& tag) noexcept
121{
122 AMREX_ASSERT(tag.size() < Long(std::numeric_limits<int>::max()));
123 return tag.size();
124}
125
126template <typename T, typename F>
128std::enable_if_t<std::is_same<std::decay_t<decltype(std::declval<T>().box())>, Box>::value, void>
130#ifdef AMREX_USE_SYCL
131 sycl::nd_item<1> const& item,
132#endif
133 int icell, T const& tag, F&& f) noexcept
134{
135 int ncells = tag.box().numPts();
136 const auto len = amrex::length(tag.box());
137 const auto lo = amrex::lbound(tag.box());
138 int k = icell / (len.x*len.y);
139 int j = (icell - k*(len.x*len.y)) / len.x;
140 int i = (icell - k*(len.x*len.y)) - j*len.x;
141 i += lo.x;
142 j += lo.y;
143 k += lo.z;
144#ifdef AMREX_USE_SYCL
145 f(item, icell, ncells, i, j, k, tag);
146#else
147 f( icell, ncells, i, j, k, tag);
148#endif
149}
150
151template <typename T, typename F>
153std::enable_if_t<std::is_integral<std::decay_t<decltype(std::declval<T>().size())> >::value, void>
155#ifdef AMREX_USE_SYCL
156 sycl::nd_item<1> const& item,
157#endif
158 int i, T const& tag, F&& f) noexcept
159{
160 int N = tag.size();
161#ifdef AMREX_USE_SYCL
162 f(item, i, N, tag);
163#else
164 f( i, N, tag);
165#endif
166}
167
168template <class TagType, class F>
169void
171{
172 const int ntags = tags.size();
173 if (ntags == 0) { return; }
174
175 Long l_ntotwarps = 0;
176 int ntotwarps = 0;
177 Vector<int> nwarps;
178 nwarps.reserve(ntags+1);
179 for (int i = 0; i < ntags; ++i)
180 {
181 auto& tag = tags[i];
182 nwarps.push_back(ntotwarps);
184 l_ntotwarps += nw;
185 ntotwarps += static_cast<int>(nw);
186 }
187 nwarps.push_back(ntotwarps);
188
189 std::size_t sizeof_tags = ntags*sizeof(TagType);
190 std::size_t offset_nwarps = Arena::align(sizeof_tags);
191 std::size_t sizeof_nwarps = (ntags+1)*sizeof(int);
192 std::size_t total_buf_size = offset_nwarps + sizeof_nwarps;
193
194 char* h_buffer = (char*)The_Pinned_Arena()->alloc(total_buf_size);
195 char* d_buffer = (char*)The_Arena()->alloc(total_buf_size);
196
197 std::memcpy(h_buffer, tags.data(), sizeof_tags);
198 std::memcpy(h_buffer+offset_nwarps, nwarps.data(), sizeof_nwarps);
199 Gpu::htod_memcpy_async(d_buffer, h_buffer, total_buf_size);
200
201 auto d_tags = reinterpret_cast<TagType*>(d_buffer);
202 auto d_nwarps = reinterpret_cast<int*>(d_buffer+offset_nwarps);
203
204 constexpr int nthreads = 256;
205 constexpr int nwarps_per_block = nthreads/Gpu::Device::warp_size;
206 int nblocks = (ntotwarps + nwarps_per_block-1) / nwarps_per_block;
207
208 amrex::ignore_unused(l_ntotwarps);
209 AMREX_ASSERT(l_ntotwarps+nwarps_per_block-1 < Long(std::numeric_limits<int>::max()));
210
211 amrex::launch(nblocks, nthreads, Gpu::gpuStream(),
212#ifdef AMREX_USE_SYCL
213 [=] AMREX_GPU_DEVICE (sycl::nd_item<1> const& item) noexcept
214 [[sycl::reqd_work_group_size(nthreads)]]
215 [[sycl::reqd_sub_group_size(Gpu::Device::warp_size)]]
216#else
217 [=] AMREX_GPU_DEVICE () noexcept
218#endif
219 {
220#ifdef AMREX_USE_SYCL
221 std::size_t g_tid = item.get_global_id(0);
222#else
223 auto g_tid = std::size_t(blockDim.x)*blockIdx.x + threadIdx.x;
224#endif
225 auto g_wid = int(g_tid / Gpu::Device::warp_size);
226 if (g_wid >= ntotwarps) { return; }
227
228 int tag_id = amrex::bisect(d_nwarps, 0, ntags, g_wid);
229
230 int b_wid = g_wid - d_nwarps[tag_id]; // b_wid'th warp on this box
231#ifdef AMREX_USE_SYCL
232 int lane = item.get_local_id(0) % Gpu::Device::warp_size;
233#else
234 int lane = threadIdx.x % Gpu::Device::warp_size;
235#endif
236 int icell = b_wid*Gpu::Device::warp_size + lane;
237
238#ifdef AMREX_USE_SYCL
239 tagparfor_call_f(item, icell, d_tags[tag_id], f);
240#else
241 tagparfor_call_f( icell, d_tags[tag_id], f);
242#endif
243 });
244
246 The_Pinned_Arena()->free(h_buffer);
247 The_Arena()->free(d_buffer);
248}
249
250}
251
252template <class TagType, class F>
253std::enable_if_t<std::is_same<std::decay_t<decltype(std::declval<TagType>().box())>,
254 Box>::value>
255ParallelFor (Vector<TagType> const& tags, int ncomp, F && f)
256{
258 [=] AMREX_GPU_DEVICE (
259#ifdef AMREX_USE_SYCL
260 sycl::nd_item<1> const& /*item*/,
261#endif
262 int icell, int ncells, int i, int j, int k, TagType const& tag) noexcept
263 {
264 if (icell < ncells) {
265 for (int n = 0; n < ncomp; ++n) {
266 f(i,j,k,n,tag);
267 }
268 }
269 });
270}
271
272template <class TagType, class F>
273std::enable_if_t<std::is_same<std::decay_t<decltype(std::declval<TagType>().box())>, Box>::value, void>
274ParallelFor (Vector<TagType> const& tags, F && f)
275{
277 [=] AMREX_GPU_DEVICE (
278#ifdef AMREX_USE_SYCL
279 sycl::nd_item<1> const& /*item*/,
280#endif
281 int icell, int ncells, int i, int j, int k, TagType const& tag) noexcept
282 {
283 if (icell < ncells) {
284 f(i,j,k,tag);
285 }
286 });
287}
288
289template <class TagType, class F>
290std::enable_if_t<std::is_integral<std::decay_t<decltype(std::declval<TagType>().size())> >::value, void>
291ParallelFor (Vector<TagType> const& tags, F && f)
292{
294 [=] AMREX_GPU_DEVICE (
295#ifdef AMREX_USE_SYCL
296 sycl::nd_item<1> const& /*item*/,
297#endif
298 int icell, int ncells, TagType const& tag) noexcept
299 {
300 if (icell < ncells) {
301 f(icell,tag);
302 }
303 });
304}
305
306#endif
307
308}
309
310#endif
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
static std::size_t align(std::size_t sz)
Given a minimum required arena size of sz bytes, this returns the next largest arena size that will a...
Definition AMReX_Arena.cpp:143
AMREX_GPU_HOST_DEVICE IntVectND< dim > size() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:139
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
Returns the number of points contained in the BoxND.
Definition AMReX_Box.H:346
static AMREX_EXPORT constexpr int warp_size
Definition AMReX_GpuDevice.H:173
Encapsulation of the Orientation of the Faces of a Box.
Definition AMReX_Orientation.H:29
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:27
Long size() const noexcept
Definition AMReX_Vector.H:50
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:237
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:251
gpuStream_t gpuStream() noexcept
Definition AMReX_GpuDevice.H:218
void ParallelFor_doit(Vector< TagType > const &tags, F &&f)
Definition AMReX_TagParallelFor.H:170
Long get_tag_size(T const &tag) noexcept
Definition AMReX_TagParallelFor.H:111
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void tagparfor_call_f(int icell, T const &tag, F &&f) noexcept
Definition AMReX_TagParallelFor.H:129
Definition AMReX_Amr.cpp:49
BoxND< AMREX_SPACEDIM > Box
Definition AMReX_BaseFwd.H:27
void launch(T const &n, L &&f) noexcept
Definition AMReX_GpuLaunchFunctsC.H:120
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
Definition AMReX_Algorithm.H:105
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:308
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:127
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 length(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:322
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:656
const int[]
Definition AMReX_BLProfiler.cpp:1664
Arena * The_Arena()
Definition AMReX_Arena.cpp:616
Definition AMReX_FabArrayCommI.H:896
Definition AMReX_TagParallelFor.H:86
Box bx
Definition AMReX_TagParallelFor.H:88
Array4< T > fab
Definition AMReX_TagParallelFor.H:87
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:92
Dim3 offset
Definition AMReX_TagParallelFor.H:89
Definition AMReX_TagParallelFor.H:76
Array4< T > fab
Definition AMReX_TagParallelFor.H:77
Box bx
Definition AMReX_TagParallelFor.H:78
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:82
Orientation face
Definition AMReX_TagParallelFor.H:79
Definition AMReX_TagParallelFor.H:57
Array4< T > dfab
Definition AMReX_TagParallelFor.H:58
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:62
Box dbox
Definition AMReX_TagParallelFor.H:59
Definition AMReX_TagParallelFor.H:66
T val
Definition AMReX_TagParallelFor.H:69
Array4< T > dfab
Definition AMReX_TagParallelFor.H:67
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:72
Box dbox
Definition AMReX_TagParallelFor.H:68
Definition AMReX_TagParallelFor.H:26
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:33
Array4< T1 const > sfab
Definition AMReX_TagParallelFor.H:28
Dim3 offset
Definition AMReX_TagParallelFor.H:30
Array4< T0 > dfab
Definition AMReX_TagParallelFor.H:27
Box dbox
Definition AMReX_TagParallelFor.H:29
Definition AMReX_TagParallelFor.H:37
Dim3 offset
Definition AMReX_TagParallelFor.H:42
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:45
Box dbox
Definition AMReX_TagParallelFor.H:41
Array4< T0 > dfab
Definition AMReX_TagParallelFor.H:38
Array4< int > mask
Definition AMReX_TagParallelFor.H:40
Array4< T1 const > sfab
Definition AMReX_TagParallelFor.H:39
Definition AMReX_TagParallelFor.H:16
Array4< T > dfab
Definition AMReX_TagParallelFor.H:17
Box dbox
Definition AMReX_TagParallelFor.H:19
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box const & box() const noexcept
Definition AMReX_TagParallelFor.H:22
Array4< T const > sfab
Definition AMReX_TagParallelFor.H:18
Definition AMReX_TagParallelFor.H:49
Array4< T > dfab
Definition AMReX_TagParallelFor.H:50
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box box() const noexcept
Definition AMReX_TagParallelFor.H:53
Definition AMReX_Array4.H:61
Definition AMReX_Dim3.H:12
Definition AMReX_TagParallelFor.H:96
T * p
Definition AMReX_TagParallelFor.H:97
Long m_size
Definition AMReX_TagParallelFor.H:98
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Long size() const noexcept
Definition AMReX_TagParallelFor.H:101