Block-Structured AMR Software Framework
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Loading...
Searching...
No Matches
AMReX_GpuLaunch.nolint.H
Go to the documentation of this file.
1// Do not include this header anywhere other than AMReX_GpuLaunch.H.
2// The purpose of this file is to avoid clang-tidy.
3
4#define AMREX_GET_LAUNCH_MACRO(_1,_2,_3,_4,_5,_6,_7,_8,_9,NAME,...) NAME
5#define AMREX_LAUNCH_DEVICE_LAMBDA(...) AMREX_GET_LAUNCH_MACRO(__VA_ARGS__,\
6 AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3, \
7 AMREX_WRONG_NUM_ARGS, \
8 AMREX_WRONG_NUM_ARGS, \
9 AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2, \
10 AMREX_WRONG_NUM_ARGS, \
11 AMREX_WRONG_NUM_ARGS, \
12 AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE, \
13 AMREX_WRONG_NUM_ARGS, \
14 AMREX_WRONG_NUM_ARGS)(__VA_ARGS__)
15
16#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA(...) AMREX_GET_LAUNCH_MACRO(__VA_ARGS__,\
17 AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3, \
18 AMREX_WRONG_NUM_ARGS, \
19 AMREX_WRONG_NUM_ARGS, \
20 AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2, \
21 AMREX_WRONG_NUM_ARGS, \
22 AMREX_WRONG_NUM_ARGS, \
23 AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE, \
24 AMREX_WRONG_NUM_ARGS, \
25 AMREX_WRONG_NUM_ARGS)(__VA_ARGS__)
26
27#if (AMREX_SPACEDIM == 1)
28#define AMREX_LAUNCH_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE (a1,a2,a3)
29#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE(a1,a2,a3)
30#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(fl,a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(fl,a1,a2,a3)
31#elif (AMREX_SPACEDIM == 2)
32#define AMREX_LAUNCH_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2 (a1,a2,a3,b1,b2,b3)
33#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2(a1,a2,a3,b1,b2,b3)
34#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(fl,a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(fl,a1,a2,a3,b1,b2,b3)
35#elif (AMREX_SPACEDIM == 3)
36#define AMREX_LAUNCH_DEVICE_LAMBDA_DIM(...) AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3 (__VA_ARGS__)
37#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM(...) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3(__VA_ARGS__)
38#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(...) AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(__VA_ARGS__)
39#endif
40
41#ifdef AMREX_USE_GPU
42
43#ifndef AMREX_USE_SYCL
44
45#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
46 { using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
47 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
48 { \
49 amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
50 block \
51 ); \
52 } \
53 else { \
54 AMREX_PRAGMA_SIMD \
55 for (amrex_i_inttype i = 0; i < n; ++i) { \
56 block \
57 } \
58 }}
59
60#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
61 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
62 { \
63 amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
64 block \
65 ); \
66 } \
67 else { \
68 amrex::LoopConcurrentOnCpu(box, [=] (int i, int j, int k) noexcept \
69 block \
70 ); \
71 }
72
73#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
74 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
75 { \
76 amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
77 block \
78 ); \
79 } \
80 else { \
81 amrex::LoopConcurrentOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
82 block \
83 ); \
84 }
85
86#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \
87 { using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
88 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
89 { \
90 amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
91 block \
92 ); \
93 } \
94 else { \
95 for (amrex_i_inttype i = 0; i < n; ++i) { \
96 block \
97 } \
98 }}
99
100#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
101 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
102 { \
103 amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
104 block \
105 ); \
106 } \
107 else { \
108 amrex::LoopOnCpu(box, [=] (int i, int j, int k) noexcept \
109 block \
110 ); \
111 }
112
113#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
114 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
115 { \
116 amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
117 block \
118 ); \
119 } \
120 else { \
121 amrex::LoopOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
122 block \
123 ); \
124 }
125
126#if defined(AMREX_USE_CUDA) && defined(_WIN32)
127#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
128 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
129 { \
130 amrex::launch(box, [=] AMREX_GPU_DEVICE (std::decay_t<decltype(box)> const& tbox) { block }); \
131 } else { \
132 auto tbox = box; \
133 block; \
134 }
135#else
136#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
137 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
138 { \
139 AMREX_LAUNCH_DEVICE_LAMBDA(box,tbox,block); \
140 } else { \
141 auto tbox = box; \
142 block; \
143 }
144#endif
145
146#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
147 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
148 { \
149 AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1); \
150 } else { \
151 auto tbx1 = bx1; \
152 block1; \
153 }
154
155#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \
156 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
157 { \
158 AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2); \
159 } else { \
160 auto tbx1 = bx1; \
161 auto tbx2 = bx2; \
162 block1; \
163 block2; \
164 }
165
166#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \
167 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
168 { \
169 AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3); \
170 } else { \
171 auto tbx1 = bx1; \
172 auto tbx2 = bx2; \
173 auto tbx3 = bx3; \
174 block1; \
175 block2; \
176 block3; \
177 }
178
179#else
180// xxxxx SYCL todo: host disabled in host device
181
182#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
183 { using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
184 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
185 { \
186 amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
187 block \
188 ); \
189 } \
190 else { \
191 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
192 }}
193
194#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
195 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
196 { \
197 amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
198 block \
199 ); \
200 } \
201 else { \
202 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
203 }
204
205#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
206 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
207 { \
208 amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
209 block \
210 ); \
211 } \
212 else { \
213 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
214 }
215
216#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \
217 { using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
218 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
219 { \
220 amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
221 block \
222 ); \
223 } \
224 else { \
225 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
226 }}
227
228#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
229 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
230 { \
231 amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
232 block \
233 ); \
234 } \
235 else { \
236 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
237 }
238
239#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
240 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
241 { \
242 amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
243 block \
244 ); \
245 } \
246 else { \
247 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
248 }
249
250#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
251 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
252 { \
253 AMREX_LAUNCH_DEVICE_LAMBDA(box,tbox,block); \
254 } else { \
255 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
256 }
257
258#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
259 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
260 { \
261 AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1); \
262 } else { \
263 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
264 }
265
266#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \
267 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
268 { \
269 AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2); \
270 } else { \
271 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
272 }
273
274#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \
275 if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
276 { \
277 AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3); \
278 } else { \
279 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
280 }
281
282#endif
283
284#else
285
286#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
287 { using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
288 amrex::ignore_unused(where_to_run); \
289 AMREX_PRAGMA_SIMD \
290 for (amrex_i_inttype i = 0; i < n; ++i) { \
291 block \
292 }}
293
294#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
295 amrex::ignore_unused(where_to_run); \
296 amrex::LoopConcurrentOnCpu(box, [=] (int i, int j, int k) noexcept \
297 block \
298 );
299
300#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
301 amrex::ignore_unused(where_to_run); \
302 amrex::LoopConcurrentOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
303 block \
304 );
305
306#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \
307 { using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
308 amrex::ignore_unused(where_to_run); \
309 for (amrex_i_inttype i = 0; i < n; ++i) { \
310 block \
311 }}
312
313#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
314 amrex::ignore_unused(where_to_run); \
315 amrex::LoopOnCpu(box, [=] (int i, int j, int k) noexcept \
316 block \
317 );
318
319#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
320 amrex::ignore_unused(where_to_run); \
321 amrex::LoopOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
322 block \
323 );
324
325#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
326 amrex::ignore_unused(where_to_run); \
327 { \
328 auto tbox = box; \
329 block; \
330 }
331
332#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
333 amrex::ignore_unused(where_to_run); \
334 { \
335 auto tbx1 = bx1; \
336 block1; \
337 }
338
339#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \
340 amrex::ignore_unused(where_to_run); \
341 { \
342 auto tbx1 = bx1; \
343 auto tbx2 = bx2; \
344 block1; \
345 block2; \
346 }
347
348#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \
349 amrex::ignore_unused(where_to_run); \
350 { \
351 auto tbx1 = bx1; \
352 auto tbx2 = bx2; \
353 auto tbx3 = bx3; \
354 block1; \
355 block2; \
356 block3; \
357 }
358
359#endif