Block-Structured AMR Software Framework
AMReX_GpuLaunch.nolint.H
Go to the documentation of this file.
1
// Do not include this header anywhere other than AMReX_GpuLaunch.H.
2
// The purpose of this file is to avoid clang-tidy.
3
4
#define AMREX_GET_LAUNCH_MACRO(_1,_2,_3,_4,_5,_6,_7,_8,_9,NAME,...) NAME
5
#define AMREX_LAUNCH_DEVICE_LAMBDA(...) AMREX_GET_LAUNCH_MACRO(__VA_ARGS__,\
6
AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3, \
7
AMREX_WRONG_NUM_ARGS, \
8
AMREX_WRONG_NUM_ARGS, \
9
AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2, \
10
AMREX_WRONG_NUM_ARGS, \
11
AMREX_WRONG_NUM_ARGS, \
12
AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE, \
13
AMREX_WRONG_NUM_ARGS, \
14
AMREX_WRONG_NUM_ARGS)(__VA_ARGS__)
15
16
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA(...) AMREX_GET_LAUNCH_MACRO(__VA_ARGS__,\
17
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3, \
18
AMREX_WRONG_NUM_ARGS, \
19
AMREX_WRONG_NUM_ARGS, \
20
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2, \
21
AMREX_WRONG_NUM_ARGS, \
22
AMREX_WRONG_NUM_ARGS, \
23
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE, \
24
AMREX_WRONG_NUM_ARGS, \
25
AMREX_WRONG_NUM_ARGS)(__VA_ARGS__)
26
27
#if (AMREX_SPACEDIM == 1)
28
#define AMREX_LAUNCH_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE (a1,a2,a3)
29
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE(a1,a2,a3)
30
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(fl,a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(fl,a1,a2,a3)
31
#elif (AMREX_SPACEDIM == 2)
32
#define AMREX_LAUNCH_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2 (a1,a2,a3,b1,b2,b3)
33
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2(a1,a2,a3,b1,b2,b3)
34
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(fl,a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(fl,a1,a2,a3,b1,b2,b3)
35
#elif (AMREX_SPACEDIM == 3)
36
#define AMREX_LAUNCH_DEVICE_LAMBDA_DIM(...) AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3 (__VA_ARGS__)
37
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM(...) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3(__VA_ARGS__)
38
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(...) AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(__VA_ARGS__)
39
#endif
40
41
#define AMREX_FOR_1D(...) AMREX_GPU_DEVICE_FOR_1D(__VA_ARGS__)
42
#define AMREX_FOR_3D(...) AMREX_GPU_DEVICE_FOR_3D(__VA_ARGS__)
43
#define AMREX_FOR_4D(...) AMREX_GPU_DEVICE_FOR_4D(__VA_ARGS__)
44
45
#define AMREX_PARALLEL_FOR_1D(...) AMREX_GPU_DEVICE_PARALLEL_FOR_1D(__VA_ARGS__)
46
#define AMREX_PARALLEL_FOR_3D(...) AMREX_GPU_DEVICE_PARALLEL_FOR_3D(__VA_ARGS__)
47
#define AMREX_PARALLEL_FOR_4D(...) AMREX_GPU_DEVICE_PARALLEL_FOR_4D(__VA_ARGS__)
48
49
#define AMREX_HOST_DEVICE_FOR_1D(...) AMREX_GPU_HOST_DEVICE_FOR_1D(__VA_ARGS__)
50
#define AMREX_HOST_DEVICE_FOR_3D(...) AMREX_GPU_HOST_DEVICE_FOR_3D(__VA_ARGS__)
51
#define AMREX_HOST_DEVICE_FOR_4D(...) AMREX_GPU_HOST_DEVICE_FOR_4D(__VA_ARGS__)
52
53
#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D(...) AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_1D(__VA_ARGS__)
54
#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D(...) AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_3D(__VA_ARGS__)
55
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D(...) AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_4D(__VA_ARGS__)
56
57
#ifdef AMREX_USE_GPU
58
59
#ifndef AMREX_USE_SYCL
60
61
#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
62
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
63
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
64
{ \
65
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
66
block \
67
); \
68
} \
69
else { \
70
AMREX_PRAGMA_SIMD \
71
for (amrex_i_inttype i = 0; i < n; ++i) { \
72
block \
73
} \
74
}}
75
76
#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
77
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
78
{ \
79
amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
80
block \
81
); \
82
} \
83
else { \
84
amrex::LoopConcurrentOnCpu(box, [=] (int i, int j, int k) noexcept \
85
block \
86
); \
87
}
88
89
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
90
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
91
{ \
92
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
93
block \
94
); \
95
} \
96
else { \
97
amrex::LoopConcurrentOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
98
block \
99
); \
100
}
101
102
#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \
103
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
104
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
105
{ \
106
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
107
block \
108
); \
109
} \
110
else { \
111
for (amrex_i_inttype i = 0; i < n; ++i) { \
112
block \
113
} \
114
}}
115
116
#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
117
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
118
{ \
119
amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
120
block \
121
); \
122
} \
123
else { \
124
amrex::LoopOnCpu(box, [=] (int i, int j, int k) noexcept \
125
block \
126
); \
127
}
128
129
#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
130
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
131
{ \
132
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
133
block \
134
); \
135
} \
136
else { \
137
amrex::LoopOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
138
block \
139
); \
140
}
141
142
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
143
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
144
{ \
145
AMREX_LAUNCH_DEVICE_LAMBDA(box,tbox,block); \
146
} else { \
147
auto tbox = box; \
148
block; \
149
}
150
151
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
152
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
153
{ \
154
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1); \
155
} else { \
156
auto tbx1 = bx1; \
157
block1; \
158
}
159
160
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \
161
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
162
{ \
163
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2); \
164
} else { \
165
auto tbx1 = bx1; \
166
auto tbx2 = bx2; \
167
block1; \
168
block2; \
169
}
170
171
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \
172
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
173
{ \
174
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3); \
175
} else { \
176
auto tbx1 = bx1; \
177
auto tbx2 = bx2; \
178
auto tbx3 = bx3; \
179
block1; \
180
block2; \
181
block3; \
182
}
183
184
#else
185
// xxxxx SYCL todo: host disabled in host device
186
187
#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
188
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
189
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
190
{ \
191
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
192
block \
193
); \
194
} \
195
else { \
196
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"
); \
197
}}
198
199
#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
200
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
201
{ \
202
amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
203
block \
204
); \
205
} \
206
else { \
207
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"
); \
208
}
209
210
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
211
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
212
{ \
213
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
214
block \
215
); \
216
} \
217
else { \
218
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"
); \
219
}
220
221
#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \
222
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
223
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
224
{ \
225
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
226
block \
227
); \
228
} \
229
else { \
230
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"
); \
231
}}
232
233
#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
234
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
235
{ \
236
amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
237
block \
238
); \
239
} \
240
else { \
241
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"
); \
242
}
243
244
#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
245
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
246
{ \
247
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
248
block \
249
); \
250
} \
251
else { \
252
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"
); \
253
}
254
255
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
256
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
257
{ \
258
AMREX_LAUNCH_DEVICE_LAMBDA(box,tbox,block); \
259
} else { \
260
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"
); \
261
}
262
263
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
264
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
265
{ \
266
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1); \
267
} else { \
268
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"
); \
269
}
270
271
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \
272
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
273
{ \
274
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2); \
275
} else { \
276
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"
); \
277
}
278
279
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \
280
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
281
{ \
282
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3); \
283
} else { \
284
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"
); \
285
}
286
287
#endif
288
289
#else
290
291
#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
292
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
293
amrex::ignore_unused(where_to_run); \
294
AMREX_PRAGMA_SIMD \
295
for (amrex_i_inttype i = 0; i < n; ++i) { \
296
block \
297
}}
298
299
#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
300
amrex::ignore_unused(where_to_run); \
301
amrex::LoopConcurrentOnCpu(box, [=] (int i, int j, int k) noexcept \
302
block \
303
);
304
305
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
306
amrex::ignore_unused(where_to_run); \
307
amrex::LoopConcurrentOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
308
block \
309
);
310
311
#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \
312
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
313
amrex::ignore_unused(where_to_run); \
314
for (amrex_i_inttype i = 0; i < n; ++i) { \
315
block \
316
}}
317
318
#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
319
amrex::ignore_unused(where_to_run); \
320
amrex::LoopOnCpu(box, [=] (int i, int j, int k) noexcept \
321
block \
322
);
323
324
#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
325
amrex::ignore_unused(where_to_run); \
326
amrex::LoopOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
327
block \
328
);
329
330
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
331
amrex::ignore_unused(where_to_run); \
332
{ \
333
auto tbox = box; \
334
block; \
335
}
336
337
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
338
amrex::ignore_unused(where_to_run); \
339
{ \
340
auto tbx1 = bx1; \
341
block1; \
342
}
343
344
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \
345
amrex::ignore_unused(where_to_run); \
346
{ \
347
auto tbx1 = bx1; \
348
auto tbx2 = bx2; \
349
block1; \
350
block2; \
351
}
352
353
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \
354
amrex::ignore_unused(where_to_run); \
355
{ \
356
auto tbx1 = bx1; \
357
auto tbx2 = bx2; \
358
auto tbx3 = bx3; \
359
block1; \
360
block2; \
361
block3; \
362
}
363
364
#endif
Src
Base
AMReX_GpuLaunch.nolint.H
Generated by
1.9.1