Block-Structured AMR Software Framework
•
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Modules
Pages
Loading...
Searching...
No Matches
AMReX_GpuLaunch.nolint.H
Go to the documentation of this file.
1
// Do not include this header anywhere other than AMReX_GpuLaunch.H.
2
// The purpose of this file is to avoid clang-tidy.
3
4
#define AMREX_GET_LAUNCH_MACRO(_1,_2,_3,_4,_5,_6,_7,_8,_9,NAME,...) NAME
5
#define AMREX_LAUNCH_DEVICE_LAMBDA(...) AMREX_GET_LAUNCH_MACRO(__VA_ARGS__,\
6
AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3, \
7
AMREX_WRONG_NUM_ARGS, \
8
AMREX_WRONG_NUM_ARGS, \
9
AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2, \
10
AMREX_WRONG_NUM_ARGS, \
11
AMREX_WRONG_NUM_ARGS, \
12
AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE, \
13
AMREX_WRONG_NUM_ARGS, \
14
AMREX_WRONG_NUM_ARGS)(__VA_ARGS__)
15
16
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA(...) AMREX_GET_LAUNCH_MACRO(__VA_ARGS__,\
17
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3, \
18
AMREX_WRONG_NUM_ARGS, \
19
AMREX_WRONG_NUM_ARGS, \
20
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2, \
21
AMREX_WRONG_NUM_ARGS, \
22
AMREX_WRONG_NUM_ARGS, \
23
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE, \
24
AMREX_WRONG_NUM_ARGS, \
25
AMREX_WRONG_NUM_ARGS)(__VA_ARGS__)
26
27
#if (AMREX_SPACEDIM == 1)
28
#define AMREX_LAUNCH_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE (a1,a2,a3)
29
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE(a1,a2,a3)
30
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(fl,a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(fl,a1,a2,a3)
31
#elif (AMREX_SPACEDIM == 2)
32
#define AMREX_LAUNCH_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2 (a1,a2,a3,b1,b2,b3)
33
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM(a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2(a1,a2,a3,b1,b2,b3)
34
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(fl,a1,a2,a3,b1,b2,b3,c1,c2,c3) AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(fl,a1,a2,a3,b1,b2,b3)
35
#elif (AMREX_SPACEDIM == 3)
36
#define AMREX_LAUNCH_DEVICE_LAMBDA_DIM(...) AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3 (__VA_ARGS__)
37
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM(...) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3(__VA_ARGS__)
38
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(...) AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(__VA_ARGS__)
39
#endif
40
41
#ifdef AMREX_USE_GPU
42
43
#ifndef AMREX_USE_SYCL
44
45
#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
46
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
47
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
48
{ \
49
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
50
block \
51
); \
52
} \
53
else { \
54
AMREX_PRAGMA_SIMD \
55
for (amrex_i_inttype i = 0; i < n; ++i) { \
56
block \
57
} \
58
}}
59
60
#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
61
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
62
{ \
63
amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
64
block \
65
); \
66
} \
67
else { \
68
amrex::LoopConcurrentOnCpu(box, [=] (int i, int j, int k) noexcept \
69
block \
70
); \
71
}
72
73
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
74
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
75
{ \
76
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
77
block \
78
); \
79
} \
80
else { \
81
amrex::LoopConcurrentOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
82
block \
83
); \
84
}
85
86
#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \
87
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
88
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
89
{ \
90
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
91
block \
92
); \
93
} \
94
else { \
95
for (amrex_i_inttype i = 0; i < n; ++i) { \
96
block \
97
} \
98
}}
99
100
#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
101
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
102
{ \
103
amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
104
block \
105
); \
106
} \
107
else { \
108
amrex::LoopOnCpu(box, [=] (int i, int j, int k) noexcept \
109
block \
110
); \
111
}
112
113
#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
114
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
115
{ \
116
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
117
block \
118
); \
119
} \
120
else { \
121
amrex::LoopOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
122
block \
123
); \
124
}
125
126
#if defined(AMREX_USE_CUDA) && defined(_WIN32)
127
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
128
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
129
{ \
130
amrex::launch(box, [=] AMREX_GPU_DEVICE (std::decay_t<decltype(box)> const& tbox) { block }); \
131
} else { \
132
auto tbox = box; \
133
block; \
134
}
135
#else
136
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
137
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
138
{ \
139
AMREX_LAUNCH_DEVICE_LAMBDA(box,tbox,block); \
140
} else { \
141
auto tbox = box; \
142
block; \
143
}
144
#endif
145
146
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
147
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
148
{ \
149
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1); \
150
} else { \
151
auto tbx1 = bx1; \
152
block1; \
153
}
154
155
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \
156
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
157
{ \
158
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2); \
159
} else { \
160
auto tbx1 = bx1; \
161
auto tbx2 = bx2; \
162
block1; \
163
block2; \
164
}
165
166
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \
167
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
168
{ \
169
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3); \
170
} else { \
171
auto tbx1 = bx1; \
172
auto tbx2 = bx2; \
173
auto tbx3 = bx3; \
174
block1; \
175
block2; \
176
block3; \
177
}
178
179
#else
180
// xxxxx SYCL todo: host disabled in host device
181
182
#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
183
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
184
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
185
{ \
186
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
187
block \
188
); \
189
} \
190
else { \
191
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
192
}}
193
194
#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
195
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
196
{ \
197
amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
198
block \
199
); \
200
} \
201
else { \
202
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
203
}
204
205
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
206
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
207
{ \
208
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
209
block \
210
); \
211
} \
212
else { \
213
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
214
}
215
216
#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \
217
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
218
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
219
{ \
220
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
221
block \
222
); \
223
} \
224
else { \
225
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
226
}}
227
228
#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
229
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
230
{ \
231
amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
232
block \
233
); \
234
} \
235
else { \
236
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
237
}
238
239
#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
240
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
241
{ \
242
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
243
block \
244
); \
245
} \
246
else { \
247
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
248
}
249
250
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
251
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
252
{ \
253
AMREX_LAUNCH_DEVICE_LAMBDA(box,tbox,block); \
254
} else { \
255
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
256
}
257
258
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
259
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
260
{ \
261
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1); \
262
} else { \
263
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
264
}
265
266
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \
267
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
268
{ \
269
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2); \
270
} else { \
271
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
272
}
273
274
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \
275
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
276
{ \
277
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3); \
278
} else { \
279
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
280
}
281
282
#endif
283
284
#else
285
286
#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
287
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
288
amrex::ignore_unused(where_to_run); \
289
AMREX_PRAGMA_SIMD \
290
for (amrex_i_inttype i = 0; i < n; ++i) { \
291
block \
292
}}
293
294
#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
295
amrex::ignore_unused(where_to_run); \
296
amrex::LoopConcurrentOnCpu(box, [=] (int i, int j, int k) noexcept \
297
block \
298
);
299
300
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
301
amrex::ignore_unused(where_to_run); \
302
amrex::LoopConcurrentOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
303
block \
304
);
305
306
#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \
307
{ using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
308
amrex::ignore_unused(where_to_run); \
309
for (amrex_i_inttype i = 0; i < n; ++i) { \
310
block \
311
}}
312
313
#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
314
amrex::ignore_unused(where_to_run); \
315
amrex::LoopOnCpu(box, [=] (int i, int j, int k) noexcept \
316
block \
317
);
318
319
#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
320
amrex::ignore_unused(where_to_run); \
321
amrex::LoopOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
322
block \
323
);
324
325
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
326
amrex::ignore_unused(where_to_run); \
327
{ \
328
auto tbox = box; \
329
block; \
330
}
331
332
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
333
amrex::ignore_unused(where_to_run); \
334
{ \
335
auto tbx1 = bx1; \
336
block1; \
337
}
338
339
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \
340
amrex::ignore_unused(where_to_run); \
341
{ \
342
auto tbx1 = bx1; \
343
auto tbx2 = bx2; \
344
block1; \
345
block2; \
346
}
347
348
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \
349
amrex::ignore_unused(where_to_run); \
350
{ \
351
auto tbx1 = bx1; \
352
auto tbx2 = bx2; \
353
auto tbx3 = bx3; \
354
block1; \
355
block2; \
356
block3; \
357
}
358
359
#endif
Src
Base
AMReX_GpuLaunch.nolint.H
Generated by
1.9.8