5#define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE(TN,TI,block) \
6 { auto const& amrex_i_tn = TN; \
7 if (!amrex::isEmpty(amrex_i_tn)) { \
8 if (amrex::Gpu::inLaunchRegion()) \
10 const auto amrex_i_ec = amrex::Gpu::ExecutionConfig(amrex_i_tn); \
11 int amrex_i_nthreads_per_block = amrex_i_ec.numThreads.x; \
12 int amrex_i_nthreads_total = amrex_i_nthreads_per_block * amrex_i_ec.numBlocks.x; \
13 auto& amrex_i_q = amrex::Gpu::Device::streamQueue(); \
15 amrex_i_q.submit([&] (sycl::handler& amrex_i_h) { \
16 amrex_i_h.parallel_for(sycl::nd_range<1>(sycl::range<1>(amrex_i_nthreads_total), \
17 sycl::range<1>(amrex_i_nthreads_per_block)), \
18 [=] (sycl::nd_item<1> amrex_i_item) \
19 [[sycl::reqd_work_group_size(AMREX_GPU_MAX_THREADS)]] \
20 [[sycl::reqd_sub_group_size(Gpu::Device::warp_size)]] \
22 for (auto const TI : amrex::Gpu::Range(amrex_i_tn,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
27 } catch (sycl::exception const& ex) { \
28 amrex::Abort(std::string("LAUNCH: ")+ex.what()+"!!!!!"); \
32 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
43#define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE(TN,TI,block) \
44 { auto const& amrex_i_tn = TN; \
45 if (!amrex::isEmpty(amrex_i_tn)) { \
46 if (amrex::Gpu::inLaunchRegion()) \
48 const auto amrex_i_ec = amrex::Gpu::ExecutionConfig(amrex_i_tn); \
49 AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \
50 [=] AMREX_GPU_DEVICE () noexcept { \
51 for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \
55 AMREX_GPU_ERROR_CHECK(); \
58 for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \
66#define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2(TN1,TI1,block1,TN2,TI2,block2) \
67 { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; \
68 if (!amrex::isEmpty(amrex_i_tn1) || !amrex::isEmpty(amrex_i_tn2)) { \
69 if (amrex::Gpu::inLaunchRegion()) \
71 const auto amrex_i_ec1 = amrex::Gpu::ExecutionConfig(amrex_i_tn1); \
72 const auto amrex_i_ec2 = amrex::Gpu::ExecutionConfig(amrex_i_tn2); \
73 dim3 amrex_i_nblocks = amrex::max(amrex_i_ec1.numBlocks.x, \
74 amrex_i_ec2.numBlocks.x); \
75 amrex_i_nblocks.y = 2; \
76 int amrex_i_nthreads_per_block = amrex_i_ec1.numThreads.x; \
77 int amrex_i_nthreads_total = amrex_i_nthreads_per_block * amrex_i_nblocks.x; \
78 auto& amrex_i_q = amrex::Gpu::Device::streamQueue(); \
80 amrex_i_q.submit([&] (sycl::handler& amrex_i_h) { \
81 amrex_i_h.parallel_for(sycl::nd_range<2>(sycl::range<2>(amrex_i_nthreads_total,2), \
82 sycl::range<2>(amrex_i_nthreads_per_block,1)), \
83 [=] (sycl::nd_item<2> amrex_i_item) \
84 [[sycl::reqd_sub_group_size(Gpu::Device::warp_size)]] \
86 switch (amrex_i_item.get_group(1)) { \
87 case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
91 case 1: for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
97 } catch (sycl::exception const& ex) { \
98 amrex::Abort(std::string("LAUNCH: ")+ex.what()+"!!!!!"); \
102 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
106 for (
auto const TI1 :
amrex::Gpu::Range(amrex_i_tn1)) { \
116#define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2(TN1,TI1,block1,TN2,TI2,block2) \
117 { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; \
118 if (!amrex::isEmpty(amrex_i_tn1) || !amrex::isEmpty(amrex_i_tn2)) { \
119 if (amrex::Gpu::inLaunchRegion()) \
121 const auto amrex_i_ec1 = amrex::Gpu::ExecutionConfig(amrex_i_tn1); \
122 const auto amrex_i_ec2 = amrex::Gpu::ExecutionConfig(amrex_i_tn2); \
123 dim3 amrex_i_nblocks = amrex::max(amrex_i_ec1.numBlocks.x, \
124 amrex_i_ec2.numBlocks.x); \
125 amrex_i_nblocks.y = 2; \
126 AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \
127 [=] AMREX_GPU_DEVICE () noexcept { \
128 switch (blockIdx.y) { \
129 case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \
133 case 1: for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2)) { \
138 AMREX_GPU_ERROR_CHECK(); \
141 for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \
144 for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2)) { \
152#define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3(TN1,TI1,block1,TN2,TI2,block2,TN3,TI3,block3) \
153 { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; auto const& amrex_i_tn3 = TN3; \
154 if (!amrex::isEmpty(amrex_i_tn1) || !amrex::isEmpty(amrex_i_tn2) || !amrex::isEmpty(amrex_i_tn3)) { \
155 if (amrex::Gpu::inLaunchRegion()) \
157 const auto amrex_i_ec1 = amrex::Gpu::ExecutionConfig(amrex_i_tn1); \
158 const auto amrex_i_ec2 = amrex::Gpu::ExecutionConfig(amrex_i_tn2); \
159 const auto amrex_i_ec3 = amrex::Gpu::ExecutionConfig(amrex_i_tn3); \
160 dim3 amrex_i_nblocks = amrex::max(amrex::max(amrex_i_ec1.numBlocks.x, \
161 amrex_i_ec2.numBlocks.x), \
162 amrex_i_ec3.numBlocks.x); \
163 amrex_i_nblocks.y = 3; \
164 int amrex_i_nthreads_per_block = amrex_i_ec1.numThreads.x; \
165 int amrex_i_nthreads_total = amrex_i_nthreads_per_block * amrex_i_nblocks.x; \
166 auto& amrex_i_q = amrex::Gpu::Device::streamQueue(); \
168 amrex_i_q.submit([&] (sycl::handler& amrex_i_h) { \
169 amrex_i_h.parallel_for(sycl::nd_range<2>(sycl::range<2>(amrex_i_nthreads_total,3), \
170 sycl::range<2>(amrex_i_nthreads_per_block,1)), \
171 [=] (sycl::nd_item<2> amrex_i_item) \
172 [[sycl::reqd_sub_group_size(Gpu::Device::warp_size)]] \
174 switch (amrex_i_item.get_group(1)) { \
175 case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
179 case 1: for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
183 case 2: for (auto const TI3 : amrex::Gpu::Range(amrex_i_tn3,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
189 } catch (sycl::exception const& ex) { \
190 amrex::Abort(std::string("LAUNCH: ")+ex.what()+"!!!!!"); \
194 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
198 for (
auto const TI1 :
amrex::Gpu::Range(amrex_i_tn1)) { \
211#define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3(TN1,TI1,block1,TN2,TI2,block2,TN3,TI3,block3) \
212 { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; auto const& amrex_i_tn3 = TN3; \
213 if (!amrex::isEmpty(amrex_i_tn1) || !amrex::isEmpty(amrex_i_tn2) || !amrex::isEmpty(amrex_i_tn3)) { \
214 if (amrex::Gpu::inLaunchRegion()) \
216 const auto amrex_i_ec1 = amrex::Gpu::ExecutionConfig(amrex_i_tn1); \
217 const auto amrex_i_ec2 = amrex::Gpu::ExecutionConfig(amrex_i_tn2); \
218 const auto amrex_i_ec3 = amrex::Gpu::ExecutionConfig(amrex_i_tn3); \
219 dim3 amrex_i_nblocks = amrex::max(amrex::max(amrex_i_ec1.numBlocks.x, \
220 amrex_i_ec2.numBlocks.x), \
221 amrex_i_ec3.numBlocks.x); \
222 amrex_i_nblocks.y = 3; \
223 AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \
224 [=] AMREX_GPU_DEVICE () noexcept { \
225 switch (blockIdx.y) { \
226 case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \
230 case 1: for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2)) { \
234 case 2: for (auto const TI3 : amrex::Gpu::Range(amrex_i_tn3)) { \
239 AMREX_GPU_ERROR_CHECK(); \
242 for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \
245 for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2)) { \
248 for (auto const TI3 : amrex::Gpu::Range(amrex_i_tn3)) { \
255#define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE(TN,TI,block) \
256 { auto const& amrex_i_tn = TN; \
257 if (!amrex::isEmpty(amrex_i_tn)) { \
258 if (amrex::Gpu::inLaunchRegion()) \
260 auto amrex_i_ec = amrex::Gpu::ExecutionConfig(amrex_i_tn); \
261 int amrex_i_nthreads_per_block = amrex_i_ec.numThreads.x; \
262 int amrex_i_nthreads_total = amrex_i_nthreads_per_block * amrex_i_ec.numBlocks.x; \
263 auto& amrex_i_q = amrex::Gpu::Device::streamQueue(); \
265 amrex_i_q.submit([&] (sycl::handler& amrex_i_h) { \
266 amrex_i_h.parallel_for(sycl::nd_range<1>(sycl::range<1>(amrex_i_nthreads_total), \
267 sycl::range<1>(amrex_i_nthreads_per_block)), \
268 [=] (sycl::nd_item<1> amrex_i_item) \
269 [[sycl::reqd_work_group_size(AMREX_GPU_MAX_THREADS)]] \
270 [[sycl::reqd_sub_group_size(Gpu::Device::warp_size)]] \
272 for (auto const TI : amrex::Gpu::Range(amrex_i_tn,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
277 } catch (sycl::exception const& ex) { \
278 amrex::Abort(std::string("LAUNCH: ")+ex.what()+"!!!!!"); \
282 amrex::Abort("AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE: cannot call device function from host"); \
285#define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE(TN,TI,block) \
286 { auto const& amrex_i_tn = TN; \
287 if (!amrex::isEmpty(amrex_i_tn)) { \
288 if (amrex::Gpu::inLaunchRegion()) \
290 auto amrex_i_ec = amrex::Gpu::ExecutionConfig(amrex_i_tn); \
291 AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \
292 [=] AMREX_GPU_DEVICE () noexcept { \
293 for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \
297 AMREX_GPU_ERROR_CHECK(); \
300 amrex::Abort("AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE: cannot call device function from host"); \
306#define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2(TN1,TI1,block1,TN2,TI2,block2) \
307 { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; \
308 if (!amrex::isEmpty(amrex_i_tn1) || !amrex::isEmpty(amrex_i_tn2)) { \
309 if (amrex::Gpu::inLaunchRegion()) \
311 const auto amrex_i_ec1 = amrex::Gpu::ExecutionConfig(amrex_i_tn1); \
312 const auto amrex_i_ec2 = amrex::Gpu::ExecutionConfig(amrex_i_tn2); \
313 dim3 amrex_i_nblocks = amrex::max(amrex_i_ec1.numBlocks.x, \
314 amrex_i_ec2.numBlocks.x); \
315 amrex_i_nblocks.y = 2; \
316 int amrex_i_nthreads_per_block = amrex_i_ec1.numThreads.x; \
317 int amrex_i_nthreads_total = amrex_i_nthreads_per_block * amrex_i_nblocks.x; \
318 auto& amrex_i_q = amrex::Gpu::Device::streamQueue(); \
320 amrex_i_q.submit([&] (sycl::handler& amrex_i_h) { \
321 amrex_i_h.parallel_for(sycl::nd_range<2>(sycl::range<2>(amrex_i_nthreads_total,2), \
322 sycl::range<2>(amrex_i_nthreads_per_block,1)), \
323 [=] (sycl::nd_item<2> amrex_i_item) \
324 [[sycl::reqd_sub_group_size(Gpu::Device::warp_size)]] \
326 switch (amrex_i_item.get_group(1)) { \
327 case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
331 case 1: for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
337 } catch (sycl::exception const& ex) { \
338 amrex::Abort(std::string("LAUNCH: ")+ex.what()+"!!!!!"); \
342 amrex::Abort("AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2: cannot call device function from host"); \
345#define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2(TN1,TI1,block1,TN2,TI2,block2) \
346 { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; \
347 if (!amrex::isEmpty(amrex_i_tn1) || !amrex::isEmpty(amrex_i_tn2)) { \
348 if (amrex::Gpu::inLaunchRegion()) \
350 const auto amrex_i_ec1 = amrex::Gpu::ExecutionConfig(amrex_i_tn1); \
351 const auto amrex_i_ec2 = amrex::Gpu::ExecutionConfig(amrex_i_tn2); \
352 dim3 amrex_i_nblocks = amrex::max(amrex_i_ec1.numBlocks.x, \
353 amrex_i_ec2.numBlocks.x); \
354 amrex_i_nblocks.y = 2; \
355 AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \
356 [=] AMREX_GPU_DEVICE () noexcept { \
357 switch (blockIdx.y) { \
358 case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \
362 case 1: for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2)) { \
367 AMREX_GPU_ERROR_CHECK(); \
370 amrex::Abort("AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2: cannot call device function from host"); \
376#define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3(TN1,TI1,block1,TN2,TI2,block2,TN3,TI3,block3) \
377 { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; auto const& amrex_i_tn3 = TN3; \
378 if (!amrex::isEmpty(amrex_i_tn1) || !amrex::isEmpty(amrex_i_tn2) || !amrex::isEmpty(amrex_i_tn3)) { \
379 if (amrex::Gpu::inLaunchRegion()) \
381 const auto amrex_i_ec1 = amrex::Gpu::ExecutionConfig(amrex_i_tn1); \
382 const auto amrex_i_ec2 = amrex::Gpu::ExecutionConfig(amrex_i_tn2); \
383 const auto amrex_i_ec3 = amrex::Gpu::ExecutionConfig(amrex_i_tn3); \
384 dim3 amrex_i_nblocks = amrex::max(amrex::max(amrex_i_ec1.numBlocks.x, \
385 amrex_i_ec2.numBlocks.x), \
386 amrex_i_ec3.numBlocks.x); \
387 amrex_i_nblocks.y = 3; \
388 int amrex_i_nthreads_per_block = amrex_i_ec1.numThreads.x; \
389 int amrex_i_nthreads_total = amrex_i_nthreads_per_block * amrex_i_nblocks.x; \
390 auto& amrex_i_q = amrex::Gpu::Device::streamQueue(); \
392 amrex_i_q.submit([&] (sycl::handler& amrex_i_h) { \
393 amrex_i_h.parallel_for(sycl::nd_range<2>(sycl::range<2>(amrex_i_nthreads_total,3), \
394 sycl::range<2>(amrex_i_nthreads_per_block,1)), \
395 [=] (sycl::nd_item<2> amrex_i_item) \
396 [[sycl::reqd_sub_group_size(Gpu::Device::warp_size)]] \
398 switch (amrex_i_item.get_group(1)) { \
399 case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
403 case 1: for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
407 case 2: for (auto const TI3 : amrex::Gpu::Range(amrex_i_tn3,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
413 } catch (sycl::exception const& ex) { \
414 amrex::Abort(std::string("LAUNCH: ")+ex.what()+"!!!!!"); \
418 amrex::Abort("AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2: cannot call device function from host"); \
421#define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3(TN1,TI1,block1,TN2,TI2,block2,TN3,TI3,block3) \
422 { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; auto const& amrex_i_tn3 = TN3; \
423 if (!amrex::isEmpty(amrex_i_tn1) || !amrex::isEmpty(amrex_i_tn2) || !amrex::isEmpty(amrex_i_tn3)) { \
424 if (amrex::Gpu::inLaunchRegion()) \
426 const auto amrex_i_ec1 = amrex::Gpu::ExecutionConfig(amrex_i_tn1); \
427 const auto amrex_i_ec2 = amrex::Gpu::ExecutionConfig(amrex_i_tn2); \
428 const auto amrex_i_ec3 = amrex::Gpu::ExecutionConfig(amrex_i_tn3); \
429 dim3 amrex_i_nblocks = amrex::max(amrex::max(amrex_i_ec1.numBlocks.x, \
430 amrex_i_ec2.numBlocks.x), \
431 amrex_i_ec3.numBlocks.x); \
432 amrex_i_nblocks.y = 3; \
433 AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \
434 [=] AMREX_GPU_DEVICE () noexcept { \
435 switch (blockIdx.y) { \
436 case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \
440 case 1: for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2)) { \
444 case 2: for (auto const TI3 : amrex::Gpu::Range(amrex_i_tn3)) { \
449 AMREX_GPU_ERROR_CHECK(); \
452 amrex::Abort("AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2: cannot call device function from host"); \
459#define AMREX_HOST_DEVICE_FOR_1D(n,i,block) \
461 auto const& amrex_i_n = n; \
462 using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
463 if (amrex::Gpu::inLaunchRegion()) { \
464 amrex::ParallelFor(amrex_i_n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \
466 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
470#define AMREX_HOST_DEVICE_FOR_1D(n,i,block) \
472 auto const& amrex_i_n = n; \
473 using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
474 if (amrex::Gpu::inLaunchRegion()) { \
475 amrex::ParallelFor(amrex_i_n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \
477 auto amrex_i_lambda = [=] (amrex_i_inttype i) noexcept block; \
479 for (amrex_i_inttype i = 0; i < amrex_i_n; ++i) amrex_i_lambda(i); \
485#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D(n,i,block) \
487 auto const& amrex_i_n = n; \
488 using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
489 if (amrex::Gpu::inLaunchRegion()) { \
490 amrex::ParallelFor(amrex_i_n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \
492 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
496#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D(n,i,block) \
498 auto const& amrex_i_n = n; \
499 using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
500 if (amrex::Gpu::inLaunchRegion()) { \
501 amrex::ParallelFor(amrex_i_n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \
503 auto amrex_i_lambda = [=] (amrex_i_inttype i) noexcept block; \
505 for (amrex_i_inttype i = 0; i < amrex_i_n; ++i) amrex_i_lambda(i); \
510#define AMREX_FOR_1D(n,i,block) \
512 using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
513 amrex::ParallelFor(n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \
516#define AMREX_PARALLEL_FOR_1D(n,i,block) \
518 using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
519 amrex::ParallelFor(n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \
525#define AMREX_HOST_DEVICE_FOR_3D(box,i,j,k,block) \
527 auto const& amrex_i_box = box; \
528 if (amrex::Gpu::inLaunchRegion()) { \
529 amrex::ParallelFor(amrex_i_box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \
531 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
535#define AMREX_HOST_DEVICE_FOR_3D(box,i,j,k,block) \
537 auto const& amrex_i_box = box; \
538 if (amrex::Gpu::inLaunchRegion()) { \
539 amrex::ParallelFor(amrex_i_box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \
541 amrex::LoopConcurrentOnCpu(amrex_i_box,[=] (int i, int j, int k) noexcept block); \
547#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D(box,i,j,k,block) \
549 auto const& amrex_i_box = box; \
550 if (amrex::Gpu::inLaunchRegion()) { \
551 amrex::ParallelFor(amrex_i_box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \
553 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
557#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D(box,i,j,k,block) \
559 auto const& amrex_i_box = box; \
560 if (amrex::Gpu::inLaunchRegion()) { \
561 amrex::ParallelFor(amrex_i_box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \
563 amrex::LoopConcurrentOnCpu(amrex_i_box,[=] (int i, int j, int k) noexcept block); \
568#define AMREX_FOR_3D(box,i,j,k,block) \
570 amrex::ParallelFor(box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \
573#define AMREX_PARALLEL_FOR_3D(box,i,j,k,block) \
575 amrex::ParallelFor(box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \
581#define AMREX_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \
583 auto const& amrex_i_box = box; \
584 auto const& amrex_i_ncomp = ncomp; \
585 if (amrex::Gpu::inLaunchRegion()) { \
586 amrex::ParallelFor(amrex_i_box,amrex_i_ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \
588 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
592#define AMREX_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \
594 auto const& amrex_i_box = box; \
595 auto const& amrex_i_ncomp = ncomp; \
596 if (amrex::Gpu::inLaunchRegion()) { \
597 amrex::ParallelFor(amrex_i_box,amrex_i_ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \
599 amrex::LoopConcurrentOnCpu(amrex_i_box,amrex_i_ncomp,[=] (int i, int j, int k, int n) noexcept block); \
605#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D(box,ncomp,i,j,k,n,block) \
607 auto const& amrex_i_box = box; \
608 auto const& amrex_i_ncomp = ncomp; \
609 if (amrex::Gpu::inLaunchRegion()) { \
610 amrex::ParallelFor(amrex_i_box,amrex_i_ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \
612 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
616#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D(box,ncomp,i,j,k,n,block) \
618 auto const& amrex_i_box = box; \
619 auto const& amrex_i_ncomp = ncomp; \
620 if (amrex::Gpu::inLaunchRegion()) { \
621 amrex::ParallelFor(amrex_i_box,amrex_i_ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \
623 amrex::LoopConcurrentOnCpu(amrex_i_box,amrex_i_ncomp,[=] (int i, int j, int k, int n) noexcept block); \
628#define AMREX_FOR_4D(box,ncomp,i,j,k,n,block) \
630 amrex::ParallelFor(box,ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \
633#define AMREX_PARALLEL_FOR_4D(box,ncomp,i,j,k,n,block) \
635 amrex::ParallelFor(box,ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \
__host__ __device__ range_detail::range_impl< T > Range(T const &b) noexcept
Definition AMReX_GpuRange.H:127
Definition AMReX_Amr.cpp:49