5 #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE(TN,TI,block) \
6 { auto const& amrex_i_tn = TN; \
7 if (!amrex::isEmpty(amrex_i_tn)) { \
8 if (amrex::Gpu::inLaunchRegion()) \
10 const auto amrex_i_ec = amrex::Gpu::ExecutionConfig(amrex_i_tn); \
11 int amrex_i_nthreads_per_block = amrex_i_ec.numThreads.x; \
12 int amrex_i_nthreads_total = amrex_i_nthreads_per_block * amrex_i_ec.numBlocks.x; \
13 auto& amrex_i_q = amrex::Gpu::Device::streamQueue(); \
15 amrex_i_q.submit([&] (sycl::handler& amrex_i_h) { \
16 amrex_i_h.parallel_for(sycl::nd_range<1>(sycl::range<1>(amrex_i_nthreads_total), \
17 sycl::range<1>(amrex_i_nthreads_per_block)), \
18 [=] (sycl::nd_item<1> amrex_i_item) \
19 [[sycl::reqd_work_group_size(AMREX_GPU_MAX_THREADS)]] \
20 [[sycl::reqd_sub_group_size(Gpu::Device::warp_size)]] \
22 for (auto const TI : amrex::Gpu::Range(amrex_i_tn,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
27 } catch (sycl::exception const& ex) { \
28 amrex::Abort(std::string("LAUNCH: ")+ex.what()+"!!!!!"); \
32 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
43 #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE(TN,TI,block) \
44 { auto const& amrex_i_tn = TN; \
45 if (!amrex::isEmpty(amrex_i_tn)) { \
46 if (amrex::Gpu::inLaunchRegion()) \
48 const auto amrex_i_ec = amrex::Gpu::ExecutionConfig(amrex_i_tn); \
49 AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \
50 [=] AMREX_GPU_DEVICE () noexcept { \
51 for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \
55 AMREX_GPU_ERROR_CHECK(); \
58 for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \
66 #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2(TN1,TI1,block1,TN2,TI2,block2) \
67 { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; \
68 if (!amrex::isEmpty(amrex_i_tn1) || !amrex::isEmpty(amrex_i_tn2)) { \
69 if (amrex::Gpu::inLaunchRegion()) \
71 const auto amrex_i_ec1 = amrex::Gpu::ExecutionConfig(amrex_i_tn1); \
72 const auto amrex_i_ec2 = amrex::Gpu::ExecutionConfig(amrex_i_tn2); \
73 dim3 amrex_i_nblocks = amrex::max(amrex_i_ec1.numBlocks.x, \
74 amrex_i_ec2.numBlocks.x); \
75 amrex_i_nblocks.y = 2; \
76 int amrex_i_nthreads_per_block = amrex_i_ec1.numThreads.x; \
77 int amrex_i_nthreads_total = amrex_i_nthreads_per_block * amrex_i_nblocks.x; \
78 auto& amrex_i_q = amrex::Gpu::Device::streamQueue(); \
80 amrex_i_q.submit([&] (sycl::handler& amrex_i_h) { \
81 amrex_i_h.parallel_for(sycl::nd_range<2>(sycl::range<2>(amrex_i_nthreads_total,2), \
82 sycl::range<2>(amrex_i_nthreads_per_block,1)), \
83 [=] (sycl::nd_item<2> amrex_i_item) \
84 [[sycl::reqd_sub_group_size(Gpu::Device::warp_size)]] \
86 switch (amrex_i_item.get_group(1)) { \
87 case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
91 case 1: for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
97 } catch (sycl::exception const& ex) { \
98 amrex::Abort(std::string("LAUNCH: ")+ex.what()+"!!!!!"); \
102 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
116 #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2(TN1,TI1,block1,TN2,TI2,block2) \
117 { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; \
118 if (!amrex::isEmpty(amrex_i_tn1) || !amrex::isEmpty(amrex_i_tn2)) { \
119 if (amrex::Gpu::inLaunchRegion()) \
121 const auto amrex_i_ec1 = amrex::Gpu::ExecutionConfig(amrex_i_tn1); \
122 const auto amrex_i_ec2 = amrex::Gpu::ExecutionConfig(amrex_i_tn2); \
123 dim3 amrex_i_nblocks = amrex::max(amrex_i_ec1.numBlocks.x, \
124 amrex_i_ec2.numBlocks.x); \
125 amrex_i_nblocks.y = 2; \
126 AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \
127 [=] AMREX_GPU_DEVICE () noexcept { \
128 switch (blockIdx.y) { \
129 case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \
133 case 1: for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2)) { \
138 AMREX_GPU_ERROR_CHECK(); \
141 for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \
144 for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2)) { \
151 #ifdef AMREX_USE_SYCL
152 #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3(TN1,TI1,block1,TN2,TI2,block2,TN3,TI3,block3) \
153 { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; auto const& amrex_i_tn3 = TN3; \
154 if (!amrex::isEmpty(amrex_i_tn1) || !amrex::isEmpty(amrex_i_tn2) || !amrex::isEmpty(amrex_i_tn3)) { \
155 if (amrex::Gpu::inLaunchRegion()) \
157 const auto amrex_i_ec1 = amrex::Gpu::ExecutionConfig(amrex_i_tn1); \
158 const auto amrex_i_ec2 = amrex::Gpu::ExecutionConfig(amrex_i_tn2); \
159 const auto amrex_i_ec3 = amrex::Gpu::ExecutionConfig(amrex_i_tn3); \
160 dim3 amrex_i_nblocks = amrex::max(amrex::max(amrex_i_ec1.numBlocks.x, \
161 amrex_i_ec2.numBlocks.x), \
162 amrex_i_ec3.numBlocks.x); \
163 amrex_i_nblocks.y = 3; \
164 int amrex_i_nthreads_per_block = amrex_i_ec1.numThreads.x; \
165 int amrex_i_nthreads_total = amrex_i_nthreads_per_block * amrex_i_nblocks.x; \
166 auto& amrex_i_q = amrex::Gpu::Device::streamQueue(); \
168 amrex_i_q.submit([&] (sycl::handler& amrex_i_h) { \
169 amrex_i_h.parallel_for(sycl::nd_range<2>(sycl::range<2>(amrex_i_nthreads_total,3), \
170 sycl::range<2>(amrex_i_nthreads_per_block,1)), \
171 [=] (sycl::nd_item<2> amrex_i_item) \
172 [[sycl::reqd_sub_group_size(Gpu::Device::warp_size)]] \
174 switch (amrex_i_item.get_group(1)) { \
175 case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
179 case 1: for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
183 case 2: for (auto const TI3 : amrex::Gpu::Range(amrex_i_tn3,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
189 } catch (sycl::exception const& ex) { \
190 amrex::Abort(std::string("LAUNCH: ")+ex.what()+"!!!!!"); \
194 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
211 #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3(TN1,TI1,block1,TN2,TI2,block2,TN3,TI3,block3) \
212 { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; auto const& amrex_i_tn3 = TN3; \
213 if (!amrex::isEmpty(amrex_i_tn1) || !amrex::isEmpty(amrex_i_tn2) || !amrex::isEmpty(amrex_i_tn3)) { \
214 if (amrex::Gpu::inLaunchRegion()) \
216 const auto amrex_i_ec1 = amrex::Gpu::ExecutionConfig(amrex_i_tn1); \
217 const auto amrex_i_ec2 = amrex::Gpu::ExecutionConfig(amrex_i_tn2); \
218 const auto amrex_i_ec3 = amrex::Gpu::ExecutionConfig(amrex_i_tn3); \
219 dim3 amrex_i_nblocks = amrex::max(amrex::max(amrex_i_ec1.numBlocks.x, \
220 amrex_i_ec2.numBlocks.x), \
221 amrex_i_ec3.numBlocks.x); \
222 amrex_i_nblocks.y = 3; \
223 AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \
224 [=] AMREX_GPU_DEVICE () noexcept { \
225 switch (blockIdx.y) { \
226 case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \
230 case 1: for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2)) { \
234 case 2: for (auto const TI3 : amrex::Gpu::Range(amrex_i_tn3)) { \
239 AMREX_GPU_ERROR_CHECK(); \
242 for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \
245 for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2)) { \
248 for (auto const TI3 : amrex::Gpu::Range(amrex_i_tn3)) { \
254 #ifdef AMREX_USE_SYCL
255 #define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE(TN,TI,block) \
256 { auto const& amrex_i_tn = TN; \
257 if (!amrex::isEmpty(amrex_i_tn)) { \
258 if (amrex::Gpu::inLaunchRegion()) \
260 auto amrex_i_ec = amrex::Gpu::ExecutionConfig(amrex_i_tn); \
261 int amrex_i_nthreads_per_block = amrex_i_ec.numThreads.x; \
262 int amrex_i_nthreads_total = amrex_i_nthreads_per_block * amrex_i_ec.numBlocks.x; \
263 auto& amrex_i_q = amrex::Gpu::Device::streamQueue(); \
265 amrex_i_q.submit([&] (sycl::handler& amrex_i_h) { \
266 amrex_i_h.parallel_for(sycl::nd_range<1>(sycl::range<1>(amrex_i_nthreads_total), \
267 sycl::range<1>(amrex_i_nthreads_per_block)), \
268 [=] (sycl::nd_item<1> amrex_i_item) \
269 [[sycl::reqd_work_group_size(AMREX_GPU_MAX_THREADS)]] \
270 [[sycl::reqd_sub_group_size(Gpu::Device::warp_size)]] \
272 for (auto const TI : amrex::Gpu::Range(amrex_i_tn,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
277 } catch (sycl::exception const& ex) { \
278 amrex::Abort(std::string("LAUNCH: ")+ex.what()+"!!!!!"); \
282 amrex::Abort("AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE: cannot call device function from host"); \
285 #define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE(TN,TI,block) \
286 { auto const& amrex_i_tn = TN; \
287 if (!amrex::isEmpty(amrex_i_tn)) { \
288 if (amrex::Gpu::inLaunchRegion()) \
290 auto amrex_i_ec = amrex::Gpu::ExecutionConfig(amrex_i_tn); \
291 AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \
292 [=] AMREX_GPU_DEVICE () noexcept { \
293 for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \
297 AMREX_GPU_ERROR_CHECK(); \
300 amrex::Abort("AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE: cannot call device function from host"); \
305 #ifdef AMREX_USE_SYCL
306 #define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2(TN1,TI1,block1,TN2,TI2,block2) \
307 { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; \
308 if (!amrex::isEmpty(amrex_i_tn1) || !amrex::isEmpty(amrex_i_tn2)) { \
309 if (amrex::Gpu::inLaunchRegion()) \
311 const auto amrex_i_ec1 = amrex::Gpu::ExecutionConfig(amrex_i_tn1); \
312 const auto amrex_i_ec2 = amrex::Gpu::ExecutionConfig(amrex_i_tn2); \
313 dim3 amrex_i_nblocks = amrex::max(amrex_i_ec1.numBlocks.x, \
314 amrex_i_ec2.numBlocks.x); \
315 amrex_i_nblocks.y = 2; \
316 int amrex_i_nthreads_per_block = amrex_i_ec1.numThreads.x; \
317 int amrex_i_nthreads_total = amrex_i_nthreads_per_block * amrex_i_nblocks.x; \
318 auto& amrex_i_q = amrex::Gpu::Device::streamQueue(); \
320 amrex_i_q.submit([&] (sycl::handler& amrex_i_h) { \
321 amrex_i_h.parallel_for(sycl::nd_range<2>(sycl::range<2>(amrex_i_nthreads_total,2), \
322 sycl::range<2>(amrex_i_nthreads_per_block,1)), \
323 [=] (sycl::nd_item<2> amrex_i_item) \
324 [[sycl::reqd_sub_group_size(Gpu::Device::warp_size)]] \
326 switch (amrex_i_item.get_group(1)) { \
327 case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
331 case 1: for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
337 } catch (sycl::exception const& ex) { \
338 amrex::Abort(std::string("LAUNCH: ")+ex.what()+"!!!!!"); \
342 amrex::Abort("AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2: cannot call device function from host"); \
345 #define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2(TN1,TI1,block1,TN2,TI2,block2) \
346 { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; \
347 if (!amrex::isEmpty(amrex_i_tn1) || !amrex::isEmpty(amrex_i_tn2)) { \
348 if (amrex::Gpu::inLaunchRegion()) \
350 const auto amrex_i_ec1 = amrex::Gpu::ExecutionConfig(amrex_i_tn1); \
351 const auto amrex_i_ec2 = amrex::Gpu::ExecutionConfig(amrex_i_tn2); \
352 dim3 amrex_i_nblocks = amrex::max(amrex_i_ec1.numBlocks.x, \
353 amrex_i_ec2.numBlocks.x); \
354 amrex_i_nblocks.y = 2; \
355 AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \
356 [=] AMREX_GPU_DEVICE () noexcept { \
357 switch (blockIdx.y) { \
358 case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \
362 case 1: for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2)) { \
367 AMREX_GPU_ERROR_CHECK(); \
370 amrex::Abort("AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2: cannot call device function from host"); \
375 #ifdef AMREX_USE_SYCL
376 #define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3(TN1,TI1,block1,TN2,TI2,block2,TN3,TI3,block3) \
377 { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; auto const& amrex_i_tn3 = TN3; \
378 if (!amrex::isEmpty(amrex_i_tn1) || !amrex::isEmpty(amrex_i_tn2) || !amrex::isEmpty(amrex_i_tn3)) { \
379 if (amrex::Gpu::inLaunchRegion()) \
381 const auto amrex_i_ec1 = amrex::Gpu::ExecutionConfig(amrex_i_tn1); \
382 const auto amrex_i_ec2 = amrex::Gpu::ExecutionConfig(amrex_i_tn2); \
383 const auto amrex_i_ec3 = amrex::Gpu::ExecutionConfig(amrex_i_tn3); \
384 dim3 amrex_i_nblocks = amrex::max(amrex::max(amrex_i_ec1.numBlocks.x, \
385 amrex_i_ec2.numBlocks.x), \
386 amrex_i_ec3.numBlocks.x); \
387 amrex_i_nblocks.y = 3; \
388 int amrex_i_nthreads_per_block = amrex_i_ec1.numThreads.x; \
389 int amrex_i_nthreads_total = amrex_i_nthreads_per_block * amrex_i_nblocks.x; \
390 auto& amrex_i_q = amrex::Gpu::Device::streamQueue(); \
392 amrex_i_q.submit([&] (sycl::handler& amrex_i_h) { \
393 amrex_i_h.parallel_for(sycl::nd_range<2>(sycl::range<2>(amrex_i_nthreads_total,3), \
394 sycl::range<2>(amrex_i_nthreads_per_block,1)), \
395 [=] (sycl::nd_item<2> amrex_i_item) \
396 [[sycl::reqd_sub_group_size(Gpu::Device::warp_size)]] \
398 switch (amrex_i_item.get_group(1)) { \
399 case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
403 case 1: for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
406 case 2: for (auto const TI3 : amrex::Gpu::Range(amrex_i_tn3,amrex_i_item.get_global_id(0),amrex_i_item.get_global_range(0))) { \
412 } catch (sycl::exception const& ex) { \
413 amrex::Abort(std::string("LAUNCH: ")+ex.what()+"!!!!!"); \
417 amrex::Abort("AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2: cannot call device function from host"); \
420 #define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3(TN1,TI1,block1,TN2,TI2,block2,TN3,TI3,block3) \
421 { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; auto const& amrex_i_tn3 = TN3; \
422 if (!amrex::isEmpty(amrex_i_tn1) || !amrex::isEmpty(amrex_i_tn2) || !amrex::isEmpty(amrex_i_tn3)) { \
423 if (amrex::Gpu::inLaunchRegion()) \
425 const auto amrex_i_ec1 = amrex::Gpu::ExecutionConfig(amrex_i_tn1); \
426 const auto amrex_i_ec2 = amrex::Gpu::ExecutionConfig(amrex_i_tn2); \
427 const auto amrex_i_ec3 = amrex::Gpu::ExecutionConfig(amrex_i_tn3); \
428 dim3 amrex_i_nblocks = amrex::max(amrex::max(amrex_i_ec1.numBlocks.x, \
429 amrex_i_ec2.numBlocks.x), \
430 amrex_i_ec3.numBlocks.x); \
431 amrex_i_nblocks.y = 3; \
432 AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \
433 [=] AMREX_GPU_DEVICE () noexcept { \
434 switch (blockIdx.y) { \
435 case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \
439 case 1: for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2)) { \
442 case 2: for (auto const TI3 : amrex::Gpu::Range(amrex_i_tn3)) { \
447 AMREX_GPU_ERROR_CHECK(); \
450 amrex::Abort("AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2: cannot call device function from host"); \
456 #ifdef AMREX_USE_SYCL
457 #define AMREX_GPU_HOST_DEVICE_FOR_1D(n,i,block) \
459 auto const& amrex_i_n = n; \
460 using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
461 if (amrex::Gpu::inLaunchRegion()) { \
462 amrex::ParallelFor(amrex_i_n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \
464 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
468 #define AMREX_GPU_HOST_DEVICE_FOR_1D(n,i,block) \
470 auto const& amrex_i_n = n; \
471 using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
472 if (amrex::Gpu::inLaunchRegion()) { \
473 amrex::ParallelFor(amrex_i_n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \
475 auto amrex_i_lambda = [=] (amrex_i_inttype i) noexcept block; \
477 for (amrex_i_inttype i = 0; i < amrex_i_n; ++i) amrex_i_lambda(i); \
482 #define AMREX_GPU_DEVICE_FOR_1D(n,i,block) \
484 using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
485 amrex::ParallelFor(n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \
490 #ifdef AMREX_USE_SYCL
491 #define AMREX_GPU_HOST_DEVICE_FOR_3D(box,i,j,k,block) \
493 auto const& amrex_i_box = box; \
494 if (amrex::Gpu::inLaunchRegion()) { \
495 amrex::ParallelFor(amrex_i_box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \
497 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
501 #define AMREX_GPU_HOST_DEVICE_FOR_3D(box,i,j,k,block) \
503 auto const& amrex_i_box = box; \
504 if (amrex::Gpu::inLaunchRegion()) { \
505 amrex::ParallelFor(amrex_i_box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \
507 amrex::LoopConcurrentOnCpu(amrex_i_box,[=] (int i, int j, int k) noexcept block); \
512 #define AMREX_GPU_DEVICE_FOR_3D(box,i,j,k,block) \
514 amrex::ParallelFor(box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \
519 #ifdef AMREX_USE_SYCL
520 #define AMREX_GPU_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \
522 auto const& amrex_i_box = box; \
523 auto const& amrex_i_ncomp = ncomp; \
524 if (amrex::Gpu::inLaunchRegion()) { \
525 amrex::ParallelFor(amrex_i_box,amrex_i_ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \
527 amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
531 #define AMREX_GPU_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \
533 auto const& amrex_i_box = box; \
534 auto const& amrex_i_ncomp = ncomp; \
535 if (amrex::Gpu::inLaunchRegion()) { \
536 amrex::ParallelFor(amrex_i_box,amrex_i_ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \
538 amrex::LoopConcurrentOnCpu(amrex_i_box,amrex_i_ncomp,[=] (int i, int j, int k, int n) noexcept block); \
543 #define AMREX_GPU_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \
545 amrex::ParallelFor(box,ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \
548 #define AMREX_GPU_DEVICE_PARALLEL_FOR_1D(...) AMREX_GPU_DEVICE_FOR_1D(__VA_ARGS__)
549 #define AMREX_GPU_DEVICE_PARALLEL_FOR_3D(...) AMREX_GPU_DEVICE_FOR_3D(__VA_ARGS__)
550 #define AMREX_GPU_DEVICE_PARALLEL_FOR_4D(...) AMREX_GPU_DEVICE_FOR_4D(__VA_ARGS__)
552 #define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_1D(...) AMREX_GPU_HOST_DEVICE_FOR_1D(__VA_ARGS__)
553 #define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_3D(...) AMREX_GPU_HOST_DEVICE_FOR_3D(__VA_ARGS__)
554 #define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_4D(...) AMREX_GPU_HOST_DEVICE_FOR_4D(__VA_ARGS__)
AMREX_GPU_HOST_DEVICE range_detail::range_impl< T > Range(T const &b) noexcept
Definition: AMReX_GpuRange.H:125