Block-Structured AMR Software Framework
AMReX_GpuLaunchMacrosC.nolint.H
Go to the documentation of this file.
1 // Do not include this header anywhere other than AMReX_GpuLaunchMacrosC.H.
2 // The purpose of this file is to avoid clang-tidy.
3 
4 #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE(TN,TI,block) \
5  { \
6  for (auto const TI : amrex::Gpu::Range(TN)) { \
7  block \
8  } \
9  }
10 
11 #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2(TN1,TI1,block1,TN2,TI2,block2) \
12  { \
13  for (auto const TI1 : amrex::Gpu::Range(TN1)) { \
14  block1 \
15  } \
16  for (auto const TI2 : amrex::Gpu::Range(TN2)) { \
17  block2 \
18  } \
19  }
20 
21 #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3(TN1,TI1,block1,TN2,TI2,block2,TN3,TI3,block3) \
22  { \
23  for (auto const TI1 : amrex::Gpu::Range(TN1)) { \
24  block1 \
25  } \
26  for (auto const TI2 : amrex::Gpu::Range(TN2)) { \
27  block2 \
28  } \
29  for (auto const TI3 : amrex::Gpu::Range(TN3)) { \
30  block3 \
31  } \
32  }
33 
34 #define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE(...) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE(__VA_ARGS__)
35 #define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2(...) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2(__VA_ARGS__)
36 #define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3(...) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3(__VA_ARGS__)
37 
38 #define AMREX_GPU_FOR_1D_IMPL(n,i,block) \
39  for (auto i = decltype(n){0}; i < n; ++i) { \
40  block \
41  }
42 
43 #define AMREX_GPU_PARALLEL_FOR_1D_IMPL(n,i,block) \
44  AMREX_PRAGMA_SIMD \
45  for (auto i = decltype(n){0}; i < n; ++i) { \
46  block \
47  }
48 
49 #define AMREX_GPU_FOR_3D_IMPL(box,i,j,k,block) \
50  { \
51  const auto amrex_i_lo = amrex::lbound(box); \
52  const auto amrex_i_hi = amrex::ubound(box); \
53  for (int k = amrex_i_lo.z; k <= amrex_i_hi.z; ++k) { \
54  for (int j = amrex_i_lo.y; j <= amrex_i_hi.y; ++j) { \
55  for (int i = amrex_i_lo.x; i <= amrex_i_hi.x; ++i) { \
56  block \
57  }}} \
58  }
59 
60 #define AMREX_GPU_PARALLEL_FOR_3D_IMPL(box,i,j,k,block) \
61  { \
62  const auto amrex_i_lo = amrex::lbound(box); \
63  const auto amrex_i_hi = amrex::ubound(box); \
64  for (int k = amrex_i_lo.z; k <= amrex_i_hi.z; ++k) { \
65  for (int j = amrex_i_lo.y; j <= amrex_i_hi.y; ++j) { \
66  AMREX_PRAGMA_SIMD \
67  for (int i = amrex_i_lo.x; i <= amrex_i_hi.x; ++i) { \
68  block \
69  }}} \
70  }
71 
72 #define AMREX_GPU_FOR_4D_IMPL(box,ncomp,i,j,k,n,block) \
73  { \
74  const auto amrex_i_lo = amrex::lbound(box); \
75  const auto amrex_i_hi = amrex::ubound(box); \
76  for (int n = 0; n < ncomp; ++n) { \
77  for (int k = amrex_i_lo.z; k <= amrex_i_hi.z; ++k) { \
78  for (int j = amrex_i_lo.y; j <= amrex_i_hi.y; ++j) { \
79  for (int i = amrex_i_lo.x; i <= amrex_i_hi.x; ++i) { \
80  block \
81  }}}} \
82  }
83 
84 #define AMREX_GPU_PARALLEL_FOR_4D_IMPL(box,ncomp,i,j,k,n,block) \
85  { \
86  const auto amrex_i_lo = amrex::lbound(box); \
87  const auto amrex_i_hi = amrex::ubound(box); \
88  for (int n = 0; n < ncomp; ++n) { \
89  for (int k = amrex_i_lo.z; k <= amrex_i_hi.z; ++k) { \
90  for (int j = amrex_i_lo.y; j <= amrex_i_hi.y; ++j) { \
91  AMREX_PRAGMA_SIMD \
92  for (int i = amrex_i_lo.x; i <= amrex_i_hi.x; ++i) { \
93  block \
94  }}}} \
95  }
96 
97 #define AMREX_GPU_HOST_DEVICE_FOR_1D(...) AMREX_GPU_FOR_1D_IMPL(__VA_ARGS__);
98 #define AMREX_GPU_DEVICE_FOR_1D(...) AMREX_GPU_FOR_1D_IMPL(__VA_ARGS__);
99 
100 #define AMREX_GPU_HOST_DEVICE_FOR_3D(...) AMREX_GPU_FOR_3D_IMPL(__VA_ARGS__);
101 #define AMREX_GPU_DEVICE_FOR_3D(...) AMREX_GPU_FOR_3D_IMPL(__VA_ARGS__);
102 
103 #define AMREX_GPU_HOST_DEVICE_FOR_4D(...) AMREX_GPU_FOR_4D_IMPL(__VA_ARGS__);
104 #define AMREX_GPU_DEVICE_FOR_4D(...) AMREX_GPU_FOR_4D_IMPL(__VA_ARGS__);
105 
106 #define AMREX_GPU_DEVICE_PARALLEL_FOR_1D(...) AMREX_GPU_PARALLEL_FOR_1D_IMPL(__VA_ARGS__);
107 #define AMREX_GPU_DEVICE_PARALLEL_FOR_3D(...) AMREX_GPU_PARALLEL_FOR_3D_IMPL(__VA_ARGS__);
108 #define AMREX_GPU_DEVICE_PARALLEL_FOR_4D(...) AMREX_GPU_PARALLEL_FOR_4D_IMPL(__VA_ARGS__);
109 
110 #define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_1D(...) AMREX_GPU_PARALLEL_FOR_1D_IMPL(__VA_ARGS__);
111 #define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_3D(...) AMREX_GPU_PARALLEL_FOR_3D_IMPL(__VA_ARGS__);
112 #define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_4D(...) AMREX_GPU_PARALLEL_FOR_4D_IMPL(__VA_ARGS__);