1 #ifndef AMREX_GPU_LAUNCH_H_
2 #define AMREX_GPU_LAUNCH_H_
3 #include <AMReX_Config.H>
29 #define AMREX_GPU_NCELLS_PER_THREAD 3
30 #define AMREX_GPU_Y_STRIDE 1
31 #define AMREX_GPU_Z_STRIDE 1
34 # define AMREX_LAUNCH_KERNEL(MT, blocks, threads, sharedMem, stream, ... ) \
35 amrex::launch_global<MT><<<blocks, threads, sharedMem, stream>>>(__VA_ARGS__)
36 #elif defined(AMREX_USE_HIP)
37 # define AMREX_LAUNCH_KERNEL(MT, blocks, threads, sharedMem, stream, ... ) \
38 hipLaunchKernelGGL(launch_global<MT>, blocks, threads, sharedMem, stream, __VA_ARGS__)
48 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
49 template<
class L,
class... Lambdas>
55 template<
class L,
class... Lambdas>
67 template<
class L,
class... Lambdas>
69 std::forward<L>(f0)();
74 template <
class T>
class LayoutData;
81 return AMREX_GPU_MAX_THREADS;
108 const auto len = bx.length3d();
109 Long k =
offset / (len[0]*len[1]);
110 Long j = (
offset - k*(len[0]*len[1])) / len[0];
111 Long i = (
offset - k*(len[0]*len[1])) - j*len[0];
112 IntVect iv{AMREX_D_DECL(static_cast<int>(i),
114 static_cast<int>(k))};
116 return (bx &
Box(iv,iv,bx.type()));
162 Long numBlocks = (
std::max(N,Long(1)) + MT - 1) / MT;
167 ec.numBlocks.x = numBlocks;
168 ec.numThreads.x = MT;
177 return makeExecutionConfig<MT>(box.numPts());
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_GPU_Z_STRIDE
Definition: AMReX_GpuLaunch.H:31
#define AMREX_GPU_NCELLS_PER_THREAD
Definition: AMReX_GpuLaunch.H:29
#define AMREX_GPU_Y_STRIDE
Definition: AMReX_GpuLaunch.H:30
#define AMREX_IF_ON_DEVICE(CODE)
Definition: AMReX_GpuQualifiers.H:56
#define AMREX_GPU_GLOBAL
Definition: AMReX_GpuQualifiers.H:19
#define AMREX_IF_ON_HOST(CODE)
Definition: AMReX_GpuQualifiers.H:58
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition: AMReX_GpuQualifiers.H:20
Array4< int const > offset
Definition: AMReX_HypreMLABecLap.cpp:1089
#define AMREX_D_DECL(a, b, c)
Definition: AMReX_SPACE.H:104
static void c_threads_and_blocks(const int *lo, const int *hi, dim3 &numBlocks, dim3 &numThreads) noexcept
Definition: AMReX_GpuDevice.cpp:859
static void n_threads_and_blocks(const Long N, dim3 &numBlocks, dim3 &numThreads) noexcept
Definition: AMReX_GpuDevice.cpp:844
static constexpr AMREX_EXPORT int warp_size
Definition: AMReX_GpuDevice.H:173
static void c_comps_threads_and_blocks(const int *lo, const int *hi, const int comps, dim3 &numBlocks, dim3 &numThreads) noexcept
Definition: AMReX_GpuDevice.cpp:851
static void grid_stride_threads_and_blocks(dim3 &numBlocks, dim3 &numThreads) noexcept
Definition: AMReX_GpuDevice.cpp:917
constexpr std::size_t numThreadsPerBlockParallelFor()
Definition: AMReX_GpuLaunch.H:80
ExecutionConfig makeExecutionConfig(Long N) noexcept
Definition: AMReX_GpuLaunch.H:159
AMREX_GPU_HOST_DEVICE Box getThreadBox(const Box &bx, Long offset) noexcept
Definition: AMReX_GpuLaunch.H:105
@ min
Definition: AMReX_ParallelReduce.H:18
@ max
Definition: AMReX_ParallelReduce.H:17
Definition: AMReX_Amr.cpp:49
void launch_host(L &&f0) noexcept
Definition: AMReX_GpuLaunch.H:65
BoxND< AMREX_SPACEDIM > Box
Definition: AMReX_BaseFwd.H:27
IntVectND< AMREX_SPACEDIM > IntVect
Definition: AMReX_BaseFwd.H:30
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition: AMReX.H:111
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > surroundingNodes(const BoxND< dim > &b, int dir) noexcept
Returns a BoxND with NODE based coordinates in direction dir that encloses BoxND b....
Definition: AMReX_Box.H:1399
AMREX_GPU_DEVICE void call_device(L &&f0) noexcept
Definition: AMReX_GpuLaunch.H:53
AMREX_GPU_GLOBAL void launch_global(L f0, Lambdas... fs)
Definition: AMReX_GpuLaunch.H:50
Definition: AMReX_GpuLaunch.H:89
Box box
Definition: AMReX_GpuLaunch.H:90
int ic
Definition: AMReX_GpuLaunch.H:91
int nc
Definition: AMReX_GpuLaunch.H:92
Definition: AMReX_GpuLaunch.H:127
ExecutionConfig(dim3 nb, dim3 nt, std::size_t sm=0) noexcept
Definition: AMReX_GpuLaunch.H:149
ExecutionConfig(Long N) noexcept
Definition: AMReX_GpuLaunch.H:146
dim3 numBlocks
Definition: AMReX_GpuLaunch.H:152
dim3 numThreads
Definition: AMReX_GpuLaunch.H:153
ExecutionConfig(const Box &box, int comps) noexcept
Definition: AMReX_GpuLaunch.H:142
ExecutionConfig(const Box &box) noexcept
Definition: AMReX_GpuLaunch.H:131
ExecutionConfig() noexcept
Definition: AMReX_GpuLaunch.H:128
std::size_t sharedMem
Definition: AMReX_GpuLaunch.H:154
Definition: AMReX_GpuLaunch.H:95
int globalBlockId
Definition: AMReX_GpuLaunch.H:98
int numBlocks
Definition: AMReX_GpuLaunch.H:96
int numThreads
Definition: AMReX_GpuLaunch.H:97