Block-Structured AMR Software Framework
AMReX_GpuLaunchMacrosG.nolint.H File Reference

Go to the source code of this file.

Macros

#define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE(TN, TI, block)
 
#define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2(TN1, TI1, block1, TN2, TI2, block2)
 
#define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3(TN1, TI1, block1, TN2, TI2, block2, TN3, TI3, block3)
 
#define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE(TN, TI, block)
 
#define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2(TN1, TI1, block1, TN2, TI2, block2)
 
#define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3(TN1, TI1, block1, TN2, TI2, block2, TN3, TI3, block3)
 
#define AMREX_GPU_HOST_DEVICE_FOR_1D(n, i, block)
 
#define AMREX_GPU_DEVICE_FOR_1D(n, i, block)
 
#define AMREX_GPU_HOST_DEVICE_FOR_3D(box, i, j, k, block)
 
#define AMREX_GPU_DEVICE_FOR_3D(box, i, j, k, block)
 
#define AMREX_GPU_HOST_DEVICE_FOR_4D(box, ncomp, i, j, k, n, block)
 
#define AMREX_GPU_DEVICE_FOR_4D(box, ncomp, i, j, k, n, block)
 
#define AMREX_GPU_DEVICE_PARALLEL_FOR_1D(...)   AMREX_GPU_DEVICE_FOR_1D(__VA_ARGS__)
 
#define AMREX_GPU_DEVICE_PARALLEL_FOR_3D(...)   AMREX_GPU_DEVICE_FOR_3D(__VA_ARGS__)
 
#define AMREX_GPU_DEVICE_PARALLEL_FOR_4D(...)   AMREX_GPU_DEVICE_FOR_4D(__VA_ARGS__)
 
#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_1D(...)   AMREX_GPU_HOST_DEVICE_FOR_1D(__VA_ARGS__)
 
#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_3D(...)   AMREX_GPU_HOST_DEVICE_FOR_3D(__VA_ARGS__)
 
#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_4D(...)   AMREX_GPU_HOST_DEVICE_FOR_4D(__VA_ARGS__)
 

Macro Definition Documentation

◆ AMREX_GPU_DEVICE_FOR_1D

#define AMREX_GPU_DEVICE_FOR_1D (   n,
  i,
  block 
)
Value:
{ \
using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
amrex::ParallelFor(n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \
}
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:200

◆ AMREX_GPU_DEVICE_FOR_3D

#define AMREX_GPU_DEVICE_FOR_3D (   box,
  i,
  j,
  k,
  block 
)
Value:
{ \
amrex::ParallelFor(box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \
}

◆ AMREX_GPU_DEVICE_FOR_4D

#define AMREX_GPU_DEVICE_FOR_4D (   box,
  ncomp,
  i,
  j,
  k,
  n,
  block 
)
Value:
{ \
amrex::ParallelFor(box,ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \
}

◆ AMREX_GPU_DEVICE_PARALLEL_FOR_1D

#define AMREX_GPU_DEVICE_PARALLEL_FOR_1D (   ...)    AMREX_GPU_DEVICE_FOR_1D(__VA_ARGS__)

◆ AMREX_GPU_DEVICE_PARALLEL_FOR_3D

#define AMREX_GPU_DEVICE_PARALLEL_FOR_3D (   ...)    AMREX_GPU_DEVICE_FOR_3D(__VA_ARGS__)

◆ AMREX_GPU_DEVICE_PARALLEL_FOR_4D

#define AMREX_GPU_DEVICE_PARALLEL_FOR_4D (   ...)    AMREX_GPU_DEVICE_FOR_4D(__VA_ARGS__)

◆ AMREX_GPU_HOST_DEVICE_FOR_1D

#define AMREX_GPU_HOST_DEVICE_FOR_1D (   n,
  i,
  block 
)
Value:
{ \
auto const& amrex_i_n = n; \
using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
amrex::ParallelFor(amrex_i_n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \
} else { \
auto amrex_i_lambda = [=] (amrex_i_inttype i) noexcept block; \
AMREX_PRAGMA_SIMD \
for (amrex_i_inttype i = 0; i < amrex_i_n; ++i) amrex_i_lambda(i); \
} \
}
bool inLaunchRegion() noexcept
Definition: AMReX_GpuControl.H:86

◆ AMREX_GPU_HOST_DEVICE_FOR_3D

#define AMREX_GPU_HOST_DEVICE_FOR_3D (   box,
  i,
  j,
  k,
  block 
)
Value:
{ \
auto const& amrex_i_box = box; \
amrex::ParallelFor(amrex_i_box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \
} else { \
amrex::LoopConcurrentOnCpu(amrex_i_box,[=] (int i, int j, int k) noexcept block); \
} \
}
AMREX_ATTRIBUTE_FLATTEN_FOR void LoopConcurrentOnCpu(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition: AMReX_Loop.H:378

◆ AMREX_GPU_HOST_DEVICE_FOR_4D

#define AMREX_GPU_HOST_DEVICE_FOR_4D (   box,
  ncomp,
  i,
  j,
  k,
  n,
  block 
)
Value:
{ \
auto const& amrex_i_box = box; \
auto const& amrex_i_ncomp = ncomp; \
amrex::ParallelFor(amrex_i_box,amrex_i_ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \
} else { \
amrex::LoopConcurrentOnCpu(amrex_i_box,amrex_i_ncomp,[=] (int i, int j, int k, int n) noexcept block); \
} \
}

◆ AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_1D

#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_1D (   ...)    AMREX_GPU_HOST_DEVICE_FOR_1D(__VA_ARGS__)

◆ AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_3D

#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_3D (   ...)    AMREX_GPU_HOST_DEVICE_FOR_3D(__VA_ARGS__)

◆ AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_4D

#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_4D (   ...)    AMREX_GPU_HOST_DEVICE_FOR_4D(__VA_ARGS__)

◆ AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE

#define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE (   TN,
  TI,
  block 
)
Value:
{ auto const& amrex_i_tn = TN; \
if (!amrex::isEmpty(amrex_i_tn)) { \
{ \
auto amrex_i_ec = amrex::Gpu::ExecutionConfig(amrex_i_tn); \
AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \
[=] AMREX_GPU_DEVICE () noexcept { \
for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \
block \
} \
}); \
AMREX_GPU_ERROR_CHECK(); \
} \
else { \
amrex::Abort("AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE: cannot call device function from host"); \
}}}
AMREX_GPU_HOST_DEVICE range_detail::range_impl< T > Range(T const &b) noexcept
Definition: AMReX_GpuRange.H:125
gpuStream_t gpuStream() noexcept
Definition: AMReX_GpuDevice.H:218
bool isEmpty(T n) noexcept
Definition: AMReX_GpuRange.H:14
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition: AMReX.cpp:225
Definition: AMReX_GpuLaunch.H:128

◆ AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2

#define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2 (   TN1,
  TI1,
  block1,
  TN2,
  TI2,
  block2 
)
Value:
{ auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; \
if (!amrex::isEmpty(amrex_i_tn1) || !amrex::isEmpty(amrex_i_tn2)) { \
{ \
const auto amrex_i_ec1 = amrex::Gpu::ExecutionConfig(amrex_i_tn1); \
const auto amrex_i_ec2 = amrex::Gpu::ExecutionConfig(amrex_i_tn2); \
dim3 amrex_i_nblocks = amrex::max(amrex_i_ec1.numBlocks.x, \
amrex_i_ec2.numBlocks.x); \
amrex_i_nblocks.y = 2; \
AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \
[=] AMREX_GPU_DEVICE () noexcept { \
switch (blockIdx.y) { \
case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \
block1 \
} \
break; \
case 1: for (auto const TI2 : amrex::Gpu::Range(amrex_i_tn2)) { \
block2 \
} \
} \
}); \
AMREX_GPU_ERROR_CHECK(); \
} \
else { \
amrex::Abort("AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2: cannot call device function from host"); \
}}}
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & max(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:35

◆ AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3

#define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3 (   TN1,
  TI1,
  block1,
  TN2,
  TI2,
  block2,
  TN3,
  TI3,
  block3 
)

◆ AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE

#define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE (   TN,
  TI,
  block 
)
Value:
{ auto const& amrex_i_tn = TN; \
if (!amrex::isEmpty(amrex_i_tn)) { \
{ \
const auto amrex_i_ec = amrex::Gpu::ExecutionConfig(amrex_i_tn); \
AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \
[=] AMREX_GPU_DEVICE () noexcept { \
for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \
block \
} \
}); \
AMREX_GPU_ERROR_CHECK(); \
} \
else { \
for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \
block \
} \
}}}

◆ AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2

#define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2 (   TN1,
  TI1,
  block1,
  TN2,
  TI2,
  block2 
)

◆ AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3

#define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3 (   TN1,
  TI1,
  block1,
  TN2,
  TI2,
  block2,
  TN3,
  TI3,
  block3 
)