1 #ifndef AMREX_GPU_CONTAINERS_H_
2 #define AMREX_GPU_CONTAINERS_H_
3 #include <AMReX_Config.H>
120 template<
class InIter,
class OutIter>
123 using value_type =
typename std::iterator_traits<InIter>::value_type;
125 using out_value_type =
typename std::iterator_traits<OutIter>::value_type;
126 static_assert(std::is_same_v<value_type, out_value_type>);
127 static_assert(std::is_trivially_copyable<value_type>(),
128 "Can only copy trivially copyable types");
131 if (
size == 0) {
return; }
157 template<
class InIter,
class OutIter>
160 using value_type =
typename std::iterator_traits<InIter>::value_type;
162 using out_value_type =
typename std::iterator_traits<OutIter>::value_type;
163 static_assert(std::is_same_v<value_type, out_value_type>);
164 static_assert(std::is_trivially_copyable<value_type>(),
165 "Can only copy trivially copyable types");
168 if (
size == 0) {
return; }
194 template<
class InIter,
class OutIter>
197 using value_type =
typename std::iterator_traits<InIter>::value_type;
199 using out_value_type =
typename std::iterator_traits<OutIter>::value_type;
200 static_assert(std::is_same_v<value_type, out_value_type>);
201 static_assert(std::is_trivially_copyable<value_type>(),
202 "Can only copy trivially copyable types");
205 if (
size == 0) {
return; }
232 template<
class InIter,
class OutIter>
235 using value_type =
typename std::iterator_traits<InIter>::value_type;
237 using out_value_type =
typename std::iterator_traits<OutIter>::value_type;
238 static_assert(std::is_same_v<value_type, out_value_type>);
239 static_assert(std::is_trivially_copyable<value_type>(),
240 "Can only copy trivially copyable types");
243 if (
size == 0) {
return; }
270 template<
class InIter,
class OutIter>
273 using value_type =
typename std::iterator_traits<InIter>::value_type;
275 using out_value_type =
typename std::iterator_traits<OutIter>::value_type;
276 static_assert(std::is_same_v<value_type, out_value_type>);
277 static_assert(std::is_trivially_copyable<value_type>(),
278 "Can only copy trivially copyable types");
281 if (
size == 0) {
return; }
308 template<
class InIter,
class OutIter>
311 using value_type =
typename std::iterator_traits<InIter>::value_type;
313 using out_value_type =
typename std::iterator_traits<OutIter>::value_type;
314 static_assert(std::is_same_v<value_type, out_value_type>);
315 static_assert(std::is_trivially_copyable<value_type>(),
316 "Can only copy trivially copyable types");
319 if (
size == 0) {
return; }
336 using value_type =
typename std::iterator_traits<Iter>::value_type;
337 static_assert(std::is_trivially_copyable<value_type>(),
338 "Can only copy trivially copyable types");
341 if (
size == 0) {
return; }
345 #if defined(AMREX_USE_CUDA) && !defined(_WIN32)
348 size*
sizeof(value_type),
367 using value_type =
typename std::iterator_traits<Iter>::value_type;
368 static_assert(std::is_trivially_copyable<value_type>(),
369 "Can only copy trivially copyable types");
372 if (
size == 0) {
return; }
376 #if defined(AMREX_USE_CUDA) && !defined(_WIN32)
379 size*
sizeof(value_type),
404 template <
typename IT,
typename F,
405 typename T =
typename std::iterator_traits<IT>::value_type,
406 std::enable_if_t<(
sizeof(T) <= 36*8) &&
407 std::is_trivially_copyable_v<T> &&
412 auto N =
static_cast<Long
>(std::distance(first, last));
413 if (N <= 0) {
return; }
415 #ifndef AMREX_USE_GPU
416 for (Long i = 0; i < N; ++i) {
423 if constexpr ((
sizeof(T) <= 8)
424 || (
sizeof(T) > 36*8)
425 || ! std::is_trivially_copyable<T>()) {
431 static_assert(
sizeof(T) %
sizeof(
unsigned int) == 0);
432 using U = std::conditional_t<
sizeof(T) %
sizeof(
unsigned long long) == 0,
433 unsigned long long,
unsigned int>;
434 constexpr Long nU =
sizeof(T) /
sizeof(U);
435 auto pu =
reinterpret_cast<U*
>(p);
436 int nthreads_per_block = (
sizeof(T) <= 64) ? 256 : 128;
437 int nblocks =
static_cast<int>((N+nthreads_per_block-1)/nthreads_per_block);
438 std::size_t shared_mem_bytes = nthreads_per_block *
sizeof(T);
439 #ifdef AMREX_USE_SYCL
443 Long i = handler.globalIdx();
444 Long blockDimx = handler.blockDim();
445 Long threadIdxx = handler.threadIdx();
446 Long blockIdxx = handler.blockIdx();
447 auto const shared_U = (U*)handler.sharedMemory();
448 auto const shared_T = (T*)shared_U;
450 auto ga =
new(shared_T+threadIdxx) T;
453 handler.sharedBarrier();
454 for (Long m = threadIdxx,
455 mend = nU *
amrex::min(blockDimx, N-blockDimx*blockIdxx);
456 m < mend; m += blockDimx) {
457 pu[blockDimx*blockIdxx*nU+m] = shared_U[m];
464 Long blockDimx = blockDim.x;
465 Long threadIdxx = threadIdx.x;
466 Long blockIdxx = blockIdx.x;
467 Long i = blockDimx*blockIdxx + threadIdxx;
469 auto const shared_U = gsm.
dataPtr();
470 auto const shared_T = (T*)shared_U;
472 auto ga =
new(shared_T+threadIdxx) T;
476 for (Long m = threadIdxx,
477 mend = nU *
amrex::min(blockDimx, N-blockDimx*blockIdxx);
478 m < mend; m += blockDimx) {
479 pu[blockDimx*blockIdxx*nU+m] = shared_U[m];
#define AMREX_CUDA_SAFE_CALL(call)
Definition: AMReX_GpuError.H:73
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
static int deviceId() noexcept
Definition: AMReX_GpuDevice.cpp:568
static int devicePropMajor() noexcept
Definition: AMReX_GpuDevice.H:142
Definition: AMReX_PODVector.H:246
AMREX_GPU_HOST_DEVICE Long size(T const &b) noexcept
integer version
Definition: AMReX_GpuRange.H:26
Definition: AMReX_BaseFwd.H:52
void dtod_memcpy_async(void *p_d_dst, const void *p_d_src, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:279
void fillAsync(IT first, IT last, F const &f) noexcept
Fill the elements in the given range using the given calllable.
Definition: AMReX_GpuContainers.H:410
void copy(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition: AMReX_GpuContainers.H:121
void copyAsync(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition: AMReX_GpuContainers.H:233
void prefetchToHost(Iter begin, Iter end) noexcept
Migrate elements of a container from device to host. This is a no-op for host-only code.
Definition: AMReX_GpuContainers.H:334
static constexpr DeviceToDevice deviceToDevice
Definition: AMReX_GpuContainers.H:100
static constexpr DeviceToHost deviceToHost
Definition: AMReX_GpuContainers.H:99
static constexpr HostToDevice hostToDevice
Definition: AMReX_GpuContainers.H:98
void streamSynchronize() noexcept
Definition: AMReX_GpuDevice.H:237
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:265
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void * memcpy(void *dest, const void *src, std::size_t count)
Definition: AMReX_GpuUtility.H:214
void dtoh_memcpy(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:301
void htod_memcpy(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:293
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:251
void dtod_memcpy(void *p_d_dst, const void *p_d_src, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:309
gpuStream_t gpuStream() noexcept
Definition: AMReX_GpuDevice.H:218
void prefetchToDevice(Iter begin, Iter end) noexcept
Migrate elements of a container from host to device. This is a no-op for host-only code.
Definition: AMReX_GpuContainers.H:365
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:200
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & min(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:21
void launch(T const &n, L &&f) noexcept
Definition: AMReX_GpuLaunchFunctsC.H:120
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 end(BoxND< dim > const &box) noexcept
Definition: AMReX_Box.H:1890
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 begin(BoxND< dim > const &box) noexcept
Definition: AMReX_Box.H:1881
Definition: AMReX_GpuContainers.H:97
Definition: AMReX_GpuContainers.H:96
Definition: AMReX_GpuTypes.H:86
Definition: AMReX_GpuContainers.H:95
Definition: AMReX_GpuMemory.H:125
AMREX_GPU_DEVICE T * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:126
Test if a given type T is callable with arguments of type Args...
Definition: AMReX_TypeTraits.H:201