Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_GpuAsyncArray.H
Go to the documentation of this file.
1#ifndef AMREX_GPU_ASYNC_ARRAY_H_
2#define AMREX_GPU_ASYNC_ARRAY_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_Arena.H>
6#include <AMReX_TypeTraits.H>
7#include <AMReX_GpuDevice.H>
8
9#include <cstddef>
10#include <cstring>
11#include <cstdlib>
12#include <memory>
13
14#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
15extern "C" {
16#if defined(AMREX_USE_HIP)
17 void amrex_asyncarray_delete ( hipStream_t stream, hipError_t error, void* p);
18#elif defined(AMREX_USE_CUDA)
19 void CUDART_CB amrex_asyncarray_delete (void* p);
20#endif
21}
22#endif
23
24namespace amrex {
25namespace Gpu {
26
32template <typename T, std::enable_if_t<std::is_trivially_copyable_v<T>,int> = 0>
34{
35public:
36
37 AsyncArray (T const* h_p, const std::size_t n)
38 {
39 if (n == 0) { return; }
40 h_data = static_cast<T*>(The_Pinned_Arena()->alloc(n*sizeof(T)));
41 std::memcpy(h_data, h_p, n*sizeof(T));
42#ifdef AMREX_USE_GPU
44 {
45 d_data = static_cast<T*>(The_Arena()->alloc(n*sizeof(T)));
46 Gpu::htod_memcpy_async(d_data, h_data, n*sizeof(T));
47 }
48#endif
49 }
50
51 template <typename U = T, std::enable_if_t<std::is_standard_layout_v<U> && std::is_trivial_v<U>,int> = 0>
52 explicit AsyncArray (const std::size_t n)
53 {
54 if (n == 0) { return; }
55#ifdef AMREX_USE_GPU
57 {
58 d_data = static_cast<T*>(The_Arena()->alloc(n*sizeof(T)));
59 }
60 else
61#endif
62 {
63 h_data = static_cast<T*>(The_Pinned_Arena()->alloc(n*sizeof(T)));
64 }
65 }
66
68
69 AsyncArray (AsyncArray const&) = delete;
70 AsyncArray (AsyncArray &&) = delete;
71 void operator= (AsyncArray const&) = delete;
72 void operator= (AsyncArray &&) = delete;
73
74 [[nodiscard]] T const* data () const noexcept { return (d_data != nullptr) ? d_data : h_data; }
75 [[nodiscard]] T* data () noexcept { return (d_data != nullptr) ? d_data : h_data; }
76 void clear ()
77 {
78#ifdef AMREX_USE_GPU
80 {
81 if (d_data != nullptr) {
82#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
83 T** p = static_cast<T**>(std::malloc(2*sizeof(T*)));
84 p[0] = d_data;
85 p[1] = h_data;
86#if defined(AMREX_USE_HIP)
87 AMREX_HIP_SAFE_CALL ( hipStreamAddCallback(Gpu::gpuStream(),
89#elif defined(AMREX_USE_CUDA)
90 AMREX_CUDA_SAFE_CALL(cudaLaunchHostFunc(Gpu::gpuStream(),
92#endif
93#elif defined(AMREX_USE_SYCL)
94 auto* pd = d_data;
95 auto* ph = h_data;
96 auto& q = *(Gpu::gpuStream().queue);
97 try {
98 q.submit([&] (sycl::handler& h) {
99 h.host_task([=] () {
100 The_Arena()->free(pd);
101 The_Pinned_Arena()->free(ph);
102 });
103 });
104 } catch (sycl::exception const& ex) {
105 amrex::Abort(std::string("host_task: ")+ex.what()+"!!!!!");
106 }
107#endif
108 }
109 }
110 else
111#endif
112 {
113 The_Pinned_Arena()->free(h_data);
114 }
115 d_data = nullptr;
116 h_data = nullptr;
117 }
118
119 void copyToHost (T* h_p, std::size_t n) const
120 {
121 if (n == 0) { return; }
122#ifdef AMREX_USE_GPU
123 if (d_data)
124 {
125 Gpu::dtoh_memcpy(h_p, d_data, n*sizeof(T));
126 }
127 else
128#endif
129 if (h_data)
130 {
131 std::memcpy(h_p, h_data, n*sizeof(T));
132 }
133 }
134
135private:
136 T* d_data = nullptr;
137 T* h_data = nullptr;
138};
139
140}
141
142using Gpu::AsyncArray;
143}
144
145#endif
void CUDART_CB amrex_asyncarray_delete(void *p)
Definition AMReX_GpuAsyncArray.cpp:10
void CUDART_CB amrex_asyncarray_delete(void *p)
Definition AMReX_GpuAsyncArray.cpp:10
#define AMREX_CUDA_SAFE_CALL(call)
Definition AMReX_GpuError.H:73
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
Definition AMReX_GpuAsyncArray.H:34
AsyncArray(T const *h_p, const std::size_t n)
Definition AMReX_GpuAsyncArray.H:37
T const * data() const noexcept
Definition AMReX_GpuAsyncArray.H:74
void clear()
Definition AMReX_GpuAsyncArray.H:76
AsyncArray(AsyncArray &&)=delete
void copyToHost(T *h_p, std::size_t n) const
Definition AMReX_GpuAsyncArray.H:119
void operator=(AsyncArray const &)=delete
~AsyncArray()
Definition AMReX_GpuAsyncArray.H:67
AsyncArray(AsyncArray const &)=delete
T * data() noexcept
Definition AMReX_GpuAsyncArray.H:75
AsyncArray(const std::size_t n)
Definition AMReX_GpuAsyncArray.H:52
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:860
Arena * The_Arena()
Definition AMReX_Arena.cpp:820
bool inLaunchRegion() noexcept
Definition AMReX_GpuControl.H:92
void dtoh_memcpy(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:496
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:421
gpuStream_t gpuStream() noexcept
Definition AMReX_GpuDevice.H:291
Definition AMReX_Amr.cpp:49
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:240