Block-Structured AMR Software Framework
AMReX_GpuAsyncArray.H
Go to the documentation of this file.
1 #ifndef AMREX_GPU_ASYNC_ARRAY_H_
2 #define AMREX_GPU_ASYNC_ARRAY_H_
3 #include <AMReX_Config.H>
4 
5 #include <AMReX_Arena.H>
6 #include <AMReX_TypeTraits.H>
7 #include <AMReX_GpuDevice.H>
8 
9 #include <cstddef>
10 #include <cstring>
11 #include <cstdlib>
12 #include <memory>
13 
14 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
15 extern "C" {
16 #if defined(AMREX_USE_HIP)
17  void amrex_asyncarray_delete ( hipStream_t stream, hipError_t error, void* p);
18 #elif defined(AMREX_USE_CUDA)
19  void CUDART_CB amrex_asyncarray_delete (void* p);
20 #endif
21 }
22 #endif
23 
24 namespace amrex {
25 namespace Gpu {
26 
27 template <typename T, std::enable_if_t<std::is_trivially_copyable_v<T>,int> = 0>
29 {
30 public:
31 
32  AsyncArray (T const* h_p, const std::size_t n)
33  {
34  if (n == 0) { return; }
35  h_data = static_cast<T*>(The_Pinned_Arena()->alloc(n*sizeof(T)));
36  std::memcpy(h_data, h_p, n*sizeof(T));
37 #ifdef AMREX_USE_GPU
38  if (Gpu::inLaunchRegion())
39  {
40  d_data = static_cast<T*>(The_Arena()->alloc(n*sizeof(T)));
41  Gpu::htod_memcpy_async(d_data, h_data, n*sizeof(T));
42  }
43 #endif
44  }
45 
46  template <typename U = T, std::enable_if_t<std::is_standard_layout_v<U> && std::is_trivial_v<U>,int> = 0>
47  explicit AsyncArray (const std::size_t n)
48  {
49  if (n == 0) { return; }
50 #ifdef AMREX_USE_GPU
51  if (Gpu::inLaunchRegion())
52  {
53  d_data = static_cast<T*>(The_Arena()->alloc(n*sizeof(T)));
54  }
55  else
56 #endif
57  {
58  h_data = static_cast<T*>(The_Pinned_Arena()->alloc(n*sizeof(T)));
59  }
60  }
61 
62  ~AsyncArray () { clear(); }
63 
64  AsyncArray (AsyncArray const&) = delete;
65  AsyncArray (AsyncArray &&) = delete;
66  void operator= (AsyncArray const&) = delete;
67  void operator= (AsyncArray &&) = delete;
68 
69  [[nodiscard]] T const* data () const noexcept { return (d_data != nullptr) ? d_data : h_data; }
70  [[nodiscard]] T* data () noexcept { return (d_data != nullptr) ? d_data : h_data; }
71  void clear ()
72  {
73 #ifdef AMREX_USE_GPU
74  if (Gpu::inLaunchRegion())
75  {
76  if (d_data != nullptr) {
77 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
78  T** p = static_cast<T**>(std::malloc(2*sizeof(T*)));
79  p[0] = d_data;
80  p[1] = h_data;
81 #if defined(AMREX_USE_HIP)
82  AMREX_HIP_SAFE_CALL ( hipStreamAddCallback(Gpu::gpuStream(),
84 #elif defined(AMREX_USE_CUDA)
85  AMREX_CUDA_SAFE_CALL(cudaLaunchHostFunc(Gpu::gpuStream(),
87 #endif
88 #elif defined(AMREX_USE_SYCL)
89  auto* pd = d_data;
90  auto* ph = h_data;
91  auto& q = *(Gpu::gpuStream().queue);
92  try {
93  q.submit([&] (sycl::handler& h) {
94  h.host_task([=] () {
95  The_Arena()->free(pd);
96  The_Pinned_Arena()->free(ph);
97  });
98  });
99  } catch (sycl::exception const& ex) {
100  amrex::Abort(std::string("host_task: ")+ex.what()+"!!!!!");
101  }
102 #endif
103  }
104  }
105  else
106 #endif
107  {
109  }
110  d_data = nullptr;
111  h_data = nullptr;
112  }
113 
114  void copyToHost (T* h_p, std::size_t n) const
115  {
116  if (n == 0) { return; }
117 #ifdef AMREX_USE_GPU
118  if (d_data)
119  {
120  Gpu::dtoh_memcpy(h_p, d_data, n*sizeof(T));
121  }
122  else
123 #endif
124  if (h_data)
125  {
126  std::memcpy(h_p, h_data, n*sizeof(T));
127  }
128  }
129 
130 private:
131  T* d_data = nullptr;
132  T* h_data = nullptr;
133 };
134 
135 }
136 
137 using Gpu::AsyncArray;
138 }
139 
140 #endif
void CUDART_CB amrex_asyncarray_delete(void *p)
Definition: AMReX_GpuAsyncArray.cpp:10
#define AMREX_CUDA_SAFE_CALL(call)
Definition: AMReX_GpuError.H:73
void * malloc(YYSIZE_T)
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
Definition: AMReX_GpuAsyncArray.H:29
AsyncArray(T const *h_p, const std::size_t n)
Definition: AMReX_GpuAsyncArray.H:32
T * h_data
Definition: AMReX_GpuAsyncArray.H:132
void clear()
Definition: AMReX_GpuAsyncArray.H:71
AsyncArray(AsyncArray &&)=delete
T const * data() const noexcept
Definition: AMReX_GpuAsyncArray.H:69
void copyToHost(T *h_p, std::size_t n) const
Definition: AMReX_GpuAsyncArray.H:114
T * d_data
Definition: AMReX_GpuAsyncArray.H:131
void operator=(AsyncArray const &)=delete
~AsyncArray()
Definition: AMReX_GpuAsyncArray.H:62
AsyncArray(AsyncArray const &)=delete
T * data() noexcept
Definition: AMReX_GpuAsyncArray.H:70
AsyncArray(const std::size_t n)
Definition: AMReX_GpuAsyncArray.H:47
bool inLaunchRegion() noexcept
Definition: AMReX_GpuControl.H:86
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void * memcpy(void *dest, const void *src, std::size_t count)
Definition: AMReX_GpuUtility.H:214
void dtoh_memcpy(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:301
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:251
gpuStream_t gpuStream() noexcept
Definition: AMReX_GpuDevice.H:218
Definition: AMReX_Amr.cpp:49
Arena * The_Pinned_Arena()
Definition: AMReX_Arena.cpp:634
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition: AMReX.cpp:221
Arena * The_Arena()
Definition: AMReX_Arena.cpp:594