Block-Structured AMR Software Framework
AMReX_GpuMemory.H
Go to the documentation of this file.
1 #ifndef AMREX_GPU_MEMORY_H_
2 #define AMREX_GPU_MEMORY_H_
3 #include <AMReX_Config.H>
4 
5 #include <AMReX_GpuQualifiers.H>
6 #include <AMReX_GpuControl.H>
7 #include <AMReX_GpuDevice.H>
8 #include <AMReX_TypeTraits.H>
9 #include <AMReX_Arena.H>
10 #include <cstdlib>
11 
12 namespace amrex::Gpu {
13 
14 struct Managed {
15 
16 #ifdef AMREX_USE_GPU
17 
18  [[nodiscard]] void *operator new (std::size_t len)
19  {
20  return The_Managed_Arena()->alloc(len);
21  }
22 
23  void operator delete (void *ptr)
24  {
25  if (ptr) { The_Managed_Arena()->free(ptr); }
26  }
27 
28 #endif
29 };
30 
31 struct Pinned {
32 
33 #ifdef AMREX_USE_GPU
34 
35  [[nodiscard]] void *operator new (std::size_t len)
36  {
37  return The_Pinned_Arena()->alloc(len);
38  }
39 
40  void operator delete (void *ptr)
41  {
42  if (ptr) { The_Pinned_Arena()->free(ptr); }
43  }
44 
45 #endif
46 };
47 
48 struct Deleter {
49  Arena* m_arena = nullptr;
50  Deleter (Arena* ar) noexcept : m_arena(ar) {}
51  void operator() (void* pt) const noexcept { m_arena->free(pt); }
52 };
53 
54 template <class T, std::enable_if_t<std::is_trivially_copyable_v<T>,int> = 0>
56 {
57  DeviceScalar (DeviceScalar const&) = delete;
58  DeviceScalar (DeviceScalar &&) = delete;
59  void operator= (DeviceScalar const&) = delete;
60  void operator= (DeviceScalar &&) = delete;
61 
62 #ifdef AMREX_USE_GPU
63 
65  if (Gpu::inLaunchRegion()) {
66  dp = (T*)(The_Arena()->alloc(sizeof(T)));
67  } else {
68  dp = (T*)(std::malloc(sizeof(T)));
69  }
70  }
71 
72  explicit DeviceScalar (T init_val) {
73  if (Gpu::inLaunchRegion()) {
74  dp = (T*)(The_Arena()->alloc(sizeof(T)));
75  Gpu::htod_memcpy(dp, &init_val, sizeof(T));
76  } else {
77  dp = (T*)(std::malloc(sizeof(T)));
78  *dp = init_val;
79  }
80  }
81 
83  if (Gpu::inLaunchRegion()) {
84  The_Arena()->free(dp);
85  } else {
86  std::free(dp);
87  }
88  }
89 
90  [[nodiscard]] T* dataPtr () { return dp; }
91  [[nodiscard]] T const* dataPtr () const { return dp; }
92  [[nodiscard]] T dataValue () const {
93  if (Gpu::inLaunchRegion()) {
94  T r;
95  Gpu::dtoh_memcpy(&r, dp, sizeof(T));
96  return r;
97  } else {
98  return *dp;
99  }
100  }
101 
102 private:
103  T* dp;
104 
105 #else
106 
107  DeviceScalar (T const& init_val) : d(init_val) {}
108  DeviceScalar () = default;
109  ~DeviceScalar () = default;
110 
111  [[nodiscard]] T* dataPtr () { return &d; }
112  [[nodiscard]] T const* dataPtr () const { return &d; }
113  [[nodiscard]] T dataValue () const { return d; }
114 
115 private:
116  T d;
117 
118 #endif
119 };
120 
121 #ifdef AMREX_USE_GPU
122 
123 template <class T>
125 {
126  [[nodiscard]] AMREX_GPU_DEVICE T* dataPtr () noexcept {
127  static_assert(sizeof(T) < 0, "We must specialize struct SharedMemory");
128  return nullptr;
129  }
130 };
131 
132 #ifndef AMREX_USE_SYCL
133 // xxxxx SYCL todo: extern __shared__
134 
135 template <>
136 struct SharedMemory<double>
137 {
138  [[nodiscard]] AMREX_GPU_DEVICE double* dataPtr () noexcept {
139  AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(double,amrex_sm_double);,
140  extern __shared__ double amrex_sm_double[];)
141  return amrex_sm_double;
142  }
143 };
144 
145 template <>
146 struct SharedMemory<float>
147 {
148  [[nodiscard]] AMREX_GPU_DEVICE float* dataPtr () noexcept {
149  AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(float,amrex_sm_float);,
150  extern __shared__ float amrex_sm_float[];)
151  return amrex_sm_float;
152  }
153 };
154 
155 template <>
156 struct SharedMemory<long>
157 {
158  [[nodiscard]] AMREX_GPU_DEVICE long* dataPtr () noexcept {
159  AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(long,amrex_sm_long);,
160  extern __shared__ long amrex_sm_long[];)
161  return amrex_sm_long;
162  }
163 };
164 
165 template <>
166 struct SharedMemory<long long>
167 {
168  [[nodiscard]] AMREX_GPU_DEVICE long long* dataPtr () noexcept {
169  AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(long long,amrex_sm_long_long);,
170  extern __shared__ long long amrex_sm_long_long[];)
171  return amrex_sm_long_long;
172  }
173 };
174 
175 template <>
177 {
178  [[nodiscard]] AMREX_GPU_DEVICE int* dataPtr () noexcept {
179  AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(int,amrex_sm_int);,
180  extern __shared__ int amrex_sm_int[];)
181  return amrex_sm_int;
182  }
183 };
184 
185 template <>
186 struct SharedMemory<short>
187 {
188  [[nodiscard]] AMREX_GPU_DEVICE short* dataPtr () noexcept {
189  AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(short,amrex_sm_short);,
190  extern __shared__ short amrex_sm_short[];)
191  return amrex_sm_short;
192  }
193 };
194 
195 template <>
196 struct SharedMemory<char>
197 {
198  [[nodiscard]] AMREX_GPU_DEVICE char* dataPtr () noexcept {
199  AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(char,amrex_sm_char);,
200  extern __shared__ char amrex_sm_char[];)
201  return amrex_sm_char;
202  }
203 };
204 
205 template <>
206 struct SharedMemory<unsigned long>
207 {
208  [[nodiscard]] AMREX_GPU_DEVICE unsigned long* dataPtr () noexcept {
209  AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(unsigned long,amrex_sm_ulong);,
210  extern __shared__ unsigned long amrex_sm_ulong[];)
211  return amrex_sm_ulong;
212  }
213 };
214 
215 template <>
216 struct SharedMemory<unsigned long long>
217 {
218  [[nodiscard]] AMREX_GPU_DEVICE unsigned long long* dataPtr () noexcept {
219  AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(unsigned long long,amrex_sm_ulonglong);,
220  extern __shared__ unsigned long long amrex_sm_ulonglong[];)
221  return amrex_sm_ulonglong;
222  }
223 };
224 
225 template <>
226 struct SharedMemory<unsigned int>
227 {
228  [[nodiscard]] AMREX_GPU_DEVICE unsigned int* dataPtr () noexcept {
229  AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(unsigned int,amrex_sm_uint);,
230  extern __shared__ unsigned int amrex_sm_uint[];)
231  return amrex_sm_uint;
232  }
233 };
234 
235 template <>
236 struct SharedMemory<unsigned short>
237 {
238  [[nodiscard]] AMREX_GPU_DEVICE unsigned short* dataPtr () noexcept {
239  AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(unsigned short,amrex_sm_ushort);,
240  extern __shared__ unsigned short amrex_sm_ushort[];)
241  return amrex_sm_ushort;
242  }
243 };
244 
245 template <>
246 struct SharedMemory<unsigned char>
247 {
248  [[nodiscard]] AMREX_GPU_DEVICE unsigned char* dataPtr () noexcept {
249  AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(unsigned char,amrex_sm_uchar);,
250  extern __shared__ unsigned char amrex_sm_uchar[];)
251  return amrex_sm_uchar;
252  }
253 };
254 
255 template <>
256 struct SharedMemory<bool>
257 {
258  [[nodiscard]] AMREX_GPU_DEVICE bool* dataPtr () noexcept {
259  AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(bool,amrex_sm_bool);,
260  extern __shared__ bool amrex_sm_bool[];)
261  return amrex_sm_bool;
262  }
263 };
264 
265 #endif
266 
267 #endif
268 
269 } //namespace
270 
271 
272 #endif
#define AMREX_HIP_OR_CUDA(a, b)
Definition: AMReX_GpuControl.H:21
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
void * malloc(YYSIZE_T)
void free(void *)
A virtual base class for objects that manage their own dynamic memory allocation.
Definition: AMReX_Arena.H:100
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
Definition: AMReX_BaseFwd.H:52
bool inLaunchRegion() noexcept
Definition: AMReX_GpuControl.H:86
void dtoh_memcpy(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:301
void htod_memcpy(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition: AMReX_GpuDevice.H:293
Arena * The_Managed_Arena()
Definition: AMReX_Arena.cpp:624
Arena * The_Pinned_Arena()
Definition: AMReX_Arena.cpp:634
const int[]
Definition: AMReX_BLProfiler.cpp:1664
Arena * The_Arena()
Definition: AMReX_Arena.cpp:594
Definition: AMReX_GpuMemory.H:48
Deleter(Arena *ar) noexcept
Definition: AMReX_GpuMemory.H:50
void operator()(void *pt) const noexcept
Definition: AMReX_GpuMemory.H:51
Arena * m_arena
Definition: AMReX_GpuMemory.H:49
Definition: AMReX_GpuMemory.H:56
DeviceScalar()
Definition: AMReX_GpuMemory.H:64
T dataValue() const
Definition: AMReX_GpuMemory.H:92
void operator=(DeviceScalar const &)=delete
DeviceScalar(DeviceScalar const &)=delete
T * dataPtr()
Definition: AMReX_GpuMemory.H:90
DeviceScalar(DeviceScalar &&)=delete
T const * dataPtr() const
Definition: AMReX_GpuMemory.H:91
DeviceScalar(T init_val)
Definition: AMReX_GpuMemory.H:72
~DeviceScalar()
Definition: AMReX_GpuMemory.H:82
T * dp
Definition: AMReX_GpuMemory.H:103
Definition: AMReX_GpuMemory.H:14
Definition: AMReX_GpuMemory.H:31
AMREX_GPU_DEVICE bool * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:258
AMREX_GPU_DEVICE char * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:198
AMREX_GPU_DEVICE double * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:138
AMREX_GPU_DEVICE float * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:148
AMREX_GPU_DEVICE int * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:178
AMREX_GPU_DEVICE long * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:158
AMREX_GPU_DEVICE long long * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:168
AMREX_GPU_DEVICE short * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:188
AMREX_GPU_DEVICE unsigned char * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:248
AMREX_GPU_DEVICE unsigned int * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:228
AMREX_GPU_DEVICE unsigned long * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:208
AMREX_GPU_DEVICE unsigned long long * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:218
AMREX_GPU_DEVICE unsigned short * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:238
Definition: AMReX_GpuMemory.H:125
AMREX_GPU_DEVICE T * dataPtr() noexcept
Definition: AMReX_GpuMemory.H:126