Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_GpuMemory.H
Go to the documentation of this file.
1#ifndef AMREX_GPU_MEMORY_H_
2#define AMREX_GPU_MEMORY_H_
3#include <AMReX_Config.H>
4
6#include <AMReX_GpuControl.H>
7#include <AMReX_GpuDevice.H>
8#include <AMReX_TypeTraits.H>
9#include <AMReX_Arena.H>
10#include <cstdlib>
11
12namespace amrex::Gpu {
13
14struct Managed {
15
16#ifdef AMREX_USE_GPU
17
18 [[nodiscard]] void *operator new (std::size_t len)
19 {
20 return The_Managed_Arena()->alloc(len);
21 }
22
23 void operator delete (void *ptr)
24 {
25 if (ptr) { The_Managed_Arena()->free(ptr); }
26 }
27
28#endif
29};
30
31struct Pinned {
32
33#ifdef AMREX_USE_GPU
34
35 [[nodiscard]] void *operator new (std::size_t len)
36 {
37 return The_Pinned_Arena()->alloc(len);
38 }
39
40 void operator delete (void *ptr)
41 {
42 if (ptr) { The_Pinned_Arena()->free(ptr); }
43 }
44
45#endif
46};
47
48struct Deleter {
49 Arena* m_arena = nullptr;
50 Deleter (Arena* ar) noexcept : m_arena(ar) {}
51 void operator() (void* pt) const noexcept { m_arena->free(pt); }
52};
53
54template <class T>
55requires (std::is_trivially_copyable_v<T>)
57{
58 DeviceScalar (DeviceScalar const&) = delete;
60 void operator= (DeviceScalar const&) = delete;
61 void operator= (DeviceScalar &&) = delete;
62
63#ifdef AMREX_USE_GPU
64
66 if (Gpu::inLaunchRegion()) {
67 dp = (T*)(The_Arena()->alloc(sizeof(T)));
68 } else {
69 dp = (T*)(std::malloc(sizeof(T)));
70 }
71 }
72
73 explicit DeviceScalar (T init_val) {
74 if (Gpu::inLaunchRegion()) {
75 dp = (T*)(The_Arena()->alloc(sizeof(T)));
76 Gpu::htod_memcpy(dp, &init_val, sizeof(T));
77 } else {
78 dp = (T*)(std::malloc(sizeof(T)));
79 *dp = init_val;
80 }
81 }
82
84 if (Gpu::inLaunchRegion()) {
85 The_Arena()->free(dp);
86 } else {
87 std::free(dp);
88 }
89 }
90
91 [[nodiscard]] T* dataPtr () { return dp; }
92 [[nodiscard]] T const* dataPtr () const { return dp; }
93 [[nodiscard]] T dataValue () const {
94 if (Gpu::inLaunchRegion()) {
95 T r;
96 Gpu::dtoh_memcpy(&r, dp, sizeof(T));
97 return r;
98 } else {
99 return *dp;
100 }
101 }
102
103private:
104 T* dp;
105
106#else
107
108 DeviceScalar (T const& init_val) : d(init_val) {}
109 DeviceScalar () = default;
110 ~DeviceScalar () = default;
111
112 [[nodiscard]] T* dataPtr () { return &d; }
113 [[nodiscard]] T const* dataPtr () const { return &d; }
114 [[nodiscard]] T dataValue () const { return d; }
115
116private:
117 T d;
118
119#endif
120};
121
122#ifdef AMREX_USE_GPU
123
124template <class T>
126{
127 [[nodiscard]] AMREX_GPU_DEVICE T* dataPtr () noexcept {
128 static_assert(sizeof(T) < 0, "We must specialize struct SharedMemory");
129 return nullptr;
130 }
131};
132
133#ifndef AMREX_USE_SYCL
134// xxxxx SYCL todo: extern __shared__
135
136template <>
137struct SharedMemory<double>
138{
139 [[nodiscard]] AMREX_GPU_DEVICE double* dataPtr () noexcept {
140 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(double,amrex_sm_double);,
141 extern __shared__ double amrex_sm_double[];)
142 return amrex_sm_double;
143 }
144};
145
146template <>
147struct SharedMemory<float>
148{
149 [[nodiscard]] AMREX_GPU_DEVICE float* dataPtr () noexcept {
150 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(float,amrex_sm_float);,
151 extern __shared__ float amrex_sm_float[];)
152 return amrex_sm_float;
153 }
154};
155
156template <>
157struct SharedMemory<long>
158{
159 [[nodiscard]] AMREX_GPU_DEVICE long* dataPtr () noexcept {
160 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(long,amrex_sm_long);,
161 extern __shared__ long amrex_sm_long[];)
162 return amrex_sm_long;
163 }
164};
165
166template <>
167struct SharedMemory<long long>
168{
169 [[nodiscard]] AMREX_GPU_DEVICE long long* dataPtr () noexcept {
170 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(long long,amrex_sm_long_long);,
171 extern __shared__ long long amrex_sm_long_long[];)
172 return amrex_sm_long_long;
173 }
174};
175
176template <>
178{
179 [[nodiscard]] AMREX_GPU_DEVICE int* dataPtr () noexcept {
180 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(int,amrex_sm_int);,
181 extern __shared__ int amrex_sm_int[];)
182 return amrex_sm_int;
183 }
184};
185
186template <>
187struct SharedMemory<short>
188{
189 [[nodiscard]] AMREX_GPU_DEVICE short* dataPtr () noexcept {
190 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(short,amrex_sm_short);,
191 extern __shared__ short amrex_sm_short[];)
192 return amrex_sm_short;
193 }
194};
195
196template <>
197struct SharedMemory<char>
198{
199 [[nodiscard]] AMREX_GPU_DEVICE char* dataPtr () noexcept {
200 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(char,amrex_sm_char);,
201 extern __shared__ char amrex_sm_char[];)
202 return amrex_sm_char;
203 }
204};
205
206template <>
207struct SharedMemory<unsigned long>
208{
209 [[nodiscard]] AMREX_GPU_DEVICE unsigned long* dataPtr () noexcept {
210 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(unsigned long,amrex_sm_ulong);,
211 extern __shared__ unsigned long amrex_sm_ulong[];)
212 return amrex_sm_ulong;
213 }
214};
215
216template <>
217struct SharedMemory<unsigned long long>
218{
219 [[nodiscard]] AMREX_GPU_DEVICE unsigned long long* dataPtr () noexcept {
220 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(unsigned long long,amrex_sm_ulonglong);,
221 extern __shared__ unsigned long long amrex_sm_ulonglong[];)
222 return amrex_sm_ulonglong;
223 }
224};
225
226template <>
227struct SharedMemory<unsigned int>
228{
229 [[nodiscard]] AMREX_GPU_DEVICE unsigned int* dataPtr () noexcept {
230 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(unsigned int,amrex_sm_uint);,
231 extern __shared__ unsigned int amrex_sm_uint[];)
232 return amrex_sm_uint;
233 }
234};
235
236template <>
237struct SharedMemory<unsigned short>
238{
239 [[nodiscard]] AMREX_GPU_DEVICE unsigned short* dataPtr () noexcept {
240 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(unsigned short,amrex_sm_ushort);,
241 extern __shared__ unsigned short amrex_sm_ushort[];)
242 return amrex_sm_ushort;
243 }
244};
245
246template <>
247struct SharedMemory<unsigned char>
248{
249 [[nodiscard]] AMREX_GPU_DEVICE unsigned char* dataPtr () noexcept {
250 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(unsigned char,amrex_sm_uchar);,
251 extern __shared__ unsigned char amrex_sm_uchar[];)
252 return amrex_sm_uchar;
253 }
254};
255
256template <>
257struct SharedMemory<bool>
258{
259 [[nodiscard]] AMREX_GPU_DEVICE bool* dataPtr () noexcept {
260 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(bool,amrex_sm_bool);,
261 extern __shared__ bool amrex_sm_bool[];)
262 return amrex_sm_bool;
263 }
264};
265
266#endif
267
268#endif
269
270} //namespace
271
272
273#endif
#define AMREX_HIP_OR_CUDA(a, b)
Definition AMReX_GpuControl.H:17
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
A virtual base class for objects that manage their own dynamic memory allocation.
Definition AMReX_Arena.H:132
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
Arena * The_Managed_Arena()
Definition AMReX_Arena.cpp:850
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:860
Arena * The_Arena()
Definition AMReX_Arena.cpp:820
Definition AMReX_BaseFwd.H:55
bool inLaunchRegion() noexcept
Definition AMReX_GpuControl.H:88
void dtoh_memcpy(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:496
void htod_memcpy(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:488
const int[]
Definition AMReX_BLProfiler.cpp:1664
Definition AMReX_GpuMemory.H:48
Deleter(Arena *ar) noexcept
Definition AMReX_GpuMemory.H:50
void operator()(void *pt) const noexcept
Definition AMReX_GpuMemory.H:51
Arena * m_arena
Definition AMReX_GpuMemory.H:49
Definition AMReX_GpuMemory.H:57
~DeviceScalar()
Definition AMReX_GpuMemory.H:83
T * dataPtr()
Definition AMReX_GpuMemory.H:91
DeviceScalar(DeviceScalar &&)=delete
DeviceScalar()
Definition AMReX_GpuMemory.H:65
DeviceScalar(T init_val)
Definition AMReX_GpuMemory.H:73
T dataValue() const
Definition AMReX_GpuMemory.H:93
DeviceScalar(DeviceScalar const &)=delete
T const * dataPtr() const
Definition AMReX_GpuMemory.H:92
Definition AMReX_GpuMemory.H:14
Definition AMReX_GpuMemory.H:31
__device__ bool * dataPtr() noexcept
Definition AMReX_GpuMemory.H:259
__device__ char * dataPtr() noexcept
Definition AMReX_GpuMemory.H:199
__device__ double * dataPtr() noexcept
Definition AMReX_GpuMemory.H:139
__device__ float * dataPtr() noexcept
Definition AMReX_GpuMemory.H:149
__device__ int * dataPtr() noexcept
Definition AMReX_GpuMemory.H:179
__device__ long * dataPtr() noexcept
Definition AMReX_GpuMemory.H:159
__device__ long long * dataPtr() noexcept
Definition AMReX_GpuMemory.H:169
__device__ short * dataPtr() noexcept
Definition AMReX_GpuMemory.H:189
__device__ unsigned char * dataPtr() noexcept
Definition AMReX_GpuMemory.H:249
__device__ unsigned int * dataPtr() noexcept
Definition AMReX_GpuMemory.H:229
__device__ unsigned long * dataPtr() noexcept
Definition AMReX_GpuMemory.H:209
__device__ unsigned long long * dataPtr() noexcept
Definition AMReX_GpuMemory.H:219
__device__ unsigned short * dataPtr() noexcept
Definition AMReX_GpuMemory.H:239
Definition AMReX_GpuMemory.H:126
__device__ T * dataPtr() noexcept
Definition AMReX_GpuMemory.H:127