Block-Structured AMR Software Framework
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Loading...
Searching...
No Matches
AMReX_GpuMemory.H
Go to the documentation of this file.
1#ifndef AMREX_GPU_MEMORY_H_
2#define AMREX_GPU_MEMORY_H_
3#include <AMReX_Config.H>
4
6#include <AMReX_GpuControl.H>
7#include <AMReX_GpuDevice.H>
8#include <AMReX_TypeTraits.H>
9#include <AMReX_Arena.H>
10#include <cstdlib>
11
12namespace amrex::Gpu {
13
14struct Managed {
15
16#ifdef AMREX_USE_GPU
17
18 [[nodiscard]] void *operator new (std::size_t len)
19 {
20 return The_Managed_Arena()->alloc(len);
21 }
22
23 void operator delete (void *ptr)
24 {
25 if (ptr) { The_Managed_Arena()->free(ptr); }
26 }
27
28#endif
29};
30
31struct Pinned {
32
33#ifdef AMREX_USE_GPU
34
35 [[nodiscard]] void *operator new (std::size_t len)
36 {
37 return The_Pinned_Arena()->alloc(len);
38 }
39
40 void operator delete (void *ptr)
41 {
42 if (ptr) { The_Pinned_Arena()->free(ptr); }
43 }
44
45#endif
46};
47
48struct Deleter {
49 Arena* m_arena = nullptr;
50 Deleter (Arena* ar) noexcept : m_arena(ar) {}
51 void operator() (void* pt) const noexcept { m_arena->free(pt); }
52};
53
54template <class T, std::enable_if_t<std::is_trivially_copyable_v<T>,int> = 0>
56{
57 DeviceScalar (DeviceScalar const&) = delete;
59 void operator= (DeviceScalar const&) = delete;
60 void operator= (DeviceScalar &&) = delete;
61
62#ifdef AMREX_USE_GPU
63
65 if (Gpu::inLaunchRegion()) {
66 dp = (T*)(The_Arena()->alloc(sizeof(T)));
67 } else {
68 dp = (T*)(std::malloc(sizeof(T)));
69 }
70 }
71
72 explicit DeviceScalar (T init_val) {
73 if (Gpu::inLaunchRegion()) {
74 dp = (T*)(The_Arena()->alloc(sizeof(T)));
75 Gpu::htod_memcpy(dp, &init_val, sizeof(T));
76 } else {
77 dp = (T*)(std::malloc(sizeof(T)));
78 *dp = init_val;
79 }
80 }
81
83 if (Gpu::inLaunchRegion()) {
84 The_Arena()->free(dp);
85 } else {
86 std::free(dp);
87 }
88 }
89
90 [[nodiscard]] T* dataPtr () { return dp; }
91 [[nodiscard]] T const* dataPtr () const { return dp; }
92 [[nodiscard]] T dataValue () const {
93 if (Gpu::inLaunchRegion()) {
94 T r;
95 Gpu::dtoh_memcpy(&r, dp, sizeof(T));
96 return r;
97 } else {
98 return *dp;
99 }
100 }
101
102private:
103 T* dp;
104
105#else
106
107 DeviceScalar (T const& init_val) : d(init_val) {}
108 DeviceScalar () = default;
109 ~DeviceScalar () = default;
110
111 [[nodiscard]] T* dataPtr () { return &d; }
112 [[nodiscard]] T const* dataPtr () const { return &d; }
113 [[nodiscard]] T dataValue () const { return d; }
114
115private:
116 T d;
117
118#endif
119};
120
121#ifdef AMREX_USE_GPU
122
123template <class T>
125{
126 [[nodiscard]] AMREX_GPU_DEVICE T* dataPtr () noexcept {
127 static_assert(sizeof(T) < 0, "We must specialize struct SharedMemory");
128 return nullptr;
129 }
130};
131
132#ifndef AMREX_USE_SYCL
133// xxxxx SYCL todo: extern __shared__
134
135template <>
136struct SharedMemory<double>
137{
138 [[nodiscard]] AMREX_GPU_DEVICE double* dataPtr () noexcept {
139 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(double,amrex_sm_double);,
140 extern __shared__ double amrex_sm_double[];)
141 return amrex_sm_double;
142 }
143};
144
145template <>
146struct SharedMemory<float>
147{
148 [[nodiscard]] AMREX_GPU_DEVICE float* dataPtr () noexcept {
149 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(float,amrex_sm_float);,
150 extern __shared__ float amrex_sm_float[];)
151 return amrex_sm_float;
152 }
153};
154
155template <>
156struct SharedMemory<long>
157{
158 [[nodiscard]] AMREX_GPU_DEVICE long* dataPtr () noexcept {
159 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(long,amrex_sm_long);,
160 extern __shared__ long amrex_sm_long[];)
161 return amrex_sm_long;
162 }
163};
164
165template <>
166struct SharedMemory<long long>
167{
168 [[nodiscard]] AMREX_GPU_DEVICE long long* dataPtr () noexcept {
169 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(long long,amrex_sm_long_long);,
170 extern __shared__ long long amrex_sm_long_long[];)
171 return amrex_sm_long_long;
172 }
173};
174
175template <>
177{
178 [[nodiscard]] AMREX_GPU_DEVICE int* dataPtr () noexcept {
179 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(int,amrex_sm_int);,
180 extern __shared__ int amrex_sm_int[];)
181 return amrex_sm_int;
182 }
183};
184
185template <>
186struct SharedMemory<short>
187{
188 [[nodiscard]] AMREX_GPU_DEVICE short* dataPtr () noexcept {
189 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(short,amrex_sm_short);,
190 extern __shared__ short amrex_sm_short[];)
191 return amrex_sm_short;
192 }
193};
194
195template <>
196struct SharedMemory<char>
197{
198 [[nodiscard]] AMREX_GPU_DEVICE char* dataPtr () noexcept {
199 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(char,amrex_sm_char);,
200 extern __shared__ char amrex_sm_char[];)
201 return amrex_sm_char;
202 }
203};
204
205template <>
206struct SharedMemory<unsigned long>
207{
208 [[nodiscard]] AMREX_GPU_DEVICE unsigned long* dataPtr () noexcept {
209 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(unsigned long,amrex_sm_ulong);,
210 extern __shared__ unsigned long amrex_sm_ulong[];)
211 return amrex_sm_ulong;
212 }
213};
214
215template <>
216struct SharedMemory<unsigned long long>
217{
218 [[nodiscard]] AMREX_GPU_DEVICE unsigned long long* dataPtr () noexcept {
219 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(unsigned long long,amrex_sm_ulonglong);,
220 extern __shared__ unsigned long long amrex_sm_ulonglong[];)
221 return amrex_sm_ulonglong;
222 }
223};
224
225template <>
226struct SharedMemory<unsigned int>
227{
228 [[nodiscard]] AMREX_GPU_DEVICE unsigned int* dataPtr () noexcept {
229 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(unsigned int,amrex_sm_uint);,
230 extern __shared__ unsigned int amrex_sm_uint[];)
231 return amrex_sm_uint;
232 }
233};
234
235template <>
236struct SharedMemory<unsigned short>
237{
238 [[nodiscard]] AMREX_GPU_DEVICE unsigned short* dataPtr () noexcept {
239 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(unsigned short,amrex_sm_ushort);,
240 extern __shared__ unsigned short amrex_sm_ushort[];)
241 return amrex_sm_ushort;
242 }
243};
244
245template <>
246struct SharedMemory<unsigned char>
247{
248 [[nodiscard]] AMREX_GPU_DEVICE unsigned char* dataPtr () noexcept {
249 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(unsigned char,amrex_sm_uchar);,
250 extern __shared__ unsigned char amrex_sm_uchar[];)
251 return amrex_sm_uchar;
252 }
253};
254
255template <>
256struct SharedMemory<bool>
257{
258 [[nodiscard]] AMREX_GPU_DEVICE bool* dataPtr () noexcept {
259 AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(bool,amrex_sm_bool);,
260 extern __shared__ bool amrex_sm_bool[];)
261 return amrex_sm_bool;
262 }
263};
264
265#endif
266
267#endif
268
269} //namespace
270
271
272#endif
#define AMREX_HIP_OR_CUDA(a, b)
Definition AMReX_GpuControl.H:21
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
A virtual base class for objects that manage their own dynamic memory allocation.
Definition AMReX_Arena.H:100
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
Definition AMReX_BaseFwd.H:52
bool inLaunchRegion() noexcept
Definition AMReX_GpuControl.H:86
void dtoh_memcpy(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:301
void htod_memcpy(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:293
Arena * The_Managed_Arena()
Definition AMReX_Arena.cpp:646
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:656
const int[]
Definition AMReX_BLProfiler.cpp:1664
Arena * The_Arena()
Definition AMReX_Arena.cpp:616
Definition AMReX_GpuMemory.H:48
Deleter(Arena *ar) noexcept
Definition AMReX_GpuMemory.H:50
void operator()(void *pt) const noexcept
Definition AMReX_GpuMemory.H:51
Arena * m_arena
Definition AMReX_GpuMemory.H:49
Definition AMReX_GpuMemory.H:56
DeviceScalar()
Definition AMReX_GpuMemory.H:64
T dataValue() const
Definition AMReX_GpuMemory.H:92
void operator=(DeviceScalar const &)=delete
DeviceScalar(DeviceScalar const &)=delete
DeviceScalar(DeviceScalar &&)=delete
T const * dataPtr() const
Definition AMReX_GpuMemory.H:91
DeviceScalar(T init_val)
Definition AMReX_GpuMemory.H:72
T * dataPtr()
Definition AMReX_GpuMemory.H:90
~DeviceScalar()
Definition AMReX_GpuMemory.H:82
T * dp
Definition AMReX_GpuMemory.H:103
Definition AMReX_GpuMemory.H:14
Definition AMReX_GpuMemory.H:31
AMREX_GPU_DEVICE bool * dataPtr() noexcept
Definition AMReX_GpuMemory.H:258
AMREX_GPU_DEVICE char * dataPtr() noexcept
Definition AMReX_GpuMemory.H:198
AMREX_GPU_DEVICE double * dataPtr() noexcept
Definition AMReX_GpuMemory.H:138
AMREX_GPU_DEVICE float * dataPtr() noexcept
Definition AMReX_GpuMemory.H:148
AMREX_GPU_DEVICE int * dataPtr() noexcept
Definition AMReX_GpuMemory.H:178
AMREX_GPU_DEVICE long * dataPtr() noexcept
Definition AMReX_GpuMemory.H:158
AMREX_GPU_DEVICE long long * dataPtr() noexcept
Definition AMReX_GpuMemory.H:168
AMREX_GPU_DEVICE short * dataPtr() noexcept
Definition AMReX_GpuMemory.H:188
AMREX_GPU_DEVICE unsigned char * dataPtr() noexcept
Definition AMReX_GpuMemory.H:248
AMREX_GPU_DEVICE unsigned int * dataPtr() noexcept
Definition AMReX_GpuMemory.H:228
AMREX_GPU_DEVICE unsigned long * dataPtr() noexcept
Definition AMReX_GpuMemory.H:208
AMREX_GPU_DEVICE unsigned long long * dataPtr() noexcept
Definition AMReX_GpuMemory.H:218
AMREX_GPU_DEVICE unsigned short * dataPtr() noexcept
Definition AMReX_GpuMemory.H:238
Definition AMReX_GpuMemory.H:125
AMREX_GPU_DEVICE T * dataPtr() noexcept
Definition AMReX_GpuMemory.H:126