Block-Structured AMR Software Framework
AMReX_GpuUtility.H
Go to the documentation of this file.
1 #ifndef AMREX_GPU_UTILITY_H_
2 #define AMREX_GPU_UTILITY_H_
3 #include <AMReX_Config.H>
4 
5 #include <AMReX_GpuQualifiers.H>
6 #include <AMReX_GpuTypes.H>
7 #include <AMReX_GpuControl.H>
8 #include <AMReX_GpuDevice.H>
9 #include <AMReX_Extension.H>
10 #include <AMReX_REAL.H>
11 #include <AMReX_INT.H>
12 #include <AMReX_Array.H>
13 #include <AMReX_Array4.H>
14 #include <iosfwd>
15 #include <cmath>
16 #include <cstring>
17 
18 #ifdef AMREX_USE_CUDA
19 #include <cuda.h>
20 #include <curand_kernel.h> // Is this needed here?
21 #endif
22 
23 namespace amrex {
24 namespace Gpu {
25 
26  template <typename T>
28  T LDG (Array4<T> const& a, int i, int j, int k) noexcept {
29 #if defined(AMREX_USE_CUDA)
30  AMREX_IF_ON_DEVICE((return __ldg(a.ptr(i,j,k));))
31  AMREX_IF_ON_HOST((return a(i,j,k);))
32 #else
33  return a(i,j,k);
34 #endif
35  }
36 
37  template <typename T>
39  T LDG (Array4<T> const& a, int i, int j, int k, int n) noexcept {
40 #if defined(AMREX_USE_CUDA)
41  AMREX_IF_ON_DEVICE((return __ldg(a.ptr(i,j,k,n));))
42  AMREX_IF_ON_HOST((return a(i,j,k,n);))
43 #else
44  return a(i,j,k,n);
45 #endif
46  }
47 
48  inline bool isManaged (void const* p) noexcept {
49 #ifdef AMREX_USE_CUDA
50  CUpointer_attribute attrib = CU_POINTER_ATTRIBUTE_IS_MANAGED;
51  unsigned int is_managed = 0;
52  void* data[] = { (void*)(&is_managed) };
53  CUresult r = cuPointerGetAttributes(1, &attrib, data, (CUdeviceptr)p);
54  return r == CUDA_SUCCESS && is_managed;
55 #elif defined(AMREX_USE_SYCL)
56  auto type = sycl::get_pointer_type(p,Device::syclContext());
57  return type == sycl::usm::alloc::shared;
58 #else
60  return false;
61 #endif
62  }
63 
64  inline bool isDevicePtr (void const* p) noexcept {
65 #if defined(AMREX_USE_HIP)
66  hipPointerAttribute_t attrib;
67  hipError_t r = hipPointerGetAttributes(&attrib, p);
68 #if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
69  return r == hipSuccess && attrib.memoryType == hipMemoryTypeDevice;
70 #else
71  return r == hipSuccess && attrib.type == hipMemoryTypeDevice;
72 #endif // (HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
73 #elif defined(AMREX_USE_CUDA)
74  CUpointer_attribute attrib = CU_POINTER_ATTRIBUTE_MEMORY_TYPE;
75  CUmemorytype mem_type = static_cast<CUmemorytype>(0);
76  void* data[] = { (void*)(&mem_type) };
77  CUresult r = cuPointerGetAttributes(1, &attrib, data, (CUdeviceptr)p);
78  return r == CUDA_SUCCESS && mem_type == CU_MEMORYTYPE_DEVICE;
79 #elif defined(AMREX_USE_SYCL)
80  auto type = sycl::get_pointer_type(p,Device::syclContext());
81  return type == sycl::usm::alloc::device;
82 #else
84  return false;
85 #endif
86  }
87 
88  inline bool isPinnedPtr (void const* p) noexcept {
89 #if defined(AMREX_USE_HIP)
90  hipPointerAttribute_t attrib;
91  hipError_t r = hipPointerGetAttributes(&attrib, p);
92 #if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
93  return r == hipSuccess && attrib.memoryType == hipMemoryTypeHost;
94 #else
95  return r == hipSuccess && attrib.type == hipMemoryTypeHost;
96 #endif // (HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
97 #elif defined(AMREX_USE_CUDA)
98  CUpointer_attribute attrib = CU_POINTER_ATTRIBUTE_MEMORY_TYPE;
99  CUmemorytype mem_type = static_cast<CUmemorytype>(0);
100  void* data[] = { (void*)(&mem_type) };
101  CUresult r = cuPointerGetAttributes(1, &attrib, data, (CUdeviceptr)p);
102  return r == CUDA_SUCCESS && mem_type == CU_MEMORYTYPE_HOST;
103 #elif defined(AMREX_USE_SYCL)
104  auto type = sycl::get_pointer_type(p,Device::syclContext());
105  return type == sycl::usm::alloc::host;
106 #else
108  return false;
109 #endif
110  }
111 
112  inline bool isGpuPtr (void const* p) noexcept {
113 #if defined(AMREX_USE_HIP)
114  if (isManaged(p)) { // We might be using CUDA/NVCC
115  return true;
116  } else {
117  hipPointerAttribute_t attrib;
118  hipError_t r = hipPointerGetAttributes(&attrib, p);
119 #if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
120  return r == hipSuccess &&
121  (attrib.memoryType == hipMemoryTypeHost ||
122  attrib.memoryType == hipMemoryTypeDevice);
123 #else
124  return r == hipSuccess &&
125  (attrib.type == hipMemoryTypeHost ||
126  attrib.type == hipMemoryTypeDevice);
127 #endif // (HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
128  }
129 #elif defined(AMREX_USE_CUDA)
130  CUpointer_attribute attrib = CU_POINTER_ATTRIBUTE_MEMORY_TYPE;
131  CUmemorytype mem_type = static_cast<CUmemorytype>(0);
132  void* data[] = { (void*)(&mem_type) };
133  CUresult r = cuPointerGetAttributes(1, &attrib, data, (CUdeviceptr)p);
134  return r == CUDA_SUCCESS &&
135  (mem_type == CU_MEMORYTYPE_HOST ||
136  mem_type == CU_MEMORYTYPE_DEVICE ||
137  mem_type == CU_MEMORYTYPE_ARRAY ||
138  mem_type == CU_MEMORYTYPE_UNIFIED);
139 #elif defined(AMREX_USE_SYCL)
140  auto type = sycl::get_pointer_type(p,Device::syclContext());
141  return type != sycl::usm::alloc::unknown;
142 #else
144  return false;
145 #endif
146  }
147 
148  template <class T>
150  bool isnan (T m) noexcept
151  {
152 #if defined(AMREX_USE_SYCL)
153  return sycl::isnan(m);
154 #else
155  return std::isnan(m);
156 #endif
157  }
158 
159  template <class T>
161  bool isinf (T m) noexcept
162  {
163 #if defined(AMREX_USE_SYCL)
164  return sycl::isinf(m);
165 #else
166  return std::isinf(m);
167 #endif
168  }
169 
171  {
173  StreamItInfo () noexcept
174  : device_sync(!Gpu::inNoSyncRegion()) {}
176  device_sync = false;
177  return *this;
178  }
179  };
180 
182  {
183  public:
184  StreamIter (int n, bool is_thread_safe=true) noexcept;
185  StreamIter (int n, const StreamItInfo& info, bool is_thread_safe=true) noexcept;
186 
187  ~StreamIter ();
188 
189  StreamIter (StreamIter const&) = delete;
190  StreamIter (StreamIter &&) = delete;
191  void operator= (StreamIter const&) = delete;
192  void operator= (StreamIter &&) = delete;
193 
194  int operator() () const noexcept { return m_i; }
195 
196  [[nodiscard]] bool isValid () const noexcept { return m_i < m_n; }
197 
198 #if !defined(AMREX_USE_GPU)
199  void operator++ () noexcept { ++m_i; }
200 #else
201  void operator++ () noexcept;
202 #endif
203 
204  private:
205  void init () noexcept; // NOLINT
206 
207  int m_n;
208  int m_i;
210  bool m_sync;
211  };
212 
214 void* memcpy (void* dest, const void* src, std::size_t count)
215 {
216 #if defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP)
217  return ::memcpy(dest, src, count);
218 #else
219  return std::memcpy(dest, src, count);
220 #endif
221 }
222 
223 } // namespace Gpu
224 
225 #ifdef AMREX_USE_GPU
226 std::ostream& operator<< (std::ostream& os, const dim3& d);
227 #endif
228 
229 using Gpu::isnan;
230 using Gpu::isinf;
231 
232 } // namespace amrex
233 
234 #endif
#define AMREX_FORCE_INLINE
Definition: AMReX_Extension.H:119
#define AMREX_IF_ON_DEVICE(CODE)
Definition: AMReX_GpuQualifiers.H:56
#define AMREX_IF_ON_HOST(CODE)
Definition: AMReX_GpuQualifiers.H:58
#define AMREX_GPU_HOST_DEVICE
Definition: AMReX_GpuQualifiers.H:20
Definition: AMReX_GpuUtility.H:182
int operator()() const noexcept
Definition: AMReX_GpuUtility.H:194
void operator++() noexcept
Definition: AMReX_GpuUtility.cpp:81
void init() noexcept
Definition: AMReX_GpuUtility.cpp:36
~StreamIter()
Definition: AMReX_GpuUtility.cpp:65
bool isValid() const noexcept
Definition: AMReX_GpuUtility.H:196
void operator=(StreamIter const &)=delete
int m_n
Definition: AMReX_GpuUtility.H:207
bool m_threadsafe
Definition: AMReX_GpuUtility.H:209
StreamIter(StreamIter &&)=delete
int m_i
Definition: AMReX_GpuUtility.H:208
StreamIter(StreamIter const &)=delete
bool m_sync
Definition: AMReX_GpuUtility.H:210
StreamIter(int n, bool is_thread_safe=true) noexcept
Definition: AMReX_GpuUtility.cpp:23
static constexpr Type_t unknown
Definition: AMReX_EB2_Graph.H:41
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool isinf(T m) noexcept
Definition: AMReX_GpuUtility.H:161
bool isManaged(void const *p) noexcept
Definition: AMReX_GpuUtility.H:48
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T LDG(Array4< T > const &a, int i, int j, int k) noexcept
Definition: AMReX_GpuUtility.H:28
bool isGpuPtr(void const *p) noexcept
Definition: AMReX_GpuUtility.H:112
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void * memcpy(void *dest, const void *src, std::size_t count)
Definition: AMReX_GpuUtility.H:214
bool isPinnedPtr(void const *p) noexcept
Definition: AMReX_GpuUtility.H:88
bool inNoSyncRegion() noexcept
Definition: AMReX_GpuControl.H:146
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool isnan(T m) noexcept
Definition: AMReX_GpuUtility.H:150
bool isDevicePtr(void const *p) noexcept
Definition: AMReX_GpuUtility.H:64
Definition: AMReX_Amr.cpp:49
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition: AMReX.H:111
std::ostream & operator<<(std::ostream &os, AmrMesh const &amr_mesh)
Definition: AMReX_AmrMesh.cpp:1236
Definition: AMReX_Array4.H:61
Definition: AMReX_GpuUtility.H:171
StreamItInfo() noexcept
Definition: AMReX_GpuUtility.H:173
StreamItInfo & DisableDeviceSync() noexcept
Definition: AMReX_GpuUtility.H:175
bool device_sync
Definition: AMReX_GpuUtility.H:172