Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_GpuAsyncArray.H
Go to the documentation of this file.
1#ifndef AMREX_GPU_ASYNC_ARRAY_H_
2#define AMREX_GPU_ASYNC_ARRAY_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_Arena.H>
6#include <AMReX_TypeTraits.H>
7#include <AMReX_GpuDevice.H>
8
9#include <cstddef>
10#include <cstring>
11#include <cstdlib>
12#include <memory>
13
14#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
15extern "C" {
16#if defined(AMREX_USE_HIP)
17 void amrex_asyncarray_delete ( hipStream_t stream, hipError_t error, void* p);
18#elif defined(AMREX_USE_CUDA)
19 void CUDART_CB amrex_asyncarray_delete (void* p);
20#endif
21}
22#endif
23
24namespace amrex {
25namespace Gpu {
26
32template <typename T>
33requires (std::is_trivially_copyable_v<T>)
35{
36public:
37
38 AsyncArray (T const* h_p, const std::size_t n)
39 {
40 if (n == 0) { return; }
41 h_data = static_cast<T*>(The_Pinned_Arena()->alloc(n*sizeof(T)));
42 std::memcpy(h_data, h_p, n*sizeof(T));
43#ifdef AMREX_USE_GPU
45 {
46 d_data = static_cast<T*>(The_Arena()->alloc(n*sizeof(T)));
47 Gpu::htod_memcpy_async(d_data, h_data, n*sizeof(T));
48 }
49#endif
50 }
51
52 explicit AsyncArray (const std::size_t n)
53 requires (std::is_standard_layout_v<T> && std::is_trivial_v<T>)
54
55 {
56 if (n == 0) { return; }
57#ifdef AMREX_USE_GPU
59 {
60 d_data = static_cast<T*>(The_Arena()->alloc(n*sizeof(T)));
61 }
62 else
63#endif
64 {
65 h_data = static_cast<T*>(The_Pinned_Arena()->alloc(n*sizeof(T)));
66 }
67 }
68
69 ~AsyncArray () { clear(); }
70
71 AsyncArray (AsyncArray const&) = delete;
72 AsyncArray (AsyncArray &&) = delete;
73 void operator= (AsyncArray const&) = delete;
74 void operator= (AsyncArray &&) = delete;
75
76 [[nodiscard]] T const* data () const noexcept { return (d_data != nullptr) ? d_data : h_data; }
77 [[nodiscard]] T* data () noexcept { return (d_data != nullptr) ? d_data : h_data; }
78 void clear ()
79 {
80#ifdef AMREX_USE_GPU
82 {
83 if (d_data != nullptr) {
84#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
85 T** p = static_cast<T**>(std::malloc(2*sizeof(T*)));
86 p[0] = d_data;
87 p[1] = h_data;
88#if defined(AMREX_USE_HIP)
89 AMREX_HIP_SAFE_CALL ( hipStreamAddCallback(Gpu::gpuStream(),
91#elif defined(AMREX_USE_CUDA)
92 AMREX_CUDA_SAFE_CALL(cudaLaunchHostFunc(Gpu::gpuStream(),
94#endif
95#elif defined(AMREX_USE_SYCL)
96 auto* pd = d_data;
97 auto* ph = h_data;
98 auto& q = *(Gpu::gpuStream().queue);
99 try {
100 q.submit([&] (sycl::handler& h) {
101 h.host_task([=] () {
102 The_Arena()->free(pd);
103 The_Pinned_Arena()->free(ph);
104 });
105 });
106 } catch (sycl::exception const& ex) {
107 amrex::Abort(std::string("host_task: ")+ex.what()+"!!!!!");
108 }
109#endif
110 }
111 }
112 else
113#endif
114 {
115 The_Pinned_Arena()->free(h_data);
116 }
117 d_data = nullptr;
118 h_data = nullptr;
119 }
120
121 void copyToHost (T* h_p, std::size_t n) const
122 {
123 if (n == 0) { return; }
124#ifdef AMREX_USE_GPU
125 if (d_data)
126 {
127 Gpu::dtoh_memcpy(h_p, d_data, n*sizeof(T));
128 }
129 else
130#endif
131 if (h_data)
132 {
133 std::memcpy(h_p, h_data, n*sizeof(T));
134 }
135 }
136
137private:
138 T* d_data = nullptr;
139 T* h_data = nullptr;
140};
141
142}
143
144using Gpu::AsyncArray;
145}
146
147#endif
void CUDART_CB amrex_asyncarray_delete(void *p)
Definition AMReX_GpuAsyncArray.cpp:10
void CUDART_CB amrex_asyncarray_delete(void *p)
Definition AMReX_GpuAsyncArray.cpp:10
#define AMREX_CUDA_SAFE_CALL(call)
Definition AMReX_GpuError.H:73
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
Definition AMReX_GpuAsyncArray.H:35
void copyToHost(T *h_p, std::size_t n) const
Definition AMReX_GpuAsyncArray.H:121
AsyncArray(AsyncArray const &)=delete
AsyncArray(AsyncArray &&)=delete
void clear()
Definition AMReX_GpuAsyncArray.H:78
T * data() noexcept
Definition AMReX_GpuAsyncArray.H:77
~AsyncArray()
Definition AMReX_GpuAsyncArray.H:69
T const * data() const noexcept
Definition AMReX_GpuAsyncArray.H:76
AsyncArray(T const *h_p, const std::size_t n)
Definition AMReX_GpuAsyncArray.H:38
AsyncArray(const std::size_t n)
Definition AMReX_GpuAsyncArray.H:52
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:860
Arena * The_Arena()
Definition AMReX_Arena.cpp:820
bool inLaunchRegion() noexcept
Definition AMReX_GpuControl.H:88
void dtoh_memcpy(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:496
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:421
gpuStream_t gpuStream() noexcept
Definition AMReX_GpuDevice.H:291
Definition AMReX_Amr.cpp:50
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:241