amrex/doxygen/AMReX__GpuDevice_8H_source.html

#ifndef AMREX_GPU_DEVICE_H_

#define AMREX_GPU_DEVICE_H_

#include <AMReX_Config.H>


#include <AMReX.H>

#include <AMReX_Extension.H>

#include <AMReX_Utility.H>

#include <AMReX_GpuTypes.H>

#include <AMReX_GpuError.H>

#include <AMReX_GpuControl.H>

#include <AMReX_OpenMP.H>

#include <AMReX_Vector.H>


#include <algorithm>

#include <array>

#include <cstdlib>

#include <cstring>

#include <memory>

#include <mutex>


#define AMREX_GPU_MAX_STREAMS 8


#ifdef AMREX_USE_GPU

namespace amrex {

#ifdef AMREX_USE_HIP

using gpuDeviceProp_t = hipDeviceProp_t;

#elif defined(AMREX_USE_CUDA)

using gpuDeviceProp_t = cudaDeviceProp;

#elif defined(AMREX_USE_SYCL)

    struct gpuDeviceProp_t {

        std::string name;

        std::string vendor; // SYCL only (inferred for CUDA and HIP)

        std::size_t totalGlobalMem;

        std::size_t sharedMemPerBlock;

        int multiProcessorCount;

        int maxThreadsPerMultiProcessor;

        int maxThreadsPerBlock;

        int maxThreadsDim[3];

        int maxGridSize[3];

        int warpSize;

        Long maxMemAllocSize; // SYCL only

        int managedMemory;

        int concurrentManagedAccess;

        int maxParameterSize;

    };

#endif

}

#endif


namespace amrex {

    class Arena;

}


namespace amrex::Gpu {


#ifdef AMREX_USE_GPU


class StreamManager {

    gpuStream_t m_stream;

    std::mutex m_mutex;

    Vector<std::pair<Arena*, void*>> m_free_wait_list;

public:

    [[nodiscard]] gpuStream_t& get ();

    void sync ();

    void free_async (Arena* arena, void* mem);

    std::size_t wait_list_size ();

};


#endif


class Device

{


public:


    static void Initialize (bool minimal, int a_device_id);

    static void Finalize ();


#if defined(AMREX_USE_GPU)


    static gpuStream_t gpuStream () noexcept {

        return gpu_stream_pool[gpu_stream_index[OpenMP::get_thread_num()]].get();

    }


#ifdef AMREX_USE_CUDA

    static cudaStream_t cudaStream () noexcept { return gpuStream(); }

#endif

#ifdef AMREX_USE_SYCL

    static sycl::queue& streamQueue () noexcept { return *(gpuStream().queue); }

    static sycl::queue& streamQueue (int i) noexcept { return *(gpu_stream_pool[i].get().queue); }

#endif

#endif


    static int numGpuStreams () noexcept {

        return inSingleStreamRegion() ? 1 : max_gpu_streams;

    }


    static void setStreamIndex (int idx) noexcept;

    static void resetStreamIndex () noexcept { setStreamIndex(0); }


#ifdef AMREX_USE_GPU

    static int streamIndex (gpuStream_t s = gpuStream()) noexcept;


    static gpuStream_t setStream (gpuStream_t s) noexcept;

    static gpuStream_t resetStream () noexcept;

#endif


    static int deviceId () noexcept;

    static int numDevicesUsed () noexcept; // Total number of device used

    static int numDevicePartners () noexcept; // Number of partners sharing my device


    static void synchronize () noexcept;


    static void streamSynchronize () noexcept;


    static void streamSynchronizeAll () noexcept;


    static void freeAsync (Arena* arena, void* mem) noexcept;


    static bool clearFreeAsyncBuffer () noexcept;


#if defined(__CUDACC__)

    static void startGraphRecording(bool first_iter, void* h_ptr, void* d_ptr, size_t sz);

    static cudaGraphExec_t stopGraphRecording(bool last_iter);


    static cudaGraphExec_t instantiateGraph(cudaGraph_t graph);


    static void executeGraph(const cudaGraphExec_t &graphExec, bool synch = true);


#endif


    static void mem_advise_set_preferred (void* p, std::size_t sz, int device);

    static void mem_advise_set_readonly (void* p, std::size_t sz);


#ifdef AMREX_USE_GPU

    static void setNumThreadsMin (int nx, int ny, int nz) noexcept;

    static void n_threads_and_blocks (const Long N, dim3& numBlocks, dim3& numThreads) noexcept;

    static void c_comps_threads_and_blocks (const int* lo, const int* hi, const int comps,

                                            dim3& numBlocks, dim3& numThreads) noexcept;

    static void c_threads_and_blocks (const int* lo, const int* hi, dim3& numBlocks, dim3& numThreads) noexcept;

    static void grid_stride_threads_and_blocks (dim3& numBlocks, dim3& numThreads) noexcept;


    static std::size_t totalGlobalMem () noexcept { return device_prop.totalGlobalMem; }

    static std::size_t sharedMemPerBlock () noexcept { return device_prop.sharedMemPerBlock; }

    static int numMultiProcessors () noexcept { return device_prop.multiProcessorCount; }

    static int maxThreadsPerMultiProcessor () noexcept { return device_prop.maxThreadsPerMultiProcessor; }

    static int maxThreadsPerBlock () noexcept { return device_prop.maxThreadsPerBlock; }

    static int maxThreadsPerBlock (int dir) noexcept { return device_prop.maxThreadsDim[dir]; }

    static int maxBlocksPerGrid (int dir) noexcept { return device_prop.maxGridSize[dir]; }

    static std::string deviceName () noexcept { return std::string(device_prop.name); }

#endif


#ifdef AMREX_USE_CUDA

    static int devicePropMajor () noexcept { return device_prop.major; }

    static int devicePropMinor () noexcept { return device_prop.minor; }

#endif


    static std::string deviceVendor() noexcept

    {

#if defined(AMREX_USE_HIP) && defined(__HIP_PLATFORM_AMD__)

        return std::string("AMD");

#elif defined(AMREX_USE_CUDA) || (defined(AMREX_USE_HIP) && defined(__HIP_PLATFORM_NVIDIA__))

        // Using HIP on NVIDIA GPUs isn't currently supported by AMReX

        return std::string("NVIDIA");

#elif defined(AMREX_USE_SYCL)

        return device_prop.vendor;

#else

        return std::string("Unknown");

#endif

    }


    static std::size_t freeMemAvailable ();

    static void profilerStart ();

    static void profilerStop ();


#ifdef AMREX_USE_GPU


    static int memoryPoolsSupported () noexcept { return memory_pools_supported; }


#if defined(AMREX_USE_HIP)

    static AMREX_EXPORT constexpr int warp_size = AMREX_AMDGCN_WAVEFRONT_SIZE;

#elif defined(AMREX_USE_SYCL)

    static AMREX_EXPORT constexpr int warp_size = AMREX_SYCL_SUB_GROUP_SIZE;

#else

    static AMREX_EXPORT constexpr int warp_size = AMREX_HIP_OR_CUDA(64,32);

#endif


    static unsigned int maxBlocksPerLaunch () noexcept { return max_blocks_per_launch; }


#ifdef AMREX_USE_SYCL

    static Long maxMemAllocSize () noexcept { return device_prop.maxMemAllocSize; }

    static sycl::context& syclContext () { return *sycl_context; }

    static sycl::device& syclDevice () { return *sycl_device; }

#endif

#endif


private:


    static void initialize_gpu (bool minimal);


    static AMREX_EXPORT int device_id;

    static AMREX_EXPORT int num_devices_used;

    static AMREX_EXPORT int num_device_partners;

    static AMREX_EXPORT int verbose;

    static AMREX_EXPORT int max_gpu_streams;


#ifdef AMREX_USE_GPU

    static AMREX_EXPORT dim3 numThreadsMin;

    static AMREX_EXPORT dim3 numBlocksOverride, numThreadsOverride;


    static AMREX_EXPORT Vector<StreamManager> gpu_stream_pool; // The size of this is max_gpu_stream

    // The non-owning gpu_stream_index is used to store the current stream index that will be used.

    // gpu_stream_index is a vector so that it's thread safe to write to it.

    static AMREX_EXPORT Vector<int> gpu_stream_index; // The size of this is omp_max_threads

    static AMREX_EXPORT gpuDeviceProp_t device_prop;

    static AMREX_EXPORT int memory_pools_supported;

    static AMREX_EXPORT unsigned int max_blocks_per_launch;


#ifdef AMREX_USE_SYCL

    static AMREX_EXPORT std::unique_ptr<sycl::context> sycl_context;

    static AMREX_EXPORT std::unique_ptr<sycl::device>  sycl_device;

#endif


    friend StreamManager;

#endif

};


// Put these in amrex::Gpu


#if defined(AMREX_USE_GPU)

inline gpuStream_t


gpuStream () noexcept

{

    return Device::gpuStream();

}


#endif


inline int


numGpuStreams () noexcept

{

    return Device::numGpuStreams();

}


inline void


synchronize () noexcept

{

    Device::synchronize();

}


inline void


streamSynchronize () noexcept

{

    Device::streamSynchronize();

}


inline void


streamSynchronizeAll () noexcept

{

    Device::streamSynchronizeAll();

}


inline void


freeAsync (Arena* arena, void* mem) noexcept

{

    Device::freeAsync(arena, mem);

}


inline bool


clearFreeAsyncBuffer () noexcept

{

    return Device::clearFreeAsyncBuffer();

}


#ifdef AMREX_USE_GPU


inline void


htod_memcpy_async (void* p_d, const void* p_h, const std::size_t sz) noexcept

{

    if (sz == 0) { return; }

#ifdef AMREX_USE_SYCL

    auto& q = Device::streamQueue();

    q.submit([&] (sycl::handler& h) { h.memcpy(p_d, p_h, sz); });

#else

    AMREX_HIP_OR_CUDA(

        AMREX_HIP_SAFE_CALL(hipMemcpyAsync(p_d, p_h, sz, hipMemcpyHostToDevice, gpuStream()));,

        AMREX_CUDA_SAFE_CALL(cudaMemcpyAsync(p_d, p_h, sz, cudaMemcpyHostToDevice, gpuStream())); )

#endif

}


inline void


dtoh_memcpy_async (void* p_h, const void* p_d, const std::size_t sz) noexcept

{

    if (sz == 0) { return; }

#ifdef AMREX_USE_SYCL

    auto& q = Device::streamQueue();

    q.submit([&] (sycl::handler& h) { h.memcpy(p_h, p_d, sz); });

#else

    AMREX_HIP_OR_CUDA(

        AMREX_HIP_SAFE_CALL(hipMemcpyAsync(p_h, p_d, sz, hipMemcpyDeviceToHost, gpuStream()));,

        AMREX_CUDA_SAFE_CALL(cudaMemcpyAsync(p_h, p_d, sz, cudaMemcpyDeviceToHost, gpuStream())); )

#endif

}


inline void


dtod_memcpy_async (void* p_d_dst, const void* p_d_src, const std::size_t sz) noexcept

{

    if (sz == 0) { return; }

#ifdef AMREX_USE_SYCL

    auto& q = Device::streamQueue();

    q.submit([&] (sycl::handler& h) { h.memcpy(p_d_dst, p_d_src, sz); });

#else

    AMREX_HIP_OR_CUDA(

        AMREX_HIP_SAFE_CALL(hipMemcpyAsync(p_d_dst, p_d_src, sz, hipMemcpyDeviceToDevice, gpuStream()));,

        AMREX_CUDA_SAFE_CALL(cudaMemcpyAsync(p_d_dst, p_d_src, sz, cudaMemcpyDeviceToDevice, gpuStream())); )

#endif

}


#else // AMREX_USE_GPU


inline void

htod_memcpy_async (void* p_d, const void* p_h, const std::size_t sz) noexcept

{

    if (sz == 0) { return; }

    std::memcpy(p_d, p_h, sz);

}


inline void

dtoh_memcpy_async (void* p_h, const void* p_d, const std::size_t sz) noexcept

{

    if (sz == 0) { return; }

    std::memcpy(p_h, p_d, sz);

}


inline void

dtod_memcpy_async (void* p_d_dst, const void* p_d_src, const std::size_t sz) noexcept

{

    if (sz == 0) { return; }

    std::memcpy(p_d_dst, p_d_src, sz);

}


#endif // AMREX_USE_GPU


inline void


htod_memcpy (void* p_d, const void* p_h, const std::size_t sz) noexcept

{

    if (sz == 0) { return; }

    htod_memcpy_async(p_d, p_h, sz);

    Gpu::streamSynchronize();

}


inline void


dtoh_memcpy (void* p_h, const void* p_d, const std::size_t sz) noexcept

{

    if (sz == 0) { return; }

    dtoh_memcpy_async(p_h, p_d, sz);

    Gpu::streamSynchronize();

}


inline void


dtod_memcpy (void* p_d_dst, const void* p_d_src, const std::size_t sz) noexcept

{

    if (sz == 0) { return; }

    dtod_memcpy_async(p_d_dst, p_d_src, sz);

    Gpu::streamSynchronize();

}


#ifdef AMREX_USE_HYPRE

void hypreSynchronize ();

#endif


template <typename T>


void memcpy_from_host_to_device_global_async (T& dg, const void* src,

                                              std::size_t nbytes,

                                              std::size_t offset = 0)

{

#if defined(AMREX_USE_CUDA)

    AMREX_CUDA_SAFE_CALL(cudaMemcpyToSymbolAsync(dg, src, nbytes, offset,

                                                 cudaMemcpyHostToDevice,

                                                 Device::gpuStream()));

#elif defined(AMREX_USE_HIP)

    AMREX_HIP_SAFE_CALL(hipMemcpyToSymbolAsync(dg, src, nbytes, offset,

                                               hipMemcpyHostToDevice,

                                               Device::gpuStream()));

#elif defined(AMREX_USE_SYCL)

    Device::streamQueue().memcpy(dg, src, nbytes, offset);

#else

    auto* p = (char*)(&dg);

    std::memcpy(p+offset, src, nbytes);

#endif

}


template <typename T>


void memcpy_from_device_global_to_host_async (void* dst, T const& dg,

                                              std::size_t nbytes,

                                              std::size_t offset = 0)

{

#if defined(AMREX_USE_CUDA)

    AMREX_CUDA_SAFE_CALL(cudaMemcpyFromSymbolAsync(dst, dg, nbytes, offset,

                                                   cudaMemcpyDeviceToHost,

                                                   Device::gpuStream()));

#elif defined(AMREX_USE_HIP)

    AMREX_HIP_SAFE_CALL(hipMemcpyFromSymbolAsync(dst, dg, nbytes, offset,

                                                  hipMemcpyDeviceToHost,

                                                  Device::gpuStream()));

#elif defined(AMREX_USE_SYCL)

    Device::streamQueue().memcpy(dst, dg, nbytes, offset);

#else

    auto const* p = (char const*)(&dg);

    std::memcpy(dst, p+offset, nbytes);

#endif

}


}


#endif

AMReX.H

AMReX_Extension.H

AMREX_EXPORT
#define AMREX_EXPORT
Definition AMReX_Extension.H:191

AMReX_GpuControl.H

AMREX_HIP_OR_CUDA
#define AMREX_HIP_OR_CUDA(a, b)
Definition AMReX_GpuControl.H:21

AMReX_GpuError.H

AMREX_CUDA_SAFE_CALL
#define AMREX_CUDA_SAFE_CALL(call)
Definition AMReX_GpuError.H:73

AMReX_GpuTypes.H

offset
Array4< int const  > offset
Definition AMReX_HypreMLABecLap.cpp:1089

AMReX_OpenMP.H

AMReX_Utility.H

AMReX_Vector.H

amrex::Arena
A virtual base class for objects that manage their own dynamic memory allocation.
Definition AMReX_Arena.H:105

amrex::Gpu::Device
Definition AMReX_GpuDevice.H:70

amrex::Gpu::Device::setStream
static gpuStream_t setStream(gpuStream_t s) noexcept
Definition AMReX_GpuDevice.cpp:731

amrex::Gpu::Device::streamIndex
static int streamIndex(gpuStream_t s=gpuStream()) noexcept
Definition AMReX_GpuDevice.cpp:697

amrex::Gpu::Device::numDevicePartners
static int numDevicePartners() noexcept
Definition AMReX_GpuDevice.cpp:690

amrex::Gpu::Device::numGpuStreams
static int numGpuStreams() noexcept
Definition AMReX_GpuDevice.H:91

amrex::Gpu::Device::freeAsync
static void freeAsync(Arena *arena, void *mem) noexcept
Definition AMReX_GpuDevice.cpp:775

amrex::Gpu::Device::mem_advise_set_readonly
static void mem_advise_set_readonly(void *p, std::size_t sz)
Definition AMReX_GpuDevice.cpp:950

amrex::Gpu::Device::c_threads_and_blocks
static void c_threads_and_blocks(const int *lo, const int *hi, dim3 &numBlocks, dim3 &numThreads) noexcept
Definition AMReX_GpuDevice.cpp:1007

amrex::Gpu::Device::memoryPoolsSupported
static int memoryPoolsSupported() noexcept
Definition AMReX_GpuDevice.H:190

amrex::Gpu::Device::setStreamIndex
static void setStreamIndex(int idx) noexcept
Definition AMReX_GpuDevice.cpp:710

amrex::Gpu::Device::resetStream
static gpuStream_t resetStream() noexcept
Definition AMReX_GpuDevice.cpp:723

amrex::Gpu::Device::maxBlocksPerLaunch
static unsigned int maxBlocksPerLaunch() noexcept
Definition AMReX_GpuDevice.H:200

amrex::Gpu::Device::maxThreadsPerBlock
static int maxThreadsPerBlock() noexcept
Definition AMReX_GpuDevice.H:159

amrex::Gpu::Device::maxBlocksPerGrid
static int maxBlocksPerGrid(int dir) noexcept
Definition AMReX_GpuDevice.H:161

amrex::Gpu::Device::devicePropMinor
static int devicePropMinor() noexcept
Definition AMReX_GpuDevice.H:167

amrex::Gpu::Device::numMultiProcessors
static int numMultiProcessors() noexcept
Definition AMReX_GpuDevice.H:157

amrex::Gpu::Device::Finalize
static void Finalize()
Definition AMReX_GpuDevice.cpp:450

amrex::Gpu::Device::synchronize
static void synchronize() noexcept
Definition AMReX_GpuDevice.cpp:740

amrex::Gpu::Device::mem_advise_set_preferred
static void mem_advise_set_preferred(void *p, std::size_t sz, int device)
Definition AMReX_GpuDevice.cpp:918

amrex::Gpu::Device::deviceName
static std::string deviceName() noexcept
Definition AMReX_GpuDevice.H:162

amrex::Gpu::Device::cudaStream
static cudaStream_t cudaStream() noexcept
Definition AMReX_GpuDevice.H:83

amrex::Gpu::Device::setNumThreadsMin
static void setNumThreadsMin(int nx, int ny, int nz) noexcept
Definition AMReX_GpuDevice.cpp:984

amrex::Gpu::Device::streamSynchronize
static void streamSynchronize() noexcept
Definition AMReX_GpuDevice.cpp:757

amrex::Gpu::Device::gpuStream
static gpuStream_t gpuStream() noexcept
Definition AMReX_GpuDevice.H:78

amrex::Gpu::Device::deviceVendor
static std::string deviceVendor() noexcept
Definition AMReX_GpuDevice.H:170

amrex::Gpu::Device::profilerStop
static void profilerStop()
Definition AMReX_GpuDevice.cpp:1179

amrex::Gpu::Device::n_threads_and_blocks
static void n_threads_and_blocks(const Long N, dim3 &numBlocks, dim3 &numThreads) noexcept
Definition AMReX_GpuDevice.cpp:992

amrex::Gpu::Device::warp_size
static constexpr int warp_size
Definition AMReX_GpuDevice.H:197

amrex::Gpu::Device::sharedMemPerBlock
static std::size_t sharedMemPerBlock() noexcept
Definition AMReX_GpuDevice.H:156

amrex::Gpu::Device::freeMemAvailable
static std::size_t freeMemAvailable()
Definition AMReX_GpuDevice.cpp:1146

amrex::Gpu::Device::streamSynchronizeAll
static void streamSynchronizeAll() noexcept
Definition AMReX_GpuDevice.cpp:765

amrex::Gpu::Device::profilerStart
static void profilerStart()
Definition AMReX_GpuDevice.cpp:1168

amrex::Gpu::Device::maxThreadsPerBlock
static int maxThreadsPerBlock(int dir) noexcept
Definition AMReX_GpuDevice.H:160

amrex::Gpu::Device::deviceId
static int deviceId() noexcept
Definition AMReX_GpuDevice.cpp:679

amrex::Gpu::Device::maxThreadsPerMultiProcessor
static int maxThreadsPerMultiProcessor() noexcept
Definition AMReX_GpuDevice.H:158

amrex::Gpu::Device::numDevicesUsed
static int numDevicesUsed() noexcept
Definition AMReX_GpuDevice.cpp:685

amrex::Gpu::Device::resetStreamIndex
static void resetStreamIndex() noexcept
Definition AMReX_GpuDevice.H:96

amrex::Gpu::Device::Initialize
static void Initialize(bool minimal, int a_device_id)
Definition AMReX_GpuDevice.cpp:213

amrex::Gpu::Device::clearFreeAsyncBuffer
static bool clearFreeAsyncBuffer() noexcept
Definition AMReX_GpuDevice.cpp:785

amrex::Gpu::Device::c_comps_threads_and_blocks
static void c_comps_threads_and_blocks(const int *lo, const int *hi, const int comps, dim3 &numBlocks, dim3 &numThreads) noexcept
Definition AMReX_GpuDevice.cpp:999

amrex::Gpu::Device::totalGlobalMem
static std::size_t totalGlobalMem() noexcept
Definition AMReX_GpuDevice.H:155

amrex::Gpu::Device::grid_stride_threads_and_blocks
static void grid_stride_threads_and_blocks(dim3 &numBlocks, dim3 &numThreads) noexcept
Definition AMReX_GpuDevice.cpp:1065

amrex::Gpu::Device::devicePropMajor
static int devicePropMajor() noexcept
Definition AMReX_GpuDevice.H:166

amrex::Gpu::StreamManager
Definition AMReX_GpuDevice.H:57

amrex::Gpu::StreamManager::get
gpuStream_t & get()
Definition AMReX_GpuDevice.cpp:146

amrex::Gpu::StreamManager::wait_list_size
std::size_t wait_list_size()
Definition AMReX_GpuDevice.cpp:204

amrex::Gpu::StreamManager::sync
void sync()
Definition AMReX_GpuDevice.cpp:151

amrex::Gpu::StreamManager::free_async
void free_async(Arena *arena, void *mem)
Definition AMReX_GpuDevice.cpp:184

amrex::Vector
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28

amrex::Long
amrex_long Long
Definition AMReX_INT.H:30

amrex::Gpu
Definition AMReX_BaseFwd.H:52

amrex::Gpu::dtod_memcpy_async
void dtod_memcpy_async(void *p_d_dst, const void *p_d_src, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:329

amrex::Gpu::synchronize
void synchronize() noexcept
Definition AMReX_GpuDevice.H:257

amrex::Gpu::clearFreeAsyncBuffer
bool clearFreeAsyncBuffer() noexcept
Definition AMReX_GpuDevice.H:293

amrex::Gpu::freeAsync
void freeAsync(Arena *arena, void *mem) noexcept
Definition AMReX_GpuDevice.H:284

amrex::Gpu::streamSynchronize
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:263

amrex::Gpu::dtoh_memcpy_async
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:315

amrex::Gpu::streamSynchronizeAll
void streamSynchronizeAll() noexcept
Definition AMReX_GpuDevice.H:269

amrex::Gpu::inSingleStreamRegion
bool inSingleStreamRegion() noexcept
Definition AMReX_GpuControl.H:151

amrex::Gpu::memcpy_from_device_global_to_host_async
void memcpy_from_device_global_to_host_async(void *dst, T const &dg, std::size_t nbytes, std::size_t offset=0)
Definition AMReX_GpuDevice.H:421

amrex::Gpu::numGpuStreams
int numGpuStreams() noexcept
Definition AMReX_GpuDevice.H:251

amrex::Gpu::dtoh_memcpy
void dtoh_memcpy(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:376

amrex::Gpu::htod_memcpy
void htod_memcpy(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:368

amrex::Gpu::htod_memcpy_async
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:301

amrex::Gpu::dtod_memcpy
void dtod_memcpy(void *p_d_dst, const void *p_d_src, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:384

amrex::Gpu::memcpy_from_host_to_device_global_async
void memcpy_from_host_to_device_global_async(T &dg, const void *src, std::size_t nbytes, std::size_t offset=0)
Definition AMReX_GpuDevice.H:398

amrex::Gpu::gpuStream
gpuStream_t gpuStream() noexcept
Definition AMReX_GpuDevice.H:244

amrex::OpenMP::get_thread_num
constexpr int get_thread_num()
Definition AMReX_OpenMP.H:37

amrex
Definition AMReX_Amr.cpp:49

amrex::gpuDeviceProp_t
cudaDeviceProp gpuDeviceProp_t
Definition AMReX_GpuDevice.H:28

amrex::gpuStream_t
cudaStream_t gpuStream_t
Definition AMReX_GpuControl.H:83