Block-Structured AMR Software Framework
amrex::Gpu::Device Class Reference

#include <AMReX_GpuDevice.H>

Static Public Member Functions

static void Initialize ()
 
static void Finalize ()
 
static gpuStream_t gpuStream () noexcept
 
static cudaStream_t cudaStream () noexcept
 
static int numGpuStreams () noexcept
 
static void setStreamIndex (int idx) noexcept
 
static void resetStreamIndex () noexcept
 
static int streamIndex (gpuStream_t s=gpuStream()) noexcept
 
static gpuStream_t setStream (gpuStream_t s) noexcept
 
static gpuStream_t resetStream () noexcept
 
static int deviceId () noexcept
 
static int numDevicesUsed () noexcept
 
static int numDevicePartners () noexcept
 
static void synchronize () noexcept
 
static void streamSynchronize () noexcept
 
static void streamSynchronizeAll () noexcept
 
static void mem_advise_set_preferred (void *p, std::size_t sz, int device)
 
static void mem_advise_set_readonly (void *p, std::size_t sz)
 
static void setNumThreadsMin (int nx, int ny, int nz) noexcept
 
static void n_threads_and_blocks (const Long N, dim3 &numBlocks, dim3 &numThreads) noexcept
 
static void c_comps_threads_and_blocks (const int *lo, const int *hi, const int comps, dim3 &numBlocks, dim3 &numThreads) noexcept
 
static void c_threads_and_blocks (const int *lo, const int *hi, dim3 &numBlocks, dim3 &numThreads) noexcept
 
static void grid_stride_threads_and_blocks (dim3 &numBlocks, dim3 &numThreads) noexcept
 
static std::size_t totalGlobalMem () noexcept
 
static std::size_t sharedMemPerBlock () noexcept
 
static int numMultiProcessors () noexcept
 
static int maxThreadsPerMultiProcessor () noexcept
 
static int maxThreadsPerBlock () noexcept
 
static int maxThreadsPerBlock (int dir) noexcept
 
static int maxBlocksPerGrid (int dir) noexcept
 
static std::string deviceName () noexcept
 
static int devicePropMajor () noexcept
 
static int devicePropMinor () noexcept
 
static std::string deviceVendor () noexcept
 
static std::size_t freeMemAvailable ()
 
static void profilerStart ()
 
static void profilerStop ()
 
static int memoryPoolsSupported () noexcept
 
static unsigned int maxBlocksPerLaunch () noexcept
 

Static Public Attributes

static constexpr AMREX_EXPORT int warp_size = AMREX_HIP_OR_CUDA(64,32)
 

Static Private Member Functions

static void initialize_gpu ()
 

Static Private Attributes

static AMREX_EXPORT int device_id = 0
 
static AMREX_EXPORT int num_devices_used = 0
 
static AMREX_EXPORT int num_device_partners = 1
 
static AMREX_EXPORT int verbose = 0
 
static AMREX_EXPORT int max_gpu_streams = 4
 
static AMREX_EXPORT dim3 numThreadsMin = dim3(1, 1, 1)
 
static AMREX_EXPORT dim3 numBlocksOverride = dim3(0, 0, 0)
 
static AMREX_EXPORT dim3 numThreadsOverride = dim3(0, 0, 0)
 
static AMREX_EXPORT Vector< gpuStream_tgpu_stream_pool
 
static AMREX_EXPORT Vector< gpuStream_tgpu_stream
 
static AMREX_EXPORT gpuDeviceProp_t device_prop
 
static AMREX_EXPORT int memory_pools_supported = 0
 
static AMREX_EXPORT unsigned int max_blocks_per_launch = 2560
 

Member Function Documentation

◆ c_comps_threads_and_blocks()

void amrex::Gpu::Device::c_comps_threads_and_blocks ( const int lo,
const int hi,
const int  comps,
dim3 &  numBlocks,
dim3 &  numThreads 
)
staticnoexcept

◆ c_threads_and_blocks()

void amrex::Gpu::Device::c_threads_and_blocks ( const int lo,
const int hi,
dim3 &  numBlocks,
dim3 &  numThreads 
)
staticnoexcept

◆ cudaStream()

static cudaStream_t amrex::Gpu::Device::cudaStream ( )
inlinestaticnoexcept

for backward compatibility

◆ deviceId()

int amrex::Gpu::Device::deviceId ( )
staticnoexcept

◆ deviceName()

static std::string amrex::Gpu::Device::deviceName ( )
inlinestaticnoexcept

◆ devicePropMajor()

static int amrex::Gpu::Device::devicePropMajor ( )
inlinestaticnoexcept

◆ devicePropMinor()

static int amrex::Gpu::Device::devicePropMinor ( )
inlinestaticnoexcept

◆ deviceVendor()

static std::string amrex::Gpu::Device::deviceVendor ( )
inlinestaticnoexcept

◆ Finalize()

void amrex::Gpu::Device::Finalize ( )
static

◆ freeMemAvailable()

std::size_t amrex::Gpu::Device::freeMemAvailable ( )
static

◆ gpuStream()

static gpuStream_t amrex::Gpu::Device::gpuStream ( )
inlinestaticnoexcept

◆ grid_stride_threads_and_blocks()

void amrex::Gpu::Device::grid_stride_threads_and_blocks ( dim3 &  numBlocks,
dim3 &  numThreads 
)
staticnoexcept

◆ Initialize()

void amrex::Gpu::Device::Initialize ( )
static

◆ initialize_gpu()

void amrex::Gpu::Device::initialize_gpu ( )
staticprivate

◆ maxBlocksPerGrid()

static int amrex::Gpu::Device::maxBlocksPerGrid ( int  dir)
inlinestaticnoexcept

◆ maxBlocksPerLaunch()

static unsigned int amrex::Gpu::Device::maxBlocksPerLaunch ( )
inlinestaticnoexcept

◆ maxThreadsPerBlock() [1/2]

static int amrex::Gpu::Device::maxThreadsPerBlock ( )
inlinestaticnoexcept

◆ maxThreadsPerBlock() [2/2]

static int amrex::Gpu::Device::maxThreadsPerBlock ( int  dir)
inlinestaticnoexcept

◆ maxThreadsPerMultiProcessor()

static int amrex::Gpu::Device::maxThreadsPerMultiProcessor ( )
inlinestaticnoexcept

◆ mem_advise_set_preferred()

void amrex::Gpu::Device::mem_advise_set_preferred ( void *  p,
std::size_t  sz,
int  device 
)
static

◆ mem_advise_set_readonly()

void amrex::Gpu::Device::mem_advise_set_readonly ( void *  p,
std::size_t  sz 
)
static

◆ memoryPoolsSupported()

static int amrex::Gpu::Device::memoryPoolsSupported ( )
inlinestaticnoexcept

◆ n_threads_and_blocks()

void amrex::Gpu::Device::n_threads_and_blocks ( const Long  N,
dim3 &  numBlocks,
dim3 &  numThreads 
)
staticnoexcept

◆ numDevicePartners()

int amrex::Gpu::Device::numDevicePartners ( )
staticnoexcept

◆ numDevicesUsed()

int amrex::Gpu::Device::numDevicesUsed ( )
staticnoexcept

◆ numGpuStreams()

static int amrex::Gpu::Device::numGpuStreams ( )
inlinestaticnoexcept

◆ numMultiProcessors()

static int amrex::Gpu::Device::numMultiProcessors ( )
inlinestaticnoexcept

◆ profilerStart()

void amrex::Gpu::Device::profilerStart ( )
static

◆ profilerStop()

void amrex::Gpu::Device::profilerStop ( )
static

◆ resetStream()

gpuStream_t amrex::Gpu::Device::resetStream ( )
staticnoexcept

◆ resetStreamIndex()

static void amrex::Gpu::Device::resetStreamIndex ( )
inlinestaticnoexcept

◆ setNumThreadsMin()

void amrex::Gpu::Device::setNumThreadsMin ( int  nx,
int  ny,
int  nz 
)
staticnoexcept

◆ setStream()

gpuStream_t amrex::Gpu::Device::setStream ( gpuStream_t  s)
staticnoexcept

◆ setStreamIndex()

void amrex::Gpu::Device::setStreamIndex ( int  idx)
staticnoexcept

◆ sharedMemPerBlock()

static std::size_t amrex::Gpu::Device::sharedMemPerBlock ( )
inlinestaticnoexcept

◆ streamIndex()

int amrex::Gpu::Device::streamIndex ( gpuStream_t  s = gpuStream())
staticnoexcept

◆ streamSynchronize()

void amrex::Gpu::Device::streamSynchronize ( )
staticnoexcept

Halt execution of code until the current AMReX GPU stream has finished processing all previously requested tasks.

◆ streamSynchronizeAll()

void amrex::Gpu::Device::streamSynchronizeAll ( )
staticnoexcept

Halt execution of code until all AMReX GPU streams have finished processing all previously requested tasks.

◆ synchronize()

void amrex::Gpu::Device::synchronize ( )
staticnoexcept

Halt execution of code until GPU has finished processing all previously requested tasks.

◆ totalGlobalMem()

static std::size_t amrex::Gpu::Device::totalGlobalMem ( )
inlinestaticnoexcept

Member Data Documentation

◆ device_id

int amrex::Gpu::Device::device_id = 0
staticprivate

◆ device_prop

gpuDeviceProp_t amrex::Gpu::Device::device_prop
staticprivate

◆ gpu_stream

Vector< gpuStream_t > amrex::Gpu::Device::gpu_stream
staticprivate

◆ gpu_stream_pool

Vector< gpuStream_t > amrex::Gpu::Device::gpu_stream_pool
staticprivate

◆ max_blocks_per_launch

unsigned int amrex::Gpu::Device::max_blocks_per_launch = 2560
staticprivate

◆ max_gpu_streams

int amrex::Gpu::Device::max_gpu_streams = 4
staticprivate

◆ memory_pools_supported

int amrex::Gpu::Device::memory_pools_supported = 0
staticprivate

◆ num_device_partners

int amrex::Gpu::Device::num_device_partners = 1
staticprivate

◆ num_devices_used

int amrex::Gpu::Device::num_devices_used = 0
staticprivate

◆ numBlocksOverride

dim3 amrex::Gpu::Device::numBlocksOverride = dim3(0, 0, 0)
staticprivate

◆ numThreadsMin

dim3 amrex::Gpu::Device::numThreadsMin = dim3(1, 1, 1)
staticprivate

◆ numThreadsOverride

dim3 amrex::Gpu::Device::numThreadsOverride = dim3(0, 0, 0)
staticprivate

◆ verbose

int amrex::Gpu::Device::verbose = 0
staticprivate

◆ warp_size

constexpr int amrex::Gpu::Device::warp_size = AMREX_HIP_OR_CUDA(64,32)
staticconstexpr

The documentation for this class was generated from the following files: