37#if defined(AMREX_USE_GPU)
40 Long
const ncols =
x.numLocalRows();
43#if defined(AMREX_USE_CUDA)
45 cusparseHandle_t handle;
46 cusparseCreate(&handle);
49 cudaDataType data_type;
50 if constexpr (std::is_same_v<T,float>) {
51 data_type = CUDA_R_32F;
52 }
else if constexpr (std::is_same_v<T,double>) {
53 data_type = CUDA_R_64F;
54 }
else if constexpr (std::is_same_v<T,GpuComplex<float>>) {
55 data_type = CUDA_C_32F;
56 }
else if constexpr (std::is_same_v<T,GpuComplex<double>>) {
57 data_type = CUDA_C_64F;
62 cusparseIndexType_t index_type = CUSPARSE_INDEX_64I;
64 cusparseSpMatDescr_t mat_descr;
65 cusparseCreateCsr(&mat_descr, nrows, ncols, nnz, (
void*)row, (
void*)col, (
void*)mat,
66 index_type, index_type, CUSPARSE_INDEX_BASE_ZERO, data_type);
68 cusparseDnVecDescr_t x_descr;
69 cusparseCreateDnVec(&x_descr, ncols, (
void*)px, data_type);
71 cusparseDnVecDescr_t y_descr;
72 cusparseCreateDnVec(&y_descr, nrows, (
void*)py, data_type);
77 std::size_t buffer_size;
78 cusparseSpMV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, mat_descr, x_descr,
79 &beta, y_descr, data_type, CUSPARSE_SPMV_ALG_DEFAULT, &buffer_size);
83 cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, mat_descr, x_descr,
84 &beta, y_descr, data_type, CUSPARSE_SPMV_ALG_DEFAULT, pbuffer);
88 cusparseDestroySpMat(mat_descr);
89 cusparseDestroyDnVec(x_descr);
90 cusparseDestroyDnVec(y_descr);
91 cusparseDestroy(handle);
94#elif defined(AMREX_USE_HIP)
96 rocsparse_handle handle;
97 rocsparse_create_handle(&handle);
100 rocsparse_datatype data_type;
101 if constexpr (std::is_same_v<T,float>) {
102 data_type = rocsparse_datatype_f32_r;
103 }
else if constexpr (std::is_same_v<T,double>) {
104 data_type = rocsparse_datatype_f64_r;
105 }
else if constexpr (std::is_same_v<T,GpuComplex<float>>) {
106 data_type = rocsparse_datatype_f32_c;
107 }
else if constexpr (std::is_same_v<T,GpuComplex<double>>) {
108 data_type = rocsparse_datatype_f64_c;
113 rocsparse_indextype index_type = rocsparse_indextype_i64;
115 rocsparse_spmat_descr mat_descr;
116 rocsparse_create_csr_descr(&mat_descr, nrows, ncols, nnz, (
void*)row, (
void*)col,
117 (
void*)mat, index_type, index_type,
118 rocsparse_index_base_zero, data_type);
120 rocsparse_dnvec_descr x_descr;
121 rocsparse_create_dnvec_descr(&x_descr, ncols, (
void*)px, data_type);
123 rocsparse_dnvec_descr y_descr;
124 rocsparse_create_dnvec_descr(&y_descr, nrows, (
void*)py, data_type);
129#if (HIP_VERSION_MAJOR >= 7)
130#pragma clang diagnostic push
131#pragma clang diagnostic ignored "-Wdeprecated-declarations"
134 std::size_t buffer_size;
135 rocsparse_spmv(handle, rocsparse_operation_none, &alpha, mat_descr, x_descr,
136 &beta, y_descr, data_type, rocsparse_spmv_alg_default,
137#
if (HIP_VERSION_MAJOR >= 6)
138 rocsparse_spmv_stage_buffer_size,
140 &buffer_size,
nullptr);
144#if (HIP_VERSION_MAJOR >= 6)
145 rocsparse_spmv(handle, rocsparse_operation_none, &alpha, mat_descr, x_descr,
146 &beta, y_descr, data_type, rocsparse_spmv_alg_default,
147 rocsparse_spmv_stage_preprocess, &buffer_size, pbuffer);
150 rocsparse_spmv(handle, rocsparse_operation_none, &alpha, mat_descr, x_descr,
151 &beta, y_descr, data_type, rocsparse_spmv_alg_default,
152#
if (HIP_VERSION_MAJOR >= 6)
153 rocsparse_spmv_stage_compute,
155 &buffer_size, pbuffer);
157#if (HIP_VERSION_MAJOR >= 7)
159#pragma clang diagnostic pop
164 rocsparse_destroy_spmat_descr(mat_descr);
165 rocsparse_destroy_dnvec_descr(x_descr);
166 rocsparse_destroy_dnvec_descr(y_descr);
167 rocsparse_destroy_handle(handle);
170#elif defined(AMREX_USE_SYCL)
173 mkl::sparse::matrix_handle_t handle{};
174 mkl::sparse::init_matrix_handle(&handle);
176#if defined(INTEL_MKL_VERSION) && (INTEL_MKL_VERSION < 20250300)
177 mkl::sparse::set_csr_data(Gpu::Device::streamQueue(), handle, nrows, ncols,
178 mkl::index_base::zero, (Long*)row, (Long*)col, (T*)mat);
180 mkl::sparse::set_csr_data(Gpu::Device::streamQueue(), handle, nrows, ncols, nnz,
181 mkl::index_base::zero, (Long*)row, (Long*)col, (T*)mat);
183 mkl::sparse::gemv(Gpu::Device::streamQueue(), mkl::transpose::nontrans,
184 T(1), handle, px, T(0), py);
186 auto ev = mkl::sparse::release_matrix_handle(Gpu::Device::streamQueue(), &handle);
195 Long
const ny =
y.numLocalRows();
196 for (Long i = 0; i < ny; ++i) {
199#pragma omp parallel for reduction(+:r)
201 for (Long j = row[i]; j < row[i+1]; ++j) {
202 r += mat[j] * px[col[j]];