20#if defined(AMREX_USE_GPU)
24#if defined(AMREX_USE_CUDA)
26 cusparseHandle_t handle;
30 cudaDataType data_type;
31 if constexpr (std::is_same_v<T,float>) {
32 data_type = CUDA_R_32F;
33 }
else if constexpr (std::is_same_v<T,double>) {
34 data_type = CUDA_R_64F;
35 }
else if constexpr (std::is_same_v<T,GpuComplex<float>>) {
36 data_type = CUDA_C_32F;
37 }
else if constexpr (std::is_same_v<T,GpuComplex<double>>) {
38 data_type = CUDA_C_64F;
43 cusparseIndexType_t index_type = CUSPARSE_INDEX_64I;
45 cusparseSpMatDescr_t mat_descr;
47 (cusparseCreateCsr(&mat_descr, nrows, ncols, nnz,
48 (
void*)row, (
void*)col, (
void*)mat,
49 index_type, index_type, CUSPARSE_INDEX_BASE_ZERO,
52 cusparseDnVecDescr_t x_descr;
55 cusparseDnVecDescr_t y_descr;
61 std::size_t buffer_size;
63 (cusparseSpMV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
64 &alpha, mat_descr, x_descr, &beta, y_descr,
65 data_type, CUSPARSE_SPMV_ALG_DEFAULT,
71 (cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
72 &alpha, mat_descr, x_descr, &beta, y_descr,
73 data_type, CUSPARSE_SPMV_ALG_DEFAULT, pbuffer));
83#elif defined(AMREX_USE_HIP)
85 rocsparse_handle handle;
86 AMREX_ROCSPARSE_SAFE_CALL(rocsparse_create_handle(&handle));
87 AMREX_ROCSPARSE_SAFE_CALL(rocsparse_set_stream(handle,
Gpu::gpuStream()));
89 rocsparse_datatype data_type;
90 if constexpr (std::is_same_v<T,float>) {
91 data_type = rocsparse_datatype_f32_r;
92 }
else if constexpr (std::is_same_v<T,double>) {
93 data_type = rocsparse_datatype_f64_r;
94 }
else if constexpr (std::is_same_v<T,GpuComplex<float>>) {
95 data_type = rocsparse_datatype_f32_c;
96 }
else if constexpr (std::is_same_v<T,GpuComplex<double>>) {
97 data_type = rocsparse_datatype_f64_c;
102 rocsparse_indextype index_type = rocsparse_indextype_i64;
104 rocsparse_spmat_descr mat_descr;
105 AMREX_ROCSPARSE_SAFE_CALL(
106 rocsparse_create_csr_descr(&mat_descr, nrows, ncols, nnz,
107 (
void*)row, (
void*)col, (
void*)mat,
108 index_type, index_type,
109 rocsparse_index_base_zero, data_type));
111 rocsparse_dnvec_descr x_descr;
112 AMREX_ROCSPARSE_SAFE_CALL(
113 rocsparse_create_dnvec_descr(&x_descr, ncols, (
void*)px, data_type));
115 rocsparse_dnvec_descr y_descr;
116 AMREX_ROCSPARSE_SAFE_CALL(
117 rocsparse_create_dnvec_descr(&y_descr, nrows, (
void*)py, data_type));
122#if (HIP_VERSION_MAJOR >= 7)
123#pragma clang diagnostic push
124#pragma clang diagnostic ignored "-Wdeprecated-declarations"
127 std::size_t buffer_size;
129 rocsparse_spmv(handle, rocsparse_operation_none, &alpha, mat_descr, x_descr,
130 &beta, y_descr, data_type, rocsparse_spmv_alg_default,
131#
if (HIP_VERSION_MAJOR >= 6)
132 rocsparse_spmv_stage_buffer_size,
134 &buffer_size,
nullptr);
135 AMREX_ROCSPARSE_SAFE_CALL(err0);
139#if (HIP_VERSION_MAJOR >= 6)
140 AMREX_ROCSPARSE_SAFE_CALL(
141 rocsparse_spmv(handle, rocsparse_operation_none, &alpha, mat_descr, x_descr,
142 &beta, y_descr, data_type, rocsparse_spmv_alg_default,
143 rocsparse_spmv_stage_preprocess, &buffer_size, pbuffer));
147 rocsparse_spmv(handle, rocsparse_operation_none, &alpha, mat_descr, x_descr,
148 &beta, y_descr, data_type, rocsparse_spmv_alg_default,
149#
if (HIP_VERSION_MAJOR >= 6)
150 rocsparse_spmv_stage_compute,
152 &buffer_size, pbuffer);
153 AMREX_ROCSPARSE_SAFE_CALL(err1);
155#if (HIP_VERSION_MAJOR >= 7)
157#pragma clang diagnostic pop
162 AMREX_ROCSPARSE_SAFE_CALL(rocsparse_destroy_spmat_descr(mat_descr));
163 AMREX_ROCSPARSE_SAFE_CALL(rocsparse_destroy_dnvec_descr(x_descr));
164 AMREX_ROCSPARSE_SAFE_CALL(rocsparse_destroy_dnvec_descr(y_descr));
165 AMREX_ROCSPARSE_SAFE_CALL(rocsparse_destroy_handle(handle));
168#elif defined(AMREX_USE_SYCL)
170 mkl::sparse::matrix_handle_t handle{};
171 mkl::sparse::init_matrix_handle(&handle);
173#if defined(INTEL_MKL_VERSION) && (INTEL_MKL_VERSION < 20250300)
175 mkl::sparse::set_csr_data(Gpu::Device::streamQueue(), handle, nrows, ncols,
176 mkl::index_base::zero, (
Long*)row, (
Long*)col, (T*)mat);
178 mkl::sparse::set_csr_data(Gpu::Device::streamQueue(), handle, nrows, ncols, nnz,
179 mkl::index_base::zero, (
Long*)row, (
Long*)col, (T*)mat);
181 mkl::sparse::gemv(Gpu::Device::streamQueue(), mkl::transpose::nontrans,
182 T(1), handle, px, T(0), py);
184 auto ev = mkl::sparse::release_matrix_handle(Gpu::Device::streamQueue(), &handle);
196#pragma omp parallel for
198 for (
Long i = 0; i < nrows; ++i) {
200 for (
Long j = row[i]; j < row[i+1]; ++j) {
201 r += mat[j] * px[col[j]];