5#ifndef AMREX_HYPRE_SOLVER_H_
6#define AMREX_HYPRE_SOLVER_H_
14#include "_hypre_utilities.h"
60 template <
class Marker,
class Filler>
69 std::string a_options_namespace =
"hypre");
80 template <FabArrayType MF>
81 requires (std::same_as<typename MF::value_type, HYPRE_Real>)
84 HYPRE_Real rel_tol, HYPRE_Real abs_tol,
int max_iter);
87 int getNumIters ()
const {
return m_hypre_ij->getNumIters(); }
91 return m_hypre_ij->getFinalResidualNorm();
95 HYPRE_IJMatrix
getA ()
const {
return m_hypre_ij->A(); }
97 HYPRE_IJVector
getb ()
const {
return m_hypre_ij->b(); }
99 HYPRE_IJVector
getx ()
const {
return m_hypre_ij->x(); }
106 template <
class Marker>
115 template <
typename AI>
126 template <
class Filler>
129 HYPRE_Int&, HYPRE_Int*,
139 template <FabArrayType MF>
140 requires (std::same_as<typename MF::value_type, HYPRE_Real>)
149 template <FabArrayType MF>
150 requires (std::same_as<typename MF::value_type, HYPRE_Real>)
163 std::string m_options_namespace;
179 HYPRE_Int m_nrows_proc;
181 std::unique_ptr<HypreIJIface> m_hypre_ij;
184 HYPRE_IJMatrix m_A =
nullptr;
185 HYPRE_IJVector m_b =
nullptr;
186 HYPRE_IJVector m_x =
nullptr;
190template <
class Marker,
class Filler>
199 std::string a_options_namespace)
200 : m_nvars (
int(a_index_type.size())),
201 m_index_type (a_index_type),
205 m_verbose (a_verbose),
206 m_options_namespace(std::move(a_options_namespace))
214 m_grids.resize(m_nvars);
215 m_local_id.resize(m_nvars);
216 m_global_id.resize(m_nvars);
217 m_nrows_grid.resize(m_nvars);
218 m_id_offset.resize(m_nvars);
220 for (
int ivar = 0; ivar < m_nvars; ++ivar) {
222 m_local_id [ivar].define(m_grids[ivar], m_dmap, 1, 0);
223 m_global_id [ivar].define(m_grids[ivar], m_dmap, 1, m_nghost);
224 m_nrows_grid[ivar].define(m_grids[0], m_dmap);
225 m_id_offset [ivar].define(m_grids[0], m_dmap);
226 nrows_max += m_grids[ivar].numPts();
228 m_global_id_vec.define(m_grids[0], m_dmap);
229 m_nrows.
define (m_grids[0], m_dmap);
231 "Need to configure Hypre with --enable-bigint");
233 m_owner_mask.resize(m_nvars);
234 for (
int ivar = 0; ivar < m_nvars; ++ivar) {
239 m_cell_offset.define(m_grids[0], m_dmap);
256 if (nrows_allprocs.
size() > 1) {
257 MPI_Allgather(&m_nrows_proc,
sizeof(HYPRE_Int), MPI_CHAR,
258 nrows_allprocs.data(),
sizeof(HYPRE_Int), MPI_CHAR, m_comm);
262 nrows_allprocs[0] = m_nrows_proc;
265 HYPRE_Int proc_begin = 0;
266 for (
int i = 0; i < myproc; ++i) {
267 proc_begin += nrows_allprocs[i];
270 HYPRE_Int proc_end = proc_begin;
272 for (
int ivar = 0; ivar < m_nvars; ++ivar) {
273 m_id_offset[ivar][mfi] = proc_end;
274 proc_end += m_nrows_grid[ivar][mfi];
284 using AtomicInt = std::conditional_t<
sizeof(HYPRE_Int) == 4,
285 HYPRE_Int,
unsigned long long>;
286 fill_global_id<AtomicInt>();
289 HYPRE_Int ilower = proc_begin;
290 HYPRE_Int iupper = proc_end-1;
291 m_hypre_ij = std::make_unique<HypreIJIface>(m_comm, ilower, iupper, m_verbose);
292 m_hypre_ij->parse_inputs(m_options_namespace);
295 m_A = m_hypre_ij->A();
296 m_b = m_hypre_ij->b();
297 m_x = m_hypre_ij->x();
303template <
class Marker>
316 int boxno = mfi.LocalIndex();
318 for (
int ivar = 0; ivar < m_nvars; ++ivar) {
322 m_cell_offset[mfi].resize(npts_tot);
324 int* p_cell_offset = m_cell_offset[mfi].data();
325 for (
int ivar = 0; ivar < m_nvars; ++ivar) {
327 auto const& lid = m_local_id[ivar].array(mfi);
328 auto const& owner = m_owner_mask[ivar]->const_array(mfi);
330 const auto npts =
static_cast<int>(bx.
numPts());
331 int npts_box = amrex::Scan::PrefixSum<int>(npts,
335 int id = (owner ( cell.
x,cell.
y,cell.
z ) &&
336 marker(boxno,cell.
x,cell.
y,cell.
z,ivar)) ? 1 : 0;
337 lid(cell.
x,cell.
y,cell.
z) = id;
343 if (lid(cell.
x,cell.
y,cell.
z)) {
344 lid(cell.
x,cell.
y,cell.
z) = ps;
345 p_cell_offset[ps] =
offset;
347 lid(cell.
x,cell.
y,cell.
z) = std::numeric_limits<int>::lowest();
351 m_nrows_grid[ivar][mfi] = npts_box;
352 npts_tot += npts_box;
353 p_cell_offset += npts_box;
355 m_cell_offset[mfi].resize(npts_tot);
364 int boxno = mfi.LocalIndex();
365 for (
int ivar = 0; ivar < m_nvars; ++ivar) {
367 auto const& lid = m_local_id[ivar].array(mfi);
368 auto const& owner = m_owner_mask[ivar]->const_array(mfi);
372 for (
int k = lo.z; k <= hi.z; ++k) {
373 for (
int j = lo.y; j <= hi.y; ++j) {
374 for (
int i = lo.x; i <= hi.x; ++i) {
375 if (owner(i,j,k) && marker(boxno,i,j,k,ivar)) {
378 lid(i,j,k) = std::numeric_limits<int>::lowest();
381 m_nrows_grid[ivar][mfi] = id;
389 for (
int ivar = 0; ivar < m_nvars; ++ivar) {
390 nrows += m_nrows_grid[ivar][mfi];
392 m_nrows[mfi] = nrows;
393 m_nrows_proc += nrows;
398template <
typename AI>
407 if constexpr (std::is_same_v<HYPRE_Int,AI>) {
408 for (
int ivar = 0; ivar < m_nvars; ++ivar) {
409 p_global_id.push_back(&(m_global_id[ivar]));
412 for (
int ivar = 0; ivar < m_nvars; ++ivar) {
413 global_id_raii.emplace_back(m_global_id[ivar].
boxArray(),
414 m_global_id[ivar].DistributionMap(),
415 1, m_global_id[ivar].nGrowVect());
416 p_global_id.push_back(&(global_id_raii[ivar]));
421#pragma omp parallel if (Gpu::notInLaunchRegion())
424 auto& rows_vec = m_global_id_vec[mfi];
425 rows_vec.resize(m_nrows[mfi]);
428 for (
int ivar = 0; ivar < m_nvars; ++ivar) {
429 HYPRE_Int
const os = m_id_offset[ivar][mfi];
430 Box bx = mfi.validbox();
432 Array4<AI> const& gid = p_global_id[ivar]->array(mfi);
433 auto const& lid = m_local_id[ivar].const_array(mfi);
434 HYPRE_Int* rows = rows_vec.data() + nrows;
435 nrows += m_nrows_grid[ivar][mfi];
438 if (lid.contains(i,j,k) && lid(i,j,k) >= 0) {
439 const auto id = lid(i,j,k) + os;
440 rows[lid(i,j,k)] = id;
441 gid(i,j,k) =
static_cast<AI
>(id);
443 gid(i,j,k) =
static_cast<AI
>
444 (std::numeric_limits<HYPRE_Int>::max());
450 for (
int ivar = 0; ivar < m_nvars; ++ivar) {
452 m_geom.periodicity());
453 p_global_id[ivar]->FillBoundary(m_geom.periodicity());
455 if constexpr (!std::is_same<HYPRE_Int, AI>()) {
456 auto const& dst = m_global_id[ivar].arrays();
457 auto const& src = p_global_id[ivar]->const_arrays();
461 dst[b](i,j,k) =
static_cast<HYPRE_Int
>(src[b](i,j,k));
476 auto* p_cols_tmp = cols_tmp.
data();
477 auto* p_mat_tmp = mat_tmp.
data();
478 auto const* p_cols = cols.
data();
479 auto const* p_mat = mat.
data();
481 Scan::PrefixSum<T>(N,
484 return static_cast<T
>(p_cols[i] >= 0);
488 if (p_cols[i] >= 0) {
489 p_cols_tmp[s] = p_cols[i];
490 p_mat_tmp[s] = p_mat[i];
494 std::swap(cols_tmp, cols);
495 std::swap(mat_tmp, mat);
502template <
class Filler>
504 Array4<HYPRE_Int const>
const*,
505 HYPRE_Int&, HYPRE_Int*,
518 for (
MFIter mfi(m_local_id[0],mfitinfo); mfi.
isValid(); ++mfi)
520 int boxno = mfi.LocalIndex();
521 const HYPRE_Int nrows = m_nrows[mfi];
526 HYPRE_Int* ncols = ncols_vec.
data();
530 HYPRE_Int* cols = cols_vec.
data();
534 HYPRE_Real* mat = mat_vec.
data();
537 for (
int ivar = 0; ivar < m_nvars; ++ivar) {
538 gid_v[ivar] = m_global_id[ivar].const_array(mfi);
543 (gid_v.data(), gid_v.
size());
544 auto const* pgid = gid_buf.
data();
545 auto const* p_cell_offset = m_cell_offset[mfi].data();
547 for (
int ivar = 0; ivar < m_nvars; ++ivar) {
548 const HYPRE_Int nrows_var = m_nrows_grid[ivar][mfi];
551 ntot += Reduce::Sum<Long>(nrows_var,
555 filler(boxno, cell.
x, cell.
y, cell.
z, ivar, pgid,
560 p_cell_offset += nrows_var;
562 cols +=
Long(nrows_var)*MSS;
563 mat +=
Long(nrows_var)*MSS;
568 if (ntot >=
Long(std::numeric_limits<int>::max())) {
569 detail::pack_matrix_gpu<Long>(cols_tmp, mat_tmp, cols_vec, mat_vec);
571 detail::pack_matrix_gpu<int>(cols_tmp, mat_tmp, cols_vec, mat_vec);
574 auto* pgid = gid_v.data();
575 for (
int ivar = 0; ivar < m_nvars; ++ivar) {
576 if (m_nrows_grid[ivar][mfi] > 0) {
577 auto const& lid = m_local_id[ivar].const_array(mfi);
579 [=,&ncols,&cols,&mat] (
int i,
int j,
int k)
581 if (lid(i,j,k) >= 0) {
582 filler(boxno, i, j, k, ivar, pgid, *ncols, cols, mat);
592 const auto& rows_vec = m_global_id_vec[mfi];
593 HYPRE_Int
const* rows = rows_vec.data();
596 HYPRE_IJMatrixSetValues(m_A, nrows, ncols_vec.
data(), rows,
598 Gpu::hypreSynchronize();
601 HYPRE_IJMatrixAssemble(m_A);
605template <FabArrayType MF>
606requires (std::same_as<typename MF::value_type, HYPRE_Real>)
610 HYPRE_Real rel_tol, HYPRE_Real abs_tol,
int max_iter)
616 HYPRE_IJVectorInitialize(m_b);
617 HYPRE_IJVectorInitialize(m_x);
619 load_vectors(a_soln, a_rhs);
621 HYPRE_IJVectorAssemble(m_x);
622 HYPRE_IJVectorAssemble(m_b);
624 m_hypre_ij->solve(rel_tol, abs_tol, max_iter);
626 get_solution(a_soln);
630template <FabArrayType MF>
631requires (std::same_as<typename MF::value_type, HYPRE_Real>)
645 const HYPRE_Int nrows = m_nrows[mfi];
652 auto* xp = xvec.
data();
653 auto* bp = bvec.
data();
655 HYPRE_Int
const* rows = m_global_id_vec[mfi].data();
658 for (
int ivar = 0; ivar < m_nvars; ++ivar) {
659 if (m_nrows_grid[ivar][mfi] > 0) {
660 auto const& xfab = a_soln[ivar]->const_array(mfi);
661 auto const& bfab = a_rhs [ivar]->const_array(mfi);
662 auto const& lid = m_local_id[ivar].const_array(mfi);
664 HYPRE_Real* b = bp +
offset;
668 if (lid(i,j,k) >= 0) {
669 x[lid(i,j,k)] = xfab(i,j,k);
670 b[lid(i,j,k)] = bfab(i,j,k);
673 offset += m_nrows_grid[ivar][mfi];
677 Gpu::streamSynchronize();
678 HYPRE_IJVectorSetValues(m_x, nrows, rows, xp);
679 HYPRE_IJVectorSetValues(m_b, nrows, rows, bp);
680 Gpu::hypreSynchronize();
686template <FabArrayType MF>
687requires (std::same_as<typename MF::value_type, HYPRE_Real>)
699 const HYPRE_Int nrows = m_nrows[mfi];
704 auto* xp = xvec.
data();
706 HYPRE_Int
const* rows = m_global_id_vec[mfi].data();
708 HYPRE_IJVectorGetValues(m_x, nrows, rows, xp);
709 Gpu::hypreSynchronize();
712 for (
int ivar = 0; ivar < m_nvars; ++ivar) {
713 if (m_nrows_grid[ivar][mfi] > 0) {
714 auto const& xfab = a_soln[ivar]->array(mfi);
715 auto const& lid = m_local_id[ivar].const_array(mfi);
720 if (lid(i,j,k) >= 0) {
721 xfab(i,j,k) =
x[lid(i,j,k)];
724 offset += m_nrows_grid[ivar][mfi];
727 Gpu::streamSynchronize();
731 for (
int ivar = 0; ivar < m_nvars; ++ivar) {
733 m_geom.periodicity());
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:49
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1139
A collection of Boxes stored in an Array.
Definition AMReX_BoxArray.H:564
__host__ __device__ BoxND & grow(int i) noexcept
Definition AMReX_Box.H:649
__host__ __device__ Long numPts() const noexcept
Return the number of points contained in the BoxND.
Definition AMReX_Box.H:364
__host__ __device__ BoxND & convert(IndexTypeND< dim > typ) noexcept
Convert the BoxND from the current type into the argument type. This may change the BoxND coordinates...
Definition AMReX_Box.H:982
__host__ __device__ IntVectND< dim > atOffset(Long offset) const noexcept
Given the offset, compute IntVectND<dim>
Definition AMReX_Box.H:1079
Calculates the distribution of FABs to MPI processes.
Definition AMReX_DistributionMapping.H:43
Rectangular problem domain geometry.
Definition AMReX_Geometry.H:75
Periodicity periodicity() const noexcept
Definition AMReX_Geometry.H:361
Definition AMReX_GpuBuffer.H:24
T const * data() const noexcept
Definition AMReX_GpuBuffer.H:51
Solve Ax = b using HYPRE's generic IJ matrix format where A is a sparse matrix specified using the co...
Definition AMReX_HypreSolver.H:31
HypreSolver(Vector< IndexType > const &a_index_type, IntVect const &a_nghost, Geometry const &a_geom, BoxArray const &a_grids, DistributionMapping const &a_dmap, Marker &&a_marker, Filler &&a_filler, int a_verbose=0, std::string a_options_namespace="hypre")
Definition AMReX_HypreSolver.H:191
void load_vectors(Vector< MF * > const &a_soln, Vector< MF const * > const &a_rhs)
Copy AMReX RHS/initial guess data into the IJ vectors.
Definition AMReX_HypreSolver.H:633
int getNumIters() const
Number of iterations from the last solve().
Definition AMReX_HypreSolver.H:87
void solve(Vector< MF * > const &a_soln, Vector< MF const * > const &a_rhs, HYPRE_Real rel_tol, HYPRE_Real abs_tol, int max_iter)
Solve Ax=b after the constructor assembled the IJ matrix.
Definition AMReX_HypreSolver.H:608
HYPRE_IJVector getx() const
Access the IJ solution handle (non-owning).
Definition AMReX_HypreSolver.H:99
HYPRE_IJMatrix getA() const
Access the assembled IJ matrix handle (non-owning).
Definition AMReX_HypreSolver.H:95
void fill_matrix(Filler const &filler)
Fill each CSR row using the supplied filler functor.
Definition AMReX_HypreSolver.H:508
void fill_local_id(Marker const &marker)
Assign local ids to each owned DOF by invoking marker.
Definition AMReX_HypreSolver.H:309
void get_solution(Vector< MF * > const &a_soln)
Copy IJ solution entries back into AMReX storage.
Definition AMReX_HypreSolver.H:689
void fill_global_id()
Convert the local ids to globally unique ids visible to HYPRE.
Definition AMReX_HypreSolver.H:400
HYPRE_Real getFinalResidualNorm() const
Final residual norm from the last solve().
Definition AMReX_HypreSolver.H:90
HYPRE_IJVector getb() const
Access the assembled IJ RHS handle (non-owning).
Definition AMReX_HypreSolver.H:97
a one-thingy-per-box distributed object
Definition AMReX_LayoutData.H:13
void define(const BoxArray &a_grids, const DistributionMapping &a_dm)
Definition AMReX_LayoutData.H:25
Iterator for looping ever tiles and boxes of amrex::FabArray based containers.
Definition AMReX_MFIter.H:88
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:172
Dynamically allocated vector for trivially copyable data.
Definition AMReX_PODVector.H:308
size_type size() const noexcept
Definition AMReX_PODVector.H:648
void resize(size_type a_new_size, GrowthStrategy strategy=GrowthStrategy::Poisson)
Definition AMReX_PODVector.H:728
void clear() noexcept
Definition AMReX_PODVector.H:646
T * data() noexcept
Definition AMReX_PODVector.H:666
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:29
Long size() const noexcept
Definition AMReX_Vector.H:54
amrex_long Long
Definition AMReX_INT.H:30
__host__ __device__ Dim3 ubound(Array4< T > const &a) noexcept
Return the inclusive upper bounds of an Array4 in Dim3 form.
Definition AMReX_Array4.H:1359
__host__ __device__ Dim3 lbound(Array4< T > const &a) noexcept
Return the inclusive lower bounds of an Array4 in Dim3 form.
Definition AMReX_Array4.H:1345
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
static constexpr struct amrex::Scan::Type::Exclusive exclusive
static constexpr RetSum noRetSum
Definition AMReX_Scan.H:34
int MPI_Comm
Definition AMReX_ccse-mpi.H:51
static constexpr int MPI_COMM_NULL
Definition AMReX_ccse-mpi.H:59
Definition AMReX_Amr.cpp:50
__host__ __device__ BoxND< dim > convert(const BoxND< dim > &b, const IntVectND< dim > &typ) noexcept
Return a BoxND with different type.
Definition AMReX_Box.H:1567
IntVect nGrowVect(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2857
void ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:202
std::unique_ptr< iMultiFab > OwnerMask(FabArrayBase const &mf, const Periodicity &period, const IntVect &ngrow)
Definition AMReX_iMultiFab.cpp:699
const int[]
Definition AMReX_BLProfiler.cpp:1664
__host__ __device__ void Loop(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition AMReX_Loop.H:127
void OverrideSync(FabArray< FAB > &fa, FabArray< IFAB > const &msk, const Periodicity &period)
Definition AMReX_FabArrayUtility.H:1379
BoxArray const & boxArray(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2862
A multidimensional array accessor.
Definition AMReX_Array4.H:285
Definition AMReX_Dim3.H:13
int x
Definition AMReX_Dim3.H:13
int z
Definition AMReX_Dim3.H:13
int y
Definition AMReX_Dim3.H:13
Test if a given type T is callable with arguments of type Args...
Definition AMReX_TypeTraits.H:214
Test if a given type T is callable with arguments of type Args...
Definition AMReX_TypeTraits.H:208
Definition AMReX_MFIter.H:20
MFItInfo & DisableDeviceSync() noexcept
Definition AMReX_MFIter.H:47
MFItInfo & UseDefaultStream() noexcept
Definition AMReX_MFIter.H:75