amrex/doxygen/AMReX__HypreSolver_8H_source.html

#ifndef AMREX_HYPRE_SOLVER_H_

#define AMREX_HYPRE_SOLVER_H_


#include <AMReX_Geometry.H>

#include <AMReX_iMultiFab.H>

#include <AMReX_HypreIJIface.H>

#include <AMReX_BLProfiler.H>


#include "HYPRE.h"

#include "_hypre_utilities.h"


#include <string>


namespace amrex

{


template <int MSS>


class HypreSolver

{

public:


    template <class Marker, class Filler>

    HypreSolver (Vector<IndexType>   const& a_index_type,

                 IntVect             const& a_nghost,

                 Geometry            const& a_geom,

                 BoxArray            const& a_grids,

                 DistributionMapping const& a_dmap, // NOLINT(modernize-pass-by-value)

                 Marker                  && a_marker,

                 Filler                  && a_filler,

                 int                        a_verbose = 0,

                 std::string                a_options_namespace = "hypre");


    template <class MF, std::enable_if_t<IsFabArray<MF>::value &&

                                         std::is_same_v<typename MF::value_type,

                                                        HYPRE_Real>, int> = 0>

    void solve (Vector<MF      *> const& a_soln,

                Vector<MF const*> const& a_rhs,

                HYPRE_Real rel_tol, HYPRE_Real abs_tol, int max_iter);


    int getNumIters () const { return m_hypre_ij->getNumIters(); }


    HYPRE_Real getFinalResidualNorm () const {

        return m_hypre_ij->getFinalResidualNorm();

    }


    HYPRE_IJMatrix getA () const { return m_hypre_ij->A(); }

    HYPRE_IJVector getb () const { return m_hypre_ij->b(); }

    HYPRE_IJVector getx () const { return m_hypre_ij->x(); }


// public: // for cuda


    template <class Marker>

#ifdef AMREX_USE_CUDA

    std::enable_if_t<IsCallable<Marker,int,int,int,int,int>::value>

#else

    std::enable_if_t<IsCallableR<bool,Marker,int,int,int,int,int>::value>

#endif

    fill_local_id (Marker const& marker);


    template <typename AI>

    void fill_global_id ();


    template <class Filler,

              std::enable_if_t<IsCallable<Filler,int,int,int,int,int,

                                          Array4<HYPRE_Int const> const*,

                                          HYPRE_Int&, HYPRE_Int*,

                                          HYPRE_Real*>::value,int> FOO = 0>

    void fill_matrix (Filler const& filler);


    template <class MF, std::enable_if_t<IsFabArray<MF>::value &&

                                         std::is_same_v<typename MF::value_type,

                                                        HYPRE_Real>, int> = 0>

    void load_vectors (Vector<MF      *> const& a_soln,

                       Vector<MF const*> const& a_rhs);


    template <class MF, std::enable_if_t<IsFabArray<MF>::value &&

                                         std::is_same_v<typename MF::value_type,

                                                        HYPRE_Real>, int> = 0>

    void get_solution (Vector<MF*> const& a_soln);


private:


    int                 m_nvars;

    Vector<IndexType>   m_index_type;

    IntVect             m_nghost;

    Geometry            m_geom;

    Vector<BoxArray>    m_grids;

    DistributionMapping m_dmap;


    int         m_verbose;

    std::string m_options_namespace;


    MPI_Comm m_comm = MPI_COMM_NULL;


    Vector<std::unique_ptr<iMultiFab>>        m_owner_mask;

    Vector<iMultiFab>                         m_local_id;

    Vector<FabArray<BaseFab<HYPRE_Int>>>      m_global_id;

    LayoutData<Gpu::DeviceVector<HYPRE_Int>>  m_global_id_vec;


#ifdef AMREX_USE_GPU

    LayoutData<Gpu::DeviceVector<int>> m_cell_offset;

#endif


    Vector<LayoutData<HYPRE_Int>> m_nrows_grid;

    Vector<LayoutData<HYPRE_Int>> m_id_offset;

    LayoutData<HYPRE_Int>         m_nrows;

    HYPRE_Int                     m_nrows_proc;


    std::unique_ptr<HypreIJIface> m_hypre_ij;


    // Non-owning references to Hypre matrix, rhs, and solution data

    HYPRE_IJMatrix m_A = nullptr;

    HYPRE_IJVector m_b = nullptr;

    HYPRE_IJVector m_x = nullptr;

};


template <int MSS>

template <class Marker, class Filler>


HypreSolver<MSS>::HypreSolver (Vector<IndexType>   const& a_index_type,

                               IntVect             const& a_nghost,

                               Geometry            const& a_geom,

                               BoxArray            const& a_grids,

                               DistributionMapping const& a_dmap, // NOLINT(modernize-pass-by-value)

                               Marker                  && a_marker,

                               Filler                  && a_filler,

                               int                        a_verbose,

                               std::string                a_options_namespace)

    : m_nvars            (int(a_index_type.size())),

      m_index_type       (a_index_type),

      m_nghost           (a_nghost),

      m_geom             (a_geom),

      m_dmap             (a_dmap),

      m_verbose          (a_verbose),

      m_options_namespace(std::move(a_options_namespace))

{

    BL_PROFILE("HypreSolver()");


#ifdef AMREX_USE_MPI

    m_comm = ParallelContext::CommunicatorSub(); // NOLINT(cppcoreguidelines-prefer-member-initializer)

#endif


    m_grids.resize(m_nvars);

    m_local_id.resize(m_nvars);

    m_global_id.resize(m_nvars);

    m_nrows_grid.resize(m_nvars);

    m_id_offset.resize(m_nvars);

    Long nrows_max = 0;

    for (int ivar = 0; ivar < m_nvars; ++ivar) {

        m_grids     [ivar] = amrex::convert(a_grids,m_index_type[ivar]);

        m_local_id  [ivar].define(m_grids[ivar], m_dmap, 1, 0);

        m_global_id [ivar].define(m_grids[ivar], m_dmap, 1, m_nghost);

        m_nrows_grid[ivar].define(m_grids[0], m_dmap);

        m_id_offset [ivar].define(m_grids[0], m_dmap);

        nrows_max += m_grids[ivar].numPts();

    }

    m_global_id_vec.define(m_grids[0], m_dmap);

    m_nrows.define        (m_grids[0], m_dmap);

    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(nrows_max < static_cast<Long>(std::numeric_limits<HYPRE_Int>::max()-1),

                                     "Need to configure Hypre with --enable-bigint");


    m_owner_mask.resize(m_nvars);

    for (int ivar = 0; ivar < m_nvars; ++ivar) {

        m_owner_mask[ivar] = amrex::OwnerMask(m_local_id[ivar], m_geom.periodicity());

    }


#ifdef AMREX_USE_GPU

    m_cell_offset.define(m_grids[0], m_dmap);

#endif


    fill_local_id(std::forward<Marker>(a_marker));


    // At this point, m_local_id stores the ids local to each box.

    // m_nrows_grid stores the number of unique points in each box.

    // m_nrows_proc is the number of rowss for all variables on this MPI

    // process.  If a point is invalid, its id is invalid (i.e., a very

    // negative number).  Note that the data type of local_node_id is int,

    // not HYPRE_Int for performance on GPU.


    const int nprocs = ParallelContext::NProcsSub();

    const int myproc = ParallelContext::MyProcSub();


    Vector<HYPRE_Int> nrows_allprocs(nprocs);

#ifdef AMREX_USE_MPI

    if (nrows_allprocs.size() > 1) {

        MPI_Allgather(&m_nrows_proc, sizeof(HYPRE_Int), MPI_CHAR,

                      nrows_allprocs.data(), sizeof(HYPRE_Int), MPI_CHAR, m_comm);

    } else

#endif

    {

        nrows_allprocs[0] = m_nrows_proc;

    }


    HYPRE_Int proc_begin = 0;

    for (int i = 0; i < myproc; ++i) {

        proc_begin += nrows_allprocs[i];

    }


    HYPRE_Int proc_end = proc_begin;

    for (MFIter mfi(m_nrows_grid[0]); mfi.isValid(); ++mfi) {

        for (int ivar = 0; ivar < m_nvars; ++ivar) {

            m_id_offset[ivar][mfi] = proc_end;

            proc_end += m_nrows_grid[ivar][mfi];

        }

    }

    AMREX_ASSERT(proc_end == proc_begin + m_nrows_proc);


    // To generate global ids for Hypre, we need to remove duplicates on

    // nodes shared by multiple Boxes with OverrideSync.  So we need to use

    // a type that supports atomicAdd.  HYPRE_Int is either int or long

    // long.  The latter (i.e., long long) does not have native atomicAdd

    // support in CUDA/HIP, whereas unsigned long long has.

    using AtomicInt = std::conditional_t<sizeof(HYPRE_Int) == 4,

                                         HYPRE_Int, unsigned long long>;

    fill_global_id<AtomicInt>();


    // Create and initialize A, b & x

    HYPRE_Int ilower = proc_begin;

    HYPRE_Int iupper = proc_end-1;

    m_hypre_ij = std::make_unique<HypreIJIface>(m_comm, ilower, iupper, m_verbose);

    m_hypre_ij->parse_inputs(m_options_namespace);


    // Obtain non-owning references to the matrix, rhs, and solution data

    m_A = m_hypre_ij->A();

    m_b = m_hypre_ij->b();

    m_x = m_hypre_ij->x();


    fill_matrix(std::forward<Filler>(a_filler));

}


template <int MSS>

template <class Marker>

#ifdef AMREX_USE_CUDA

    std::enable_if_t<IsCallable<Marker,int,int,int,int,int>::value>

#else

    std::enable_if_t<IsCallableR<bool,Marker,int,int,int,int,int>::value>

#endif


HypreSolver<MSS>::fill_local_id (Marker const& marker)

{

    BL_PROFILE("HypreSolver::fill_local_id()");


#ifdef AMREX_USE_GPU


    for (MFIter mfi(m_local_id[0]); mfi.isValid(); ++mfi) {

        int boxno = mfi.LocalIndex();

        Long npts_tot = 0;

        for (int ivar = 0; ivar < m_nvars; ++ivar) {

            Box const& bx = amrex::convert(mfi.validbox(),m_index_type[ivar]);

            npts_tot += bx.numPts();

        }

        m_cell_offset[mfi].resize(npts_tot);

        npts_tot = 0;

        int* p_cell_offset = m_cell_offset[mfi].data();

        for (int ivar = 0; ivar < m_nvars; ++ivar) {

            Box const& bx = amrex::convert(mfi.validbox(),m_index_type[ivar]);

            auto const& lid = m_local_id[ivar].array(mfi);

            auto const& owner = m_owner_mask[ivar]->const_array(mfi);

            AMREX_ASSERT(bx.numPts() < static_cast<Long>(std::numeric_limits<int>::max()));

            const auto npts = static_cast<int>(bx.numPts());

            int npts_box = amrex::Scan::PrefixSum<int>(npts,

                [=] AMREX_GPU_DEVICE (int offset) noexcept -> int

                {

                    const Dim3 cell = bx.atOffset(offset).dim3();

                    int id = (owner (      cell.x,cell.y,cell.z     ) &&

                              marker(boxno,cell.x,cell.y,cell.z,ivar)) ? 1 : 0;

                    lid(cell.x,cell.y,cell.z) = id;

                    return id;

                },

                [=] AMREX_GPU_DEVICE (int offset, int ps) noexcept

                {

                    const Dim3 cell = bx.atOffset(offset).dim3();

                    if (lid(cell.x,cell.y,cell.z)) {

                        lid(cell.x,cell.y,cell.z) = ps;

                        p_cell_offset[ps] = offset;

                    } else {

                        lid(cell.x,cell.y,cell.z) = std::numeric_limits<int>::lowest();

                    }

                },

                amrex::Scan::Type::exclusive);

            m_nrows_grid[ivar][mfi] = npts_box;

            npts_tot += npts_box;

            p_cell_offset += npts_box;

        }

        m_cell_offset[mfi].resize(npts_tot);

    }


#else


#ifdef AMREX_USE_OMP

#pragma omp parallel

#endif

    for (MFIter mfi(m_local_id[0]); mfi.isValid(); ++mfi) {

        int boxno = mfi.LocalIndex();

        for (int ivar = 0; ivar < m_nvars; ++ivar) {

            Box const& bx = amrex::convert(mfi.validbox(),m_index_type[ivar]);

            auto const& lid = m_local_id[ivar].array(mfi);

            auto const& owner = m_owner_mask[ivar]->const_array(mfi);

            int id = 0;

            const auto lo = amrex::lbound(bx);

            const auto hi = amrex::ubound(bx);

            for (int k = lo.z; k <= hi.z; ++k) {

            for (int j = lo.y; j <= hi.y; ++j) {

            for (int i = lo.x; i <= hi.x; ++i) {

                if (owner(i,j,k) && marker(boxno,i,j,k,ivar)) {

                    lid(i,j,k) = id++;

                } else {

                    lid(i,j,k) = std::numeric_limits<int>::lowest();

                }

            }}}

            m_nrows_grid[ivar][mfi] = id;

        }

    }

#endif


    m_nrows_proc = 0;

    for (MFIter mfi(m_nrows); mfi.isValid(); ++mfi) {

        int nrows = 0;

        for (int ivar = 0; ivar < m_nvars; ++ivar) {

            nrows += m_nrows_grid[ivar][mfi];

        }

        m_nrows[mfi] = nrows;

        m_nrows_proc += nrows;

    }

}


template <int MSS>

template <typename AI>

void


HypreSolver<MSS>::fill_global_id ()

{

    BL_PROFILE("HypreSolver::fill_global_id()");


    Vector<FabArray<BaseFab<AI>>> global_id_raii;

    Vector<FabArray<BaseFab<AI>>*> p_global_id;


    if constexpr (std::is_same_v<HYPRE_Int,AI>) {

        for (int ivar = 0; ivar < m_nvars; ++ivar) {

            p_global_id.push_back(&(m_global_id[ivar]));

        }

    } else {

        for (int ivar = 0; ivar < m_nvars; ++ivar) {

            global_id_raii.emplace_back(m_global_id[ivar].boxArray(),

                                        m_global_id[ivar].DistributionMap(),

                                        1, m_global_id[ivar].nGrowVect());

            p_global_id.push_back(&(global_id_raii[ivar]));

        }

    }


#ifdef AMREX_USE_OMP

#pragma omp parallel if (Gpu::notInLaunchRegion())

#endif

    for (MFIter mfi(m_global_id[0]); mfi.isValid(); ++mfi) {

        auto& rows_vec = m_global_id_vec[mfi];

        rows_vec.resize(m_nrows[mfi]);


        HYPRE_Int nrows = 0;

        for (int ivar = 0; ivar < m_nvars; ++ivar) {

            HYPRE_Int const os = m_id_offset[ivar][mfi];

            Box bx = mfi.validbox();

            bx.convert(m_index_type[ivar]).grow(m_nghost);

            Array4<AI> const& gid = p_global_id[ivar]->array(mfi);

            auto const& lid = m_local_id[ivar].const_array(mfi);

            HYPRE_Int* rows = rows_vec.data() + nrows;

            nrows += m_nrows_grid[ivar][mfi];

            amrex::ParallelFor(bx,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept

            {

                if (lid.contains(i,j,k) && lid(i,j,k) >= 0) {

                    const auto id = lid(i,j,k) + os;

                    rows[lid(i,j,k)] = id;

                    gid(i,j,k) = static_cast<AI>(id);

                } else {

                    gid(i,j,k) = static_cast<AI>

                        (std::numeric_limits<HYPRE_Int>::max());

                }

            });

        }

    }


    for (int ivar = 0; ivar < m_nvars; ++ivar) {

        amrex::OverrideSync(*p_global_id[ivar], *m_owner_mask[ivar],

                            m_geom.periodicity());

        p_global_id[ivar]->FillBoundary(m_geom.periodicity());


        if constexpr (!std::is_same<HYPRE_Int, AI>()) {

            auto const& dst = m_global_id[ivar].arrays();

            auto const& src = p_global_id[ivar]->const_arrays();

            amrex::ParallelFor(m_global_id[ivar], m_global_id[ivar].nGrowVect(),

                               [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)

            {

                dst[b](i,j,k) = static_cast<HYPRE_Int>(src[b](i,j,k));

            });

        }

    }

}


#ifdef AMREX_USE_GPU

namespace detail {

template <typename T>


void pack_matrix_gpu (Gpu::DeviceVector<HYPRE_Int>& cols_tmp,

                      Gpu::DeviceVector<HYPRE_Real> mat_tmp,

                      Gpu::DeviceVector<HYPRE_Int>& cols,

                      Gpu::DeviceVector<HYPRE_Real>& mat)

{

    auto* p_cols_tmp = cols_tmp.data();

    auto* p_mat_tmp  =  mat_tmp.data();

    auto const* p_cols = cols.data();

    auto const* p_mat  =  mat.data();

    const auto N = Long(cols.size());

    Scan::PrefixSum<T>(N,

        [=] AMREX_GPU_DEVICE (Long i) -> T

        {

            return static_cast<T>(p_cols[i] >= 0);

        },

        [=] AMREX_GPU_DEVICE (Long i, T s)

        {

            if (p_cols[i] >= 0) {

                p_cols_tmp[s] = p_cols[i];

                p_mat_tmp[s] = p_mat[i];

            }

        },

        Scan::Type::exclusive, Scan::noRetSum);

    std::swap(cols_tmp, cols);

    std::swap(mat_tmp, mat);

}


}

#endif


template <int MSS>

template <class Filler,

          std::enable_if_t<IsCallable<Filler,int,int,int,int,int,

                                      Array4<HYPRE_Int const> const*,

                                      HYPRE_Int&, HYPRE_Int*,

                                      HYPRE_Real*>::value,int> FOO>

void


HypreSolver<MSS>::fill_matrix (Filler const& filler)

{

    BL_PROFILE("HypreSolver::fill_matrix()");


    Gpu::DeviceVector<HYPRE_Int> ncols_vec;

    Gpu::DeviceVector<HYPRE_Int> cols_vec;

    Gpu::DeviceVector<HYPRE_Real> mat_vec;


    MFItInfo mfitinfo;

    mfitinfo.UseDefaultStream().DisableDeviceSync();

    for (MFIter mfi(m_local_id[0],mfitinfo); mfi.isValid(); ++mfi)

    {

        int boxno = mfi.LocalIndex();

        const HYPRE_Int nrows = m_nrows[mfi];

        if (nrows > 0)

        {

            ncols_vec.clear();

            ncols_vec.resize(nrows);

            HYPRE_Int* ncols = ncols_vec.data();


            cols_vec.clear();

            cols_vec.resize(Long(nrows)*MSS, -1);

            HYPRE_Int* cols = cols_vec.data();


            mat_vec.clear();

            mat_vec.resize(Long(nrows)*MSS);

            HYPRE_Real* mat = mat_vec.data();


            Vector<Array4<HYPRE_Int const>> gid_v(m_nvars);

            for (int ivar = 0; ivar < m_nvars; ++ivar) {

                gid_v[ivar] = m_global_id[ivar].const_array(mfi);

            }


#ifdef AMREX_USE_GPU

            const Gpu::Buffer<Array4<HYPRE_Int const>> gid_buf

                (gid_v.data(), gid_v.size());

            auto const* pgid = gid_buf.data();

            auto const* p_cell_offset = m_cell_offset[mfi].data();

            Long ntot = 0;

            for (int ivar = 0; ivar < m_nvars; ++ivar) {

                const HYPRE_Int nrows_var = m_nrows_grid[ivar][mfi];

                if (nrows_var > 0) {

                    Box const& bx = amrex::convert(mfi.validbox(),m_index_type[ivar]);

                    ntot += Reduce::Sum<Long>(nrows_var,

                                              [=] AMREX_GPU_DEVICE (HYPRE_Int offset)

                    {

                        const Dim3 cell = bx.atOffset(p_cell_offset[offset]).dim3();

                        filler(boxno, cell.x, cell.y, cell.z, ivar, pgid,

                               ncols[offset], cols+Long(offset)*MSS,

                               mat+Long(offset)*MSS);

                        return ncols[offset];

                    });

                    p_cell_offset += nrows_var;

                    ncols += nrows_var;

                    cols += Long(nrows_var)*MSS;

                    mat  += Long(nrows_var)*MSS;

                }

            }

            Gpu::DeviceVector<HYPRE_Int> cols_tmp(ntot);

            Gpu::DeviceVector<HYPRE_Real> mat_tmp(ntot);

            if (ntot >= Long(std::numeric_limits<int>::max())) {

                detail::pack_matrix_gpu<Long>(cols_tmp, mat_tmp, cols_vec, mat_vec);

            } else {

                detail::pack_matrix_gpu<int>(cols_tmp, mat_tmp, cols_vec, mat_vec);

            }

#else

            auto* pgid = gid_v.data();

            for (int ivar = 0; ivar < m_nvars; ++ivar) {

                if (m_nrows_grid[ivar][mfi] > 0) {

                    auto const& lid = m_local_id[ivar].const_array(mfi);

                    amrex::Loop(amrex::convert(mfi.validbox(),m_index_type[ivar]),

                                [=,&ncols,&cols,&mat] (int i, int j, int k)

                    {

                        if (lid(i,j,k) >= 0) {

                            filler(boxno, i, j, k, ivar, pgid, *ncols, cols, mat);

                            cols += (*ncols);

                            mat  += (*ncols);

                            ++ncols;

                        }

                    });

                }

            }

#endif


            const auto& rows_vec = m_global_id_vec[mfi];

            HYPRE_Int const* rows = rows_vec.data();


            Gpu::streamSynchronize();

            HYPRE_IJMatrixSetValues(m_A, nrows, ncols_vec.data(), rows,

                                    cols_vec.data(), mat_vec.data());

            Gpu::hypreSynchronize();

        }

    }

    HYPRE_IJMatrixAssemble(m_A);

}


template <int MSS>

template <class MF, std::enable_if_t<IsFabArray<MF>::value &&

                                     std::is_same_v<typename MF::value_type,

                                                    HYPRE_Real>, int> FOO>

void


HypreSolver<MSS>::solve (Vector<MF      *> const& a_soln,

                         Vector<MF const*> const& a_rhs,

                         HYPRE_Real rel_tol, HYPRE_Real abs_tol, int max_iter)

{

    BL_PROFILE("HypreSolver::solve()");


    AMREX_ASSERT(a_soln.size() == m_nvars && a_rhs.size() == m_nvars);


    HYPRE_IJVectorInitialize(m_b);

    HYPRE_IJVectorInitialize(m_x);


    load_vectors(a_soln, a_rhs);


    HYPRE_IJVectorAssemble(m_x);

    HYPRE_IJVectorAssemble(m_b);


    m_hypre_ij->solve(rel_tol, abs_tol, max_iter);


    get_solution(a_soln);

}


template <int MSS>

template <class MF, std::enable_if_t<IsFabArray<MF>::value &&

                                     std::is_same_v<typename MF::value_type,

                                                    HYPRE_Real>, int> FOO>

void


HypreSolver<MSS>::load_vectors (Vector<MF      *> const& a_soln,

                                Vector<MF const*> const& a_rhs)

{

    BL_PROFILE("HypreSolver::load_vectors()");


    MFItInfo mfitinfo;

    mfitinfo.UseDefaultStream().DisableDeviceSync();


    Gpu::DeviceVector<Real> xvec;

    Gpu::DeviceVector<Real> bvec;

    for (MFIter mfi(*a_soln[0],mfitinfo); mfi.isValid(); ++mfi)

    {

        const HYPRE_Int nrows = m_nrows[mfi];

        if (nrows > 0)

        {

            xvec.clear();

            xvec.resize(nrows);

            bvec.clear();

            bvec.resize(nrows);

            auto* xp = xvec.data();

            auto* bp = bvec.data();


            HYPRE_Int const* rows = m_global_id_vec[mfi].data();


            HYPRE_Int offset = 0;

            for (int ivar = 0; ivar < m_nvars; ++ivar) {

                if (m_nrows_grid[ivar][mfi] > 0) {

                    auto const& xfab = a_soln[ivar]->const_array(mfi);

                    auto const& bfab = a_rhs [ivar]->const_array(mfi);

                    auto const& lid = m_local_id[ivar].const_array(mfi);

                    HYPRE_Real* x = xp + offset;

                    HYPRE_Real* b = bp + offset;

                    Box box = amrex::convert(mfi.validbox(),m_index_type[ivar]);

                    amrex::ParallelFor(box,[=] AMREX_GPU_DEVICE (int i, int j, int k)

                    {

                        if (lid(i,j,k) >= 0) {

                            x[lid(i,j,k)] = xfab(i,j,k);

                            b[lid(i,j,k)] = bfab(i,j,k);

                        }

                    });

                    offset += m_nrows_grid[ivar][mfi];

                }

            }


            Gpu::streamSynchronize();

            HYPRE_IJVectorSetValues(m_x, nrows, rows, xp);

            HYPRE_IJVectorSetValues(m_b, nrows, rows, bp);

            Gpu::hypreSynchronize();

        }

    }

}


template <int MSS>

template <class MF, std::enable_if_t<IsFabArray<MF>::value &&

                                     std::is_same_v<typename MF::value_type,

                                                    HYPRE_Real>, int> FOO>

void


HypreSolver<MSS>::get_solution (Vector<MF*> const& a_soln)

{

    BL_PROFILE("HypreSolver::get_solution()");


    MFItInfo mfitinfo;

    mfitinfo.UseDefaultStream().DisableDeviceSync();


    Gpu::DeviceVector<Real> xvec;

    for (MFIter mfi(*a_soln[0],mfitinfo); mfi.isValid(); ++mfi)

    {

        const HYPRE_Int nrows = m_nrows[mfi];

        if (nrows > 0)

        {

            xvec.clear();

            xvec.resize(nrows);

            auto* xp = xvec.data();


            HYPRE_Int const* rows = m_global_id_vec[mfi].data();


            HYPRE_IJVectorGetValues(m_x, nrows, rows, xp);

            Gpu::hypreSynchronize();


            HYPRE_Int offset = 0;

            for (int ivar = 0; ivar < m_nvars; ++ivar) {

                if (m_nrows_grid[ivar][mfi] > 0) {

                    auto const& xfab = a_soln[ivar]->array(mfi);

                    auto const& lid = m_local_id[ivar].const_array(mfi);

                    HYPRE_Real* x = xp + offset;

                    Box box = amrex::convert(mfi.validbox(),m_index_type[ivar]);

                    amrex::ParallelFor(box,[=] AMREX_GPU_DEVICE (int i, int j, int k)

                    {

                        if (lid(i,j,k) >= 0) {

                            xfab(i,j,k) = x[lid(i,j,k)];

                        }

                    });

                    offset += m_nrows_grid[ivar][mfi];

                }

            }

            Gpu::streamSynchronize();

        }

    }


    for (int ivar = 0; ivar < m_nvars; ++ivar) {

        amrex::OverrideSync(*a_soln[ivar], *m_owner_mask[ivar],

                            m_geom.periodicity());

    }

}


}


#endif

AMReX_BLProfiler.H

BL_PROFILE
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551

AMREX_ALWAYS_ASSERT_WITH_MESSAGE
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:49

AMREX_ASSERT
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38

AMReX_Geometry.H

AMREX_GPU_DEVICE
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18

AMReX_HypreIJIface.H

offset
Array4< int const  > offset
Definition AMReX_HypreMLABecLap.cpp:1089

MPI_Comm
int MPI_Comm
Definition AMReX_ccse-mpi.H:47

MPI_COMM_NULL
static constexpr int MPI_COMM_NULL
Definition AMReX_ccse-mpi.H:55

AMReX_iMultiFab.H

amrex::BoxArray
A collection of Boxes stored in an Array.
Definition AMReX_BoxArray.H:550

amrex::BoxND< AMREX_SPACEDIM >

amrex::BoxND::atOffset
AMREX_GPU_HOST_DEVICE IntVectND< dim > atOffset(Long offset) const noexcept
Given the offset, compute IntVectND<dim>
Definition AMReX_Box.H:1009

amrex::BoxND::convert
AMREX_GPU_HOST_DEVICE BoxND & convert(IndexTypeND< dim > typ) noexcept
Convert the BoxND from the current type into the argument type. This may change the BoxND coordinates...
Definition AMReX_Box.H:912

amrex::BoxND::grow
AMREX_GPU_HOST_DEVICE BoxND & grow(int i) noexcept
Definition AMReX_Box.H:627

amrex::BoxND::numPts
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
Returns the number of points contained in the BoxND.
Definition AMReX_Box.H:346

amrex::DistributionMapping
Calculates the distribution of FABs to MPI processes.
Definition AMReX_DistributionMapping.H:41

amrex::Geometry
Rectangular problem domain geometry.
Definition AMReX_Geometry.H:73

amrex::Geometry::periodicity
Periodicity periodicity() const noexcept
Definition AMReX_Geometry.H:355

amrex::Gpu::Buffer
Definition AMReX_GpuBuffer.H:17

amrex::Gpu::Buffer::data
T const * data() const noexcept
Definition AMReX_GpuBuffer.H:65

amrex::HypreSolver
Solve Ax = b using HYPRE's generic IJ matrix format where A is a sparse matrix specified using the co...
Definition AMReX_HypreSolver.H:34

amrex::HypreSolver::HypreSolver
HypreSolver(Vector< IndexType > const &a_index_type, IntVect const &a_nghost, Geometry const &a_geom, BoxArray const &a_grids, DistributionMapping const &a_dmap, Marker &&a_marker, Filler &&a_filler, int a_verbose=0, std::string a_options_namespace="hypre")
Definition AMReX_HypreSolver.H:170

amrex::HypreSolver::m_nrows
LayoutData< HYPRE_Int > m_nrows
Definition AMReX_HypreSolver.H:157

amrex::HypreSolver::m_nghost
IntVect m_nghost
Definition AMReX_HypreSolver.H:136

amrex::HypreSolver::m_nrows_grid
Vector< LayoutData< HYPRE_Int > > m_nrows_grid
Definition AMReX_HypreSolver.H:155

amrex::HypreSolver::getNumIters
int getNumIters() const
Definition AMReX_HypreSolver.H:91

amrex::HypreSolver::m_geom
Geometry m_geom
Definition AMReX_HypreSolver.H:137

amrex::HypreSolver::m_global_id_vec
LayoutData< Gpu::DeviceVector< HYPRE_Int > > m_global_id_vec
Definition AMReX_HypreSolver.H:149

amrex::HypreSolver::fill_local_id
std::enable_if_t< IsCallable< Marker, int, int, int, int, int >::value > fill_local_id(Marker const &marker)
Definition AMReX_HypreSolver.H:288

amrex::HypreSolver::m_hypre_ij
std::unique_ptr< HypreIJIface > m_hypre_ij
Definition AMReX_HypreSolver.H:160

amrex::HypreSolver::m_nvars
int m_nvars
Definition AMReX_HypreSolver.H:134

amrex::HypreSolver::m_id_offset
Vector< LayoutData< HYPRE_Int > > m_id_offset
Definition AMReX_HypreSolver.H:156

amrex::HypreSolver::m_comm
MPI_Comm m_comm
Definition AMReX_HypreSolver.H:144

amrex::HypreSolver::get_solution
void get_solution(Vector< MF * > const &a_soln)
Definition AMReX_HypreSolver.H:669

amrex::HypreSolver::getx
HYPRE_IJVector getx() const
Definition AMReX_HypreSolver.H:99

amrex::HypreSolver::load_vectors
void load_vectors(Vector< MF * > const &a_soln, Vector< MF const * > const &a_rhs)
Definition AMReX_HypreSolver.H:612

amrex::HypreSolver::getA
HYPRE_IJMatrix getA() const
Definition AMReX_HypreSolver.H:97

amrex::HypreSolver::m_options_namespace
std::string m_options_namespace
Definition AMReX_HypreSolver.H:142

amrex::HypreSolver::fill_matrix
void fill_matrix(Filler const &filler)
Definition AMReX_HypreSolver.H:485

amrex::HypreSolver::m_A
HYPRE_IJMatrix m_A
Definition AMReX_HypreSolver.H:163

amrex::HypreSolver::fill_global_id
void fill_global_id()
Definition AMReX_HypreSolver.H:379

amrex::HypreSolver::solve
void solve(Vector< MF * > const &a_soln, Vector< MF const * > const &a_rhs, HYPRE_Real rel_tol, HYPRE_Real abs_tol, int max_iter)
Definition AMReX_HypreSolver.H:586

amrex::HypreSolver::m_nrows_proc
HYPRE_Int m_nrows_proc
Definition AMReX_HypreSolver.H:158

amrex::HypreSolver::m_cell_offset
LayoutData< Gpu::DeviceVector< int > > m_cell_offset
Definition AMReX_HypreSolver.H:152

amrex::HypreSolver::m_global_id
Vector< FabArray< BaseFab< HYPRE_Int > > > m_global_id
Definition AMReX_HypreSolver.H:148

amrex::HypreSolver::getFinalResidualNorm
HYPRE_Real getFinalResidualNorm() const
Definition AMReX_HypreSolver.H:93

amrex::HypreSolver::m_owner_mask
Vector< std::unique_ptr< iMultiFab > > m_owner_mask
Definition AMReX_HypreSolver.H:146

amrex::HypreSolver::m_b
HYPRE_IJVector m_b
Definition AMReX_HypreSolver.H:164

amrex::HypreSolver::m_index_type
Vector< IndexType > m_index_type
Definition AMReX_HypreSolver.H:135

amrex::HypreSolver::m_grids
Vector< BoxArray > m_grids
Definition AMReX_HypreSolver.H:138

amrex::HypreSolver::m_local_id
Vector< iMultiFab > m_local_id
Definition AMReX_HypreSolver.H:147

amrex::HypreSolver::m_verbose
int m_verbose
Definition AMReX_HypreSolver.H:141

amrex::HypreSolver::m_dmap
DistributionMapping m_dmap
Definition AMReX_HypreSolver.H:139

amrex::HypreSolver::m_x
HYPRE_IJVector m_x
Definition AMReX_HypreSolver.H:165

amrex::HypreSolver::getb
HYPRE_IJVector getb() const
Definition AMReX_HypreSolver.H:98

amrex::IntVectND< AMREX_SPACEDIM >

amrex::LayoutData
a one-thingy-per-box distributed object
Definition AMReX_LayoutData.H:13

amrex::LayoutData::define
void define(const BoxArray &a_grids, const DistributionMapping &a_dm)
Definition AMReX_LayoutData.H:25

amrex::MFIter
Definition AMReX_MFIter.H:57

amrex::MFIter::isValid
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:141

amrex::PODVector
Definition AMReX_PODVector.H:262

amrex::PODVector::size
size_type size() const noexcept
Definition AMReX_PODVector.H:591

amrex::PODVector::clear
void clear() noexcept
Definition AMReX_PODVector.H:589

amrex::PODVector::resize
void resize(size_type a_new_size)
Definition AMReX_PODVector.H:641

amrex::PODVector::data
T * data() noexcept
Definition AMReX_PODVector.H:609

amrex::Vector
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:27

amrex::Vector::size
Long size() const noexcept
Definition AMReX_Vector.H:50

amrex::Gpu::streamSynchronize
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:237

amrex::ParallelContext::CommunicatorSub
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70

amrex::ParallelContext::MyProcSub
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76

amrex::ParallelContext::NProcsSub
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74

amrex::Scan::Type::exclusive
static constexpr struct amrex::Scan::Type::Exclusive exclusive

amrex::Scan::noRetSum
static constexpr RetSum noRetSum
Definition AMReX_Scan.H:30

amrex::detail::pack_matrix_gpu
void pack_matrix_gpu(Gpu::DeviceVector< HYPRE_Int > &cols_tmp, Gpu::DeviceVector< HYPRE_Real > mat_tmp, Gpu::DeviceVector< HYPRE_Int > &cols, Gpu::DeviceVector< HYPRE_Real > &mat)
Definition AMReX_HypreSolver.H:449

amrex
Definition AMReX_Amr.cpp:49

amrex::ParallelFor
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:191

amrex::nGrowVect
IntVect nGrowVect(FabArrayBase const &fa)

amrex::CurlCurlStateType::b
@ b

amrex::CurlCurlStateType::x
@ x

amrex::convert
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > convert(const BoxND< dim > &b, const IntVectND< dim > &typ) noexcept
Returns a BoxND with different type.
Definition AMReX_Box.H:1435

amrex::OverrideSync
void OverrideSync(FabArray< FAB > &fa, FabArray< IFAB > const &msk, const Periodicity &period)
Definition AMReX_FabArrayUtility.H:1390

amrex::OwnerMask
std::unique_ptr< iMultiFab > OwnerMask(FabArrayBase const &mf, const Periodicity &period, const IntVect &ngrow)
Definition AMReX_iMultiFab.cpp:699

amrex::ubound
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 ubound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:319

amrex::lbound
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:312

amrex::int
const int[]
Definition AMReX_BLProfiler.cpp:1664

amrex::boxArray
BoxArray const & boxArray(FabArrayBase const &fa)

amrex::Loop
AMREX_GPU_HOST_DEVICE AMREX_ATTRIBUTE_FLATTEN_FOR void Loop(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition AMReX_Loop.H:125

detail
Definition AMReX_FabArrayCommI.H:896

amrex::Array4
Definition AMReX_Array4.H:61

amrex::Dim3
Definition AMReX_Dim3.H:12

amrex::Dim3::x
int x
Definition AMReX_Dim3.H:12

amrex::Dim3::z
int z
Definition AMReX_Dim3.H:12

amrex::Dim3::y
int y
Definition AMReX_Dim3.H:12

amrex::IsCallable
Test if a given type T is callable with arguments of type Args...
Definition AMReX_TypeTraits.H:201

amrex::MFItInfo
Definition AMReX_MFIter.H:20

amrex::MFItInfo::DisableDeviceSync
MFItInfo & DisableDeviceSync() noexcept
Definition AMReX_MFIter.H:38

amrex::MFItInfo::UseDefaultStream
MFItInfo & UseDefaultStream() noexcept
Definition AMReX_MFIter.H:50