Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_HypreSolver.H
Go to the documentation of this file.
1
5#ifndef AMREX_HYPRE_SOLVER_H_
6#define AMREX_HYPRE_SOLVER_H_
7
8#include <AMReX_Geometry.H>
9#include <AMReX_iMultiFab.H>
10#include <AMReX_HypreIJIface.H>
11#include <AMReX_BLProfiler.H>
12
13#include "HYPRE.h"
14#include "_hypre_utilities.h"
15
16#include <string>
17
18namespace amrex
19{
20
29template <int MSS>
31{
32public:
33
60 template <class Marker, class Filler>
61 HypreSolver (Vector<IndexType> const& a_index_type,
62 IntVect const& a_nghost,
63 Geometry const& a_geom,
64 BoxArray const& a_grids,
65 DistributionMapping const& a_dmap, // NOLINT(modernize-pass-by-value)
66 Marker && a_marker,
67 Filler && a_filler,
68 int a_verbose = 0,
69 std::string a_options_namespace = "hypre");
70
71 template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
72 std::is_same_v<typename MF::value_type,
73 HYPRE_Real>, int> = 0>
83 void solve (Vector<MF *> const& a_soln,
84 Vector<MF const*> const& a_rhs,
85 HYPRE_Real rel_tol, HYPRE_Real abs_tol, int max_iter);
86
88 int getNumIters () const { return m_hypre_ij->getNumIters(); }
89
91 HYPRE_Real getFinalResidualNorm () const {
92 return m_hypre_ij->getFinalResidualNorm();
93 }
94
96 HYPRE_IJMatrix getA () const { return m_hypre_ij->A(); }
98 HYPRE_IJVector getb () const { return m_hypre_ij->b(); }
100 HYPRE_IJVector getx () const { return m_hypre_ij->x(); }
101
102// public: // for cuda
103
107 template <class Marker>
108#ifdef AMREX_USE_CUDA
109 std::enable_if_t<IsCallable<Marker,int,int,int,int,int>::value>
110#else
111 std::enable_if_t<IsCallableR<bool,Marker,int,int,int,int,int>::value>
112#endif
113 fill_local_id (Marker const& marker);
114
116 template <typename AI>
117 void fill_global_id ();
118
127 template <class Filler,
128 std::enable_if_t<IsCallable<Filler,int,int,int,int,int,
130 HYPRE_Int&, HYPRE_Int*,
131 HYPRE_Real*>::value,int> FOO = 0>
132 void fill_matrix (Filler const& filler);
133
140 template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
141 std::is_same_v<typename MF::value_type,
142 HYPRE_Real>, int> = 0>
143 void load_vectors (Vector<MF *> const& a_soln,
144 Vector<MF const*> const& a_rhs);
145
151 template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
152 std::is_same_v<typename MF::value_type,
153 HYPRE_Real>, int> = 0>
154 void get_solution (Vector<MF*> const& a_soln);
155
156private:
157
158 int m_nvars;
159 Vector<IndexType> m_index_type;
160 IntVect m_nghost;
161 Geometry m_geom;
162 Vector<BoxArray> m_grids;
163 DistributionMapping m_dmap;
164
165 int m_verbose;
166 std::string m_options_namespace;
167
168 MPI_Comm m_comm = MPI_COMM_NULL;
169
171 Vector<iMultiFab> m_local_id;
174
175#ifdef AMREX_USE_GPU
177#endif
178
179 Vector<LayoutData<HYPRE_Int>> m_nrows_grid;
180 Vector<LayoutData<HYPRE_Int>> m_id_offset;
181 LayoutData<HYPRE_Int> m_nrows;
182 HYPRE_Int m_nrows_proc;
183
184 std::unique_ptr<HypreIJIface> m_hypre_ij;
185
186 // Non-owning references to HYPRE matrix, rhs, and solution data
187 HYPRE_IJMatrix m_A = nullptr;
188 HYPRE_IJVector m_b = nullptr;
189 HYPRE_IJVector m_x = nullptr;
190};
191
192template <int MSS>
193template <class Marker, class Filler>
195 IntVect const& a_nghost,
196 Geometry const& a_geom,
197 BoxArray const& a_grids,
198 DistributionMapping const& a_dmap, // NOLINT(modernize-pass-by-value)
199 Marker && a_marker,
200 Filler && a_filler,
201 int a_verbose,
202 std::string a_options_namespace)
203 : m_nvars (int(a_index_type.size())),
204 m_index_type (a_index_type),
205 m_nghost (a_nghost),
206 m_geom (a_geom),
207 m_dmap (a_dmap),
208 m_verbose (a_verbose),
209 m_options_namespace(std::move(a_options_namespace))
210{
211 BL_PROFILE("HypreSolver()");
212
213#ifdef AMREX_USE_MPI
214 m_comm = ParallelContext::CommunicatorSub(); // NOLINT(cppcoreguidelines-prefer-member-initializer)
215#endif
216
217 m_grids.resize(m_nvars);
218 m_local_id.resize(m_nvars);
219 m_global_id.resize(m_nvars);
220 m_nrows_grid.resize(m_nvars);
221 m_id_offset.resize(m_nvars);
222 Long nrows_max = 0;
223 for (int ivar = 0; ivar < m_nvars; ++ivar) {
224 m_grids [ivar] = amrex::convert(a_grids,m_index_type[ivar]);
225 m_local_id [ivar].define(m_grids[ivar], m_dmap, 1, 0);
226 m_global_id [ivar].define(m_grids[ivar], m_dmap, 1, m_nghost);
227 m_nrows_grid[ivar].define(m_grids[0], m_dmap);
228 m_id_offset [ivar].define(m_grids[0], m_dmap);
229 nrows_max += m_grids[ivar].numPts();
230 }
231 m_global_id_vec.define(m_grids[0], m_dmap);
232 m_nrows.define (m_grids[0], m_dmap);
233 AMREX_ALWAYS_ASSERT_WITH_MESSAGE(nrows_max < static_cast<Long>(std::numeric_limits<HYPRE_Int>::max()-1),
234 "Need to configure Hypre with --enable-bigint");
235
236 m_owner_mask.resize(m_nvars);
237 for (int ivar = 0; ivar < m_nvars; ++ivar) {
238 m_owner_mask[ivar] = amrex::OwnerMask(m_local_id[ivar], m_geom.periodicity());
239 }
240
241#ifdef AMREX_USE_GPU
242 m_cell_offset.define(m_grids[0], m_dmap);
243#endif
244
245 fill_local_id(std::forward<Marker>(a_marker));
246
247 // At this point, m_local_id stores the ids local to each box.
248 // m_nrows_grid stores the number of unique points in each box.
249 // m_nrows_proc is the number of rowss for all variables on this MPI
250 // process. If a point is invalid, its id is invalid (i.e., a very
251 // negative number). Note that the data type of local_node_id is int,
252 // not HYPRE_Int for performance on GPU.
253
254 const int nprocs = ParallelContext::NProcsSub();
255 const int myproc = ParallelContext::MyProcSub();
256
257 Vector<HYPRE_Int> nrows_allprocs(nprocs);
258#ifdef AMREX_USE_MPI
259 if (nrows_allprocs.size() > 1) {
260 MPI_Allgather(&m_nrows_proc, sizeof(HYPRE_Int), MPI_CHAR,
261 nrows_allprocs.data(), sizeof(HYPRE_Int), MPI_CHAR, m_comm);
262 } else
263#endif
264 {
265 nrows_allprocs[0] = m_nrows_proc;
266 }
267
268 HYPRE_Int proc_begin = 0;
269 for (int i = 0; i < myproc; ++i) {
270 proc_begin += nrows_allprocs[i];
271 }
272
273 HYPRE_Int proc_end = proc_begin;
274 for (MFIter mfi(m_nrows_grid[0]); mfi.isValid(); ++mfi) {
275 for (int ivar = 0; ivar < m_nvars; ++ivar) {
276 m_id_offset[ivar][mfi] = proc_end;
277 proc_end += m_nrows_grid[ivar][mfi];
278 }
279 }
280 AMREX_ASSERT(proc_end == proc_begin + m_nrows_proc);
281
282 // To generate global ids for HYPRE, we need to remove duplicates on
283 // nodes shared by multiple Boxes with OverrideSync. So we need to use
284 // a type that supports atomicAdd. HYPRE_Int is either int or long
285 // long. The latter (i.e., long long) does not have native atomicAdd
286 // support in CUDA/HIP, whereas unsigned long long has.
287 using AtomicInt = std::conditional_t<sizeof(HYPRE_Int) == 4,
288 HYPRE_Int, unsigned long long>;
289 fill_global_id<AtomicInt>();
290
291 // Create and initialize A, b & x
292 HYPRE_Int ilower = proc_begin;
293 HYPRE_Int iupper = proc_end-1;
294 m_hypre_ij = std::make_unique<HypreIJIface>(m_comm, ilower, iupper, m_verbose);
295 m_hypre_ij->parse_inputs(m_options_namespace);
296
297 // Obtain non-owning references to the matrix, rhs, and solution data
298 m_A = m_hypre_ij->A();
299 m_b = m_hypre_ij->b();
300 m_x = m_hypre_ij->x();
301
302 fill_matrix(std::forward<Filler>(a_filler));
303}
304
305template <int MSS>
306template <class Marker>
307#ifdef AMREX_USE_CUDA
308 std::enable_if_t<IsCallable<Marker,int,int,int,int,int>::value>
309#else
310 std::enable_if_t<IsCallableR<bool,Marker,int,int,int,int,int>::value>
311#endif
313{
314 BL_PROFILE("HypreSolver::fill_local_id()");
315
316#ifdef AMREX_USE_GPU
317
318 for (MFIter mfi(m_local_id[0]); mfi.isValid(); ++mfi) {
319 int boxno = mfi.LocalIndex();
320 Long npts_tot = 0;
321 for (int ivar = 0; ivar < m_nvars; ++ivar) {
322 Box const& bx = amrex::convert(mfi.validbox(),m_index_type[ivar]);
323 npts_tot += bx.numPts();
324 }
325 m_cell_offset[mfi].resize(npts_tot);
326 npts_tot = 0;
327 int* p_cell_offset = m_cell_offset[mfi].data();
328 for (int ivar = 0; ivar < m_nvars; ++ivar) {
329 Box const& bx = amrex::convert(mfi.validbox(),m_index_type[ivar]);
330 auto const& lid = m_local_id[ivar].array(mfi);
331 auto const& owner = m_owner_mask[ivar]->const_array(mfi);
332 AMREX_ASSERT(bx.numPts() < static_cast<Long>(std::numeric_limits<int>::max()));
333 const auto npts = static_cast<int>(bx.numPts());
334 int npts_box = amrex::Scan::PrefixSum<int>(npts,
335 [=] AMREX_GPU_DEVICE (int offset) noexcept -> int
336 {
337 const Dim3 cell = bx.atOffset(offset).dim3();
338 int id = (owner ( cell.x,cell.y,cell.z ) &&
339 marker(boxno,cell.x,cell.y,cell.z,ivar)) ? 1 : 0;
340 lid(cell.x,cell.y,cell.z) = id;
341 return id;
342 },
343 [=] AMREX_GPU_DEVICE (int offset, int ps) noexcept
344 {
345 const Dim3 cell = bx.atOffset(offset).dim3();
346 if (lid(cell.x,cell.y,cell.z)) {
347 lid(cell.x,cell.y,cell.z) = ps;
348 p_cell_offset[ps] = offset;
349 } else {
350 lid(cell.x,cell.y,cell.z) = std::numeric_limits<int>::lowest();
351 }
352 },
354 m_nrows_grid[ivar][mfi] = npts_box;
355 npts_tot += npts_box;
356 p_cell_offset += npts_box;
357 }
358 m_cell_offset[mfi].resize(npts_tot);
359 }
360
361#else
362
363#ifdef AMREX_USE_OMP
364#pragma omp parallel
365#endif
366 for (MFIter mfi(m_local_id[0]); mfi.isValid(); ++mfi) {
367 int boxno = mfi.LocalIndex();
368 for (int ivar = 0; ivar < m_nvars; ++ivar) {
369 Box const& bx = amrex::convert(mfi.validbox(),m_index_type[ivar]);
370 auto const& lid = m_local_id[ivar].array(mfi);
371 auto const& owner = m_owner_mask[ivar]->const_array(mfi);
372 int id = 0;
373 const auto lo = amrex::lbound(bx);
374 const auto hi = amrex::ubound(bx);
375 for (int k = lo.z; k <= hi.z; ++k) {
376 for (int j = lo.y; j <= hi.y; ++j) {
377 for (int i = lo.x; i <= hi.x; ++i) {
378 if (owner(i,j,k) && marker(boxno,i,j,k,ivar)) {
379 lid(i,j,k) = id++;
380 } else {
381 lid(i,j,k) = std::numeric_limits<int>::lowest();
382 }
383 }}}
384 m_nrows_grid[ivar][mfi] = id;
385 }
386 }
387#endif
388
389 m_nrows_proc = 0;
390 for (MFIter mfi(m_nrows); mfi.isValid(); ++mfi) {
391 int nrows = 0;
392 for (int ivar = 0; ivar < m_nvars; ++ivar) {
393 nrows += m_nrows_grid[ivar][mfi];
394 }
395 m_nrows[mfi] = nrows;
396 m_nrows_proc += nrows;
397 }
398}
399
400template <int MSS>
401template <typename AI>
402void
404{
405 BL_PROFILE("HypreSolver::fill_global_id()");
406
407 Vector<FabArray<BaseFab<AI>>> global_id_raii;
408 Vector<FabArray<BaseFab<AI>>*> p_global_id;
409
410 if constexpr (std::is_same_v<HYPRE_Int,AI>) {
411 for (int ivar = 0; ivar < m_nvars; ++ivar) {
412 p_global_id.push_back(&(m_global_id[ivar]));
413 }
414 } else {
415 for (int ivar = 0; ivar < m_nvars; ++ivar) {
416 global_id_raii.emplace_back(m_global_id[ivar].boxArray(),
417 m_global_id[ivar].DistributionMap(),
418 1, m_global_id[ivar].nGrowVect());
419 p_global_id.push_back(&(global_id_raii[ivar]));
420 }
421 }
422
423#ifdef AMREX_USE_OMP
424#pragma omp parallel if (Gpu::notInLaunchRegion())
425#endif
426 for (MFIter mfi(m_global_id[0]); mfi.isValid(); ++mfi) {
427 auto& rows_vec = m_global_id_vec[mfi];
428 rows_vec.resize(m_nrows[mfi]);
429
430 HYPRE_Int nrows = 0;
431 for (int ivar = 0; ivar < m_nvars; ++ivar) {
432 HYPRE_Int const os = m_id_offset[ivar][mfi];
433 Box bx = mfi.validbox();
434 bx.convert(m_index_type[ivar]).grow(m_nghost);
435 Array4<AI> const& gid = p_global_id[ivar]->array(mfi);
436 auto const& lid = m_local_id[ivar].const_array(mfi);
437 HYPRE_Int* rows = rows_vec.data() + nrows;
438 nrows += m_nrows_grid[ivar][mfi];
439 amrex::ParallelFor(bx,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
440 {
441 if (lid.contains(i,j,k) && lid(i,j,k) >= 0) {
442 const auto id = lid(i,j,k) + os;
443 rows[lid(i,j,k)] = id;
444 gid(i,j,k) = static_cast<AI>(id);
445 } else {
446 gid(i,j,k) = static_cast<AI>
447 (std::numeric_limits<HYPRE_Int>::max());
448 }
449 });
450 }
451 }
452
453 for (int ivar = 0; ivar < m_nvars; ++ivar) {
454 amrex::OverrideSync(*p_global_id[ivar], *m_owner_mask[ivar],
455 m_geom.periodicity());
456 p_global_id[ivar]->FillBoundary(m_geom.periodicity());
457
458 if constexpr (!std::is_same<HYPRE_Int, AI>()) {
459 auto const& dst = m_global_id[ivar].arrays();
460 auto const& src = p_global_id[ivar]->const_arrays();
461 amrex::ParallelFor(m_global_id[ivar], m_global_id[ivar].nGrowVect(),
462 [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
463 {
464 dst[b](i,j,k) = static_cast<HYPRE_Int>(src[b](i,j,k));
465 });
466 }
467 }
468}
469
470#ifdef AMREX_USE_GPU
472namespace detail {
473template <typename T>
474void pack_matrix_gpu (Gpu::DeviceVector<HYPRE_Int>& cols_tmp,
478{
479 auto* p_cols_tmp = cols_tmp.data();
480 auto* p_mat_tmp = mat_tmp.data();
481 auto const* p_cols = cols.data();
482 auto const* p_mat = mat.data();
483 const auto N = Long(cols.size());
484 Scan::PrefixSum<T>(N,
485 [=] AMREX_GPU_DEVICE (Long i) -> T
486 {
487 return static_cast<T>(p_cols[i] >= 0);
488 },
489 [=] AMREX_GPU_DEVICE (Long i, T s)
490 {
491 if (p_cols[i] >= 0) {
492 p_cols_tmp[s] = p_cols[i];
493 p_mat_tmp[s] = p_mat[i];
494 }
495 },
497 std::swap(cols_tmp, cols);
498 std::swap(mat_tmp, mat);
499}
500}
502#endif
503
504template <int MSS>
505template <class Filler,
506 std::enable_if_t<IsCallable<Filler,int,int,int,int,int,
507 Array4<HYPRE_Int const> const*,
508 HYPRE_Int&, HYPRE_Int*,
509 HYPRE_Real*>::value,int> FOO>
510void
511HypreSolver<MSS>::fill_matrix (Filler const& filler)
512{
513 BL_PROFILE("HypreSolver::fill_matrix()");
514
518
519 MFItInfo mfitinfo;
521 for (MFIter mfi(m_local_id[0],mfitinfo); mfi.isValid(); ++mfi)
522 {
523 int boxno = mfi.LocalIndex();
524 const HYPRE_Int nrows = m_nrows[mfi];
525 if (nrows > 0)
526 {
527 ncols_vec.clear();
528 ncols_vec.resize(nrows);
529 HYPRE_Int* ncols = ncols_vec.data();
530
531 cols_vec.clear();
532 cols_vec.resize(Long(nrows)*MSS, -1);
533 HYPRE_Int* cols = cols_vec.data();
534
535 mat_vec.clear();
536 mat_vec.resize(Long(nrows)*MSS);
537 HYPRE_Real* mat = mat_vec.data();
538
539 Vector<Array4<HYPRE_Int const>> gid_v(m_nvars);
540 for (int ivar = 0; ivar < m_nvars; ++ivar) {
541 gid_v[ivar] = m_global_id[ivar].const_array(mfi);
542 }
543
544#ifdef AMREX_USE_GPU
546 (gid_v.data(), gid_v.size());
547 auto const* pgid = gid_buf.data();
548 auto const* p_cell_offset = m_cell_offset[mfi].data();
549 Long ntot = 0;
550 for (int ivar = 0; ivar < m_nvars; ++ivar) {
551 const HYPRE_Int nrows_var = m_nrows_grid[ivar][mfi];
552 if (nrows_var > 0) {
553 Box const& bx = amrex::convert(mfi.validbox(),m_index_type[ivar]);
554 ntot += Reduce::Sum<Long>(nrows_var,
555 [=] AMREX_GPU_DEVICE (HYPRE_Int offset)
556 {
557 const Dim3 cell = bx.atOffset(p_cell_offset[offset]).dim3();
558 filler(boxno, cell.x, cell.y, cell.z, ivar, pgid,
559 ncols[offset], cols+Long(offset)*MSS,
560 mat+Long(offset)*MSS);
561 return ncols[offset];
562 });
563 p_cell_offset += nrows_var;
564 ncols += nrows_var;
565 cols += Long(nrows_var)*MSS;
566 mat += Long(nrows_var)*MSS;
567 }
568 }
569 Gpu::DeviceVector<HYPRE_Int> cols_tmp(ntot);
570 Gpu::DeviceVector<HYPRE_Real> mat_tmp(ntot);
571 if (ntot >= Long(std::numeric_limits<int>::max())) {
572 detail::pack_matrix_gpu<Long>(cols_tmp, mat_tmp, cols_vec, mat_vec);
573 } else {
574 detail::pack_matrix_gpu<int>(cols_tmp, mat_tmp, cols_vec, mat_vec);
575 }
576#else
577 auto* pgid = gid_v.data();
578 for (int ivar = 0; ivar < m_nvars; ++ivar) {
579 if (m_nrows_grid[ivar][mfi] > 0) {
580 auto const& lid = m_local_id[ivar].const_array(mfi);
581 amrex::Loop(amrex::convert(mfi.validbox(),m_index_type[ivar]),
582 [=,&ncols,&cols,&mat] (int i, int j, int k)
583 {
584 if (lid(i,j,k) >= 0) {
585 filler(boxno, i, j, k, ivar, pgid, *ncols, cols, mat);
586 cols += (*ncols);
587 mat += (*ncols);
588 ++ncols;
589 }
590 });
591 }
592 }
593#endif
594
595 const auto& rows_vec = m_global_id_vec[mfi];
596 HYPRE_Int const* rows = rows_vec.data();
597
599 HYPRE_IJMatrixSetValues(m_A, nrows, ncols_vec.data(), rows,
600 cols_vec.data(), mat_vec.data());
601 Gpu::hypreSynchronize();
602 }
603 }
604 HYPRE_IJMatrixAssemble(m_A);
605}
606
607template <int MSS>
608template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
609 std::is_same_v<typename MF::value_type,
610 HYPRE_Real>, int> FOO>
611void
613 Vector<MF const*> const& a_rhs,
614 HYPRE_Real rel_tol, HYPRE_Real abs_tol, int max_iter)
615{
616 BL_PROFILE("HypreSolver::solve()");
617
618 AMREX_ASSERT(a_soln.size() == m_nvars && a_rhs.size() == m_nvars);
619
620 HYPRE_IJVectorInitialize(m_b);
621 HYPRE_IJVectorInitialize(m_x);
622
623 load_vectors(a_soln, a_rhs);
624
625 HYPRE_IJVectorAssemble(m_x);
626 HYPRE_IJVectorAssemble(m_b);
627
628 m_hypre_ij->solve(rel_tol, abs_tol, max_iter);
629
630 get_solution(a_soln);
631}
632
633template <int MSS>
634template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
635 std::is_same_v<typename MF::value_type,
636 HYPRE_Real>, int> FOO>
637void
639 Vector<MF const*> const& a_rhs)
640{
641 BL_PROFILE("HypreSolver::load_vectors()");
642
643 MFItInfo mfitinfo;
645
648 for (MFIter mfi(*a_soln[0],mfitinfo); mfi.isValid(); ++mfi)
649 {
650 const HYPRE_Int nrows = m_nrows[mfi];
651 if (nrows > 0)
652 {
653 xvec.clear();
654 xvec.resize(nrows);
655 bvec.clear();
656 bvec.resize(nrows);
657 auto* xp = xvec.data();
658 auto* bp = bvec.data();
659
660 HYPRE_Int const* rows = m_global_id_vec[mfi].data();
661
662 HYPRE_Int offset = 0;
663 for (int ivar = 0; ivar < m_nvars; ++ivar) {
664 if (m_nrows_grid[ivar][mfi] > 0) {
665 auto const& xfab = a_soln[ivar]->const_array(mfi);
666 auto const& bfab = a_rhs [ivar]->const_array(mfi);
667 auto const& lid = m_local_id[ivar].const_array(mfi);
668 HYPRE_Real* x = xp + offset;
669 HYPRE_Real* b = bp + offset;
670 Box box = amrex::convert(mfi.validbox(),m_index_type[ivar]);
671 amrex::ParallelFor(box,[=] AMREX_GPU_DEVICE (int i, int j, int k)
672 {
673 if (lid(i,j,k) >= 0) {
674 x[lid(i,j,k)] = xfab(i,j,k);
675 b[lid(i,j,k)] = bfab(i,j,k);
676 }
677 });
678 offset += m_nrows_grid[ivar][mfi];
679 }
680 }
681
682 Gpu::streamSynchronize();
683 HYPRE_IJVectorSetValues(m_x, nrows, rows, xp);
684 HYPRE_IJVectorSetValues(m_b, nrows, rows, bp);
685 Gpu::hypreSynchronize();
686 }
687 }
688}
689
690template <int MSS>
691template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
692 std::is_same_v<typename MF::value_type,
693 HYPRE_Real>, int> FOO>
694void
696{
697 BL_PROFILE("HypreSolver::get_solution()");
698
699 MFItInfo mfitinfo;
701
703 for (MFIter mfi(*a_soln[0],mfitinfo); mfi.isValid(); ++mfi)
704 {
705 const HYPRE_Int nrows = m_nrows[mfi];
706 if (nrows > 0)
707 {
708 xvec.clear();
709 xvec.resize(nrows);
710 auto* xp = xvec.data();
711
712 HYPRE_Int const* rows = m_global_id_vec[mfi].data();
713
714 HYPRE_IJVectorGetValues(m_x, nrows, rows, xp);
715 Gpu::hypreSynchronize();
716
717 HYPRE_Int offset = 0;
718 for (int ivar = 0; ivar < m_nvars; ++ivar) {
719 if (m_nrows_grid[ivar][mfi] > 0) {
720 auto const& xfab = a_soln[ivar]->array(mfi);
721 auto const& lid = m_local_id[ivar].const_array(mfi);
722 HYPRE_Real* x = xp + offset;
723 Box box = amrex::convert(mfi.validbox(),m_index_type[ivar]);
724 amrex::ParallelFor(box,[=] AMREX_GPU_DEVICE (int i, int j, int k)
725 {
726 if (lid(i,j,k) >= 0) {
727 xfab(i,j,k) = x[lid(i,j,k)];
728 }
729 });
730 offset += m_nrows_grid[ivar][mfi];
731 }
732 }
733 Gpu::streamSynchronize();
734 }
735 }
736
737 for (int ivar = 0; ivar < m_nvars; ++ivar) {
738 amrex::OverrideSync(*a_soln[ivar], *m_owner_mask[ivar],
739 m_geom.periodicity());
740 }
741}
742
743}
744
745#endif
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:49
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1139
A collection of Boxes stored in an Array.
Definition AMReX_BoxArray.H:568
__host__ __device__ BoxND & grow(int i) noexcept
Definition AMReX_Box.H:641
__host__ __device__ Long numPts() const noexcept
Return the number of points contained in the BoxND.
Definition AMReX_Box.H:356
__host__ __device__ BoxND & convert(IndexTypeND< dim > typ) noexcept
Convert the BoxND from the current type into the argument type. This may change the BoxND coordinates...
Definition AMReX_Box.H:974
__host__ __device__ IntVectND< dim > atOffset(Long offset) const noexcept
Given the offset, compute IntVectND<dim>
Definition AMReX_Box.H:1071
Calculates the distribution of FABs to MPI processes.
Definition AMReX_DistributionMapping.H:43
Rectangular problem domain geometry.
Definition AMReX_Geometry.H:74
Periodicity periodicity() const noexcept
Definition AMReX_Geometry.H:356
Definition AMReX_GpuBuffer.H:18
T const * data() const noexcept
Definition AMReX_GpuBuffer.H:45
Solve Ax = b using HYPRE's generic IJ matrix format where A is a sparse matrix specified using the co...
Definition AMReX_HypreSolver.H:31
HypreSolver(Vector< IndexType > const &a_index_type, IntVect const &a_nghost, Geometry const &a_geom, BoxArray const &a_grids, DistributionMapping const &a_dmap, Marker &&a_marker, Filler &&a_filler, int a_verbose=0, std::string a_options_namespace="hypre")
Definition AMReX_HypreSolver.H:194
int getNumIters() const
Number of iterations from the last solve().
Definition AMReX_HypreSolver.H:88
std::enable_if_t< IsCallable< Marker, int, int, int, int, int >::value > fill_local_id(Marker const &marker)
Assign local ids to each owned DOF by invoking marker.
Definition AMReX_HypreSolver.H:312
void get_solution(Vector< MF * > const &a_soln)
Copy IJ solution entries back into AMReX storage.
Definition AMReX_HypreSolver.H:695
HYPRE_IJVector getx() const
Access the IJ solution handle (non-owning).
Definition AMReX_HypreSolver.H:100
void load_vectors(Vector< MF * > const &a_soln, Vector< MF const * > const &a_rhs)
Copy AMReX RHS/initial guess data into the IJ vectors.
Definition AMReX_HypreSolver.H:638
HYPRE_IJMatrix getA() const
Access the assembled IJ matrix handle (non-owning).
Definition AMReX_HypreSolver.H:96
void fill_matrix(Filler const &filler)
Fill each CSR row using the supplied filler functor.
Definition AMReX_HypreSolver.H:511
void fill_global_id()
Convert the local ids to globally unique ids visible to HYPRE.
Definition AMReX_HypreSolver.H:403
void solve(Vector< MF * > const &a_soln, Vector< MF const * > const &a_rhs, HYPRE_Real rel_tol, HYPRE_Real abs_tol, int max_iter)
Solve Ax=b after the constructor assembled the IJ matrix.
Definition AMReX_HypreSolver.H:612
HYPRE_Real getFinalResidualNorm() const
Final residual norm from the last solve().
Definition AMReX_HypreSolver.H:91
HYPRE_IJVector getb() const
Access the assembled IJ RHS handle (non-owning).
Definition AMReX_HypreSolver.H:98
a one-thingy-per-box distributed object
Definition AMReX_LayoutData.H:13
void define(const BoxArray &a_grids, const DistributionMapping &a_dm)
Definition AMReX_LayoutData.H:25
Iterator for looping ever tiles and boxes of amrex::FabArray based containers.
Definition AMReX_MFIter.H:85
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:169
Dynamically allocated vector for trivially copyable data.
Definition AMReX_PODVector.H:308
size_type size() const noexcept
Definition AMReX_PODVector.H:648
void resize(size_type a_new_size, GrowthStrategy strategy=GrowthStrategy::Poisson)
Definition AMReX_PODVector.H:728
void clear() noexcept
Definition AMReX_PODVector.H:646
T * data() noexcept
Definition AMReX_PODVector.H:666
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
Long size() const noexcept
Definition AMReX_Vector.H:53
amrex_long Long
Definition AMReX_INT.H:30
__host__ __device__ Dim3 ubound(Array4< T > const &a) noexcept
Return the inclusive upper bounds of an Array4 in Dim3 form.
Definition AMReX_Array4.H:1331
__host__ __device__ Dim3 lbound(Array4< T > const &a) noexcept
Return the inclusive lower bounds of an Array4 in Dim3 form.
Definition AMReX_Array4.H:1317
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
static constexpr struct amrex::Scan::Type::Exclusive exclusive
static constexpr RetSum noRetSum
Definition AMReX_Scan.H:33
int MPI_Comm
Definition AMReX_ccse-mpi.H:51
static constexpr int MPI_COMM_NULL
Definition AMReX_ccse-mpi.H:59
Definition AMReX_Amr.cpp:49
__host__ __device__ BoxND< dim > convert(const BoxND< dim > &b, const IntVectND< dim > &typ) noexcept
Return a BoxND with different type.
Definition AMReX_Box.H:1558
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:193
IntVect nGrowVect(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2856
void OverrideSync(FabArray< FAB > &fa, FabArray< IFAB > const &msk, const Periodicity &period)
Definition AMReX_FabArrayUtility.H:1442
std::unique_ptr< iMultiFab > OwnerMask(FabArrayBase const &mf, const Periodicity &period, const IntVect &ngrow)
Definition AMReX_iMultiFab.cpp:699
const int[]
Definition AMReX_BLProfiler.cpp:1664
__host__ __device__ void Loop(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition AMReX_Loop.H:127
BoxArray const & boxArray(FabArrayBase const &fa)
Definition AMReX_FabArrayBase.cpp:2861
A multidimensional array accessor.
Definition AMReX_Array4.H:283
Definition AMReX_Dim3.H:12
int x
Definition AMReX_Dim3.H:12
int z
Definition AMReX_Dim3.H:12
int y
Definition AMReX_Dim3.H:12
Test if a given type T is callable with arguments of type Args...
Definition AMReX_TypeTraits.H:213
Definition AMReX_MFIter.H:20
MFItInfo & DisableDeviceSync() noexcept
Definition AMReX_MFIter.H:44
MFItInfo & UseDefaultStream() noexcept
Definition AMReX_MFIter.H:72