Block-Structured AMR Software Framework
AMReX_HypreSolver.H
Go to the documentation of this file.
1 
8 #ifndef AMREX_HYPRE_SOLVER_H_
9 #define AMREX_HYPRE_SOLVER_H_
10 
11 #include <AMReX_Geometry.H>
12 #include <AMReX_iMultiFab.H>
13 #include <AMReX_HypreIJIface.H>
14 #include <AMReX_BLProfiler.H>
15 
16 #include "HYPRE.h"
17 #include "_hypre_utilities.h"
18 
19 #include <string>
20 
21 namespace amrex
22 {
23 
32 template <int MSS>
34 {
35 public:
36 
63  template <class Marker, class Filler>
64  HypreSolver (Vector<IndexType> const& a_index_type,
65  IntVect const& a_nghost,
66  Geometry const& a_geom,
67  BoxArray const& a_grids,
68  DistributionMapping const& a_dmap, // NOLINT(modernize-pass-by-value)
69  Marker && a_marker,
70  Filler && a_filler,
71  int a_verbose = 0,
72  std::string a_options_namespace = "hypre");
73 
74  template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
75  std::is_same_v<typename MF::value_type,
76  HYPRE_Real>, int> = 0>
87  void solve (Vector<MF *> const& a_soln,
88  Vector<MF const*> const& a_rhs,
89  HYPRE_Real rel_tol, HYPRE_Real abs_tol, int max_iter);
90 
91  int getNumIters () const { return m_hypre_ij->getNumIters(); }
92 
93  HYPRE_Real getFinalResidualNorm () const {
94  return m_hypre_ij->getFinalResidualNorm();
95  }
96 
97  HYPRE_IJMatrix getA () const { return m_hypre_ij->A(); }
98  HYPRE_IJVector getb () const { return m_hypre_ij->b(); }
99  HYPRE_IJVector getx () const { return m_hypre_ij->x(); }
100 
101 // public: // for cuda
102 
103  template <class Marker>
104 #ifdef AMREX_USE_CUDA
105  std::enable_if_t<IsCallable<Marker,int,int,int,int,int>::value>
106 #else
107  std::enable_if_t<IsCallableR<bool,Marker,int,int,int,int,int>::value>
108 #endif
109  fill_local_id (Marker const& marker);
110 
111  void fill_global_id ();
112 
113  template <class Filler,
114  std::enable_if_t<IsCallable<Filler,int,int,int,int,int,
116  HYPRE_Int&, HYPRE_Int*,
117  HYPRE_Real*>::value,int> FOO = 0>
118  void fill_matrix (Filler const& filler);
119 
120  template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
121  std::is_same_v<typename MF::value_type,
122  HYPRE_Real>, int> = 0>
123  void load_vectors (Vector<MF *> const& a_soln,
124  Vector<MF const*> const& a_rhs);
125 
126  template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
127  std::is_same_v<typename MF::value_type,
128  HYPRE_Real>, int> = 0>
129  void get_solution (Vector<MF*> const& a_soln);
130 
131 private:
132 
133  int m_nvars;
139 
141  std::string m_options_namespace;
142 
144 
149 
150 #ifdef AMREX_USE_GPU
152 #endif
153 
157  HYPRE_Int m_nrows_proc;
158 
159  std::unique_ptr<HypreIJIface> m_hypre_ij;
160 
161  // Non-owning references to Hypre matrix, rhs, and solution data
162  HYPRE_IJMatrix m_A = nullptr;
163  HYPRE_IJVector m_b = nullptr;
164  HYPRE_IJVector m_x = nullptr;
165 };
166 
167 template <int MSS>
168 template <class Marker, class Filler>
170  IntVect const& a_nghost,
171  Geometry const& a_geom,
172  BoxArray const& a_grids,
173  DistributionMapping const& a_dmap, // NOLINT(modernize-pass-by-value)
174  Marker && a_marker,
175  Filler && a_filler,
176  int a_verbose,
177  std::string a_options_namespace)
178  : m_nvars (int(a_index_type.size())),
179  m_index_type (a_index_type),
180  m_nghost (a_nghost),
181  m_geom (a_geom),
182  m_dmap (a_dmap),
183  m_verbose (a_verbose),
184  m_options_namespace(std::move(a_options_namespace))
185 {
186  BL_PROFILE("HypreSolver()");
187 
188 #ifdef AMREX_USE_MPI
189  m_comm = ParallelContext::CommunicatorSub(); // NOLINT(cppcoreguidelines-prefer-member-initializer)
190 #endif
191 
192  m_grids.resize(m_nvars);
193  m_local_id.resize(m_nvars);
194  m_global_id.resize(m_nvars);
195  m_nrows_grid.resize(m_nvars);
196  m_id_offset.resize(m_nvars);
197  Long nrows_max = 0;
198  for (int ivar = 0; ivar < m_nvars; ++ivar) {
199  m_grids [ivar] = amrex::convert(a_grids,m_index_type[ivar]);
200  m_local_id [ivar].define(m_grids[ivar], m_dmap, 1, 0);
201  m_global_id [ivar].define(m_grids[ivar], m_dmap, 1, m_nghost);
202  m_nrows_grid[ivar].define(m_grids[0], m_dmap);
203  m_id_offset [ivar].define(m_grids[0], m_dmap);
204  nrows_max += m_grids[ivar].numPts();
205  }
206  m_global_id_vec.define(m_grids[0], m_dmap);
209  "Need to configure Hypre with --enable-bigint");
210 
211  m_owner_mask.resize(m_nvars);
212  for (int ivar = 0; ivar < m_nvars; ++ivar) {
214  }
215 
216 #ifdef AMREX_USE_GPU
217  m_cell_offset.define(m_grids[0], m_dmap);
218 #endif
219 
220  fill_local_id(std::forward<Marker>(a_marker));
221 
222  // At this point, m_local_id stores the ids local to each box.
223  // m_nrows_grid stores the number of unique points in each box.
224  // m_nrows_proc is the number of rowss for all variables on this MPI
225  // process. If a point is invalid, its id is invalid (i.e., a very
226  // negative number). Note that the data type of local_node_id is int,
227  // not HYPRE_Int for performance on GPU.
228 
229  const int nprocs = ParallelContext::NProcsSub();
230  const int myproc = ParallelContext::MyProcSub();
231 
232  Vector<HYPRE_Int> nrows_allprocs(nprocs);
233 #ifdef AMREX_USE_MPI
234  if (nrows_allprocs.size() > 1) {
235  MPI_Allgather(&m_nrows_proc, sizeof(HYPRE_Int), MPI_CHAR,
236  nrows_allprocs.data(), sizeof(HYPRE_Int), MPI_CHAR, m_comm);
237  } else
238 #endif
239  {
240  nrows_allprocs[0] = m_nrows_proc;
241  }
242 
243  HYPRE_Int proc_begin = 0;
244  for (int i = 0; i < myproc; ++i) {
245  proc_begin += nrows_allprocs[i];
246  }
247 
248  HYPRE_Int proc_end = proc_begin;
249  for (MFIter mfi(m_nrows_grid[0]); mfi.isValid(); ++mfi) {
250  for (int ivar = 0; ivar < m_nvars; ++ivar) {
251  m_id_offset[ivar][mfi] = proc_end;
252  proc_end += m_nrows_grid[ivar][mfi];
253  }
254  }
255  AMREX_ASSERT(proc_end == proc_begin + m_nrows_proc);
256 
257  fill_global_id();
258 
259  // Create and initialize A, b & x
260  HYPRE_Int ilower = proc_begin;
261  HYPRE_Int iupper = proc_end-1;
262  m_hypre_ij = std::make_unique<HypreIJIface>(m_comm, ilower, iupper, m_verbose);
263  m_hypre_ij->parse_inputs(m_options_namespace);
264 
265  // Obtain non-owning references to the matrix, rhs, and solution data
266  m_A = m_hypre_ij->A();
267  m_b = m_hypre_ij->b();
268  m_x = m_hypre_ij->x();
269 
270  fill_matrix(std::forward<Filler>(a_filler));
271 }
272 
273 template <int MSS>
274 template <class Marker>
275 #ifdef AMREX_USE_CUDA
276  std::enable_if_t<IsCallable<Marker,int,int,int,int,int>::value>
277 #else
278  std::enable_if_t<IsCallableR<bool,Marker,int,int,int,int,int>::value>
279 #endif
280 HypreSolver<MSS>::fill_local_id (Marker const& marker)
281 {
282  BL_PROFILE("HypreSolver::fill_local_id()");
283 
284 #ifdef AMREX_USE_GPU
285 
286  for (MFIter mfi(m_local_id[0]); mfi.isValid(); ++mfi) {
287  int boxno = mfi.LocalIndex();
288  Long npts_tot = 0;
289  for (int ivar = 0; ivar < m_nvars; ++ivar) {
290  Box const& bx = amrex::convert(mfi.validbox(),m_index_type[ivar]);
291  npts_tot += bx.numPts();
292  }
293  m_cell_offset[mfi].resize(npts_tot);
294  npts_tot = 0;
295  int* p_cell_offset = m_cell_offset[mfi].data();
296  for (int ivar = 0; ivar < m_nvars; ++ivar) {
297  Box const& bx = amrex::convert(mfi.validbox(),m_index_type[ivar]);
298  auto const& lid = m_local_id[ivar].array(mfi);
299  auto const& owner = m_owner_mask[ivar]->const_array(mfi);
300  AMREX_ASSERT(bx.numPts() < static_cast<Long>(std::numeric_limits<int>::max()));
301  const auto npts = static_cast<int>(bx.numPts());
302  int npts_box = amrex::Scan::PrefixSum<int>(npts,
303  [=] AMREX_GPU_DEVICE (int offset) noexcept -> int
304  {
305  const Dim3 cell = bx.atOffset(offset).dim3();
306  int id = (owner ( cell.x,cell.y,cell.z ) &&
307  marker(boxno,cell.x,cell.y,cell.z,ivar)) ? 1 : 0;
308  lid(cell.x,cell.y,cell.z) = id;
309  return id;
310  },
311  [=] AMREX_GPU_DEVICE (int offset, int ps) noexcept
312  {
313  const Dim3 cell = bx.atOffset(offset).dim3();
314  if (lid(cell.x,cell.y,cell.z)) {
315  lid(cell.x,cell.y,cell.z) = ps;
316  p_cell_offset[ps] = offset;
317  } else {
318  lid(cell.x,cell.y,cell.z) = std::numeric_limits<int>::lowest();
319  }
320  },
322  m_nrows_grid[ivar][mfi] = npts_box;
323  npts_tot += npts_box;
324  p_cell_offset += npts_box;
325  }
326  m_cell_offset[mfi].resize(npts_tot);
327  }
328 
329 #else
330 
331 #ifdef AMREX_USE_OMP
332 #pragma omp parallel
333 #endif
334  for (MFIter mfi(m_local_id[0]); mfi.isValid(); ++mfi) {
335  int boxno = mfi.LocalIndex();
336  for (int ivar = 0; ivar < m_nvars; ++ivar) {
337  Box const& bx = amrex::convert(mfi.validbox(),m_index_type[ivar]);
338  auto const& lid = m_local_id[ivar].array(mfi);
339  auto const& owner = m_owner_mask[ivar]->const_array(mfi);
340  int id = 0;
341  const auto lo = amrex::lbound(bx);
342  const auto hi = amrex::ubound(bx);
343  for (int k = lo.z; k <= hi.z; ++k) {
344  for (int j = lo.y; j <= hi.y; ++j) {
345  for (int i = lo.x; i <= hi.x; ++i) {
346  if (owner(i,j,k) && marker(boxno,i,j,k,ivar)) {
347  lid(i,j,k) = id++;
348  } else {
349  lid(i,j,k) = std::numeric_limits<int>::lowest();
350  }
351  }}}
352  m_nrows_grid[ivar][mfi] = id;
353  }
354  }
355 #endif
356 
357  m_nrows_proc = 0;
358  for (MFIter mfi(m_nrows); mfi.isValid(); ++mfi) {
359  int nrows = 0;
360  for (int ivar = 0; ivar < m_nvars; ++ivar) {
361  nrows += m_nrows_grid[ivar][mfi];
362  }
363  m_nrows[mfi] = nrows;
364  m_nrows_proc += nrows;
365  }
366 }
367 
368 template <int MSS>
369 void
371 {
372  BL_PROFILE("HypreSolver::fill_global_id()");
373 
374  // To generate global ids for Hypre, we need to remove duplicates on
375  // nodes shared by multiple Boxes with OverrideSync. So we need to use
376  // a type that supports atomicAdd. HYPRE_Int is either int or long
377  // long. The latter does not have native atomicAdd support in CUDA/HIP.
378  using AtomicInt = std::conditional_t<sizeof(HYPRE_Int) == 4,
379  HYPRE_Int, unsigned long long>;
380 
381  Vector<FabArray<BaseFab<AtomicInt>>> global_id_raii;
382  Vector<FabArray<BaseFab<AtomicInt>>*> p_global_id;
383  if constexpr (std::is_same<HYPRE_Int, AtomicInt>()) {
384  for (int ivar = 0; ivar < m_nvars; ++ivar) {
385  p_global_id.push_back(&(m_global_id[ivar]));
386  }
387  } else {
388  for (int ivar = 0; ivar < m_nvars; ++ivar) {
389  global_id_raii[ivar].define(m_global_id[ivar].boxArray(),
390  m_global_id[ivar].DistributionMap(),
391  1, m_global_id[ivar].nGrowVect());
392  p_global_id.push_back(&(global_id_raii[ivar]));
393  }
394  }
395 
396 #ifdef AMREX_USE_OMP
397 #pragma omp parallel if (Gpu::notInLaunchRegion())
398 #endif
399  for (MFIter mfi(m_global_id[0]); mfi.isValid(); ++mfi) {
400  auto& rows_vec = m_global_id_vec[mfi];
401  rows_vec.resize(m_nrows[mfi]);
402 
403  HYPRE_Int nrows = 0;
404  for (int ivar = 0; ivar < m_nvars; ++ivar) {
405  HYPRE_Int const os = m_id_offset[ivar][mfi];
406  Box bx = mfi.validbox();
407  bx.convert(m_index_type[ivar]).grow(m_nghost);
408  Array4<AtomicInt> const& gid = p_global_id[ivar]->array(mfi);
409  auto const& lid = m_local_id[ivar].const_array(mfi);
410  HYPRE_Int* rows = rows_vec.data() + nrows;
411  nrows += m_nrows_grid[ivar][mfi];
412  amrex::ParallelFor(bx,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
413  {
414  if (lid.contains(i,j,k) && lid(i,j,k) >= 0) {
415  const auto id = lid(i,j,k) + os;
416  rows[lid(i,j,k)] = id;
417  gid(i,j,k) = static_cast<AtomicInt>(id);
418  } else {
419  gid(i,j,k) = static_cast<AtomicInt>
421  }
422  });
423  }
424  }
425 
426  for (int ivar = 0; ivar < m_nvars; ++ivar) {
427  amrex::OverrideSync(*p_global_id[ivar], *m_owner_mask[ivar],
428  m_geom.periodicity());
429  p_global_id[ivar]->FillBoundary(m_geom.periodicity());
430 
431  if constexpr (!std::is_same<HYPRE_Int, AtomicInt>()) {
432  auto const& dst = m_global_id[ivar].arrays();
433  auto const& src = p_global_id[ivar]->const_arrays();
434  amrex::ParallelFor(m_global_id[ivar], m_global_id[ivar].nGrowVect(),
435  [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
436  {
437  dst[b](i,j,k) = static_cast<HYPRE_Int>(src[b](i,j,k));
438  });
439  }
440  }
441 }
442 
443 #ifdef AMREX_USE_GPU
444 namespace detail {
445 template <typename T>
450 {
451  auto* p_cols_tmp = cols_tmp.data();
452  auto* p_mat_tmp = mat_tmp.data();
453  auto const* p_cols = cols.data();
454  auto const* p_mat = mat.data();
455  const auto N = Long(cols.size());
456  Scan::PrefixSum<T>(N,
457  [=] AMREX_GPU_DEVICE (Long i) -> T
458  {
459  return static_cast<T>(p_cols[i] >= 0);
460  },
461  [=] AMREX_GPU_DEVICE (Long i, T s)
462  {
463  if (p_cols[i] >= 0) {
464  p_cols_tmp[s] = p_cols[i];
465  p_mat_tmp[s] = p_mat[i];
466  }
467  },
469  std::swap(cols_tmp, cols);
470  std::swap(mat_tmp, mat);
471 }
472 }
473 #endif
474 
475 template <int MSS>
476 template <class Filler,
477  std::enable_if_t<IsCallable<Filler,int,int,int,int,int,
478  Array4<HYPRE_Int const> const*,
479  HYPRE_Int&, HYPRE_Int*,
480  HYPRE_Real*>::value,int> FOO>
481 void
482 HypreSolver<MSS>::fill_matrix (Filler const& filler)
483 {
484  BL_PROFILE("HypreSolver::fill_matrix()");
485 
489 
490  MFItInfo mfitinfo;
491  mfitinfo.UseDefaultStream().DisableDeviceSync();
492  for (MFIter mfi(m_local_id[0],mfitinfo); mfi.isValid(); ++mfi)
493  {
494  int boxno = mfi.LocalIndex();
495  const HYPRE_Int nrows = m_nrows[mfi];
496  if (nrows > 0)
497  {
498  ncols_vec.clear();
499  ncols_vec.resize(nrows);
500  HYPRE_Int* ncols = ncols_vec.data();
501 
502  cols_vec.clear();
503  cols_vec.resize(Long(nrows)*MSS, -1);
504  HYPRE_Int* cols = cols_vec.data();
505 
506  mat_vec.clear();
507  mat_vec.resize(Long(nrows)*MSS);
508  HYPRE_Real* mat = mat_vec.data();
509 
510  Vector<Array4<HYPRE_Int const>> gid_v(m_nvars);
511  for (int ivar = 0; ivar < m_nvars; ++ivar) {
512  gid_v[ivar] = m_global_id[ivar].const_array(mfi);
513  }
514 
515 #ifdef AMREX_USE_GPU
517  (gid_v.data(), gid_v.size());
518  auto const* pgid = gid_buf.data();
519  auto const* p_cell_offset = m_cell_offset[mfi].data();
520  Long ntot = 0;
521  for (int ivar = 0; ivar < m_nvars; ++ivar) {
522  const HYPRE_Int nrows_var = m_nrows_grid[ivar][mfi];
523  if (nrows_var > 0) {
524  Box const& bx = amrex::convert(mfi.validbox(),m_index_type[ivar]);
525  ntot += Reduce::Sum<Long>(nrows_var,
526  [=] AMREX_GPU_DEVICE (HYPRE_Int offset)
527  {
528  const Dim3 cell = bx.atOffset(p_cell_offset[offset]).dim3();
529  filler(boxno, cell.x, cell.y, cell.z, ivar, pgid,
530  ncols[offset], cols+Long(offset)*MSS,
531  mat+Long(offset)*MSS);
532  return ncols[offset];
533  });
534  p_cell_offset += nrows_var;
535  ncols += nrows_var;
536  cols += Long(nrows_var)*MSS;
537  mat += Long(nrows_var)*MSS;
538  }
539  }
540  Gpu::DeviceVector<HYPRE_Int> cols_tmp(ntot);
541  Gpu::DeviceVector<HYPRE_Real> mat_tmp(ntot);
542  if (ntot >= Long(std::numeric_limits<int>::max())) {
543  detail::pack_matrix_gpu<Long>(cols_tmp, mat_tmp, cols_vec, mat_vec);
544  } else {
545  detail::pack_matrix_gpu<int>(cols_tmp, mat_tmp, cols_vec, mat_vec);
546  }
547 #else
548  auto* pgid = gid_v.data();
549  for (int ivar = 0; ivar < m_nvars; ++ivar) {
550  if (m_nrows_grid[ivar][mfi] > 0) {
551  auto const& lid = m_local_id[ivar].const_array(mfi);
552  amrex::Loop(amrex::convert(mfi.validbox(),m_index_type[ivar]),
553  [=,&ncols,&cols,&mat] (int i, int j, int k)
554  {
555  if (lid(i,j,k) >= 0) {
556  filler(boxno, i, j, k, ivar, pgid, *ncols, cols, mat);
557  cols += (*ncols);
558  mat += (*ncols);
559  ++ncols;
560  }
561  });
562  }
563  }
564 #endif
565 
566  const auto& rows_vec = m_global_id_vec[mfi];
567  HYPRE_Int const* rows = rows_vec.data();
568 
570  HYPRE_IJMatrixSetValues(m_A, nrows, ncols_vec.data(), rows,
571  cols_vec.data(), mat_vec.data());
572  Gpu::hypreSynchronize();
573  }
574  }
575  HYPRE_IJMatrixAssemble(m_A);
576 }
577 
578 template <int MSS>
579 template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
580  std::is_same_v<typename MF::value_type,
581  HYPRE_Real>, int> FOO>
582 void
584  Vector<MF const*> const& a_rhs,
585  HYPRE_Real rel_tol, HYPRE_Real abs_tol, int max_iter)
586 {
587  BL_PROFILE("HypreSolver::solve()");
588 
589  AMREX_ASSERT(a_soln.size() == m_nvars && a_rhs.size() == m_nvars);
590 
591  HYPRE_IJVectorInitialize(m_b);
592  HYPRE_IJVectorInitialize(m_x);
593 
594  load_vectors(a_soln, a_rhs);
595 
596  HYPRE_IJVectorAssemble(m_x);
597  HYPRE_IJVectorAssemble(m_b);
598 
599  m_hypre_ij->solve(rel_tol, abs_tol, max_iter);
600 
601  get_solution(a_soln);
602 }
603 
604 template <int MSS>
605 template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
606  std::is_same_v<typename MF::value_type,
607  HYPRE_Real>, int> FOO>
608 void
610  Vector<MF const*> const& a_rhs)
611 {
612  BL_PROFILE("HypreSolver::load_vectors()");
613 
614  MFItInfo mfitinfo;
615  mfitinfo.UseDefaultStream().DisableDeviceSync();
616 
619  for (MFIter mfi(*a_soln[0],mfitinfo); mfi.isValid(); ++mfi)
620  {
621  const HYPRE_Int nrows = m_nrows[mfi];
622  if (nrows > 0)
623  {
624  xvec.clear();
625  xvec.resize(nrows);
626  bvec.clear();
627  bvec.resize(nrows);
628  auto* xp = xvec.data();
629  auto* bp = bvec.data();
630 
631  HYPRE_Int const* rows = m_global_id_vec[mfi].data();
632 
633  HYPRE_Int offset = 0;
634  for (int ivar = 0; ivar < m_nvars; ++ivar) {
635  if (m_nrows_grid[ivar][mfi] > 0) {
636  auto const& xfab = a_soln[ivar]->const_array(mfi);
637  auto const& bfab = a_rhs [ivar]->const_array(mfi);
638  auto const& lid = m_local_id[ivar].const_array(mfi);
639  HYPRE_Real* x = xp + offset;
640  HYPRE_Real* b = bp + offset;
641  Box box = amrex::convert(mfi.validbox(),m_index_type[ivar]);
642  amrex::ParallelFor(box,[=] AMREX_GPU_DEVICE (int i, int j, int k)
643  {
644  if (lid(i,j,k) >= 0) {
645  x[lid(i,j,k)] = xfab(i,j,k);
646  b[lid(i,j,k)] = bfab(i,j,k);
647  }
648  });
649  offset += m_nrows_grid[ivar][mfi];
650  }
651  }
652 
654  HYPRE_IJVectorSetValues(m_x, nrows, rows, xp);
655  HYPRE_IJVectorSetValues(m_b, nrows, rows, bp);
656  Gpu::hypreSynchronize();
657  }
658  }
659 }
660 
661 template <int MSS>
662 template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
663  std::is_same_v<typename MF::value_type,
664  HYPRE_Real>, int> FOO>
665 void
667 {
668  BL_PROFILE("HypreSolver::get_solution()");
669 
670  MFItInfo mfitinfo;
671  mfitinfo.UseDefaultStream().DisableDeviceSync();
672 
674  for (MFIter mfi(*a_soln[0],mfitinfo); mfi.isValid(); ++mfi)
675  {
676  const HYPRE_Int nrows = m_nrows[mfi];
677  if (nrows > 0)
678  {
679  xvec.clear();
680  xvec.resize(nrows);
681  auto* xp = xvec.data();
682 
683  HYPRE_Int const* rows = m_global_id_vec[mfi].data();
684 
685  HYPRE_IJVectorGetValues(m_x, nrows, rows, xp);
686  Gpu::hypreSynchronize();
687 
688  HYPRE_Int offset = 0;
689  for (int ivar = 0; ivar < m_nvars; ++ivar) {
690  if (m_nrows_grid[ivar][mfi] > 0) {
691  auto const& xfab = a_soln[ivar]->array(mfi);
692  auto const& lid = m_local_id[ivar].const_array(mfi);
693  HYPRE_Real* x = xp + offset;
694  Box box = amrex::convert(mfi.validbox(),m_index_type[ivar]);
695  amrex::ParallelFor(box,[=] AMREX_GPU_DEVICE (int i, int j, int k)
696  {
697  if (lid(i,j,k) >= 0) {
698  xfab(i,j,k) = x[lid(i,j,k)];
699  }
700  });
701  offset += m_nrows_grid[ivar][mfi];
702  }
703  }
705  }
706  }
707 
708  for (int ivar = 0; ivar < m_nvars; ++ivar) {
709  amrex::OverrideSync(*a_soln[ivar], *m_owner_mask[ivar],
710  m_geom.periodicity());
711  }
712 }
713 
714 }
715 
716 #endif
#define BL_PROFILE(a)
Definition: AMReX_BLProfiler.H:551
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: AMReX_BLassert.H:49
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition: AMReX_HypreMLABecLap.cpp:1089
int MPI_Comm
Definition: AMReX_ccse-mpi.H:47
static constexpr int MPI_COMM_NULL
Definition: AMReX_ccse-mpi.H:55
A collection of Boxes stored in an Array.
Definition: AMReX_BoxArray.H:530
AMREX_GPU_HOST_DEVICE IntVectND< dim > atOffset(Long offset) const noexcept
Given the offset, compute IntVectND<dim>
Definition: AMReX_Box.H:1009
AMREX_GPU_HOST_DEVICE BoxND & grow(int i) noexcept
Definition: AMReX_Box.H:627
AMREX_GPU_HOST_DEVICE BoxND & convert(IndexTypeND< dim > typ) noexcept
Convert the BoxND from the current type into the argument type. This may change the BoxND coordinates...
Definition: AMReX_Box.H:912
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
Returns the number of points contained in the BoxND.
Definition: AMReX_Box.H:346
Calculates the distribution of FABs to MPI processes.
Definition: AMReX_DistributionMapping.H:41
Rectangular problem domain geometry.
Definition: AMReX_Geometry.H:73
Periodicity periodicity() const noexcept
Definition: AMReX_Geometry.H:355
Definition: AMReX_GpuBuffer.H:17
T const * data() const noexcept
Definition: AMReX_GpuBuffer.H:65
Solve Ax = b using HYPRE's generic IJ matrix format where A is a sparse matrix specified using the co...
Definition: AMReX_HypreSolver.H:34
HypreSolver(Vector< IndexType > const &a_index_type, IntVect const &a_nghost, Geometry const &a_geom, BoxArray const &a_grids, DistributionMapping const &a_dmap, Marker &&a_marker, Filler &&a_filler, int a_verbose=0, std::string a_options_namespace="hypre")
Definition: AMReX_HypreSolver.H:169
LayoutData< HYPRE_Int > m_nrows
Definition: AMReX_HypreSolver.H:156
IntVect m_nghost
Definition: AMReX_HypreSolver.H:135
Vector< LayoutData< HYPRE_Int > > m_nrows_grid
Definition: AMReX_HypreSolver.H:154
int getNumIters() const
Definition: AMReX_HypreSolver.H:91
Geometry m_geom
Definition: AMReX_HypreSolver.H:136
LayoutData< Gpu::DeviceVector< HYPRE_Int > > m_global_id_vec
Definition: AMReX_HypreSolver.H:148
std::enable_if_t< IsCallable< Marker, int, int, int, int, int >::value > fill_local_id(Marker const &marker)
Definition: AMReX_HypreSolver.H:280
std::unique_ptr< HypreIJIface > m_hypre_ij
Definition: AMReX_HypreSolver.H:159
int m_nvars
Definition: AMReX_HypreSolver.H:133
Vector< LayoutData< HYPRE_Int > > m_id_offset
Definition: AMReX_HypreSolver.H:155
void fill_global_id()
Definition: AMReX_HypreSolver.H:370
MPI_Comm m_comm
Definition: AMReX_HypreSolver.H:143
void get_solution(Vector< MF * > const &a_soln)
Definition: AMReX_HypreSolver.H:666
HYPRE_IJVector getx() const
Definition: AMReX_HypreSolver.H:99
void load_vectors(Vector< MF * > const &a_soln, Vector< MF const * > const &a_rhs)
Definition: AMReX_HypreSolver.H:609
HYPRE_IJMatrix getA() const
Definition: AMReX_HypreSolver.H:97
std::string m_options_namespace
Definition: AMReX_HypreSolver.H:141
void fill_matrix(Filler const &filler)
Definition: AMReX_HypreSolver.H:482
HYPRE_IJMatrix m_A
Definition: AMReX_HypreSolver.H:162
void solve(Vector< MF * > const &a_soln, Vector< MF const * > const &a_rhs, HYPRE_Real rel_tol, HYPRE_Real abs_tol, int max_iter)
Definition: AMReX_HypreSolver.H:583
HYPRE_Int m_nrows_proc
Definition: AMReX_HypreSolver.H:157
LayoutData< Gpu::DeviceVector< int > > m_cell_offset
Definition: AMReX_HypreSolver.H:151
Vector< FabArray< BaseFab< HYPRE_Int > > > m_global_id
Definition: AMReX_HypreSolver.H:147
HYPRE_Real getFinalResidualNorm() const
Definition: AMReX_HypreSolver.H:93
Vector< std::unique_ptr< iMultiFab > > m_owner_mask
Definition: AMReX_HypreSolver.H:145
HYPRE_IJVector m_b
Definition: AMReX_HypreSolver.H:163
Vector< IndexType > m_index_type
Definition: AMReX_HypreSolver.H:134
Vector< BoxArray > m_grids
Definition: AMReX_HypreSolver.H:137
Vector< iMultiFab > m_local_id
Definition: AMReX_HypreSolver.H:146
int m_verbose
Definition: AMReX_HypreSolver.H:140
DistributionMapping m_dmap
Definition: AMReX_HypreSolver.H:138
HYPRE_IJVector m_x
Definition: AMReX_HypreSolver.H:164
HYPRE_IJVector getb() const
Definition: AMReX_HypreSolver.H:98
a one-thingy-per-box distributed object
Definition: AMReX_LayoutData.H:13
void define(const BoxArray &a_grids, const DistributionMapping &a_dm)
Definition: AMReX_LayoutData.H:25
Definition: AMReX_MFIter.H:57
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition: AMReX_MFIter.H:141
Definition: AMReX_PODVector.H:246
size_type size() const noexcept
Definition: AMReX_PODVector.H:575
T * data() noexcept
Definition: AMReX_PODVector.H:593
void clear() noexcept
Definition: AMReX_PODVector.H:573
void resize(size_type a_new_size)
Definition: AMReX_PODVector.H:625
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition: AMReX_Vector.H:27
Long size() const noexcept
Definition: AMReX_Vector.H:50
AMREX_GPU_HOST_DEVICE Long size(T const &b) noexcept
integer version
Definition: AMReX_GpuRange.H:26
void streamSynchronize() noexcept
Definition: AMReX_GpuDevice.H:237
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition: AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition: AMReX_ParallelContext.H:76
int NProcsSub() noexcept
number of ranks in current frame
Definition: AMReX_ParallelContext.H:74
static constexpr struct amrex::Scan::Type::Exclusive exclusive
static constexpr RetSum noRetSum
Definition: AMReX_Scan.H:30
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void swap(T &a, T &b) noexcept
Definition: AMReX_algoim_K.H:113
@ max
Definition: AMReX_ParallelReduce.H:17
void pack_matrix_gpu(Gpu::DeviceVector< HYPRE_Int > &cols_tmp, Gpu::DeviceVector< HYPRE_Real > mat_tmp, Gpu::DeviceVector< HYPRE_Int > &cols, Gpu::DeviceVector< HYPRE_Real > &mat)
Definition: AMReX_HypreSolver.H:446
Definition: AMReX_Amr.cpp:49
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:200
DistributionMapping const & DistributionMap(FabArrayBase const &fa)
IntVect nGrowVect(FabArrayBase const &fa)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > convert(const BoxND< dim > &b, const IntVectND< dim > &typ) noexcept
Returns a BoxND with different type.
Definition: AMReX_Box.H:1435
void OverrideSync(FabArray< FAB > &fa, FabArray< IFAB > const &msk, const Periodicity &period)
Definition: AMReX_FabArrayUtility.H:1323
std::unique_ptr< iMultiFab > OwnerMask(FabArrayBase const &mf, const Periodicity &period, const IntVect &ngrow)
Definition: AMReX_iMultiFab.cpp:637
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 ubound(Array4< T > const &a) noexcept
Definition: AMReX_Array4.H:315
BoxArray const & boxArray(FabArrayBase const &fa)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
Definition: AMReX_Array4.H:308
const int[]
Definition: AMReX_BLProfiler.cpp:1664
AMREX_GPU_HOST_DEVICE AMREX_ATTRIBUTE_FLATTEN_FOR void Loop(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition: AMReX_Loop.H:124
Definition: AMReX_FabArrayCommI.H:841
Definition: AMReX_Array4.H:61
Definition: AMReX_Dim3.H:12
int x
Definition: AMReX_Dim3.H:12
int z
Definition: AMReX_Dim3.H:12
int y
Definition: AMReX_Dim3.H:12
Test if a given type T is callable with arguments of type Args...
Definition: AMReX_TypeTraits.H:201
Definition: AMReX_MFIter.H:20
MFItInfo & UseDefaultStream() noexcept
Definition: AMReX_MFIter.H:50
MFItInfo & DisableDeviceSync() noexcept
Definition: AMReX_MFIter.H:38