Block-Structured AMR Software Framework
 
Loading...
Searching...
No Matches
AMReX_HypreSolver.H
Go to the documentation of this file.
1
8#ifndef AMREX_HYPRE_SOLVER_H_
9#define AMREX_HYPRE_SOLVER_H_
10
11#include <AMReX_Geometry.H>
12#include <AMReX_iMultiFab.H>
13#include <AMReX_HypreIJIface.H>
14#include <AMReX_BLProfiler.H>
15
16#include "HYPRE.h"
17#include "_hypre_utilities.h"
18
19#include <string>
20
21namespace amrex
22{
23
32template <int MSS>
34{
35public:
36
63 template <class Marker, class Filler>
64 HypreSolver (Vector<IndexType> const& a_index_type,
65 IntVect const& a_nghost,
66 Geometry const& a_geom,
67 BoxArray const& a_grids,
68 DistributionMapping const& a_dmap, // NOLINT(modernize-pass-by-value)
69 Marker && a_marker,
70 Filler && a_filler,
71 int a_verbose = 0,
72 std::string a_options_namespace = "hypre");
73
74 template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
75 std::is_same_v<typename MF::value_type,
76 HYPRE_Real>, int> = 0>
87 void solve (Vector<MF *> const& a_soln,
88 Vector<MF const*> const& a_rhs,
89 HYPRE_Real rel_tol, HYPRE_Real abs_tol, int max_iter);
90
91 int getNumIters () const { return m_hypre_ij->getNumIters(); }
92
93 HYPRE_Real getFinalResidualNorm () const {
94 return m_hypre_ij->getFinalResidualNorm();
95 }
96
97 HYPRE_IJMatrix getA () const { return m_hypre_ij->A(); }
98 HYPRE_IJVector getb () const { return m_hypre_ij->b(); }
99 HYPRE_IJVector getx () const { return m_hypre_ij->x(); }
100
101// public: // for cuda
102
103 template <class Marker>
104#ifdef AMREX_USE_CUDA
105 std::enable_if_t<IsCallable<Marker,int,int,int,int,int>::value>
106#else
107 std::enable_if_t<IsCallableR<bool,Marker,int,int,int,int,int>::value>
108#endif
109 fill_local_id (Marker const& marker);
110
111 template <typename AI>
112 void fill_global_id ();
113
114 template <class Filler,
115 std::enable_if_t<IsCallable<Filler,int,int,int,int,int,
117 HYPRE_Int&, HYPRE_Int*,
118 HYPRE_Real*>::value,int> FOO = 0>
119 void fill_matrix (Filler const& filler);
120
121 template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
122 std::is_same_v<typename MF::value_type,
123 HYPRE_Real>, int> = 0>
124 void load_vectors (Vector<MF *> const& a_soln,
125 Vector<MF const*> const& a_rhs);
126
127 template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
128 std::is_same_v<typename MF::value_type,
129 HYPRE_Real>, int> = 0>
130 void get_solution (Vector<MF*> const& a_soln);
131
132private:
133
140
143
145
150
151#ifdef AMREX_USE_GPU
153#endif
154
158 HYPRE_Int m_nrows_proc;
159
160 std::unique_ptr<HypreIJIface> m_hypre_ij;
161
162 // Non-owning references to Hypre matrix, rhs, and solution data
163 HYPRE_IJMatrix m_A = nullptr;
164 HYPRE_IJVector m_b = nullptr;
165 HYPRE_IJVector m_x = nullptr;
166};
167
168template <int MSS>
169template <class Marker, class Filler>
171 IntVect const& a_nghost,
172 Geometry const& a_geom,
173 BoxArray const& a_grids,
174 DistributionMapping const& a_dmap, // NOLINT(modernize-pass-by-value)
175 Marker && a_marker,
176 Filler && a_filler,
177 int a_verbose,
178 std::string a_options_namespace)
179 : m_nvars (int(a_index_type.size())),
180 m_index_type (a_index_type),
181 m_nghost (a_nghost),
182 m_geom (a_geom),
183 m_dmap (a_dmap),
184 m_verbose (a_verbose),
185 m_options_namespace(std::move(a_options_namespace))
186{
187 BL_PROFILE("HypreSolver()");
188
189#ifdef AMREX_USE_MPI
190 m_comm = ParallelContext::CommunicatorSub(); // NOLINT(cppcoreguidelines-prefer-member-initializer)
191#endif
192
193 m_grids.resize(m_nvars);
194 m_local_id.resize(m_nvars);
195 m_global_id.resize(m_nvars);
196 m_nrows_grid.resize(m_nvars);
197 m_id_offset.resize(m_nvars);
198 Long nrows_max = 0;
199 for (int ivar = 0; ivar < m_nvars; ++ivar) {
200 m_grids [ivar] = amrex::convert(a_grids,m_index_type[ivar]);
201 m_local_id [ivar].define(m_grids[ivar], m_dmap, 1, 0);
202 m_global_id [ivar].define(m_grids[ivar], m_dmap, 1, m_nghost);
203 m_nrows_grid[ivar].define(m_grids[0], m_dmap);
204 m_id_offset [ivar].define(m_grids[0], m_dmap);
205 nrows_max += m_grids[ivar].numPts();
206 }
207 m_global_id_vec.define(m_grids[0], m_dmap);
209 AMREX_ALWAYS_ASSERT_WITH_MESSAGE(nrows_max < static_cast<Long>(std::numeric_limits<HYPRE_Int>::max()-1),
210 "Need to configure Hypre with --enable-bigint");
211
212 m_owner_mask.resize(m_nvars);
213 for (int ivar = 0; ivar < m_nvars; ++ivar) {
215 }
216
217#ifdef AMREX_USE_GPU
218 m_cell_offset.define(m_grids[0], m_dmap);
219#endif
220
221 fill_local_id(std::forward<Marker>(a_marker));
222
223 // At this point, m_local_id stores the ids local to each box.
224 // m_nrows_grid stores the number of unique points in each box.
225 // m_nrows_proc is the number of rowss for all variables on this MPI
226 // process. If a point is invalid, its id is invalid (i.e., a very
227 // negative number). Note that the data type of local_node_id is int,
228 // not HYPRE_Int for performance on GPU.
229
230 const int nprocs = ParallelContext::NProcsSub();
231 const int myproc = ParallelContext::MyProcSub();
232
233 Vector<HYPRE_Int> nrows_allprocs(nprocs);
234#ifdef AMREX_USE_MPI
235 if (nrows_allprocs.size() > 1) {
236 MPI_Allgather(&m_nrows_proc, sizeof(HYPRE_Int), MPI_CHAR,
237 nrows_allprocs.data(), sizeof(HYPRE_Int), MPI_CHAR, m_comm);
238 } else
239#endif
240 {
241 nrows_allprocs[0] = m_nrows_proc;
242 }
243
244 HYPRE_Int proc_begin = 0;
245 for (int i = 0; i < myproc; ++i) {
246 proc_begin += nrows_allprocs[i];
247 }
248
249 HYPRE_Int proc_end = proc_begin;
250 for (MFIter mfi(m_nrows_grid[0]); mfi.isValid(); ++mfi) {
251 for (int ivar = 0; ivar < m_nvars; ++ivar) {
252 m_id_offset[ivar][mfi] = proc_end;
253 proc_end += m_nrows_grid[ivar][mfi];
254 }
255 }
256 AMREX_ASSERT(proc_end == proc_begin + m_nrows_proc);
257
258 // To generate global ids for Hypre, we need to remove duplicates on
259 // nodes shared by multiple Boxes with OverrideSync. So we need to use
260 // a type that supports atomicAdd. HYPRE_Int is either int or long
261 // long. The latter (i.e., long long) does not have native atomicAdd
262 // support in CUDA/HIP, whereas unsigned long long has.
263 using AtomicInt = std::conditional_t<sizeof(HYPRE_Int) == 4,
264 HYPRE_Int, unsigned long long>;
265 fill_global_id<AtomicInt>();
266
267 // Create and initialize A, b & x
268 HYPRE_Int ilower = proc_begin;
269 HYPRE_Int iupper = proc_end-1;
270 m_hypre_ij = std::make_unique<HypreIJIface>(m_comm, ilower, iupper, m_verbose);
271 m_hypre_ij->parse_inputs(m_options_namespace);
272
273 // Obtain non-owning references to the matrix, rhs, and solution data
274 m_A = m_hypre_ij->A();
275 m_b = m_hypre_ij->b();
276 m_x = m_hypre_ij->x();
277
278 fill_matrix(std::forward<Filler>(a_filler));
279}
280
281template <int MSS>
282template <class Marker>
283#ifdef AMREX_USE_CUDA
284 std::enable_if_t<IsCallable<Marker,int,int,int,int,int>::value>
285#else
286 std::enable_if_t<IsCallableR<bool,Marker,int,int,int,int,int>::value>
287#endif
289{
290 BL_PROFILE("HypreSolver::fill_local_id()");
291
292#ifdef AMREX_USE_GPU
293
294 for (MFIter mfi(m_local_id[0]); mfi.isValid(); ++mfi) {
295 int boxno = mfi.LocalIndex();
296 Long npts_tot = 0;
297 for (int ivar = 0; ivar < m_nvars; ++ivar) {
298 Box const& bx = amrex::convert(mfi.validbox(),m_index_type[ivar]);
299 npts_tot += bx.numPts();
300 }
301 m_cell_offset[mfi].resize(npts_tot);
302 npts_tot = 0;
303 int* p_cell_offset = m_cell_offset[mfi].data();
304 for (int ivar = 0; ivar < m_nvars; ++ivar) {
305 Box const& bx = amrex::convert(mfi.validbox(),m_index_type[ivar]);
306 auto const& lid = m_local_id[ivar].array(mfi);
307 auto const& owner = m_owner_mask[ivar]->const_array(mfi);
308 AMREX_ASSERT(bx.numPts() < static_cast<Long>(std::numeric_limits<int>::max()));
309 const auto npts = static_cast<int>(bx.numPts());
310 int npts_box = amrex::Scan::PrefixSum<int>(npts,
311 [=] AMREX_GPU_DEVICE (int offset) noexcept -> int
312 {
313 const Dim3 cell = bx.atOffset(offset).dim3();
314 int id = (owner ( cell.x,cell.y,cell.z ) &&
315 marker(boxno,cell.x,cell.y,cell.z,ivar)) ? 1 : 0;
316 lid(cell.x,cell.y,cell.z) = id;
317 return id;
318 },
319 [=] AMREX_GPU_DEVICE (int offset, int ps) noexcept
320 {
321 const Dim3 cell = bx.atOffset(offset).dim3();
322 if (lid(cell.x,cell.y,cell.z)) {
323 lid(cell.x,cell.y,cell.z) = ps;
324 p_cell_offset[ps] = offset;
325 } else {
326 lid(cell.x,cell.y,cell.z) = std::numeric_limits<int>::lowest();
327 }
328 },
330 m_nrows_grid[ivar][mfi] = npts_box;
331 npts_tot += npts_box;
332 p_cell_offset += npts_box;
333 }
334 m_cell_offset[mfi].resize(npts_tot);
335 }
336
337#else
338
339#ifdef AMREX_USE_OMP
340#pragma omp parallel
341#endif
342 for (MFIter mfi(m_local_id[0]); mfi.isValid(); ++mfi) {
343 int boxno = mfi.LocalIndex();
344 for (int ivar = 0; ivar < m_nvars; ++ivar) {
345 Box const& bx = amrex::convert(mfi.validbox(),m_index_type[ivar]);
346 auto const& lid = m_local_id[ivar].array(mfi);
347 auto const& owner = m_owner_mask[ivar]->const_array(mfi);
348 int id = 0;
349 const auto lo = amrex::lbound(bx);
350 const auto hi = amrex::ubound(bx);
351 for (int k = lo.z; k <= hi.z; ++k) {
352 for (int j = lo.y; j <= hi.y; ++j) {
353 for (int i = lo.x; i <= hi.x; ++i) {
354 if (owner(i,j,k) && marker(boxno,i,j,k,ivar)) {
355 lid(i,j,k) = id++;
356 } else {
357 lid(i,j,k) = std::numeric_limits<int>::lowest();
358 }
359 }}}
360 m_nrows_grid[ivar][mfi] = id;
361 }
362 }
363#endif
364
365 m_nrows_proc = 0;
366 for (MFIter mfi(m_nrows); mfi.isValid(); ++mfi) {
367 int nrows = 0;
368 for (int ivar = 0; ivar < m_nvars; ++ivar) {
369 nrows += m_nrows_grid[ivar][mfi];
370 }
371 m_nrows[mfi] = nrows;
372 m_nrows_proc += nrows;
373 }
374}
375
376template <int MSS>
377template <typename AI>
378void
380{
381 BL_PROFILE("HypreSolver::fill_global_id()");
382
383 Vector<FabArray<BaseFab<AI>>> global_id_raii;
384 Vector<FabArray<BaseFab<AI>>*> p_global_id;
385
386 if constexpr (std::is_same_v<HYPRE_Int,AI>) {
387 for (int ivar = 0; ivar < m_nvars; ++ivar) {
388 p_global_id.push_back(&(m_global_id[ivar]));
389 }
390 } else {
391 for (int ivar = 0; ivar < m_nvars; ++ivar) {
392 global_id_raii.emplace_back(m_global_id[ivar].boxArray(),
393 m_global_id[ivar].DistributionMap(),
394 1, m_global_id[ivar].nGrowVect());
395 p_global_id.push_back(&(global_id_raii[ivar]));
396 }
397 }
398
399#ifdef AMREX_USE_OMP
400#pragma omp parallel if (Gpu::notInLaunchRegion())
401#endif
402 for (MFIter mfi(m_global_id[0]); mfi.isValid(); ++mfi) {
403 auto& rows_vec = m_global_id_vec[mfi];
404 rows_vec.resize(m_nrows[mfi]);
405
406 HYPRE_Int nrows = 0;
407 for (int ivar = 0; ivar < m_nvars; ++ivar) {
408 HYPRE_Int const os = m_id_offset[ivar][mfi];
409 Box bx = mfi.validbox();
410 bx.convert(m_index_type[ivar]).grow(m_nghost);
411 Array4<AI> const& gid = p_global_id[ivar]->array(mfi);
412 auto const& lid = m_local_id[ivar].const_array(mfi);
413 HYPRE_Int* rows = rows_vec.data() + nrows;
414 nrows += m_nrows_grid[ivar][mfi];
415 amrex::ParallelFor(bx,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
416 {
417 if (lid.contains(i,j,k) && lid(i,j,k) >= 0) {
418 const auto id = lid(i,j,k) + os;
419 rows[lid(i,j,k)] = id;
420 gid(i,j,k) = static_cast<AI>(id);
421 } else {
422 gid(i,j,k) = static_cast<AI>
423 (std::numeric_limits<HYPRE_Int>::max());
424 }
425 });
426 }
427 }
428
429 for (int ivar = 0; ivar < m_nvars; ++ivar) {
430 amrex::OverrideSync(*p_global_id[ivar], *m_owner_mask[ivar],
431 m_geom.periodicity());
432 p_global_id[ivar]->FillBoundary(m_geom.periodicity());
433
434 if constexpr (!std::is_same<HYPRE_Int, AI>()) {
435 auto const& dst = m_global_id[ivar].arrays();
436 auto const& src = p_global_id[ivar]->const_arrays();
437 amrex::ParallelFor(m_global_id[ivar], m_global_id[ivar].nGrowVect(),
438 [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
439 {
440 dst[b](i,j,k) = static_cast<HYPRE_Int>(src[b](i,j,k));
441 });
442 }
443 }
444}
445
446#ifdef AMREX_USE_GPU
447namespace detail {
448template <typename T>
453{
454 auto* p_cols_tmp = cols_tmp.data();
455 auto* p_mat_tmp = mat_tmp.data();
456 auto const* p_cols = cols.data();
457 auto const* p_mat = mat.data();
458 const auto N = Long(cols.size());
459 Scan::PrefixSum<T>(N,
460 [=] AMREX_GPU_DEVICE (Long i) -> T
461 {
462 return static_cast<T>(p_cols[i] >= 0);
463 },
464 [=] AMREX_GPU_DEVICE (Long i, T s)
465 {
466 if (p_cols[i] >= 0) {
467 p_cols_tmp[s] = p_cols[i];
468 p_mat_tmp[s] = p_mat[i];
469 }
470 },
472 std::swap(cols_tmp, cols);
473 std::swap(mat_tmp, mat);
474}
475}
476#endif
477
478template <int MSS>
479template <class Filler,
480 std::enable_if_t<IsCallable<Filler,int,int,int,int,int,
481 Array4<HYPRE_Int const> const*,
482 HYPRE_Int&, HYPRE_Int*,
483 HYPRE_Real*>::value,int> FOO>
484void
485HypreSolver<MSS>::fill_matrix (Filler const& filler)
486{
487 BL_PROFILE("HypreSolver::fill_matrix()");
488
492
493 MFItInfo mfitinfo;
495 for (MFIter mfi(m_local_id[0],mfitinfo); mfi.isValid(); ++mfi)
496 {
497 int boxno = mfi.LocalIndex();
498 const HYPRE_Int nrows = m_nrows[mfi];
499 if (nrows > 0)
500 {
501 ncols_vec.clear();
502 ncols_vec.resize(nrows);
503 HYPRE_Int* ncols = ncols_vec.data();
504
505 cols_vec.clear();
506 cols_vec.resize(Long(nrows)*MSS, -1);
507 HYPRE_Int* cols = cols_vec.data();
508
509 mat_vec.clear();
510 mat_vec.resize(Long(nrows)*MSS);
511 HYPRE_Real* mat = mat_vec.data();
512
513 Vector<Array4<HYPRE_Int const>> gid_v(m_nvars);
514 for (int ivar = 0; ivar < m_nvars; ++ivar) {
515 gid_v[ivar] = m_global_id[ivar].const_array(mfi);
516 }
517
518#ifdef AMREX_USE_GPU
520 (gid_v.data(), gid_v.size());
521 auto const* pgid = gid_buf.data();
522 auto const* p_cell_offset = m_cell_offset[mfi].data();
523 Long ntot = 0;
524 for (int ivar = 0; ivar < m_nvars; ++ivar) {
525 const HYPRE_Int nrows_var = m_nrows_grid[ivar][mfi];
526 if (nrows_var > 0) {
527 Box const& bx = amrex::convert(mfi.validbox(),m_index_type[ivar]);
528 ntot += Reduce::Sum<Long>(nrows_var,
529 [=] AMREX_GPU_DEVICE (HYPRE_Int offset)
530 {
531 const Dim3 cell = bx.atOffset(p_cell_offset[offset]).dim3();
532 filler(boxno, cell.x, cell.y, cell.z, ivar, pgid,
533 ncols[offset], cols+Long(offset)*MSS,
534 mat+Long(offset)*MSS);
535 return ncols[offset];
536 });
537 p_cell_offset += nrows_var;
538 ncols += nrows_var;
539 cols += Long(nrows_var)*MSS;
540 mat += Long(nrows_var)*MSS;
541 }
542 }
543 Gpu::DeviceVector<HYPRE_Int> cols_tmp(ntot);
544 Gpu::DeviceVector<HYPRE_Real> mat_tmp(ntot);
545 if (ntot >= Long(std::numeric_limits<int>::max())) {
546 detail::pack_matrix_gpu<Long>(cols_tmp, mat_tmp, cols_vec, mat_vec);
547 } else {
548 detail::pack_matrix_gpu<int>(cols_tmp, mat_tmp, cols_vec, mat_vec);
549 }
550#else
551 auto* pgid = gid_v.data();
552 for (int ivar = 0; ivar < m_nvars; ++ivar) {
553 if (m_nrows_grid[ivar][mfi] > 0) {
554 auto const& lid = m_local_id[ivar].const_array(mfi);
555 amrex::Loop(amrex::convert(mfi.validbox(),m_index_type[ivar]),
556 [=,&ncols,&cols,&mat] (int i, int j, int k)
557 {
558 if (lid(i,j,k) >= 0) {
559 filler(boxno, i, j, k, ivar, pgid, *ncols, cols, mat);
560 cols += (*ncols);
561 mat += (*ncols);
562 ++ncols;
563 }
564 });
565 }
566 }
567#endif
568
569 const auto& rows_vec = m_global_id_vec[mfi];
570 HYPRE_Int const* rows = rows_vec.data();
571
573 HYPRE_IJMatrixSetValues(m_A, nrows, ncols_vec.data(), rows,
574 cols_vec.data(), mat_vec.data());
575 Gpu::hypreSynchronize();
576 }
577 }
578 HYPRE_IJMatrixAssemble(m_A);
579}
580
581template <int MSS>
582template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
583 std::is_same_v<typename MF::value_type,
584 HYPRE_Real>, int> FOO>
585void
587 Vector<MF const*> const& a_rhs,
588 HYPRE_Real rel_tol, HYPRE_Real abs_tol, int max_iter)
589{
590 BL_PROFILE("HypreSolver::solve()");
591
592 AMREX_ASSERT(a_soln.size() == m_nvars && a_rhs.size() == m_nvars);
593
594 HYPRE_IJVectorInitialize(m_b);
595 HYPRE_IJVectorInitialize(m_x);
596
597 load_vectors(a_soln, a_rhs);
598
599 HYPRE_IJVectorAssemble(m_x);
600 HYPRE_IJVectorAssemble(m_b);
601
602 m_hypre_ij->solve(rel_tol, abs_tol, max_iter);
603
604 get_solution(a_soln);
605}
606
607template <int MSS>
608template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
609 std::is_same_v<typename MF::value_type,
610 HYPRE_Real>, int> FOO>
611void
613 Vector<MF const*> const& a_rhs)
614{
615 BL_PROFILE("HypreSolver::load_vectors()");
616
617 MFItInfo mfitinfo;
619
622 for (MFIter mfi(*a_soln[0],mfitinfo); mfi.isValid(); ++mfi)
623 {
624 const HYPRE_Int nrows = m_nrows[mfi];
625 if (nrows > 0)
626 {
627 xvec.clear();
628 xvec.resize(nrows);
629 bvec.clear();
630 bvec.resize(nrows);
631 auto* xp = xvec.data();
632 auto* bp = bvec.data();
633
634 HYPRE_Int const* rows = m_global_id_vec[mfi].data();
635
636 HYPRE_Int offset = 0;
637 for (int ivar = 0; ivar < m_nvars; ++ivar) {
638 if (m_nrows_grid[ivar][mfi] > 0) {
639 auto const& xfab = a_soln[ivar]->const_array(mfi);
640 auto const& bfab = a_rhs [ivar]->const_array(mfi);
641 auto const& lid = m_local_id[ivar].const_array(mfi);
642 HYPRE_Real* x = xp + offset;
643 HYPRE_Real* b = bp + offset;
644 Box box = amrex::convert(mfi.validbox(),m_index_type[ivar]);
645 amrex::ParallelFor(box,[=] AMREX_GPU_DEVICE (int i, int j, int k)
646 {
647 if (lid(i,j,k) >= 0) {
648 x[lid(i,j,k)] = xfab(i,j,k);
649 b[lid(i,j,k)] = bfab(i,j,k);
650 }
651 });
652 offset += m_nrows_grid[ivar][mfi];
653 }
654 }
655
656 Gpu::streamSynchronize();
657 HYPRE_IJVectorSetValues(m_x, nrows, rows, xp);
658 HYPRE_IJVectorSetValues(m_b, nrows, rows, bp);
659 Gpu::hypreSynchronize();
660 }
661 }
662}
663
664template <int MSS>
665template <class MF, std::enable_if_t<IsFabArray<MF>::value &&
666 std::is_same_v<typename MF::value_type,
667 HYPRE_Real>, int> FOO>
668void
670{
671 BL_PROFILE("HypreSolver::get_solution()");
672
673 MFItInfo mfitinfo;
675
677 for (MFIter mfi(*a_soln[0],mfitinfo); mfi.isValid(); ++mfi)
678 {
679 const HYPRE_Int nrows = m_nrows[mfi];
680 if (nrows > 0)
681 {
682 xvec.clear();
683 xvec.resize(nrows);
684 auto* xp = xvec.data();
685
686 HYPRE_Int const* rows = m_global_id_vec[mfi].data();
687
688 HYPRE_IJVectorGetValues(m_x, nrows, rows, xp);
689 Gpu::hypreSynchronize();
690
691 HYPRE_Int offset = 0;
692 for (int ivar = 0; ivar < m_nvars; ++ivar) {
693 if (m_nrows_grid[ivar][mfi] > 0) {
694 auto const& xfab = a_soln[ivar]->array(mfi);
695 auto const& lid = m_local_id[ivar].const_array(mfi);
696 HYPRE_Real* x = xp + offset;
697 Box box = amrex::convert(mfi.validbox(),m_index_type[ivar]);
698 amrex::ParallelFor(box,[=] AMREX_GPU_DEVICE (int i, int j, int k)
699 {
700 if (lid(i,j,k) >= 0) {
701 xfab(i,j,k) = x[lid(i,j,k)];
702 }
703 });
704 offset += m_nrows_grid[ivar][mfi];
705 }
706 }
707 Gpu::streamSynchronize();
708 }
709 }
710
711 for (int ivar = 0; ivar < m_nvars; ++ivar) {
712 amrex::OverrideSync(*a_soln[ivar], *m_owner_mask[ivar],
713 m_geom.periodicity());
714 }
715}
716
717}
718
719#endif
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:49
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1089
int MPI_Comm
Definition AMReX_ccse-mpi.H:47
static constexpr int MPI_COMM_NULL
Definition AMReX_ccse-mpi.H:55
A collection of Boxes stored in an Array.
Definition AMReX_BoxArray.H:550
AMREX_GPU_HOST_DEVICE IntVectND< dim > atOffset(Long offset) const noexcept
Given the offset, compute IntVectND<dim>
Definition AMReX_Box.H:1009
AMREX_GPU_HOST_DEVICE BoxND & convert(IndexTypeND< dim > typ) noexcept
Convert the BoxND from the current type into the argument type. This may change the BoxND coordinates...
Definition AMReX_Box.H:912
AMREX_GPU_HOST_DEVICE BoxND & grow(int i) noexcept
Definition AMReX_Box.H:627
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
Returns the number of points contained in the BoxND.
Definition AMReX_Box.H:346
Calculates the distribution of FABs to MPI processes.
Definition AMReX_DistributionMapping.H:41
Rectangular problem domain geometry.
Definition AMReX_Geometry.H:73
Periodicity periodicity() const noexcept
Definition AMReX_Geometry.H:355
Definition AMReX_GpuBuffer.H:17
T const * data() const noexcept
Definition AMReX_GpuBuffer.H:65
Solve Ax = b using HYPRE's generic IJ matrix format where A is a sparse matrix specified using the co...
Definition AMReX_HypreSolver.H:34
HypreSolver(Vector< IndexType > const &a_index_type, IntVect const &a_nghost, Geometry const &a_geom, BoxArray const &a_grids, DistributionMapping const &a_dmap, Marker &&a_marker, Filler &&a_filler, int a_verbose=0, std::string a_options_namespace="hypre")
Definition AMReX_HypreSolver.H:170
LayoutData< HYPRE_Int > m_nrows
Definition AMReX_HypreSolver.H:157
IntVect m_nghost
Definition AMReX_HypreSolver.H:136
Vector< LayoutData< HYPRE_Int > > m_nrows_grid
Definition AMReX_HypreSolver.H:155
int getNumIters() const
Definition AMReX_HypreSolver.H:91
Geometry m_geom
Definition AMReX_HypreSolver.H:137
LayoutData< Gpu::DeviceVector< HYPRE_Int > > m_global_id_vec
Definition AMReX_HypreSolver.H:149
std::enable_if_t< IsCallable< Marker, int, int, int, int, int >::value > fill_local_id(Marker const &marker)
Definition AMReX_HypreSolver.H:288
std::unique_ptr< HypreIJIface > m_hypre_ij
Definition AMReX_HypreSolver.H:160
int m_nvars
Definition AMReX_HypreSolver.H:134
Vector< LayoutData< HYPRE_Int > > m_id_offset
Definition AMReX_HypreSolver.H:156
MPI_Comm m_comm
Definition AMReX_HypreSolver.H:144
void get_solution(Vector< MF * > const &a_soln)
Definition AMReX_HypreSolver.H:669
HYPRE_IJVector getx() const
Definition AMReX_HypreSolver.H:99
void load_vectors(Vector< MF * > const &a_soln, Vector< MF const * > const &a_rhs)
Definition AMReX_HypreSolver.H:612
HYPRE_IJMatrix getA() const
Definition AMReX_HypreSolver.H:97
std::string m_options_namespace
Definition AMReX_HypreSolver.H:142
void fill_matrix(Filler const &filler)
Definition AMReX_HypreSolver.H:485
HYPRE_IJMatrix m_A
Definition AMReX_HypreSolver.H:163
void fill_global_id()
Definition AMReX_HypreSolver.H:379
void solve(Vector< MF * > const &a_soln, Vector< MF const * > const &a_rhs, HYPRE_Real rel_tol, HYPRE_Real abs_tol, int max_iter)
Definition AMReX_HypreSolver.H:586
HYPRE_Int m_nrows_proc
Definition AMReX_HypreSolver.H:158
LayoutData< Gpu::DeviceVector< int > > m_cell_offset
Definition AMReX_HypreSolver.H:152
Vector< FabArray< BaseFab< HYPRE_Int > > > m_global_id
Definition AMReX_HypreSolver.H:148
HYPRE_Real getFinalResidualNorm() const
Definition AMReX_HypreSolver.H:93
Vector< std::unique_ptr< iMultiFab > > m_owner_mask
Definition AMReX_HypreSolver.H:146
HYPRE_IJVector m_b
Definition AMReX_HypreSolver.H:164
Vector< IndexType > m_index_type
Definition AMReX_HypreSolver.H:135
Vector< BoxArray > m_grids
Definition AMReX_HypreSolver.H:138
Vector< iMultiFab > m_local_id
Definition AMReX_HypreSolver.H:147
int m_verbose
Definition AMReX_HypreSolver.H:141
DistributionMapping m_dmap
Definition AMReX_HypreSolver.H:139
HYPRE_IJVector m_x
Definition AMReX_HypreSolver.H:165
HYPRE_IJVector getb() const
Definition AMReX_HypreSolver.H:98
a one-thingy-per-box distributed object
Definition AMReX_LayoutData.H:13
void define(const BoxArray &a_grids, const DistributionMapping &a_dm)
Definition AMReX_LayoutData.H:25
Definition AMReX_MFIter.H:57
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:141
Definition AMReX_PODVector.H:262
size_type size() const noexcept
Definition AMReX_PODVector.H:591
void clear() noexcept
Definition AMReX_PODVector.H:589
void resize(size_type a_new_size)
Definition AMReX_PODVector.H:641
T * data() noexcept
Definition AMReX_PODVector.H:609
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:27
Long size() const noexcept
Definition AMReX_Vector.H:50
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:237
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
static constexpr struct amrex::Scan::Type::Exclusive exclusive
static constexpr RetSum noRetSum
Definition AMReX_Scan.H:30
void pack_matrix_gpu(Gpu::DeviceVector< HYPRE_Int > &cols_tmp, Gpu::DeviceVector< HYPRE_Real > mat_tmp, Gpu::DeviceVector< HYPRE_Int > &cols, Gpu::DeviceVector< HYPRE_Real > &mat)
Definition AMReX_HypreSolver.H:449
Definition AMReX_Amr.cpp:49
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:191
IntVect nGrowVect(FabArrayBase const &fa)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > convert(const BoxND< dim > &b, const IntVectND< dim > &typ) noexcept
Returns a BoxND with different type.
Definition AMReX_Box.H:1435
void OverrideSync(FabArray< FAB > &fa, FabArray< IFAB > const &msk, const Periodicity &period)
Definition AMReX_FabArrayUtility.H:1323
std::unique_ptr< iMultiFab > OwnerMask(FabArrayBase const &mf, const Periodicity &period, const IntVect &ngrow)
Definition AMReX_iMultiFab.cpp:637
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 ubound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:315
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:308
const int[]
Definition AMReX_BLProfiler.cpp:1664
BoxArray const & boxArray(FabArrayBase const &fa)
AMREX_GPU_HOST_DEVICE AMREX_ATTRIBUTE_FLATTEN_FOR void Loop(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition AMReX_Loop.H:125
Definition AMReX_FabArrayCommI.H:896
Definition AMReX_Array4.H:61
Definition AMReX_Dim3.H:12
int x
Definition AMReX_Dim3.H:12
int z
Definition AMReX_Dim3.H:12
int y
Definition AMReX_Dim3.H:12
Test if a given type T is callable with arguments of type Args...
Definition AMReX_TypeTraits.H:201
Definition AMReX_MFIter.H:20
MFItInfo & DisableDeviceSync() noexcept
Definition AMReX_MFIter.H:38
MFItInfo & UseDefaultStream() noexcept
Definition AMReX_MFIter.H:50