1#ifndef AMREX_MLNODETENSORLAP_3D_K_H_
2#define AMREX_MLNODETENSORLAP_3D_K_H_
3#include <AMReX_Config.H>
8namespace mlndts_detail {
11 Real ts_interp_line_x (Array4<Real const>
const&
crse,
12 int ic,
int jc,
int kc)
noexcept
18 Real ts_interp_line_y (Array4<Real const>
const&
crse,
19 int ic,
int jc,
int kc)
noexcept
25 Real ts_interp_line_z (Array4<Real const>
const&
crse,
26 int ic,
int jc,
int kc)
noexcept
32 Real ts_interp_face_xy (Array4<Real const>
const&
crse,
33 int ic,
int jc,
int kc)
noexcept
35 return (ts_interp_line_y(
crse,ic ,jc ,kc) +
36 ts_interp_line_y(
crse,ic+1,jc ,kc) +
37 ts_interp_line_x(
crse,ic ,jc ,kc) +
38 ts_interp_line_x(
crse,ic ,jc+1,kc)) *
Real(0.25);
42 Real ts_interp_face_xz (Array4<Real const>
const&
crse,
43 int ic,
int jc,
int kc)
noexcept
45 return (ts_interp_line_z(
crse,ic ,jc,kc ) +
46 ts_interp_line_z(
crse,ic+1,jc,kc ) +
47 ts_interp_line_x(
crse,ic ,jc,kc ) +
48 ts_interp_line_x(
crse,ic ,jc,kc+1)) *
Real(0.25);
52 Real ts_interp_face_yz (Array4<Real const>
const&
crse,
53 int ic,
int jc,
int kc)
noexcept
55 return (ts_interp_line_z(
crse,ic,jc ,kc ) +
56 ts_interp_line_z(
crse,ic,jc+1,kc ) +
57 ts_interp_line_y(
crse,ic,jc ,kc ) +
58 ts_interp_line_y(
crse,ic,jc ,kc+1)) *
Real(0.25);
65 Array4<Real const>
const&
crse, Array4<int const>
const& msk)
noexcept
67 using namespace mlndts_detail;
73 bool i_is_odd = (ic*2 != i);
74 bool j_is_odd = (jc*2 != j);
75 bool k_is_odd = (kc*2 != k);
76 if (i_is_odd && j_is_odd && k_is_odd) {
78 fine(i,j,k) += (ts_interp_face_yz(
crse,ic ,jc ,kc ) +
79 ts_interp_face_yz(
crse,ic+1,jc ,kc ) +
80 ts_interp_face_xz(
crse,ic ,jc ,kc ) +
81 ts_interp_face_xz(
crse,ic ,jc+1,kc ) +
82 ts_interp_face_xy(
crse,ic ,jc ,kc ) +
83 ts_interp_face_xy(
crse,ic ,jc ,kc+1)) *
Real(1./6.);
84 }
else if (j_is_odd && k_is_odd) {
86 fine(i,j,k) += ts_interp_face_yz(
crse,ic,jc,kc);
87 }
else if (i_is_odd && k_is_odd) {
89 fine(i,j,k) += ts_interp_face_xz(
crse,ic,jc,kc);
90 }
else if (i_is_odd && j_is_odd) {
92 fine(i,j,k) += ts_interp_face_xy(
crse,ic,jc,kc);
93 }
else if (i_is_odd) {
95 fine(i,j,k) += ts_interp_line_x(
crse,ic,jc,kc);
96 }
else if (j_is_odd) {
98 fine(i,j,k) += ts_interp_line_y(
crse,ic,jc,kc);
99 }
else if (k_is_odd) {
101 fine(i,j,k) += ts_interp_line_z(
crse,ic,jc,kc);
111 Array4<Real const>
const&
crse, Array4<int const>
const& msk,
112 int semi_dir)
noexcept
114 using namespace mlndts_detail;
120 bool j_is_odd = (jc*2 != j);
121 bool k_is_odd = (kc*2 != k);
122 if (j_is_odd && k_is_odd) {
124 fine(i,j,k) += ts_interp_face_yz(
crse,i,jc,kc);
125 }
else if (j_is_odd) {
127 fine(i,j,k) += ts_interp_line_y(
crse,i,jc,kc);
128 }
else if (k_is_odd) {
130 fine(i,j,k) += ts_interp_line_z(
crse,i,jc,kc);
135 }
else if (semi_dir == 1) {
138 bool i_is_odd = (ic*2 != i);
139 bool k_is_odd = (kc*2 != k);
140 if (i_is_odd && k_is_odd) {
142 fine(i,j,k) += ts_interp_face_xz(
crse,ic,j,kc);
143 }
else if (i_is_odd) {
145 fine(i,j,k) += ts_interp_line_x(
crse,ic,j,kc);
146 }
else if (k_is_odd) {
148 fine(i,j,k) += ts_interp_line_z(
crse,ic,j,kc);
156 bool i_is_odd = (ic*2 != i);
157 bool j_is_odd = (jc*2 != j);
158 if (i_is_odd && j_is_odd) {
160 fine(i,j,k) += ts_interp_face_xy(
crse,ic,jc,k);
161 }
else if (i_is_odd) {
163 fine(i,j,k) += ts_interp_line_x(
crse,ic,jc,k);
164 }
else if (j_is_odd) {
166 fine(i,j,k) += ts_interp_line_y(
crse,ic,jc,k);
180 y(i,j,k) =
Real(0.0);
182 y(i,j,k) = s[0] * (
x(i-1,j ,k ) +
x(i+1,j ,k ))
183 + s[3] * (
x(i ,j-1,k ) +
x(i ,j+1,k ))
184 + s[5] * (
x(i ,j ,k-1) +
x(i ,j ,k+1))
185 -
Real(2.)*(s[0]+s[3]+s[5]) *
x(i,j,k)
186 +
Real(0.5)*s[1] * (
x(i-1,j-1,k ) +
x(i+1,j+1,k ) -
x(i-1,j+1,k ) -
x(i+1,j-1,k ))
187 +
Real(0.5)*s[2] * (
x(i-1,j ,k-1) +
x(i+1,j ,k+1) -
x(i-1,j ,k+1) -
x(i+1,j ,k-1))
188 +
Real(0.5)*s[4] * (
x(i ,j-1,k-1) +
x(i ,j+1,k+1) -
x(i ,j-1,k+1) -
x(i ,j+1,k-1));
201 Real s0 =
Real(-2.)*(s[0]+s[3]+s[5]);
202 Real Ax = s[0] * (sol(i-1,j ,k ) + sol(i+1,j ,k ))
203 + s[3] * (sol(i ,j-1,k ) + sol(i ,j+1,k ))
204 + s[5] * (sol(i ,j ,k-1) + sol(i ,j ,k+1))
206 +
Real(0.5)*s[1] * (sol(i-1,j-1,k ) + sol(i+1,j+1,k ) - sol(i-1,j+1,k ) - sol(i+1,j-1,k ))
207 +
Real(0.5)*s[2] * (sol(i-1,j ,k-1) + sol(i+1,j ,k+1) - sol(i-1,j ,k+1) - sol(i+1,j ,k-1))
208 +
Real(0.5)*s[4] * (sol(i ,j-1,k-1) + sol(i ,j+1,k+1) - sol(i ,j-1,k+1) - sol(i ,j+1,k-1));
209 sol(i,j,k) += (rhs(i,j,k) - Ax) * (omega/s0);
213#if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1)
215template <
typename HypreInt,
typename AtomicInt>
216void mlndtslap_fill_ijmatrix_cpu (
Box const& ndbx,
217 Array4<AtomicInt const>
const& gid,
218 Array4<int const>
const& lid,
219 HypreInt*
const ncols, HypreInt*
const cols,
221 GpuArray<Real,6>
const& s)
noexcept
223 constexpr auto gidmax = std::numeric_limits<AtomicInt>::max();
229 HypreInt nelems_old = nelems;
231 cols[nelems] = gid(i,j,k);
232 mat[nelems] =
Real(-2.)*(s[0]+s[3]+s[5]);
235 if ( gid(i ,j-1,k-1) < gidmax) {
236 cols[nelems] = gid(i ,j-1,k-1);
237 mat[nelems] =
Real(0.5)*s[4];
241 if ( gid(i-1,j ,k-1) < gidmax) {
242 cols[nelems] = gid(i-1,j ,k-1);
243 mat[nelems] =
Real(0.5)*s[2];
247 if ( gid(i ,j ,k-1) < gidmax) {
248 cols[nelems] = gid(i ,j ,k-1);
253 if ( gid(i+1,j ,k-1) < gidmax) {
254 cols[nelems] = gid(i+1,j ,k-1);
255 mat[nelems] =
Real(-0.5)*s[2];
259 if ( gid(i ,j+1,k-1) < gidmax) {
260 cols[nelems] = gid(i ,j+1,k-1);
261 mat[nelems] =
Real(-0.5)*s[4];
265 if ( gid(i-1,j-1,k ) < gidmax) {
266 cols[nelems] = gid(i-1,j-1,k );
267 mat[nelems] =
Real(0.5)*s[1];
271 if ( gid(i ,j-1,k ) < gidmax) {
272 cols[nelems] = gid(i ,j-1,k );
277 if ( gid(i+1,j-1,k ) < gidmax) {
278 cols[nelems] = gid(i+1,j-1,k );
279 mat[nelems] =
Real(-0.5)*s[1];
283 if ( gid(i-1,j ,k ) < gidmax) {
284 cols[nelems] = gid(i-1,j ,k );
289 if ( gid(i+1,j ,k ) < gidmax) {
290 cols[nelems] = gid(i+1,j ,k );
295 if ( gid(i-1,j+1,k ) < gidmax) {
296 cols[nelems] = gid(i-1,j+1,k );
297 mat[nelems] =
Real(-0.5)*s[1];
301 if ( gid(i ,j+1,k ) < gidmax) {
302 cols[nelems] = gid(i ,j+1,k );
307 if ( gid(i+1,j+1,k ) < gidmax) {
308 cols[nelems] = gid(i+1,j+1,k );
309 mat[nelems] =
Real(0.5)*s[1];
313 if ( gid(i ,j-1,k+1) < gidmax) {
314 cols[nelems] = gid(i ,j-1,k+1);
315 mat[nelems] =
Real(-0.5)*s[4];
319 if ( gid(i-1,j ,k+1) < gidmax) {
320 cols[nelems] = gid(i-1,j ,k+1);
321 mat[nelems] =
Real(-0.5)*s[2];
325 if ( gid(i ,j ,k+1) < gidmax) {
326 cols[nelems] = gid(i ,j ,k+1);
331 if ( gid(i+1,j ,k+1) < gidmax) {
332 cols[nelems] = gid(i+1,j ,k+1);
333 mat[nelems] =
Real(0.5)*s[2];
337 if ( gid(i ,j+1,k+1) < gidmax) {
338 cols[nelems] = gid(i ,j+1,k+1);
339 mat[nelems] =
Real(0.5)*s[4];
343 ncols[lid(i,j,k)] = nelems - nelems_old;
350template <
typename HypreInt,
typename AtomicInt>
352void mlndtslap_fill_ijmatrix_gpu (
const int ps,
const int i,
const int j,
const int k,
354 Array4<AtomicInt const>
const& gid,
355 Array4<int const>
const& lid,
356 HypreInt*
const ncols, HypreInt*
const cols,
358 GpuArray<Real,6>
const& s)
noexcept
362 constexpr auto gidmax = std::numeric_limits<AtomicInt>::max();
365 cols[ps] = gid(i,j,k);
366 mat[ps] =
Real(-2.)*(s[0]+s[3]+s[5]);
368 if (gid(i-1,j-1,k-1) < gidmax) { ++nc; }
369 if (gid(i ,j-1,k-1) < gidmax) { ++nc; }
370 if (gid(i+1,j-1,k-1) < gidmax) { ++nc; }
371 if (gid(i-1,j ,k-1) < gidmax) { ++nc; }
372 if (gid(i ,j ,k-1) < gidmax) { ++nc; }
373 if (gid(i+1,j ,k-1) < gidmax) { ++nc; }
374 if (gid(i-1,j+1,k-1) < gidmax) { ++nc; }
375 if (gid(i ,j+1,k-1) < gidmax) { ++nc; }
376 if (gid(i+1,j+1,k-1) < gidmax) { ++nc; }
377 if (gid(i-1,j-1,k ) < gidmax) { ++nc; }
378 if (gid(i ,j-1,k ) < gidmax) { ++nc; }
379 if (gid(i+1,j-1,k ) < gidmax) { ++nc; }
380 if (gid(i-1,j ,k ) < gidmax) { ++nc; }
381 if (gid(i+1,j ,k ) < gidmax) { ++nc; }
382 if (gid(i-1,j+1,k ) < gidmax) { ++nc; }
383 if (gid(i ,j+1,k ) < gidmax) { ++nc; }
384 if (gid(i+1,j+1,k ) < gidmax) { ++nc; }
385 if (gid(i-1,j-1,k+1) < gidmax) { ++nc; }
386 if (gid(i ,j-1,k+1) < gidmax) { ++nc; }
387 if (gid(i+1,j-1,k+1) < gidmax) { ++nc; }
388 if (gid(i-1,j ,k+1) < gidmax) { ++nc; }
389 if (gid(i ,j ,k+1) < gidmax) { ++nc; }
390 if (gid(i+1,j ,k+1) < gidmax) { ++nc; }
391 if (gid(i-1,j+1,k+1) < gidmax) { ++nc; }
392 if (gid(i ,j+1,k+1) < gidmax) { ++nc; }
393 if (gid(i+1,j+1,k+1) < gidmax) { ++nc; }
394 ncols[lid(i,j,k)] = nc;
396 else if (
offset == 1 && gid(i-1,j-1,k-1) < gidmax) {
397 cols[ps] = gid(i-1,j-1,k-1);
400 else if (
offset == 2 && gid(i ,j-1,k-1) < gidmax) {
401 cols[ps] = gid(i ,j-1,k-1);
402 mat[ps] =
Real(0.5)*s[4];
404 else if (
offset == 3 && gid(i+1,j-1,k-1) < gidmax) {
405 cols[ps] = gid(i+1,j-1,k-1);
408 else if (
offset == 4 && gid(i-1,j ,k-1) < gidmax) {
409 cols[ps] = gid(i-1,j ,k-1);
410 mat[ps] =
Real(0.5)*s[2];
412 else if (
offset == 5 && gid(i ,j ,k-1) < gidmax) {
413 cols[ps] = gid(i ,j ,k-1);
416 else if (
offset == 6 && gid(i+1,j ,k-1) < gidmax) {
417 cols[ps] = gid(i+1,j ,k-1);
418 mat[ps] =
Real(-0.5)*s[2];
420 else if (
offset == 7 && gid(i-1,j+1,k-1) < gidmax) {
421 cols[ps] = gid(i-1,j+1,k-1);
424 else if (
offset == 8 && gid(i ,j+1,k-1) < gidmax) {
425 cols[ps] = gid(i ,j+1,k-1);
426 mat[ps] =
Real(-0.5)*s[4];
428 else if (
offset == 9 && gid(i+1,j+1,k-1) < gidmax) {
429 cols[ps] = gid(i+1,j+1,k-1);
432 else if (
offset == 10 && gid(i-1,j-1,k ) < gidmax) {
433 cols[ps] = gid(i-1,j-1,k );
434 mat[ps] =
Real(0.5)*s[1];
436 else if (
offset == 11 && gid(i ,j-1,k ) < gidmax) {
437 cols[ps] = gid(i ,j-1,k );
440 else if (
offset == 12 && gid(i+1,j-1,k ) < gidmax) {
441 cols[ps] = gid(i+1,j-1,k );
442 mat[ps] =
Real(-0.5)*s[1];
444 else if (
offset == 13 && gid(i-1,j ,k ) < gidmax) {
445 cols[ps] = gid(i-1,j ,k );
448 else if (
offset == 14 && gid(i+1,j ,k ) < gidmax) {
449 cols[ps] = gid(i+1,j ,k );
452 else if (
offset == 15 && gid(i-1,j+1,k ) < gidmax) {
453 cols[ps] = gid(i-1,j+1,k );
454 mat[ps] =
Real(-0.5)*s[1];
456 else if (
offset == 16 && gid(i ,j+1,k ) < gidmax) {
457 cols[ps] = gid(i ,j+1,k );
460 else if (
offset == 17 && gid(i+1,j+1,k ) < gidmax) {
461 cols[ps] = gid(i+1,j+1,k );
462 mat[ps] =
Real(0.5)*s[1];
464 else if (
offset == 18 && gid(i-1,j-1,k+1) < gidmax) {
465 cols[ps] = gid(i-1,j-1,k+1);
468 else if (
offset == 19 && gid(i ,j-1,k+1) < gidmax) {
469 cols[ps] = gid(i ,j-1,k+1);
470 mat[ps] =
Real(-0.5)*s[4];
472 else if (
offset == 20 && gid(i+1,j-1,k+1) < gidmax) {
473 cols[ps] = gid(i+1,j-1,k+1);
476 else if (
offset == 21 && gid(i-1,j ,k+1) < gidmax) {
477 cols[ps] = gid(i-1,j ,k+1);
478 mat[ps] =
Real(-0.5)*s[2];
480 else if (
offset == 22 && gid(i ,j ,k+1) < gidmax) {
481 cols[ps] = gid(i ,j ,k+1);
484 else if (
offset == 23 && gid(i+1,j ,k+1) < gidmax) {
485 cols[ps] = gid(i+1,j ,k+1);
486 mat[ps] =
Real(0.5)*s[2];
488 else if (
offset == 24 && gid(i-1,j+1,k+1) < gidmax) {
489 cols[ps] = gid(i-1,j+1,k+1);
492 else if (
offset == 25 && gid(i ,j+1,k+1) < gidmax) {
493 cols[ps] = gid(i ,j+1,k+1);
494 mat[ps] =
Real(0.5)*s[4];
496 else if (
offset == 26 && gid(i+1,j+1,k+1) < gidmax) {
497 cols[ps] = gid(i+1,j+1,k+1);
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1089
Array4< Real > fine
Definition AMReX_InterpFaceRegister.cpp:90
Array4< Real const > crse
Definition AMReX_InterpFaceRegister.cpp:92
amrex_real Real
Floating Point Type for Fields.
Definition AMReX_REAL.H:79
__host__ __device__ BoxND< dim > coarsen(const BoxND< dim > &b, int ref_ratio) noexcept
Coarsen BoxND by given (positive) coarsening ratio.
Definition AMReX_Box.H:1409
Definition AMReX_Amr.cpp:49
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndtslap_adotx(int i, int j, int k, Array4< Real > const &y, Array4< Real const > const &x, Array4< int const > const &msk, GpuArray< Real, 3 > const &s) noexcept
Definition AMReX_MLNodeTensorLap_2D_K.H:93
BoxND< 3 > Box
Box is an alias for amrex::BoxND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:27
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndtslap_gauss_seidel(int i, int j, int k, Array4< Real > const &sol, Array4< Real const > const &rhs, Array4< int const > const &msk, GpuArray< Real, 3 > const &s) noexcept
Definition AMReX_MLNodeTensorLap_2D_K.H:107
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndtslap_semi_interpadd(int, int, int, Array4< Real > const &, Array4< Real const > const &, Array4< int const > const &, int) noexcept
Definition AMReX_MLNodeTensorLap_1D_K.H:13
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndtslap_interpadd(int, int, int, Array4< Real > const &, Array4< Real const > const &, Array4< int const > const &) noexcept
Definition AMReX_MLNodeTensorLap_1D_K.H:8
void LoopOnCpu(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition AMReX_Loop.H:365
Definition AMReX_Array4.H:61
Fixed-size array that can be used on GPU.
Definition AMReX_Array.H:40