Block-Structured AMR Software Framework
AMReX_MLABecLap_3D_K.H
Go to the documentation of this file.
1 #ifndef AMREX_MLABECLAP_3D_K_H_
2 #define AMREX_MLABECLAP_3D_K_H_
3 #include <AMReX_Config.H>
4 
5 namespace amrex {
6 
7 template <typename T>
9 void mlabeclap_adotx (int i, int j, int k, int n, Array4<T> const& y,
10  Array4<T const> const& x,
11  Array4<T const> const& a,
12  Array4<T const> const& bX,
13  Array4<T const> const& bY,
14  Array4<T const> const& bZ,
15  GpuArray<T,AMREX_SPACEDIM> const& dxinv,
16  T alpha, T beta) noexcept
17 {
18  const T dhx = beta*dxinv[0]*dxinv[0];
19  const T dhy = beta*dxinv[1]*dxinv[1];
20  const T dhz = beta*dxinv[2]*dxinv[2];
21  y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n)
22  - dhx * (bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i ,j,k,n))
23  - bX(i ,j,k,n)*(x(i ,j,k,n) - x(i-1,j,k,n)))
24  - dhy * (bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j ,k,n))
25  - bY(i,j ,k,n)*(x(i,j ,k,n) - x(i,j-1,k,n)))
26  - dhz * (bZ(i,j,k+1,n)*(x(i,j,k+1,n) - x(i,j,k ,n))
27  - bZ(i,j,k ,n)*(x(i,j,k ,n) - x(i,j,k-1,n)));
28 }
29 
30 template <typename T>
32 void mlabeclap_adotx_os (int i, int j, int k, int n, Array4<T> const& y,
33  Array4<T const> const& x,
34  Array4<T const> const& a,
35  Array4<T const> const& bX,
36  Array4<T const> const& bY,
37  Array4<T const> const& bZ,
38  Array4<int const> const& osm,
39  GpuArray<T,AMREX_SPACEDIM> const& dxinv,
40  T alpha, T beta) noexcept
41 {
42  const T dhx = beta*dxinv[0]*dxinv[0];
43  const T dhy = beta*dxinv[1]*dxinv[1];
44  const T dhz = beta*dxinv[2]*dxinv[2];
45  if (osm(i,j,k) == 0) {
46  y(i,j,k,n) = T(0.0);
47  } else {
48  y(i,j,k,n) = alpha*a(i,j,k)*x(i,j,k,n)
49  - dhx * (bX(i+1,j,k,n)*(x(i+1,j,k,n) - x(i ,j,k,n))
50  - bX(i ,j,k,n)*(x(i ,j,k,n) - x(i-1,j,k,n)))
51  - dhy * (bY(i,j+1,k,n)*(x(i,j+1,k,n) - x(i,j ,k,n))
52  - bY(i,j ,k,n)*(x(i,j ,k,n) - x(i,j-1,k,n)))
53  - dhz * (bZ(i,j,k+1,n)*(x(i,j,k+1,n) - x(i,j,k ,n))
54  - bZ(i,j,k ,n)*(x(i,j,k ,n) - x(i,j,k-1,n)));
55  }
56 }
57 
58 template <typename T>
60 void mlabeclap_normalize (int i, int j, int k, int n, Array4<T> const& x,
61  Array4<T const> const& a,
62  Array4<T const> const& bX,
63  Array4<T const> const& bY,
64  Array4<T const> const& bZ,
65  GpuArray<T,AMREX_SPACEDIM> const& dxinv,
66  T alpha, T beta) noexcept
67 {
68  const T dhx = beta*dxinv[0]*dxinv[0];
69  const T dhy = beta*dxinv[1]*dxinv[1];
70  const T dhz = beta*dxinv[2]*dxinv[2];
71  x(i,j,k,n) /= alpha*a(i,j,k)
72  + dhx*(bX(i,j,k,n)+bX(i+1,j,k,n))
73  + dhy*(bY(i,j,k,n)+bY(i,j+1,k,n))
74  + dhz*(bZ(i,j,k,n)+bZ(i,j,k+1,n));
75 }
76 
77 template <typename T>
79 void mlabeclap_flux_x (Box const& box, Array4<T> const& fx, Array4<T const> const& sol,
80  Array4<T const> const& bx, T fac, int ncomp) noexcept
81 {
82  const auto lo = amrex::lbound(box);
83  const auto hi = amrex::ubound(box);
84 
85  for (int n = 0; n < ncomp; ++n) {
86  for (int k = lo.z; k <= hi.z; ++k) {
87  for (int j = lo.y; j <= hi.y; ++j) {
89  for (int i = lo.x; i <= hi.x; ++i) {
90  fx(i,j,k,n) = -fac*bx(i,j,k,n)*(sol(i,j,k,n)-sol(i-1,j,k,n));
91  }
92  }
93  }
94  }
95 }
96 
97 template <typename T>
99 void mlabeclap_flux_xface (Box const& box, Array4<T> const& fx, Array4<T const> const& sol,
100  Array4<T const> const& bx, T fac, int xlen, int ncomp) noexcept
101 {
102  const auto lo = amrex::lbound(box);
103  const auto hi = amrex::ubound(box);
104 
105  for (int n = 0; n < ncomp; ++n) {
106  for (int k = lo.z; k <= hi.z; ++k) {
107  for (int j = lo.y; j <= hi.y; ++j) {
108  int i = lo.x;
109  fx(i,j,k,n) = -fac*bx(i,j,k,n)*(sol(i,j,k,n)-sol(i-1,j,k,n));
110  i += xlen;
111  fx(i,j,k,n) = -fac*bx(i,j,k,n)*(sol(i,j,k,n)-sol(i-1,j,k,n));
112  }
113  }
114  }
115 }
116 
117 template <typename T>
119 void mlabeclap_flux_y (Box const& box, Array4<T> const& fy, Array4<T const> const& sol,
120  Array4<T const> const& by, T fac, int ncomp) noexcept
121 {
122  const auto lo = amrex::lbound(box);
123  const auto hi = amrex::ubound(box);
124 
125  for (int n = 0; n < ncomp; ++n) {
126  for (int k = lo.z; k <= hi.z; ++k) {
127  for (int j = lo.y; j <= hi.y; ++j) {
129  for (int i = lo.x; i <= hi.x; ++i) {
130  fy(i,j,k,n) = -fac*by(i,j,k,n)*(sol(i,j,k,n)-sol(i,j-1,k,n));
131  }
132  }
133  }
134  }
135 }
136 
137 template <typename T>
139 void mlabeclap_flux_yface (Box const& box, Array4<T> const& fy, Array4<T const> const& sol,
140  Array4<T const> const& by, T fac, int ylen, int ncomp) noexcept
141 {
142  const auto lo = amrex::lbound(box);
143  const auto hi = amrex::ubound(box);
144 
145  for (int n = 0; n < ncomp; ++n) {
146  for (int k = lo.z; k <= hi.z; ++k) {
147  int j = lo.y;
149  for (int i = lo.x; i <= hi.x; ++i) {
150  fy(i,j,k,n) = -fac*by(i,j,k,n)*(sol(i,j,k,n)-sol(i,j-1,k,n));
151  }
152  j += ylen;
154  for (int i = lo.x; i <= hi.x; ++i) {
155  fy(i,j,k,n) = -fac*by(i,j,k,n)*(sol(i,j,k,n)-sol(i,j-1,k,n));
156  }
157  }
158  }
159 }
160 
161 template <typename T>
163 void mlabeclap_flux_z (Box const& box, Array4<T> const& fz, Array4<T const> const& sol,
164  Array4<T const> const& bz, T fac, int ncomp) noexcept
165 {
166  const auto lo = amrex::lbound(box);
167  const auto hi = amrex::ubound(box);
168 
169  for (int n = 0; n < ncomp; ++n) {
170  for (int k = lo.z; k <= hi.z; ++k) {
171  for (int j = lo.y; j <= hi.y; ++j) {
173  for (int i = lo.x; i <= hi.x; ++i) {
174  fz(i,j,k,n) = -fac*bz(i,j,k,n)*(sol(i,j,k,n)-sol(i,j,k-1,n));
175  }
176  }
177  }
178  }
179 }
180 
181 template <typename T>
183 void mlabeclap_flux_zface (Box const& box, Array4<T> const& fz, Array4<T const> const& sol,
184  Array4<T const> const& bz, T fac, int zlen, int ncomp) noexcept
185 {
186  const auto lo = amrex::lbound(box);
187  const auto hi = amrex::ubound(box);
188 
189  for (int n = 0; n < ncomp; ++n) {
190  int k = lo.z;
191  for (int j = lo.y; j <= hi.y; ++j) {
193  for (int i = lo.x; i <= hi.x; ++i) {
194  fz(i,j,k,n) = -fac*bz(i,j,k,n)*(sol(i,j,k,n)-sol(i,j,k-1,n));
195  }
196  }
197 
198  k += zlen;
199  for (int j = lo.y; j <= hi.y; ++j) {
201  for (int i = lo.x; i <= hi.x; ++i) {
202  fz(i,j,k,n) = -fac*bz(i,j,k,n)*(sol(i,j,k,n)-sol(i,j,k-1,n));
203  }
204  }
205  }
206 }
207 
208 template <typename T>
210 void abec_gsrb (int i, int j, int k, int n, Array4<T> const& phi, Array4<T const> const& rhs,
211  T alpha, Array4<T const> const& a,
212  T dhx, T dhy, T dhz,
213  Array4<T const> const& bX, Array4<T const> const& bY,
214  Array4<T const> const& bZ,
215  Array4<int const> const& m0, Array4<int const> const& m2,
216  Array4<int const> const& m4,
217  Array4<int const> const& m1, Array4<int const> const& m3,
218  Array4<int const> const& m5,
219  Array4<T const> const& f0, Array4<T const> const& f2,
220  Array4<T const> const& f4,
221  Array4<T const> const& f1, Array4<T const> const& f3,
222  Array4<T const> const& f5,
223  Box const& vbox, int redblack) noexcept
224 {
225  constexpr T omega = T(1.15);
226 
227  if ((i+j+k+redblack)%2 == 0) {
228  const auto vlo = amrex::lbound(vbox);
229  const auto vhi = amrex::ubound(vbox);
230 
231  T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0)
232  ? f0(vlo.x,j,k,n) : T(0.0);
233  T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0)
234  ? f1(i,vlo.y,k,n) : T(0.0);
235  T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0)
236  ? f2(i,j,vlo.z,n) : T(0.0);
237  T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0)
238  ? f3(vhi.x,j,k,n) : T(0.0);
239  T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0)
240  ? f4(i,vhi.y,k,n) : T(0.0);
241  T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0)
242  ? f5(i,j,vhi.z,n) : T(0.0);
243 
244  T gamma = alpha*a(i,j,k)
245  + dhx*(bX(i,j,k,n)+bX(i+1,j,k,n))
246  + dhy*(bY(i,j,k,n)+bY(i,j+1,k,n))
247  + dhz*(bZ(i,j,k,n)+bZ(i,j,k+1,n));
248 
249  T g_m_d = gamma
250  - (dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3)
251  + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4)
252  + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5));
253 
254  T rho = dhx*( bX(i ,j,k,n)*phi(i-1,j,k,n)
255  + bX(i+1,j,k,n)*phi(i+1,j,k,n) )
256  + dhy*( bY(i,j ,k,n)*phi(i,j-1,k,n)
257  + bY(i,j+1,k,n)*phi(i,j+1,k,n) )
258  + dhz*( bZ(i,j,k ,n)*phi(i,j,k-1,n)
259  + bZ(i,j,k+1,n)*phi(i,j,k+1,n) );
260 
261  T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho);
262  phi(i,j,k,n) = phi(i,j,k,n) + omega/g_m_d * res;
263  }
264 }
265 
266 template <typename T>
268 void abec_gsrb_os (int i, int j, int k, int n,
269  Array4<T> const& phi, Array4<T const> const& rhs,
270  T alpha, Array4<T const> const& a,
271  T dhx, T dhy, T dhz,
272  Array4<T const> const& bX, Array4<T const> const& bY,
273  Array4<T const> const& bZ,
274  Array4<int const> const& m0, Array4<int const> const& m2,
275  Array4<int const> const& m4,
276  Array4<int const> const& m1, Array4<int const> const& m3,
277  Array4<int const> const& m5,
278  Array4<T const> const& f0, Array4<T const> const& f2,
279  Array4<T const> const& f4,
280  Array4<T const> const& f1, Array4<T const> const& f3,
281  Array4<T const> const& f5,
282  Array4<int const> const& osm,
283  Box const& vbox, int redblack) noexcept
284 {
285  constexpr T omega = T(1.15);
286 
287  if ((i+j+k+redblack)%2 == 0) {
288  if (osm(i,j,k) == 0) {
289  phi(i,j,k,n) = T(0.0);
290  } else {
291  const auto vlo = amrex::lbound(vbox);
292  const auto vhi = amrex::ubound(vbox);
293 
294  T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0)
295  ? f0(vlo.x,j,k,n) : T(0.0);
296  T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0)
297  ? f1(i,vlo.y,k,n) : T(0.0);
298  T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0)
299  ? f2(i,j,vlo.z,n) : T(0.0);
300  T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0)
301  ? f3(vhi.x,j,k,n) : T(0.0);
302  T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0)
303  ? f4(i,vhi.y,k,n) : T(0.0);
304  T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0)
305  ? f5(i,j,vhi.z,n) : T(0.0);
306 
307  T gamma = alpha*a(i,j,k)
308  + dhx*(bX(i,j,k,n)+bX(i+1,j,k,n))
309  + dhy*(bY(i,j,k,n)+bY(i,j+1,k,n))
310  + dhz*(bZ(i,j,k,n)+bZ(i,j,k+1,n));
311 
312  T g_m_d = gamma
313  - (dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3)
314  + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4)
315  + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5));
316 
317  T rho = dhx*( bX(i ,j,k,n)*phi(i-1,j,k,n)
318  + bX(i+1,j,k,n)*phi(i+1,j,k,n) )
319  + dhy*( bY(i,j ,k,n)*phi(i,j-1,k,n)
320  + bY(i,j+1,k,n)*phi(i,j+1,k,n) )
321  + dhz*( bZ(i,j,k ,n)*phi(i,j,k-1,n)
322  + bZ(i,j,k+1,n)*phi(i,j,k+1,n) );
323 
324  T res = rhs(i,j,k,n) - (gamma*phi(i,j,k,n) - rho);
325  phi(i,j,k,n) = phi(i,j,k,n) + omega/g_m_d * res;
326  }
327  }
328 }
329 
330 template <typename T>
332 void abec_jacobi (int i, int j, int k, int n, Array4<T> const& phi,
333  Array4<T const> const& rhs, Array4<T const> const& Ax,
334  T alpha, Array4<T const> const& a,
335  T dhx, T dhy, T dhz,
336  Array4<T const> const& bX, Array4<T const> const& bY,
337  Array4<T const> const& bZ,
338  Array4<int const> const& m0, Array4<int const> const& m2,
339  Array4<int const> const& m4,
340  Array4<int const> const& m1, Array4<int const> const& m3,
341  Array4<int const> const& m5,
342  Array4<T const> const& f0, Array4<T const> const& f2,
343  Array4<T const> const& f4,
344  Array4<T const> const& f1, Array4<T const> const& f3,
345  Array4<T const> const& f5,
346  Box const& vbox) noexcept
347 {
348  const auto vlo = amrex::lbound(vbox);
349  const auto vhi = amrex::ubound(vbox);
350 
351  T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0)
352  ? f0(vlo.x,j,k,n) : T(0.0);
353  T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0)
354  ? f1(i,vlo.y,k,n) : T(0.0);
355  T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0)
356  ? f2(i,j,vlo.z,n) : T(0.0);
357  T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0)
358  ? f3(vhi.x,j,k,n) : T(0.0);
359  T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0)
360  ? f4(i,vhi.y,k,n) : T(0.0);
361  T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0)
362  ? f5(i,j,vhi.z,n) : T(0.0);
363 
364  T gamma = alpha*a(i,j,k)
365  + dhx*(bX(i,j,k,n)+bX(i+1,j,k,n))
366  + dhy*(bY(i,j,k,n)+bY(i,j+1,k,n))
367  + dhz*(bZ(i,j,k,n)+bZ(i,j,k+1,n));
368 
369  T g_m_d = gamma
370  - (dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3)
371  + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4)
372  + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5));
373 
374  phi(i,j,k,n) += T(2.0/3.0) * (rhs(i,j,k,n) - Ax(i,j,k,n)) / g_m_d;
375 }
376 
377 template <typename T>
379 void abec_jacobi_os (int i, int j, int k, int n,
380  Array4<T> const& phi, Array4<T const> const& rhs,
381  Array4<T const> const& Ax,
382  T alpha, Array4<T const> const& a,
383  T dhx, T dhy, T dhz,
384  Array4<T const> const& bX, Array4<T const> const& bY,
385  Array4<T const> const& bZ,
386  Array4<int const> const& m0, Array4<int const> const& m2,
387  Array4<int const> const& m4,
388  Array4<int const> const& m1, Array4<int const> const& m3,
389  Array4<int const> const& m5,
390  Array4<T const> const& f0, Array4<T const> const& f2,
391  Array4<T const> const& f4,
392  Array4<T const> const& f1, Array4<T const> const& f3,
393  Array4<T const> const& f5,
394  Array4<int const> const& osm,
395  Box const& vbox) noexcept
396 {
397  if (osm(i,j,k) == 0) {
398  phi(i,j,k,n) = T(0.0);
399  } else {
400  const auto vlo = amrex::lbound(vbox);
401  const auto vhi = amrex::ubound(vbox);
402 
403  T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0)
404  ? f0(vlo.x,j,k,n) : T(0.0);
405  T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0)
406  ? f1(i,vlo.y,k,n) : T(0.0);
407  T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0)
408  ? f2(i,j,vlo.z,n) : T(0.0);
409  T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0)
410  ? f3(vhi.x,j,k,n) : T(0.0);
411  T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0)
412  ? f4(i,vhi.y,k,n) : T(0.0);
413  T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0)
414  ? f5(i,j,vhi.z,n) : T(0.0);
415 
416  T gamma = alpha*a(i,j,k)
417  + dhx*(bX(i,j,k,n)+bX(i+1,j,k,n))
418  + dhy*(bY(i,j,k,n)+bY(i,j+1,k,n))
419  + dhz*(bZ(i,j,k,n)+bZ(i,j,k+1,n));
420 
421  T g_m_d = gamma
422  - (dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3)
423  + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4)
424  + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5));
425 
426  phi(i,j,k,n) += T(2.0/3.0) * (rhs(i,j,k,n) - Ax(i,j,k,n)) / g_m_d;
427  }
428 }
429 
430 template <typename T>
434  int ilen ) noexcept
435 {
436  T bet = b_ls(0);
437  u_ls(0) = r_ls(0) / bet;
438 
439  for (int i = 1; i <= ilen - 1; i++) {
440  gam(i) = c_ls(i-1) / bet;
441  bet = b_ls(i) - a_ls(i)*gam(i);
442  if (bet == 0) { amrex::Abort(">>>TRIDIAG FAILED"); }
443  u_ls(i) = (r_ls(i)-a_ls(i)*u_ls(i-1)) / bet;
444  }
445  for (int i = ilen-2; i >= 0; i--) {
446  u_ls(i) = u_ls(i) - gam(i+1)*u_ls(i+1);
447  }
448 }
449 
450 template <typename T>
453  Box const& box, Array4<T> const& phi, Array4<T const> const& rhs,
454  T alpha, Array4<T const> const& a,
455  T dhx, T dhy, T dhz,
456  Array4<T const> const& bX, Array4<T const> const& bY,
457  Array4<T const> const& bZ,
458  Array4<int const> const& m0, Array4<int const> const& m2,
459  Array4<int const> const& m4,
460  Array4<int const> const& m1, Array4<int const> const& m3,
461  Array4<int const> const& m5,
462  Array4<T const> const& f0, Array4<T const> const& f2,
463  Array4<T const> const& f4,
464  Array4<T const> const& f1, Array4<T const> const& f3,
465  Array4<T const> const& f5,
466  Box const& vbox, int redblack, int nc) noexcept
467 {
468  const auto lo = amrex::lbound(box);
469  const auto hi = amrex::ubound(box);
470  const auto vlo = amrex::lbound(vbox);
471  const auto vhi = amrex::ubound(vbox);
472 
473  // idir is the direction in which we will do the tridiagonal solve --
474  // it should be the direction in which the mesh spacing is much larger
475  // than in the other directions
476  int idir = 2;
477  int ilen = hi.z - lo.z + 1;
478 
479  if ( (dhx <= dhy) && (dhz <= dhy) ) {
480  idir = 1;
481  ilen = hi.y - lo.y + 1;
482  }
483  if ( (dhy <= dhx) && (dhz <= dhx) ) {
484  idir = 0;
485  ilen = hi.x - lo.x + 1;
486  }
487 
488  // This assertion should be moved outside the kernel for performance!
489  if (ilen > 32) { amrex::Abort("abec_gsrb_with_line_solve is hard-wired to be no longer than 32"); }
490 
491  Array1D<T,0,31> a_ls;
492  Array1D<T,0,31> b_ls;
493  Array1D<T,0,31> c_ls;
494  Array1D<T,0,31> r_ls;
495  Array1D<T,0,31> u_ls;
496  Array1D<T,0,31> gam;
497 
498  if (idir == 2) {
499  for (int n = 0; n < nc; ++n) {
500  for (int j = lo.y; j <= hi.y; ++j) {
502  for (int i = lo.x; i <= hi.x; ++i) {
503  if ((i+j+redblack)%2 == 0) {
504 
505  for (int k = lo.z; k <= hi.z; ++k)
506  {
507  T gamma = alpha*a(i,j,k)
508  + dhx*(bX(i,j,k,n)+bX(i+1,j,k,n))
509  + dhy*(bY(i,j,k,n)+bY(i,j+1,k,n))
510  + dhz*(bZ(i,j,k,n)+bZ(i,j,k+1,n));
511 
512  T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0)
513  ? f0(vlo.x,j,k,n) : T(0.0);
514  T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0)
515  ? f1(i,vlo.y,k,n) : T(0.0);
516  T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0)
517  ? f2(i,j,vlo.z,n) : T(0.0);
518  T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0)
519  ? f3(vhi.x,j,k,n) : T(0.0);
520  T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0)
521  ? f4(i,vhi.y,k,n) : T(0.0);
522  T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0)
523  ? f5(i,j,vhi.z,n) : T(0.0);
524 
525  T g_m_d = gamma
526  - (dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3)
527  + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4)
528  + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5));
529 
530  T rho = dhx*( bX(i ,j,k,n)*phi(i-1,j,k,n)
531  + bX(i+1,j,k,n)*phi(i+1,j,k,n) )
532  + dhy*( bY(i,j ,k,n)*phi(i,j-1,k,n)
533  + bY(i,j+1,k,n)*phi(i,j+1,k,n) );
534 
535  // We have already accounted for this external boundary in the coefficient of phi(i,j,k,n)
536  if (i == vlo.x && m0(vlo.x-1,j,k) > 0) {
537  rho -= dhx*bX(i ,j,k,n)*phi(i-1,j,k,n);
538  }
539  if (i == vhi.x && m3(vhi.x+1,j,k) > 0) {
540  rho -= dhx*bX(i+1,j,k,n)*phi(i+1,j,k,n);
541  }
542  if (j == vlo.y && m1(i,vlo.y-1,k) > 0) {
543  rho -= dhy*bY(i,j ,k,n)*phi(i,j-1,k,n);
544  }
545  if (j == vhi.y && m4(i,vhi.y+1,k) > 0) {
546  rho -= dhy*bY(i,j+1,k,n)*phi(i,j+1,k,n);
547  }
548 
549  a_ls(k-lo.z) = -dhz*bZ(i,j,k,n);
550  b_ls(k-lo.z) = g_m_d;
551  c_ls(k-lo.z) = -dhz*bZ(i,j,k+1,n);
552  u_ls(k-lo.z) = T(0.);
553  r_ls(k-lo.z) = rhs(i,j,k,n) + rho;
554  // r_ls(k-lo.z) = g_m_d*phi(i,j,k,n) -gamma*phi(i,j,k,n) + rhs(i,j,k,n) + rho;
555 
556  if (k == lo.z)
557  {
558  a_ls(k-lo.z) = T(0.);
559  if (!(m2(i,j,vlo.z-1) > 0)) { r_ls(k-lo.z) += dhz*bZ(i,j,k,n)*phi(i,j,k-1,n); }
560  }
561  if (k == hi.z)
562  {
563  c_ls(k-lo.z) = T(0.);
564  if (!(m5(i,j,vhi.z+1) > 0)) { r_ls(k-lo.z) += dhz*bZ(i,j,k+1,n)*phi(i,j,k+1,n); }
565  }
566  }
567 
568  tridiagonal_solve(a_ls, b_ls, c_ls, r_ls, u_ls, gam, ilen);
569 
570  for (int k = lo.z; k <= hi.z; ++k)
571  {
572  phi(i,j,k,n) = u_ls(k-lo.z);
573  }
574  }
575  }
576  }
577  }
578  } else if (idir == 1) {
579  for (int n = 0; n < nc; ++n) {
580  for (int i = lo.x; i <= hi.x; ++i) {
582  for (int k = lo.z; k <= hi.z; ++k) {
583  if ((i+k+redblack)%2 == 0) {
584 
585  for (int j = lo.y; j <= hi.y; ++j)
586  {
587  T gamma = alpha*a(i,j,k)
588  + dhx*(bX(i,j,k,n)+bX(i+1,j,k,n))
589  + dhy*(bY(i,j,k,n)+bY(i,j+1,k,n))
590  + dhz*(bZ(i,j,k,n)+bZ(i,j,k+1,n));
591 
592  T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0)
593  ? f0(vlo.x,j,k,n) : T(0.0);
594  T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0)
595  ? f1(i,vlo.y,k,n) : T(0.0);
596  T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0)
597  ? f2(i,j,vlo.z,n) : T(0.0);
598  T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0)
599  ? f3(vhi.x,j,k,n) : T(0.0);
600  T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0)
601  ? f4(i,vhi.y,k,n) : T(0.0);
602  T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0)
603  ? f5(i,j,vhi.z,n) : T(0.0);
604 
605  T g_m_d = gamma
606  - (dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3)
607  + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4)
608  + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5));
609 
610  T rho = dhx*( bX(i ,j,k,n)*phi(i-1,j,k,n)
611  + bX(i+1,j,k,n)*phi(i+1,j,k,n) )
612  + dhz*( bZ(i,j ,k,n)*phi(i,j,k-1,n)
613  + bZ(i,j,k+1,n)*phi(i,j,k+1,n) );
614 
615  // We have already accounted for this external boundary in the coefficient of phi(i,j,k,n)
616  if (i == vlo.x && m0(vlo.x-1,j,k) > 0) {
617  rho -= dhx*bX(i ,j,k,n)*phi(i-1,j,k,n);
618  }
619  if (i == vhi.x && m3(vhi.x+1,j,k) > 0) {
620  rho -= dhx*bX(i+1,j,k,n)*phi(i+1,j,k,n);
621  }
622  if (k == vlo.z && m2(i,j,vlo.z-1) > 0) {
623  rho -= dhz*bZ(i,j ,k,n)*phi(i,j,k-1,n);
624  }
625  if (k == vhi.z && m5(i,j,vhi.z+1) > 0) {
626  rho -= dhz*bZ(i,j,k+1,n)*phi(i,j,k+1,n);
627  }
628 
629  a_ls(j-lo.y) = -dhy*bY(i,j,k,n);
630  b_ls(j-lo.y) = g_m_d;
631  c_ls(j-lo.y) = -dhy*bY(i,j+1,k,n);
632  u_ls(j-lo.y) = T(0.);
633  r_ls(j-lo.y) = rhs(i,j,k,n) + rho;
634 
635  if (j == lo.y)
636  {
637  a_ls(j-lo.y) = T(0.);
638  if (!(m1(i,vlo.y-1,k) > 0)) { r_ls(j-lo.y) += dhy*bY(i,j,k,n)*phi(i,j-1,k,n); }
639  }
640  if (j == hi.y)
641  {
642  c_ls(j-lo.y) = T(0.);
643  if (!(m4(i,vhi.y+1,k) > 0)) { r_ls(j-lo.y) += dhy*bY(i,j+1,k,n)*phi(i,j+1,k,n); }
644  }
645  }
646 
647  tridiagonal_solve(a_ls, b_ls, c_ls, r_ls, u_ls, gam, ilen);
648 
649  for (int j = lo.y; j <= hi.y; ++j)
650  {
651  phi(i,j,k,n) = u_ls(j-lo.y);
652  }
653  }
654  }
655  }
656  }
657  } else if (idir == 0) {
658  for (int n = 0; n < nc; ++n) {
659  for (int j = lo.y; j <= hi.y; ++j) {
661  for (int k = lo.z; k <= hi.z; ++k) {
662  if ((j+k+redblack)%2 == 0) {
663 
664  for (int i = lo.x; i <= hi.x; ++i)
665  {
666  T gamma = alpha*a(i,j,k)
667  + dhx*(bX(i,j,k,n)+bX(i+1,j,k,n))
668  + dhy*(bY(i,j,k,n)+bY(i,j+1,k,n))
669  + dhz*(bZ(i,j,k,n)+bZ(i,j,k+1,n));
670 
671  T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0)
672  ? f0(vlo.x,j,k,n) : T(0.0);
673  T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0)
674  ? f1(i,vlo.y,k,n) : T(0.0);
675  T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0)
676  ? f2(i,j,vlo.z,n) : T(0.0);
677  T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0)
678  ? f3(vhi.x,j,k,n) : T(0.0);
679  T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0)
680  ? f4(i,vhi.y,k,n) : T(0.0);
681  T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0)
682  ? f5(i,j,vhi.z,n) : T(0.0);
683 
684  T g_m_d = gamma
685  - (dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3)
686  + dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4)
687  + dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5));
688 
689  T rho = dhy*( bY(i,j ,k,n)*phi(i,j-1,k,n)
690  + bY(i,j+1,k,n)*phi(i,j+1,k,n) )
691  + dhz*( bZ(i,j ,k,n)*phi(i,j,k-1,n)
692  + bZ(i,j,k+1,n)*phi(i,j,k+1,n) );
693 
694  // We have already accounted for this external boundary in the coefficient of phi(i,j,k,n)
695  if (j == vlo.y && m1(i,vlo.y-1,k) > 0) {
696  rho -= dhy*bY(i,j ,k,n)*phi(i,j-1,k,n);
697  }
698  if (j == vhi.y && m4(i,vhi.y+1,k) > 0) {
699  rho -= dhy*bY(i,j+1,k,n)*phi(i,j+1,k,n);
700  }
701  if (k == vlo.z && m2(i,j,vlo.z-1) > 0) {
702  rho -= dhz*bZ(i,j ,k,n)*phi(i,j,k-1,n);
703  }
704  if (k == vhi.z && m5(i,j,vhi.z+1) > 0) {
705  rho -= dhz*bZ(i,j,k+1,n)*phi(i,j,k+1,n);
706  }
707 
708  a_ls(i-lo.x) = -dhx*bX(i,j,k,n);
709  b_ls(i-lo.x) = g_m_d;
710  c_ls(i-lo.x) = -dhx*bX(i+1,j,k,n);
711  u_ls(i-lo.x) = T(0.);
712  r_ls(i-lo.x) = rhs(i,j,k,n) + rho;
713 
714  if (i == lo.x)
715  {
716  a_ls(i-lo.x) = T(0.);
717  if (!(m0(vlo.x-1,j,k) > 0)) { r_ls(i-lo.x) += dhx*bX(i,j,k,n)*phi(i-1,j,k,n); }
718  }
719  if (i == hi.x)
720  {
721  c_ls(i-lo.x) = T(0.);
722  if (!(m3(vhi.x+1,j,k) > 0)) { r_ls(i-lo.x) += dhx*bX(i+1,j,k,n)*phi(i+1,j,k,n); }
723  }
724  }
725 
726  tridiagonal_solve(a_ls, b_ls, c_ls, r_ls, u_ls, gam, ilen);
727 
728  for (int i = lo.x; i <= hi.x; ++i)
729  {
730  phi(i,j,k,n) = u_ls(i-lo.x);
731  }
732  }
733  }
734  }
735  }
736  }
737 }
738 
739 template <typename T>
741 void overset_rescale_bcoef_x (Box const& box, Array4<T> const& bX, Array4<int const> const& osm,
742  int ncomp, T osfac) noexcept
743 {
744  const auto lo = amrex::lbound(box);
745  const auto hi = amrex::ubound(box);
746  for (int n = 0; n < ncomp; ++n) {
747  for (int k = lo.z; k <= hi.z; ++k) {
748  for (int j = lo.y; j <= hi.y; ++j) {
749  for (int i = lo.x; i <= hi.x; ++i) {
750  if ((osm(i-1,j,k)+osm(i,j,k)) == 1) {
751  bX(i,j,k,n) *= osfac;
752  }
753  }}}
754  }
755 }
756 
757 template <typename T>
759 void overset_rescale_bcoef_y (Box const& box, Array4<T> const& bY, Array4<int const> const& osm,
760  int ncomp, T osfac) noexcept
761 {
762  const auto lo = amrex::lbound(box);
763  const auto hi = amrex::ubound(box);
764  for (int n = 0; n < ncomp; ++n) {
765  for (int k = lo.z; k <= hi.z; ++k) {
766  for (int j = lo.y; j <= hi.y; ++j) {
767  for (int i = lo.x; i <= hi.x; ++i) {
768  if ((osm(i,j-1,k)+osm(i,j,k)) == 1) {
769  bY(i,j,k,n) *= osfac;
770  }
771  }}}
772  }
773 }
774 
775 template <typename T>
777 void overset_rescale_bcoef_z (Box const& box, Array4<T> const& bZ, Array4<int const> const& osm,
778  int ncomp, T osfac) noexcept
779 {
780  const auto lo = amrex::lbound(box);
781  const auto hi = amrex::ubound(box);
782  for (int n = 0; n < ncomp; ++n) {
783  for (int k = lo.z; k <= hi.z; ++k) {
784  for (int j = lo.y; j <= hi.y; ++j) {
785  for (int i = lo.x; i <= hi.x; ++i) {
786  if ((osm(i,j,k-1)+osm(i,j,k)) == 1) {
787  bZ(i,j,k,n) *= osfac;
788  }
789  }}}
790  }
791 }
792 
793 }
794 #endif
#define AMREX_PRAGMA_SIMD
Definition: AMReX_Extension.H:80
#define AMREX_FORCE_INLINE
Definition: AMReX_Extension.H:119
#define AMREX_GPU_HOST_DEVICE
Definition: AMReX_GpuQualifiers.H:20
int idir
Definition: AMReX_HypreMLABecLap.cpp:1093
Definition: AMReX_Amr.cpp:49
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void overset_rescale_bcoef_x(Box const &box, Array4< T > const &bX, Array4< int const > const &osm, int ncomp, T osfac) noexcept
Definition: AMReX_MLABecLap_1D_K.H:239
BoxND< AMREX_SPACEDIM > Box
Definition: AMReX_BaseFwd.H:27
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlabeclap_adotx_os(int i, int, int, int n, Array4< T > const &y, Array4< T const > const &x, Array4< T const > const &a, Array4< T const > const &bX, Array4< int const > const &osm, GpuArray< T, AMREX_SPACEDIM > const &dxinv, T alpha, T beta) noexcept
Definition: AMReX_MLABecLap_1D_K.H:24
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlabeclap_flux_x(Box const &box, Array4< T > const &fx, Array4< T const > const &sol, Array4< T const > const &bx, T fac, int ncomp) noexcept
Definition: AMReX_MLABecLap_1D_K.H:56
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlabeclap_flux_z(Box const &box, Array4< T > const &fz, Array4< T const > const &sol, Array4< T const > const &bz, T fac, int ncomp) noexcept
Definition: AMReX_MLABecLap_3D_K.H:163
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlabeclap_flux_y(Box const &box, Array4< T > const &fy, Array4< T const > const &sol, Array4< T const > const &by, T fac, int ncomp) noexcept
Definition: AMReX_MLABecLap_2D_K.H:104
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void abec_gsrb_os(int i, int, int, int n, Array4< T > const &phi, Array4< T const > const &rhs, T alpha, Array4< T const > const &a, T dhx, Array4< T const > const &bX, Array4< int const > const &m0, Array4< int const > const &m1, Array4< T const > const &f0, Array4< T const > const &f1, Array4< int const > const &osm, Box const &vbox, int redblack) noexcept
Definition: AMReX_MLABecLap_1D_K.H:121
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 ubound(Array4< T > const &a) noexcept
Definition: AMReX_Array4.H:315
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
Definition: AMReX_Array4.H:308
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void overset_rescale_bcoef_y(Box const &box, Array4< T > const &bY, Array4< int const > const &osm, int ncomp, T osfac) noexcept
Definition: AMReX_MLABecLap_2D_K.H:437
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void abec_jacobi(int i, int, int, int n, Array4< T > const &phi, Array4< T const > const &rhs, Array4< T const > const &Ax, T alpha, Array4< T const > const &a, T dhx, Array4< T const > const &bX, Array4< int const > const &m0, Array4< int const > const &m1, Array4< T const > const &f0, Array4< T const > const &f1, Box const &vbox) noexcept
Definition: AMReX_MLABecLap_1D_K.H:160
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void abec_gsrb(int i, int, int, int n, Array4< T > const &phi, Array4< T const > const &rhs, T alpha, Array4< T const > const &a, T dhx, Array4< T const > const &bX, Array4< int const > const &m0, Array4< int const > const &m1, Array4< T const > const &f0, Array4< T const > const &f1, Box const &vbox, int redblack) noexcept
Definition: AMReX_MLABecLap_1D_K.H:87
AMREX_FORCE_INLINE void abec_gsrb_with_line_solve(Box const &, Array4< T > const &, Array4< T const > const &, T, Array4< T const > const &, T, Array4< T const > const &, Array4< int const > const &, Array4< int const > const &, Array4< T const > const &, Array4< T const > const &, Box const &, int, int) noexcept
Definition: AMReX_MLABecLap_1D_K.H:223
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlabeclap_flux_yface(Box const &box, Array4< T > const &fy, Array4< T const > const &sol, Array4< T const > const &by, T fac, int ylen, int ncomp) noexcept
Definition: AMReX_MLABecLap_2D_K.H:122
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlabeclap_normalize(int i, int, int, int n, Array4< T > const &x, Array4< T const > const &a, Array4< T const > const &bX, GpuArray< T, AMREX_SPACEDIM > const &dxinv, T alpha, T beta) noexcept
Definition: AMReX_MLABecLap_1D_K.H:44
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition: AMReX.cpp:225
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlabeclap_flux_zface(Box const &box, Array4< T > const &fz, Array4< T const > const &sol, Array4< T const > const &bz, T fac, int zlen, int ncomp) noexcept
Definition: AMReX_MLABecLap_3D_K.H:183
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void abec_jacobi_os(int i, int, int, int n, Array4< T > const &phi, Array4< T const > const &rhs, Array4< T const > const &Ax, T alpha, Array4< T const > const &a, T dhx, Array4< T const > const &bX, Array4< int const > const &m0, Array4< int const > const &m1, Array4< T const > const &f0, Array4< T const > const &f1, Array4< int const > const &osm, Box const &vbox) noexcept
Definition: AMReX_MLABecLap_1D_K.H:189
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlabeclap_flux_xface(Box const &box, Array4< T > const &fx, Array4< T const > const &sol, Array4< T const > const &bx, T fac, int xlen, int ncomp) noexcept
Definition: AMReX_MLABecLap_1D_K.H:72
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void overset_rescale_bcoef_z(Box const &box, Array4< T > const &bZ, Array4< int const > const &osm, int ncomp, T osfac) noexcept
Definition: AMReX_MLABecLap_3D_K.H:777
AMREX_FORCE_INLINE void tridiagonal_solve(Array1D< T, 0, 31 > &a_ls, Array1D< T, 0, 31 > &b_ls, Array1D< T, 0, 31 > &c_ls, Array1D< T, 0, 31 > &r_ls, Array1D< T, 0, 31 > &u_ls, Array1D< T, 0, 31 > &gam, int ilen) noexcept
Definition: AMReX_MLABecLap_3D_K.H:432
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlabeclap_adotx(int i, int, int, int n, Array4< T > const &y, Array4< T const > const &x, Array4< T const > const &a, Array4< T const > const &bX, GpuArray< T, AMREX_SPACEDIM > const &dxinv, T alpha, T beta) noexcept
Definition: AMReX_MLABecLap_1D_K.H:9
Definition: AMReX_Array.H:161
Definition: AMReX_Array4.H:61
Definition: AMReX_Array.H:34