Block-Structured AMR Software Framework
AMReX_MLNodeLap_2D_K.H
Go to the documentation of this file.
1 #ifndef AMREX_MLNODELAP_2D_K_H_
2 #define AMREX_MLNODELAP_2D_K_H_
3 #include <AMReX_Config.H>
4 
5 namespace amrex {
6 
8 void mlndlap_zero_fine (int i, int j, int, Array4<Real> const& phi,
9  Array4<int const> const& msk, int fine_flag) noexcept
10 {
11  // Testing if the node is covered by a fine level in computing
12  // coarse sync residual
13  if (msk(i-1,j-1,0) == fine_flag &&
14  msk(i ,j-1,0) == fine_flag &&
15  msk(i-1,j ,0) == fine_flag &&
16  msk(i ,j ,0) == fine_flag)
17  {
18  phi(i,j,0) = Real(0.0);
19  }
20 }
21 
22 //
23 // coeffs
24 //
25 
27 void mlndlap_avgdown_coeff_x (int i, int j, int k, Array4<Real> const& crse,
28  Array4<Real const> const& fine) noexcept
29 {
30  Real a = fine(2*i ,2*j,k) + fine(2*i ,2*j+1,k);
31  Real b = fine(2*i+1,2*j,k) + fine(2*i+1,2*j+1,k);
32  crse(i,j,k) = a*b/(a+b);
33 }
34 
36 void mlndlap_avgdown_coeff_y (int i, int j, int k, Array4<Real> const& crse,
37  Array4<Real const> const& fine) noexcept
38 {
39  Real a = fine(2*i,2*j ,k) + fine(2*i+1,2*j ,k);
40  Real b = fine(2*i,2*j+1,k) + fine(2*i+1,2*j+1,k);
41  crse(i,j,k) = a*b/(a+b);
42 }
43 
45 void mlndlap_semi_avgdown_coeff (int i, int j, int k, Array4<Real> const& crse,
46  Array4<Real const> const& fine, int idir) noexcept
47 {
48  if (idir == 1) {
49  Real a = fine(2*i ,j,k);
50  Real b = fine(2*i+1,j,k);
51  crse(i,j,k) = Real(2.0)*a*b/(a+b);
52  } else {
53  Real a = fine(i,2*j ,k);
54  Real b = fine(i,2*j+1,k);
55  crse(i,j,k) = Real(2.0)*a*b/(a+b);
56  }
57 }
58 
59 //
60 // operator
61 //
62 
64 Real mlndlap_adotx_ha (int i, int j, int k, Array4<Real const> const& x,
65  Array4<Real const> const& sx, Array4<Real const> const& sy,
66  Array4<int const> const& msk, bool is_rz,
67  GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
68 {
69  if (msk(i,j,k)) {
70  return Real(0.0);
71  } else {
72  Real facx = Real(1./6.)*dxinv[0]*dxinv[0];
73  Real facy = Real(1./6.)*dxinv[1]*dxinv[1];
74  Real y = x(i-1,j-1,k)*(facx*sx(i-1,j-1,k)+facy*sy(i-1,j-1,k))
75  + x(i+1,j-1,k)*(facx*sx(i ,j-1,k)+facy*sy(i ,j-1,k))
76  + x(i-1,j+1,k)*(facx*sx(i-1,j ,k)+facy*sy(i-1,j ,k))
77  + x(i+1,j+1,k)*(facx*sx(i ,j ,k)+facy*sy(i ,j ,k))
78  + x(i-1,j,k)*(Real(2.0)*facx*(sx(i-1,j-1,k)+sx(i-1,j,k))
79  - facy*(sy(i-1,j-1,k)+sy(i-1,j,k)))
80  + x(i+1,j,k)*(Real(2.0)*facx*(sx(i ,j-1,k)+sx(i ,j,k))
81  - facy*(sy(i ,j-1,k)+sy(i ,j,k)))
82  + x(i,j-1,k)*( -facx*(sx(i-1,j-1,k)+sx(i,j-1,k))
83  +Real(2.0)*facy*(sy(i-1,j-1,k)+sy(i,j-1,k)))
84  + x(i,j+1,k)*( -facx*(sx(i-1,j ,k)+sx(i,j ,k))
85  +Real(2.0)*facy*(sy(i-1,j ,k)+sy(i,j ,k)))
86  + x(i,j,k)*(-Real(2.0))*(facx*(sx(i-1,j-1,k)+sx(i,j-1,k)+sx(i-1,j,k)+sx(i,j,k))
87  +facy*(sy(i-1,j-1,k)+sy(i,j-1,k)+sy(i-1,j,k)+sy(i,j,k)));
88  if (is_rz) {
89  Real fp = facy / static_cast<Real>(2*i+1);
90  Real fm = facy / static_cast<Real>(2*i-1);
91  y += (fm*sy(i-1,j ,k)-fp*sy(i,j ,k))*(x(i,j+1,k)-x(i,j,k))
92  + (fm*sy(i-1,j-1,k)-fp*sy(i,j-1,k))*(x(i,j-1,k)-x(i,j,k));
93  }
94  return y;
95  }
96 }
97 
99 Real mlndlap_adotx_aa (int i, int j, int k, Array4<Real const> const& x,
100  Array4<Real const> const& sig, Array4<int const> const& msk,
101  bool is_rz, GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
102 {
103  if (msk(i,j,k)) {
104  return Real(0.0);
105  } else {
106  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0];
107  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1];
108  Real fxy = facx + facy;
109  Real f2xmy = Real(2.0)*facx - facy;
110  Real fmx2y = Real(2.0)*facy - facx;
111  Real y = x(i-1,j-1,k)*fxy*sig(i-1,j-1,k)
112  + x(i+1,j-1,k)*fxy*sig(i ,j-1,k)
113  + x(i-1,j+1,k)*fxy*sig(i-1,j ,k)
114  + x(i+1,j+1,k)*fxy*sig(i ,j ,k)
115  + x(i-1,j,k)*f2xmy*(sig(i-1,j-1,k)+sig(i-1,j,k))
116  + x(i+1,j,k)*f2xmy*(sig(i ,j-1,k)+sig(i ,j,k))
117  + x(i,j-1,k)*fmx2y*(sig(i-1,j-1,k)+sig(i,j-1,k))
118  + x(i,j+1,k)*fmx2y*(sig(i-1,j ,k)+sig(i,j ,k))
119  + x(i,j,k)*(-Real(2.0))*fxy*(sig(i-1,j-1,k)+sig(i,j-1,k)
120  +sig(i-1,j,k)+sig(i,j,k));
121  if (is_rz) {
122  Real fp = facy / static_cast<Real>(2*i+1);
123  Real fm = facy / static_cast<Real>(2*i-1);
124  y += (fm*sig(i-1,j ,k)-fp*sig(i,j ,k))*(x(i,j+1,k)-x(i,j,k))
125  + (fm*sig(i-1,j-1,k)-fp*sig(i,j-1,k))*(x(i,j-1,k)-x(i,j,k));
126  }
127  return y;
128  }
129 }
130 
132 Real mlndlap_adotx_c (int i, int j, int k, Array4<Real const> const& x,
133  Real sigma, Array4<int const> const& msk,
134  bool is_rz, GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
135 {
136  if (msk(i,j,k)) {
137  return Real(0.0);
138  } else {
139  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0];
140  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1];
141  Real fxy = facx + facy;
142  Real f2xmy = Real(2.0)*facx - facy;
143  Real fmx2y = Real(2.0)*facy - facx;
144  Real y = (x(i-1,j-1,k)*fxy
145  + x(i+1,j-1,k)*fxy
146  + x(i-1,j+1,k)*fxy
147  + x(i+1,j+1,k)*fxy
148  + x(i-1,j,k)*f2xmy*Real(2.)
149  + x(i+1,j,k)*f2xmy*Real(2.)
150  + x(i,j-1,k)*fmx2y*Real(2.)
151  + x(i,j+1,k)*fmx2y*Real(2.)
152  + x(i,j,k)*(-Real(2.0))*fxy*Real(4.));
153  if (is_rz) {
154  Real fp = facy / static_cast<Real>(2*i+1);
155  Real fm = facy / static_cast<Real>(2*i-1);
156  y += ((fm-fp)*(x(i,j+1,k)-x(i,j,k))
157  + (fm-fp)*(x(i,j-1,k)-x(i,j,k)));
158  }
159  return y * sigma;
160  }
161 }
162 
164 void mlndlap_normalize_ha (int i, int j, int k, Array4<Real> const& x, Array4<Real const> const& sx,
165  Array4<Real const> const& sy, Array4<int const> const& msk,
166  GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
167 {
168  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0];
169  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1];
170 
171  if (!msk(i,j,k)) {
172  x(i,j,k) /= (-Real(2.0))*(facx*(sx(i-1,j-1,k)+sx(i,j-1,k)+sx(i-1,j,k)+sx(i,j,k))
173  +facy*(sy(i-1,j-1,k)+sy(i,j-1,k)+sy(i-1,j,k)+sy(i,j,k)));
174  }
175 }
176 
178 void mlndlap_normalize_aa (int i, int j, int k, Array4<Real> const& x, Array4<Real const> const& sig,
179  Array4<int const> const& msk, GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
180 {
181  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0];
182  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1];
183  Real fxy = facx + facy;
184 
185  if (!msk(i,j,k)) {
186  x(i,j,k) /= (-Real(2.0))*fxy*(sig(i-1,j-1,k)+sig(i,j-1,k)+sig(i-1,j,k)+sig(i,j,k));
187  }
188 }
189 
191 void mlndlap_jacobi_ha (int i, int j, int k, Array4<Real> const& sol, Real Ax,
192  Array4<Real const> const& rhs, Array4<Real const> const& sx,
193  Array4<Real const> const& sy, Array4<int const> const& msk,
194  GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
195 {
196  Real facx = -Real(2.0/6.0)*dxinv[0]*dxinv[0];
197  Real facy = -Real(2.0/6.0)*dxinv[1]*dxinv[1];
198 
199  if (msk(i,j,k)) {
200  sol(i,j,k) = Real(0.0);
201  } else {
202  sol(i,j,k) += Real(2.0/3.0) * (rhs(i,j,k) - Ax)
203  / (facx*(sx(i-1,j-1,k)+sx(i,j-1,k)+sx(i-1,j,k)+sx(i,j,k))
204  + facy*(sy(i-1,j-1,k)+sy(i,j-1,k)+sy(i-1,j,k)+sy(i,j,k)));
205  }
206 }
207 
208 inline
209 void mlndlap_jacobi_ha (Box const& bx, Array4<Real> const& sol, Array4<Real const> const& Ax,
210  Array4<Real const> const& rhs, Array4<Real const> const& sx,
211  Array4<Real const> const& sy, Array4<int const> const& msk,
212  GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
213 {
214  Real facx = -Real(2.0/6.0)*dxinv[0]*dxinv[0];
215  Real facy = -Real(2.0/6.0)*dxinv[1]*dxinv[1];
216 
217  amrex::LoopConcurrentOnCpu(bx, [&] (int i, int j, int k) noexcept
218  {
219  if (msk(i,j,k)) {
220  sol(i,j,k) = Real(0.0);
221  } else {
222  sol(i,j,k) += Real(2.0/3.0) * (rhs(i,j,k) - Ax(i,j,k))
223  / (facx*(sx(i-1,j-1,k)+sx(i,j-1,k)+sx(i-1,j,k)+sx(i,j,k))
224  + facy*(sy(i-1,j-1,k)+sy(i,j-1,k)+sy(i-1,j,k)+sy(i,j,k)));
225  }
226  });
227 }
228 
230 void mlndlap_jacobi_aa (int i, int j, int k, Array4<Real> const& sol, Real Ax,
231  Array4<Real const> const& rhs, Array4<Real const> const& sig,
232  Array4<int const> const& msk, GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
233 {
234  Real fac = -Real(2.0/6.0)*(dxinv[0]*dxinv[0] + dxinv[1]*dxinv[1]);
235 
236  if (msk(i,j,k)) {
237  sol(i,j,k) = Real(0.0);
238  } else {
239  sol(i,j,k) += Real(2.0/3.0) * (rhs(i,j,k) - Ax)
240  / (fac*(sig(i-1,j-1,k)+sig(i,j-1,k)+sig(i-1,j,k)+sig(i,j,k)));
241  }
242 }
243 
245 void mlndlap_jacobi_c (int i, int j, int k, Array4<Real> const& sol, Real Ax,
246  Array4<Real const> const& rhs, Real sig,
247  Array4<int const> const& msk, GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
248 {
249  Real fac = -Real(2.0/6.0)*(dxinv[0]*dxinv[0] + dxinv[1]*dxinv[1]);
250 
251  if (msk(i,j,k)) {
252  sol(i,j,k) = Real(0.0);
253  } else {
254  sol(i,j,k) += Real(2.0/3.0) * (rhs(i,j,k) - Ax)
255  / (fac*Real(4.)*sig);
256  }
257 }
258 
259 inline
260 void mlndlap_jacobi_aa (Box const& bx, Array4<Real> const& sol, Array4<Real const> const& Ax,
261  Array4<Real const> const& rhs, Array4<Real const> const& sig,
262  Array4<int const> const& msk, GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
263 {
264  Real fac = -Real(2.0/6.0)*(dxinv[0]*dxinv[0] + dxinv[1]*dxinv[1]);
265 
266  amrex::LoopConcurrentOnCpu(bx, [&] (int i, int j, int k) noexcept
267  {
268  if (msk(i,j,k)) {
269  sol(i,j,k) = Real(0.0);
270  } else {
271  sol(i,j,k) += Real(2.0/3.0) * (rhs(i,j,k) - Ax(i,j,k))
272  / (fac*(sig(i-1,j-1,k)+sig(i,j-1,k)+sig(i-1,j,k)+sig(i,j,k)));
273  }
274  });
275 }
276 
277 inline
278 void mlndlap_jacobi_c (Box const& bx, Array4<Real> const& sol, Array4<Real const> const& Ax,
279  Array4<Real const> const& rhs, Real sig,
280  Array4<int const> const& msk, GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
281 {
282  Real fac = -Real(2.0/6.0)*(dxinv[0]*dxinv[0] + dxinv[1]*dxinv[1]);
283 
284  amrex::LoopConcurrentOnCpu(bx, [&] (int i, int j, int k) noexcept
285  {
286  if (msk(i,j,k)) {
287  sol(i,j,k) = Real(0.0);
288  } else {
289  sol(i,j,k) += Real(2.0/3.0) * (rhs(i,j,k) - Ax(i,j,k))
290  / (fac*Real(4.)*sig);
291  }
292  });
293 }
294 
295 inline
296 void mlndlap_gauss_seidel_ha (Box const& bx, Array4<Real> const& sol,
297  Array4<Real const> const& rhs, Array4<Real const> const& sx,
298  Array4<Real const> const& sy, Array4<int const> const& msk,
299  GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
300  bool is_rz) noexcept
301 {
302  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0];
303  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1];
304 
305  amrex::LoopOnCpu(bx, [&] (int i, int j, int k) noexcept
306  {
307  if (msk(i,j,k)) {
308  sol(i,j,k) = Real(0.0);
309  } else {
310  Real s0 = Real(-2.0)*(facx*(sx(i-1,j-1,k)+sx(i,j-1,k)+sx(i-1,j,k)+sx(i,j,k))
311  +facy*(sy(i-1,j-1,k)+sy(i,j-1,k)+sy(i-1,j,k)+sy(i,j,k)));
312 
313  Real Ax = sol(i-1,j-1,k)*(facx*sx(i-1,j-1,k)+facy*sy(i-1,j-1,k))
314  + sol(i+1,j-1,k)*(facx*sx(i ,j-1,k)+facy*sy(i ,j-1,k))
315  + sol(i-1,j+1,k)*(facx*sx(i-1,j ,k)+facy*sy(i-1,j ,k))
316  + sol(i+1,j+1,k)*(facx*sx(i ,j ,k)+facy*sy(i ,j ,k))
317  + sol(i-1,j,k)*(Real(2.0)*facx*(sx(i-1,j-1,k)+sx(i-1,j,k))
318  - facy*(sy(i-1,j-1,k)+sy(i-1,j,k)))
319  + sol(i+1,j,k)*(Real(2.0)*facx*(sx(i ,j-1,k)+sx(i ,j,k))
320  - facy*(sy(i ,j-1,k)+sy(i ,j,k)))
321  + sol(i,j-1,k)*( -facx*(sx(i-1,j-1,k)+sx(i,j-1,k))
322  +Real(2.0)*facy*(sy(i-1,j-1,k)+sy(i,j-1,k)))
323  + sol(i,j+1,k)*( -facx*(sx(i-1,j ,k)+sx(i,j ,k))
324  +Real(2.0)*facy*(sy(i-1,j ,k)+sy(i,j ,k)))
325  + sol(i,j,k)*s0;
326 
327  if (is_rz) {
328  Real fp = facy / static_cast<Real>(2*i+1);
329  Real fm = facy / static_cast<Real>(2*i-1);
330  Real frzlo = fm*sy(i-1,j-1,k)-fp*sy(i,j-1,k);
331  Real frzhi = fm*sy(i-1,j ,k)-fp*sy(i,j ,k);
332  s0 += - frzhi - frzlo;
333  Ax += frzhi*(sol(i,j+1,k)-sol(i,j,k))
334  + frzlo*(sol(i,j-1,k)-sol(i,j,k));
335  }
336 
337  sol(i,j,k) += (rhs(i,j,k) - Ax) / s0;
338  }
339  });
340 }
341 
342 inline
343 void mlndlap_gauss_seidel_aa (Box const& bx, Array4<Real> const& sol,
344  Array4<Real const> const& rhs, Array4<Real const> const& sig,
345  Array4<int const> const& msk,
346  GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
347  bool is_rz) noexcept
348 {
349  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0];
350  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1];
351  Real fxy = facx + facy;
352  Real f2xmy = Real(2.0)*facx - facy;
353  Real fmx2y = Real(2.0)*facy - facx;
354 
355  amrex::LoopOnCpu(bx, [&] (int i, int j, int k) noexcept
356  {
357  if (msk(i,j,k)) {
358  sol(i,j,k) = Real(0.0);
359  } else {
360  Real s0 = (-Real(2.0))*fxy*(sig(i-1,j-1,k)+sig(i,j-1,k)+sig(i-1,j,k)+sig(i,j,k));
361  Real Ax = sol(i-1,j-1,k)*fxy*sig(i-1,j-1,k)
362  + sol(i+1,j-1,k)*fxy*sig(i ,j-1,k)
363  + sol(i-1,j+1,k)*fxy*sig(i-1,j ,k)
364  + sol(i+1,j+1,k)*fxy*sig(i ,j ,k)
365  + sol(i-1,j,k)*f2xmy*(sig(i-1,j-1,k)+sig(i-1,j,k))
366  + sol(i+1,j,k)*f2xmy*(sig(i ,j-1,k)+sig(i ,j,k))
367  + sol(i,j-1,k)*fmx2y*(sig(i-1,j-1,k)+sig(i,j-1,k))
368  + sol(i,j+1,k)*fmx2y*(sig(i-1,j ,k)+sig(i,j ,k))
369  + sol(i,j,k)*s0;
370 
371  if (is_rz) {
372  Real fp = facy / static_cast<Real>(2*i+1);
373  Real fm = facy / static_cast<Real>(2*i-1);
374  Real frzlo = fm*sig(i-1,j-1,k)-fp*sig(i,j-1,k);
375  Real frzhi = fm*sig(i-1,j ,k)-fp*sig(i,j ,k);
376  s0 += - frzhi - frzlo;
377  Ax += frzhi*(sol(i,j+1,k)-sol(i,j,k))
378  + frzlo*(sol(i,j-1,k)-sol(i,j,k));
379  }
380 
381  sol(i,j,k) += (rhs(i,j,k) - Ax) / s0;
382  }
383  });
384 }
385 
386 inline
387 void mlndlap_gauss_seidel_c (Box const& bx, Array4<Real> const& sol,
388  Array4<Real const> const& rhs, Real sig,
389  Array4<int const> const& msk,
390  GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
391  bool is_rz) noexcept
392 {
393  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0];
394  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1];
395  Real fxy = facx + facy;
396  Real f2xmy = Real(2.0)*facx - facy;
397  Real fmx2y = Real(2.0)*facy - facx;
398 
399  amrex::LoopOnCpu(bx, [&] (int i, int j, int k) noexcept
400  {
401  if (msk(i,j,k)) {
402  sol(i,j,k) = Real(0.0);
403  } else {
404  Real s0 = (-Real(2.0))*fxy*Real(4.);
405  Real Ax = sol(i-1,j-1,k)*fxy
406  + sol(i+1,j-1,k)*fxy
407  + sol(i-1,j+1,k)*fxy
408  + sol(i+1,j+1,k)*fxy
409  + sol(i-1,j,k)*f2xmy*Real(2.)
410  + sol(i+1,j,k)*f2xmy*Real(2.)
411  + sol(i,j-1,k)*fmx2y*Real(2.)
412  + sol(i,j+1,k)*fmx2y*Real(2.)
413  + sol(i,j,k)*s0;
414 
415  if (is_rz) {
416  Real fp = facy / static_cast<Real>(2*i+1);
417  Real fm = facy / static_cast<Real>(2*i-1);
418  Real frzlo = fm-fp;
419  Real frzhi = fm-fp;
420  s0 += - frzhi - frzlo;
421  Ax += frzhi*(sol(i,j+1,k)-sol(i,j,k))
422  + frzlo*(sol(i,j-1,k)-sol(i,j,k));
423  }
424 
425  sol(i,j,k) += (rhs(i,j,k) - Ax*sig) / (s0*sig);
426  }
427  });
428 }
429 
433  int ilen ) noexcept
434 {
435  Real bet = b_ls(0);
436  u_ls(0) = r_ls(0) / bet;
437 
438  for (int i = 1; i <= ilen - 1; i++) {
439  gam(i) = c_ls(i-1) / bet;
440  bet = b_ls(i) - a_ls(i)*gam(i);
441  if (bet == 0) { amrex::Abort(">>>TRIDIAG FAILED"); }
442  u_ls(i) = (r_ls(i)-a_ls(i)*u_ls(i-1)) / bet;
443  }
444  for (int i = ilen-2; i >= 0; i--) {
445  u_ls(i) = u_ls(i) - gam(i+1)*u_ls(i+1);
446  }
447 }
448 
449 inline
451  Array4<Real const> const& rhs, Array4<Real const> const& sig,
452  Array4<int const> const& msk,
453  GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
454  bool is_rz) noexcept
455 {
456  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0];
457  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1];
458  Real fxy = facx + facy;
459  Real f2xmy = Real(2.0)*facx - facy;
460  Real fmx2y = Real(2.0)*facy - facx;
461 
462  if (is_rz) {
463  amrex::Abort("mlndlap_gauss_seidel_with_line_solve_aa is not implemented in r-z 2D ");
464  }
465 
466  const auto lo = amrex::lbound(bx);
467  const auto hi = amrex::ubound(bx);
468 
469  int idir = -1;
470  int ilen = 33;
471  int k = 0;
472  if (dxinv[0] <= dxinv[1]) {
473  idir = 1;
474  ilen = hi.y - lo.y + 1;
475  }
476  if (dxinv[1] <= dxinv[0]) {
477  idir = 0;
478  ilen = hi.x - lo.x + 1;
479  }
480 
481  if (ilen > 32) {
482  amrex::Abort("mlndlap_gauss_seidel_with_line_solve_aa is hard-wired to be no longer than 32");
483  }
484 
485  Array1D<Real,0,31> a_ls,b_ls,c_ls,u_ls,r_ls,gam;
486 
487  if (idir == 1) {
488  for (int i = lo.x; i <= hi.x; ++i)
489  {
490  for (int j = lo.y; j <= hi.y; ++j)
491  {
492  if (msk(i,j,k)) {
493  a_ls(j-lo.y) = 0.;
494  b_ls(j-lo.y) = 1.;
495  c_ls(j-lo.y) = 0.;
496  u_ls(j-lo.y) = 0.;
497  r_ls(j-lo.y) = 0.;
498  }
499  else
500  {
501  Real s0 = (-Real(2.0))*fxy*(sig(i-1,j-1,k)+sig(i,j-1,k)+sig(i-1,j,k)+sig(i,j,k));
502 
503  Real Ax = sol(i-1,j-1,k)*fxy*sig(i-1,j-1,k)
504  + sol(i+1,j-1,k)*fxy*sig(i ,j-1,k)
505  + sol(i-1,j+1,k)*fxy*sig(i-1,j ,k)
506  + sol(i+1,j+1,k)*fxy*sig(i ,j ,k)
507  + sol(i-1,j,k)*f2xmy*(sig(i-1,j-1,k)+sig(i-1,j,k))
508  + sol(i+1,j,k)*f2xmy*(sig(i ,j-1,k)+sig(i ,j,k));
509 
510  a_ls(j-lo.y) = fmx2y*(sig(i-1,j-1,k)+sig(i,j-1,k));
511  b_ls(j-lo.y) = s0;
512  c_ls(j-lo.y) = fmx2y*(sig(i-1,j ,k)+sig(i,j ,k));
513  u_ls(j-lo.y) = 0.;
514  r_ls(j-lo.y) = rhs(i,j,k) - Ax;
515  }
516  }
517  tridiagonal_solve(a_ls, b_ls, c_ls, r_ls, u_ls, gam, ilen);
518 
519  for (int j = lo.y; j <= hi.y; ++j)
520  {
521  sol(i,j,k) = u_ls(j-lo.y);
522  }
523  }
524  } else if (idir == 0) {
525  for (int j = lo.y ;j <= hi.y; ++j)
526  {
527  for (int i = lo.x; i <= hi.x; ++i)
528  {
529  if (msk(i,j,k)) {
530  a_ls(i-lo.x) = 0.;
531  b_ls(i-lo.x) = 1.;
532  c_ls(i-lo.x) = 0.;
533  u_ls(i-lo.x) = 0.;
534  r_ls(i-lo.x) = 0.;
535  }
536  else
537  {
538  Real s0 = (-Real(2.0))*fxy*(sig(i-1,j-1,k)+sig(i,j-1,k)+sig(i-1,j,k)+sig(i,j,k));
539 
540  Real Ax = sol(i-1,j-1,k)*fxy*sig(i-1,j-1,k)
541  + sol(i+1,j-1,k)*fxy*sig(i ,j-1,k)
542  + sol(i-1,j+1,k)*fxy*sig(i-1,j ,k)
543  + sol(i+1,j+1,k)*fxy*sig(i ,j ,k)
544  + sol(i,j-1,k)*fmx2y*(sig(i-1,j-1,k)+sig(i,j-1,k))
545  + sol(i,j+1,k)*fmx2y*(sig(i-1,j ,k)+sig(i,j ,k));
546 
547  a_ls(i-lo.x) = f2xmy*(sig(i-1,j-1,k)+sig(i-1,j,k));
548  b_ls(i-lo.x) = s0;
549  c_ls(i-lo.x) = f2xmy*(sig(i ,j-1,k)+sig(i ,j,k));
550  u_ls(i-lo.x) = 0.;
551  r_ls(i-lo.x) = rhs(i,j,k) - Ax;
552 
553  }
554  }
555  tridiagonal_solve(a_ls, b_ls, c_ls, r_ls, u_ls, gam, ilen);
556 
557  for (int i = lo.x; i <= hi.x; ++i)
558  {
559  sol(i,j,k) = u_ls(i-lo.x);
560  }
561  }
562  } else {
563  amrex::Abort("mlndlap_gauss_seidel_with_line_solve_aa is wrong direction.");
564  }
565 
566 }
567 
568 //
569 // interpolation
570 //
571 
574  int i, int j, int ic, int jc) noexcept
575  {
576  Real w1 = sig(i-1,j-1,0) + sig(i-1,j,0);
577  Real w2 = sig(i ,j-1,0) + sig(i ,j,0);
578  return (w1*crse(ic,jc,0)+w2*crse(ic+1,jc,0))/(w1+w2);
579  }
580 
583  int i, int j, int ic, int jc) noexcept
584  {
585  Real w1 = sig(i-1,j-1,0) + sig(i,j-1,0);
586  Real w2 = sig(i-1,j ,0) + sig(i,j ,0);
587  return (w1*crse(ic,jc,0)+w2*crse(ic,jc+1,0))/(w1+w2);
588  }
589 
592  int i, int j, int ic, int jc) noexcept
593  {
594  Real w1 = sig(i-1,j-1,0) + sig(i-1,j,0);
595  Real w2 = sig(i ,j-1,0) + sig(i ,j,0);
596  Real w3 = sig(i-1,j-1,0) + sig(i,j-1,0);
597  Real w4 = sig(i-1,j ,0) + sig(i,j ,0);
598  return (w1 * aa_interp_line_y(crse,sig,i-1,j ,ic ,jc ) +
599  w2 * aa_interp_line_y(crse,sig,i+1,j ,ic+1,jc ) +
600  w3 * aa_interp_line_x(crse,sig,i ,j-1,ic ,jc ) +
601  w4 * aa_interp_line_x(crse,sig,i ,j+1,ic ,jc+1)) / (w1+w2+w3+w4);
602  }
603 
605 void mlndlap_interpadd_c (int i, int j, int, Array4<Real> const& fine,
606  Array4<Real const> const& crse,
607  Array4<int const> const& msk) noexcept
608 {
609  if (!msk(i,j,0)) {
610  int ic = amrex::coarsen(i,2);
611  int jc = amrex::coarsen(j,2);
612  bool i_is_odd = (ic*2 != i);
613  bool j_is_odd = (jc*2 != j);
614  if (i_is_odd && j_is_odd) {
615  // Node on a X-Y face
616  fine(i,j,0) += Real(0.25) * (crse(ic ,jc ,0) +
617  crse(ic+1,jc ,0) +
618  crse(ic ,jc+1,0) +
619  crse(ic+1,jc+1,0));
620  } else if (i_is_odd) {
621  // Node on X line
622  fine(i,j,0) += Real(0.5) * (crse(ic,jc,0)+crse(ic+1,jc,0));
623  } else if (j_is_odd) {
624  // Node on Y line
625  fine(i,j,0) += Real(0.5) * (crse(ic,jc,0)+crse(ic,jc+1,0));
626  } else {
627  // Node coincident with coarse node
628  fine(i,j,0) += crse(ic,jc,0);
629  }
630  }
631 }
632 
634 void mlndlap_interpadd_aa (int i, int j, int, Array4<Real> const& fine,
635  Array4<Real const> const& crse, Array4<Real const> const& sig,
636  Array4<int const> const& msk) noexcept
637 {
638  if (!msk(i,j,0)) {
639  int ic = amrex::coarsen(i,2);
640  int jc = amrex::coarsen(j,2);
641  bool i_is_odd = (ic*2 != i);
642  bool j_is_odd = (jc*2 != j);
643  if (i_is_odd && j_is_odd) {
644  // Node on a X-Y face
645  fine(i,j,0) += aa_interp_face_xy(crse,sig,i,j,ic,jc);
646  } else if (i_is_odd) {
647  // Node on X line
648  fine(i,j,0) += aa_interp_line_x(crse,sig,i,j,ic,jc);
649  } else if (j_is_odd) {
650  // Node on Y line
651  fine(i,j,0) += aa_interp_line_y(crse,sig,i,j,ic,jc);
652  } else {
653  // Node coincident with coarse node
654  fine(i,j,0) += crse(ic,jc,0);
655  }
656  }
657 }
658 
660 void mlndlap_semi_interpadd_aa (int i, int j, int, Array4<Real> const& fine,
661  Array4<Real const> const& crse, Array4<Real const> const& sig,
662  Array4<int const> const& msk, int idir) noexcept
663 {
664  if (idir == 1) {
665  if (!msk(i,j,0)) {
666  int ic = amrex::coarsen(i,2);
667  int jc = j;
668  bool i_is_odd = (ic*2 != i);
669  if (i_is_odd) {
670  // Node on X line
671  fine(i,j,0) += aa_interp_line_x(crse,sig,i,j,ic,jc);
672  } else {
673  //Node coincident with coarse node
674  fine(i,j,0) += crse(ic,jc,0);
675  }
676  }
677  } else if (idir == 0 ) {
678  if (!msk(i,j,0)) {
679  int ic = i;
680  int jc = amrex::coarsen(j,2);
681  bool j_is_odd = (ic*2 != i);
682  if (j_is_odd) {
683  // Node on Y line
684  fine(i,j,0) += aa_interp_line_y(crse,sig,i,j,ic,jc);
685  } else {
686  //Node coincident with coarse node
687  fine(i,j,0) += crse(ic,jc,0);
688  }
689  }
690  }
691 }
692 
695  Array4<Real const> const& sigx, Array4<Real const> const& sigy,
696  int i, int j, int ic, int jc) noexcept
697  {
698  Real w1 = sigx(i-1,j-1,0) + sigx(i-1,j,0);
699  Real w2 = sigx(i ,j-1,0) + sigx(i ,j,0);
700  Real w3 = sigy(i-1,j-1,0) + sigy(i,j-1,0);
701  Real w4 = sigy(i-1,j ,0) + sigy(i,j ,0);
702  return (w1 * aa_interp_line_y(crse,sigy,i-1,j ,ic ,jc ) +
703  w2 * aa_interp_line_y(crse,sigy,i+1,j ,ic+1,jc ) +
704  w3 * aa_interp_line_x(crse,sigx,i ,j-1,ic ,jc ) +
705  w4 * aa_interp_line_x(crse,sigx,i ,j+1,ic ,jc+1)) / (w1+w2+w3+w4);
706  }
707 
709 void mlndlap_interpadd_ha (int i, int j, int,
710  Array4<Real> const& fine, Array4<Real const> const& crse,
711  Array4<Real const> const& sigx, Array4<Real const> const& sigy,
712  Array4<int const> const& msk) noexcept
713 {
714  if (!msk(i,j,0)) {
715  int ic = amrex::coarsen(i,2);
716  int jc = amrex::coarsen(j,2);
717  bool i_is_odd = (ic*2 != i);
718  bool j_is_odd = (jc*2 != j);
719  if (i_is_odd && j_is_odd) {
720  // Node on a X-Y face
721  fine(i,j,0) += ha_interp_face_xy(crse,sigx,sigy,i,j,ic,jc);
722  } else if (i_is_odd) {
723  // Node on X line
724  fine(i,j,0) += aa_interp_line_x(crse,sigx,i,j,ic,jc);
725  } else if (j_is_odd) {
726  // Node on Y line
727  fine(i,j,0) += aa_interp_line_y(crse,sigy,i,j,ic,jc);
728  } else {
729  // Node coincident with coarse node
730  fine(i,j,0) += crse(ic,jc,0);
731  }
732  }
733 }
734 
735 //
736 // rhs & u
737 //
738 
740 void mlndlap_divu (int i, int j, int k, Array4<Real> const& rhs, Array4<Real const> const& vel,
741  Array4<int const> const& msk, GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
742  Box const& nodal_domain,
745  bool is_rz) noexcept
746 {
747  Real facx = Real(0.5)*dxinv[0];
748  Real facy = Real(0.5)*dxinv[1];
749 
750  const auto domlo = amrex::lbound(nodal_domain);
751  const auto domhi = amrex::ubound(nodal_domain);
752 
753  if (msk(i,j,k)) {
754  rhs(i,j,k) = Real(0.0);
755  } else {
756 
757  Real zero_ilo = Real(1.0);
758  Real zero_ihi = Real(1.0);
759  Real zero_jlo = Real(1.0);
760  Real zero_jhi = Real(1.0);
761 
762  // The nodal divergence operator should not see the tangential velocity
763  // at an inflow face
764  if ((bclo[0] == LinOpBCType::Neumann || bclo[0] == LinOpBCType::inflow)
765  && i == domlo.x)
766  {
767  zero_ilo = Real(0.0);
768  }
769  if ((bchi[0] == LinOpBCType::Neumann || bchi[0] == LinOpBCType::inflow)
770  && i == domhi.x)
771  {
772  zero_ihi = Real(0.0);
773  }
774  if ((bclo[1] == LinOpBCType::Neumann || bclo[1] == LinOpBCType::inflow)
775  && j == domlo.y)
776  {
777  zero_jlo = Real(0.0);
778  }
779  if ((bchi[1] == LinOpBCType::Neumann || bchi[1] == LinOpBCType::inflow)
780  && j == domhi.y)
781  {
782  zero_jhi = Real(0.0);
783  }
784 
785  rhs(i,j,k) = facx*(-vel(i-1,j-1,k,0)*zero_jlo + vel(i,j-1,k,0)*zero_jlo
786  -vel(i-1,j ,k,0)*zero_jhi + vel(i,j ,k,0)*zero_jhi)
787  + facy*(-vel(i-1,j-1,k,1)*zero_ilo - vel(i,j-1,k,1)*zero_ihi
788  +vel(i-1,j ,k,1)*zero_ilo + vel(i,j ,k,1)*zero_ihi);
789  if (is_rz) {
790  // Here we assume we can't have inflow in the radial direction
791  Real fm = facy / static_cast<Real>(6*i-3);
792  Real fp = facy / static_cast<Real>(6*i+3);
793  rhs(i,j,k) += fm*(vel(i-1,j,k,1)-vel(i-1,j-1,k,1))
794  - fp*(vel(i ,j,k,1)-vel(i ,j-1,k,1));
795  }
796  }
797 }
798 
800 Real mlndlap_rhcc (int i, int j, int k, Array4<Real const> const& rhcc,
801  Array4<int const> const& msk) noexcept
802 {
803  Real r;
804  if (msk(i,j,k)) {
805  r = Real(0.0);
806  } else {
807  r = Real(0.25) * (rhcc(i-1,j-1,k)+rhcc(i,j-1,k)+rhcc(i-1,j,k)+rhcc(i,j,k));
808  }
809  return r;
810 }
811 
813 void mlndlap_mknewu (int i, int j, int k, Array4<Real> const& u, Array4<Real const> const& p,
814  Array4<Real const> const& sig, GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
815  bool is_rz) noexcept
816 {
817  Real facx = Real(0.5)*dxinv[0];
818  Real facy = Real(0.5)*dxinv[1];
819  u(i,j,k,0) -= sig(i,j,k)*facx*(-p(i,j,k)+p(i+1,j,k)-p(i,j+1,k)+p(i+1,j+1,k));
820  u(i,j,k,1) -= sig(i,j,k)*facy*(-p(i,j,k)-p(i+1,j,k)+p(i,j+1,k)+p(i+1,j+1,k));
821  if (is_rz) {
822  Real frz = sig(i,j,k)*facy / static_cast<Real>(6*i+3);
823  u(i,j,k,1) += frz*(p(i,j,k)-p(i+1,j,k)-p(i,j+1,k)+p(i+1,j+1,k));
824  }
825 }
826 
828 void mlndlap_mknewu_c (int i, int j, int k, Array4<Real> const& u, Array4<Real const> const& p,
829  Real sig, GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
830  bool is_rz) noexcept
831 {
832  Real facx = Real(0.5)*dxinv[0];
833  Real facy = Real(0.5)*dxinv[1];
834  u(i,j,k,0) -= sig*facx*(-p(i,j,k)+p(i+1,j,k)-p(i,j+1,k)+p(i+1,j+1,k));
835  u(i,j,k,1) -= sig*facy*(-p(i,j,k)-p(i+1,j,k)+p(i,j+1,k)+p(i+1,j+1,k));
836  if (is_rz) {
837  Real frz = sig*facy / static_cast<Real>(6*i+3);
838  u(i,j,k,1) += frz*(p(i,j,k)-p(i+1,j,k)-p(i,j+1,k)+p(i+1,j+1,k));
839  }
840 }
841 
843  Real mlndlap_sum_Df (int ii, int jj, Real facx, Real facy,
844  Array4<Real const> const& vel, Box const& velbx, bool is_rz) noexcept
845  {
846  Real fm = is_rz ? facy / static_cast<Real>(6*ii-3) : Real(0.0);
847  Real fp = is_rz ? facy / static_cast<Real>(6*ii+3) : Real(0.0);
848 
849  Real Df = Real(0.0);
850  if (velbx.contains(ii-1,jj-1,0)) {
851  Df += -facx*vel(ii-1,jj-1,0,0) - (facy+fm)*vel(ii-1,jj-1,0,1);
852  }
853  if (velbx.contains(ii,jj-1,0)) {
854  Df += facx*vel(ii,jj-1,0,0) - (facy-fp)*vel(ii,jj-1,0,1);
855  }
856  if (velbx.contains(ii-1,jj,0)) {
857  Df += -facx*vel(ii-1,jj,0,0) + (facy+fm)*vel(ii-1,jj,0,1);
858  }
859  if (velbx.contains(ii,jj,0)) {
860  Df += facx*vel(ii,jj,0,0) + (facy-fp)*vel(ii,jj,0,1);
861  }
862  return Df;
863  }
864 
865 template <int rr>
867 void mlndlap_divu_fine_contrib (int i, int j, int /*k*/, Box const& fvbx, Box const& velbx,
868  Array4<Real> const& rhs, Array4<Real const> const& vel,
869  Array4<Real const> const& frhs, Array4<int const> const& msk,
870  GpuArray<Real,AMREX_SPACEDIM> const& dxinv, bool is_rz) noexcept
871 {
872  const int ii = rr*i;
873  const int jj = rr*j;
874  if (msk(ii,jj,0)) {
875  const Real facx = Real(0.5)*dxinv[0];
876  const Real facy = Real(0.5)*dxinv[1];
877 
878  Real Df = Real(0.0);
879 
880  const int ilo = amrex::max(ii-rr+1, fvbx.smallEnd(0));
881  const int ihi = amrex::min(ii+rr-1, fvbx.bigEnd (0));
882  const int jlo = amrex::max(jj-rr+1, fvbx.smallEnd(1));
883  const int jhi = amrex::min(jj+rr-1, fvbx.bigEnd (1));
884 
885  for (int joff = jlo; joff <= jhi; ++joff) {
886  for (int ioff = ilo; ioff <= ihi; ++ioff) {
887  Real scale = static_cast<Real>((rr-std::abs(ii-ioff)) *
888  (rr-std::abs(jj-joff)));
889  if (fvbx.strictly_contains(ioff,joff,0)) {
890  Df += scale * frhs(ioff,joff,0);
891  } else {
892  Df += scale * mlndlap_sum_Df(ioff, joff, facx, facy, vel, velbx, is_rz);
893  }
894  }}
895 
896  rhs(i,j,0) = Df * (Real(1.0)/static_cast<Real>(rr*rr*rr*rr));
897  } else {
898  rhs(i,j,0) = Real(0.0);
899  }
900 }
901 
902 template <int rr>
904 void mlndlap_rhcc_fine_contrib (int i, int j, int, Box const& ccbx,
905  Array4<Real> const& rhs, Array4<Real const> const& cc,
906  Array4<int const> const& msk) noexcept
907 {
908  const int ii = rr*i;
909  const int jj = rr*j;
910  if (msk(ii,jj,0)) {
911  Real tmp = Real(0.0);
912 
913  const int ilo = amrex::max(ii-rr , ccbx.smallEnd(0));
914  const int ihi = amrex::min(ii+rr-1, ccbx.bigEnd (0));
915  const int jlo = amrex::max(jj-rr , ccbx.smallEnd(1));
916  const int jhi = amrex::min(jj+rr-1, ccbx.bigEnd (1));
917 
918  for (int joff = jlo; joff <= jhi; ++joff) {
919  for (int ioff = ilo; ioff <= ihi; ++ioff) {
920  Real scale = (static_cast<Real>(rr)-std::abs(static_cast<Real>(ioff-ii)+Real(0.5)))
921  * (static_cast<Real>(rr)-std::abs(static_cast<Real>(joff-jj)+Real(0.5)));
922  tmp += cc(ioff,joff,0) * scale;
923  }}
924 
925  rhs(i,j,0) += tmp * (Real(1.0)/Real(rr*rr*rr*rr));
926  }
927 }
928 
930  Real neumann_scale (int i, int j, Box const& nddom,
932  GpuArray<LinOpBCType,AMREX_SPACEDIM> const& bchi) noexcept
933  {
934  Real val = Real(1.0);
935 
936  const auto ndlo = amrex::lbound(nddom);
937  const auto ndhi = amrex::ubound(nddom);
938 
939  if ((i == ndlo.x && ( bclo[0] == LinOpBCType::Neumann ||
940  bclo[0] == LinOpBCType::inflow)) ||
941  (i == ndhi.x && ( bchi[0] == LinOpBCType::Neumann ||
942  bchi[0] == LinOpBCType::inflow))) {
943  val *= Real(2.0);
944  }
945 
946  if ((j == ndlo.y && ( bclo[1] == LinOpBCType::Neumann ||
947  bclo[1] == LinOpBCType::inflow)) ||
948  (j == ndhi.y && ( bchi[1] == LinOpBCType::Neumann ||
949  bchi[1] == LinOpBCType::inflow))) {
950  val *= Real(2.0);
951  }
952 
953  return val;
954  }
955 
957 void mlndlap_divu_cf_contrib (int i, int j, int, Array4<Real> const& rhs,
958  Array4<Real const> const& vel, Array4<Real const> const& fc,
959  Array4<Real const> const& rhcc, Array4<int const> const& dmsk,
960  Array4<int const> const& ndmsk, Array4<int const> const& ccmsk,
961  bool is_rz, GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
962  Box const& ccdom_p, Box const& veldom, Box const& nddom,
964  GpuArray<LinOpBCType,AMREX_SPACEDIM> const& bchi) noexcept
965 {
966  using namespace nodelap_detail;
967 
968  if (!dmsk(i,j,0) && ndmsk(i,j,0) == crse_fine_node) {
969  Real facx = Real(0.5) * dxinv[0];
970  Real facy = Real(0.5) * dxinv[1];
971  Real tmp = fc(i,j,0);
972 
973  Real fm = is_rz ? facy / static_cast<Real>(6*i-3) : Real(0.0);
974  Real fp = is_rz ? facy / static_cast<Real>(6*i+3) : Real(0.0);
975 
976  // Where there is inflow, veldom there is bigger than ccdom_p by one cell.
977  // ccdom_p is cc domain grown at periodic boundaries.
978 
979  if (ccmsk(i-1,j-1,0) == crse_cell && veldom.contains(i-1,j-1,0)) {
980  tmp += -facx*vel(i-1,j-1,0,0) - (facy+fm)*vel(i-1,j-1,0,1);
981  if (rhcc && ccdom_p.contains(i-1,j-1,0)) {
982  tmp += Real(0.25) * rhcc(i-1,j-1,0);
983  }
984  }
985 
986  if (ccmsk(i,j-1,0) == crse_cell && veldom.contains(i,j-1,0)) {
987  tmp += facx*vel(i,j-1,0,0) - (facy-fp)*vel(i,j-1,0,1);
988  if (rhcc && ccdom_p.contains(i,j-1,0)) {
989  tmp += Real(0.25) * rhcc(i,j-1,0);
990  }
991  }
992 
993  if (ccmsk(i-1,j,0) == crse_cell && veldom.contains(i-1,j,0)) {
994  tmp += -facx*vel(i-1,j,0,0) + (facy+fm)*vel(i-1,j,0,1);
995  if (rhcc && ccdom_p.contains(i-1,j,0)) {
996  tmp += Real(0.25) * rhcc(i-1,j,0);
997  }
998  }
999 
1000  if (ccmsk(i,j,0) == crse_cell && veldom.contains(i,j,0)) {
1001  tmp += facx*vel(i,j,0,0) + (facy-fp)*vel(i,j,0,1);
1002  if (rhcc && ccdom_p.contains(i,j,0)) {
1003  tmp += Real(0.25) * rhcc(i,j,0);
1004  }
1005  }
1006 
1007  rhs(i,j,0) = tmp * neumann_scale(i, j, nddom, bclo, bchi);
1008  }
1009 }
1010 
1011 //
1012 // residual
1013 //
1014 
1016 void mlndlap_crse_resid (int i, int j, int k, Array4<Real> const& resid,
1017  Array4<Real const> const& rhs, Array4<int const> const& msk,
1018  Box const& nddom, GpuArray<LinOpBCType,AMREX_SPACEDIM> const& bclo,
1019  GpuArray<LinOpBCType,AMREX_SPACEDIM> const& bchi,
1020  bool neumann_doubling) noexcept
1021 {
1022  if ( msk(i-1,j-1,k ) == 0 ||
1023  msk(i ,j-1,k ) == 0 ||
1024  msk(i-1,j ,k ) == 0 ||
1025  msk(i ,j ,k ) == 0 )
1026  {
1027  Real fac = Real(1.0);
1028  if (neumann_doubling) {
1029  const auto ndlo = amrex::lbound(nddom);
1030  const auto ndhi = amrex::ubound(nddom);
1031  if ((i == ndlo.x && ( bclo[0] == LinOpBCType::Neumann ||
1032  bclo[0] == LinOpBCType::inflow)) ||
1033  (i == ndhi.x && ( bchi[0] == LinOpBCType::Neumann ||
1034  bchi[0] == LinOpBCType::inflow))) {
1035  fac *= Real(2.);
1036  }
1037  if ((j == ndlo.y && ( bclo[1] == LinOpBCType::Neumann ||
1038  bclo[1] == LinOpBCType::inflow)) ||
1039  (j == ndhi.y && ( bchi[1] == LinOpBCType::Neumann ||
1040  bchi[1] == LinOpBCType::inflow))) {
1041  fac *= Real(2.);
1042  }
1043  }
1044 
1045  resid(i,j,k) = (rhs(i,j,k) - resid(i,j,k)) * fac;
1046  } else {
1047  resid(i,j,k) = Real(0.);
1048  }
1049 }
1050 
1051 //
1052 // sync residual
1053 //
1054 
1055  template <typename P, typename S>
1057  Real mlndlap_sum_Ax (P const& pred, S const& sig, int i, int j, Real facx, Real facy,
1058  Array4<Real const> const& phi, bool is_rz) noexcept
1059  {
1060  Real Ax = Real(0.0);
1061  Real fp = Real(0.0), fm = Real(0.0);
1062  if (is_rz) {
1063  fp = facy / static_cast<Real>(2*i+1);
1064  fm = facy / static_cast<Real>(2*i-1);
1065  }
1066  if (pred(i-1,j-1)) {
1067  Ax += sig(i-1,j-1,0)*(facx*(Real(2.)*(phi(i-1,j ,0)-phi(i ,j ,0))
1068  + (phi(i-1,j-1,0)-phi(i ,j-1,0)))
1069  + facy*(Real(2.)*(phi(i ,j-1,0)-phi(i ,j ,0))
1070  + (phi(i-1,j-1,0)-phi(i-1,j ,0)))
1071  + fm * (phi(i ,j-1,0)-phi(i ,j ,0)));
1072  }
1073  if (pred(i,j-1)) {
1074  Ax += sig(i,j-1,0)*(facx*(Real(2.)*(phi(i+1,j ,0)-phi(i ,j ,0))
1075  + (phi(i+1,j-1,0)-phi(i ,j-1,0)))
1076  + facy*(Real(2.)*(phi(i ,j-1,0)-phi(i ,j ,0))
1077  + (phi(i+1,j-1,0)-phi(i+1,j ,0)))
1078  - fp * (phi(i ,j-1,0)-phi(i ,j ,0)));
1079  }
1080  if (pred(i-1,j)) {
1081  Ax += sig(i-1,j,0)*(facx*(Real(2.)*(phi(i-1,j ,0)-phi(i ,j ,0))
1082  + (phi(i-1,j+1,0)-phi(i ,j+1,0)))
1083  + facy*(Real(2.)*(phi(i ,j+1,0)-phi(i ,j ,0))
1084  + (phi(i-1,j+1,0)-phi(i-1,j ,0)))
1085  + fm * (phi(i ,j+1,0)-phi(i ,j ,0)));
1086  }
1087  if (pred(i,j)) {
1088  Ax += sig(i,j,0)*(facx*(Real(2.)*(phi(i+1,j ,0)-phi(i ,j ,0))
1089  + (phi(i+1,j+1,0)-phi(i ,j+1,0)))
1090  + facy*(Real(2.)*(phi(i ,j+1,0)-phi(i ,j ,0))
1091  + (phi(i+1,j+1,0)-phi(i+1,j ,0)))
1092  - fp * (phi(i ,j+1,0)-phi(i ,j ,0)));
1093  }
1094  return Ax;
1095  }
1096 
1097  template <int rr, typename S>
1099  void mlndlap_Ax_fine_contrib_doit (S const& sig, int i, int j, Box const& ndbx, Box const& ccbx,
1100  Array4<Real> const& f, Array4<Real const> const& res,
1101  Array4<Real const> const& rhs,
1102  Array4<Real const> const& phi,
1103  Array4<int const> const& msk, bool is_rz,
1104  GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
1105  {
1106  const int ii = rr*i;
1107  const int jj = rr*j;
1108  if (msk(ii,jj,0)) {
1109  Real facx = Real(1./6.)*dxinv[0]*dxinv[0];
1110  Real facy = Real(1./6.)*dxinv[1]*dxinv[1];
1111 
1112  auto is_fine = [&ccbx] (int ix, int iy) -> bool {
1113  return ccbx.contains(ix,iy,0);
1114  };
1115 
1116  Real Df = Real(0.0);
1117 
1118  const int ilo = amrex::max(ii-rr+1, ndbx.smallEnd(0));
1119  const int ihi = amrex::min(ii+rr-1, ndbx.bigEnd (0));
1120  const int jlo = amrex::max(jj-rr+1, ndbx.smallEnd(1));
1121  const int jhi = amrex::min(jj+rr-1, ndbx.bigEnd (1));
1122 
1123  for (int joff = jlo; joff <= jhi; ++joff) {
1124  for (int ioff = ilo; ioff <= ihi; ++ioff) {
1125  Real scale = static_cast<Real>((rr-std::abs(ii-ioff)) *
1126  (rr-std::abs(jj-joff)));
1127  if (ndbx.strictly_contains(ioff,joff,0)) {
1128  Df += scale * (rhs(ioff,joff,0)-res(ioff,joff,0));
1129  } else {
1130  Df += scale * mlndlap_sum_Ax
1131  (is_fine, sig, ioff, joff, facx, facy, phi, is_rz);
1132  }
1133  }}
1134 
1135  f(i,j,0) = Df * (Real(1.0)/static_cast<Real>(rr*rr*rr*rr));
1136  } else {
1137  f(i,j,0) = Real(0.0);
1138  }
1139  }
1140 
1141 template <int rr>
1143 void mlndlap_Ax_fine_contrib (int i, int j, int /*k*/, Box const& ndbx, Box const& ccbx,
1144  Array4<Real> const& f, Array4<Real const> const& res,
1145  Array4<Real const> const& rhs, Array4<Real const> const& phi,
1146  Array4<Real const> const& sig, Array4<int const> const& msk,
1147  bool is_rz,
1148  GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
1149 {
1150  mlndlap_Ax_fine_contrib_doit<rr>
1151  ([&sig] (int ix, int iy, int) -> Real const& { return sig(ix,iy,0); },
1152  i,j,ndbx,ccbx,f,res,rhs,phi,msk,is_rz,dxinv);
1153 }
1154 
1155 template <int rr>
1157 void mlndlap_Ax_fine_contrib_cs (int i, int j, int /*k*/, Box const& ndbx, Box const& ccbx,
1158  Array4<Real> const& f, Array4<Real const> const& res,
1159  Array4<Real const> const& rhs, Array4<Real const> const& phi,
1160  Real const sig, Array4<int const> const& msk,
1161  bool is_rz,
1162  GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
1163 {
1164  mlndlap_Ax_fine_contrib_doit<rr>
1165  ([=] (int, int, int) -> Real { return sig; },
1166  i,j,ndbx,ccbx,f,res,rhs,phi,msk,is_rz,dxinv);
1167 }
1168 
1170 void mlndlap_res_cf_contrib (int i, int j, int, Array4<Real> const& res,
1171  Array4<Real const> const& phi, Array4<Real const> const& rhs,
1172  Array4<Real const> const& sig, Array4<int const> const& dmsk,
1173  Array4<int const> const& ndmsk, Array4<int const> const& ccmsk,
1174  Array4<Real const> const& fc,
1175  GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
1176  Box const& ccdom_p, Box const& nddom,
1177  bool is_rz,
1180  bool neumann_doubling) noexcept
1181 {
1182  using namespace nodelap_detail;
1183 
1184  if (!dmsk(i,j,0) && ndmsk(i,j,0) == crse_fine_node) {
1185  Real facx = Real(1./6.)*dxinv[0]*dxinv[0];
1186  Real facy = Real(1./6.)*dxinv[1]*dxinv[1];
1187 
1188  Real Ax = mlndlap_sum_Ax([&ccmsk, &ccdom_p] (int ix, int iy) -> bool
1189  {
1190  return ccdom_p.contains(ix,iy,0)
1191  && (ccmsk(ix,iy,0) == crse_cell);
1192  },
1193  [&sig] (int ix, int iy, int) -> Real const&
1194  {
1195  return sig(ix,iy,0);
1196  },
1197  i, j, facx, facy, phi, is_rz);
1198  Ax += fc(i,j,0);
1199  Real const ns = (neumann_doubling) ? neumann_scale(i,j,nddom,bclo,bchi) : Real(1.0);
1200  res(i,j,0) = rhs(i,j,0) - Ax*ns;
1201  }
1202 }
1203 
1205 void mlndlap_res_cf_contrib_cs (int i, int j, int, Array4<Real> const& res,
1206  Array4<Real const> const& phi, Array4<Real const> const& rhs,
1207  Real const sig, Array4<int const> const& dmsk,
1208  Array4<int const> const& ndmsk, Array4<int const> const& ccmsk,
1209  Array4<Real const> const& fc,
1210  GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
1211  Box const& ccdom_p, Box const& nddom,
1212  bool is_rz,
1215  bool neumann_doubling) noexcept
1216 {
1217  using namespace nodelap_detail;
1218 
1219  if (!dmsk(i,j,0) && ndmsk(i,j,0) == crse_fine_node) {
1220  Real facx = Real(1./6.)*dxinv[0]*dxinv[0];
1221  Real facy = Real(1./6.)*dxinv[1]*dxinv[1];
1222 
1223  Real Ax = mlndlap_sum_Ax([&ccmsk, &ccdom_p] (int ix, int iy) -> bool
1224  {
1225  return ccdom_p.contains(ix,iy,0)
1226  && (ccmsk(ix,iy,0) == crse_cell);
1227  },
1228  [=] (int, int, int) -> Real
1229  {
1230  return sig;
1231  },
1232  i, j, facx, facy, phi, is_rz);
1233  Ax += fc(i,j,0);
1234  Real const ns = (neumann_doubling) ? neumann_scale(i,j,nddom,bclo,bchi) : Real(1.0);
1235  res(i,j,0) = rhs(i,j,0) - Ax*ns;
1236  }
1237 }
1238 
1239 //
1240 // RAP
1241 //
1242 
1244 void mlndlap_set_stencil (Box const& bx, Array4<Real> const& sten,
1245  Array4<Real const> const& sigma,
1246  GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
1247 {
1248  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0];
1249  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1];
1250  Real fxy = facx + facy;
1251  Real f2xmy = Real(2.0)*facx - facy;
1252  Real fmx2y = Real(2.0)*facy - facx;
1253 
1254  amrex::LoopConcurrent(bx, [=] (int i, int j, int k) noexcept
1255  {
1256  sten(i,j,k,1) = f2xmy*(sigma(i,j-1,k)+sigma(i,j,k));
1257  sten(i,j,k,2) = fmx2y*(sigma(i-1,j,k)+sigma(i,j,k));
1258  sten(i,j,k,3) = fxy*sigma(i,j,k);
1259  });
1260 }
1261 
1263 void mlndlap_set_stencil_s0 (int i, int j, int k, Array4<Real> const& sten) noexcept
1264 {
1265  using namespace nodelap_detail;
1266 
1267  sten(i,j,k,0) = -(sten(i-1,j ,k,1) + sten(i ,j ,k,1)
1268  + sten(i ,j-1,k,2) + sten(i ,j ,k,2)
1269  + sten(i-1,j-1,k,3) + sten(i ,j-1,k,3)
1270  + sten(i-1,j ,k,3) + sten(i ,j ,k,3));
1271  sten(i,j,k,4) = Real(1.0) / (std::abs(sten(i-1,j ,k,1)) + std::abs(sten(i,j ,k,1))
1272  + std::abs(sten(i ,j-1,k,2)) + std::abs(sten(i,j ,k,2))
1273  + std::abs(sten(i-1,j-1,k,3)) + std::abs(sten(i,j-1,k,3))
1274  + std::abs(sten(i-1,j ,k,3)) + std::abs(sten(i,j ,k,3)) + eps);
1275 }
1276 
1278 void mlndlap_stencil_rap (int i, int j, int, Array4<Real> const& csten,
1279  Array4<Real const> const& fsten) noexcept
1280 {
1281  using namespace nodelap_detail;
1282 
1283  constexpr int k = 0;
1284 
1285 #if 0
1286  auto interp_from_mm_to = [&fsten] (int i_, int j_) -> Real {
1287  Real wxm = std::abs(fsten(i_-1,j_ ,0,1))/(std::abs(fsten(i_-1,j_-1,0,3))+std::abs(fsten(i_-1,j_ ,0,3))+eps);
1288  Real wym = std::abs(fsten(i_ ,j_-1,0,2))/(std::abs(fsten(i_-1,j_-1,0,3))+std::abs(fsten(i_ ,j_-1,0,3))+eps);
1289  Real wmm = std::abs(fsten(i_-1,j_-1,0,3)) * (Real(1.) + wxm + wym);
1290  return wmm * fsten(i_,j_,0,4);
1291  };
1292 #endif
1293 
1294  auto interp_from_mp_to = [&fsten] (int i_, int j_) -> Real {
1295  Real wxm = std::abs(fsten(i_-1,j_ ,0,1))/(std::abs(fsten(i_-1,j_-1,0,3))+std::abs(fsten(i_-1,j_ ,0,3))+eps);
1296  Real wyp = std::abs(fsten(i_ ,j_ ,0,2))/(std::abs(fsten(i_-1,j_ ,0,3))+std::abs(fsten(i_ ,j_ ,0,3))+eps);
1297  Real wmp = std::abs(fsten(i_-1,j_ ,0,3)) *(Real(1.) + wxm + wyp);
1298  return wmp * fsten(i_,j_,0,4);
1299  };
1300 
1301  auto interp_from_pm_to = [&fsten] (int i_, int j_) -> Real {
1302  Real wxp = std::abs(fsten(i_ ,j_ ,0,1))/(std::abs(fsten(i_ ,j_-1,0,3))+std::abs(fsten(i_ ,j_ ,0,3))+eps);
1303  Real wym = std::abs(fsten(i_ ,j_-1,0,2))/(std::abs(fsten(i_-1,j_-1,0,3))+std::abs(fsten(i_ ,j_-1,0,3))+eps);
1304  Real wpm = std::abs(fsten(i_ ,j_-1,0,3)) * (Real(1.) + wxp + wym);
1305  return wpm * fsten(i_,j_,0,4);
1306  };
1307 
1308  auto interp_from_pp_to = [&fsten] (int i_, int j_) -> Real {
1309  Real wxp = std::abs(fsten(i_ ,j_ ,0,1))/(std::abs(fsten(i_ ,j_-1,0,3))+std::abs(fsten(i_ ,j_ ,0,3))+eps);
1310  Real wyp = std::abs(fsten(i_ ,j_ ,0,2))/(std::abs(fsten(i_-1,j_ ,0,3))+std::abs(fsten(i_ ,j_ ,0,3))+eps);
1311  Real wpp = std::abs(fsten(i_ ,j_ ,0,3)) * (Real(1.) + wxp + wyp);
1312  return wpp * fsten(i_,j_,0,4);
1313  };
1314 
1315  auto interp_from_m0_to = [&fsten] (int i_, int j_) -> Real {
1316  return std::abs(fsten(i_-1,j_,0,1))/(std::abs(fsten(i_-1,j_,0,1))+std::abs(fsten(i_,j_,0,1))+eps);
1317  };
1318 
1319  auto interp_from_p0_to = [&fsten] (int i_, int j_) -> Real {
1320  return std::abs(fsten(i_,j_,0,1))/(std::abs(fsten(i_-1,j_,0,1))+std::abs(fsten(i_,j_,0,1))+eps);
1321  };
1322 
1323  auto interp_from_0m_to = [&fsten] (int i_, int j_) -> Real {
1324  return std::abs(fsten(i_,j_-1,0,2))/(std::abs(fsten(i_,j_-1,0,2))+std::abs(fsten(i_,j_,0,2))+eps);
1325  };
1326 
1327  auto interp_from_0p_to = [&fsten] (int i_, int j_) -> Real {
1328  return std::abs(fsten(i_,j_,0,2))/(std::abs(fsten(i_,j_-1,0,2))+std::abs(fsten(i_,j_,0,2))+eps);
1329  };
1330 
1331 #if 0
1332  auto Amm = [&fsten] (int i_, int j_) -> Real {
1333  return fsten(i_-1,j_-1,0,3);
1334  };
1335 #endif
1336 
1337  auto A0m = [&fsten] (int i_, int j_) -> Real {
1338  return fsten(i_,j_-1,0,2);
1339  };
1340 
1341  auto Apm = [&fsten] (int i_, int j_) -> Real {
1342  return fsten(i_,j_-1,0,3);
1343  };
1344 
1345  auto Am0 = [&fsten] (int i_, int j_) -> Real {
1346  return fsten(i_-1,j_,0,1);
1347  };
1348 
1349  auto A00 = [&fsten] (int i_, int j_) -> Real {
1350  return fsten(i_,j_,0,0);
1351  };
1352 
1353  auto Ap0 = [&fsten] (int i_, int j_) -> Real {
1354  return fsten(i_,j_,0,1);
1355  };
1356 
1357  auto Amp = [&fsten] (int i_, int j_) -> Real {
1358  return fsten(i_-1,j_,0,3);
1359  };
1360 
1361  auto A0p = [&fsten] (int i_, int j_) -> Real {
1362  return fsten(i_,j_,0,2);
1363  };
1364 
1365  auto App = [&fsten] (int i_, int j_) -> Real {
1366  return fsten(i_,j_,0,3);
1367  };
1368 
1369 #if 0
1370  auto restrict_from_mm_to = [&fsten] (int ii_, int jj_) -> Real {
1371  Real wxp = std::abs(fsten(ii_-1,jj_-1,0,1))/(std::abs(fsten(ii_-1,jj_-2,0,3))+std::abs(fsten(ii_-1,jj_-1,0,3))+eps);
1372  Real wyp = std::abs(fsten(ii_-1,jj_-1,0,2))/(std::abs(fsten(ii_-2,jj_-1,0,3))+std::abs(fsten(ii_-1,jj_-1,0,3))+eps);
1373  Real wpp = std::abs(fsten(ii_-1,jj_-1,0,3))*(Real(1.)+wxp+wyp);
1374  return wpp * fsten(ii_-1,jj_-1,0,4);
1375  };
1376 #endif
1377 
1378  auto restrict_from_0m_to = [&fsten] (int ii_, int jj_) -> Real {
1379  return std::abs(fsten(ii_,jj_-1,0,2))/(std::abs(fsten(ii_,jj_-2,0,2))+std::abs(fsten(ii_,jj_-1,0,2))+eps);
1380  };
1381 
1382  auto restrict_from_pm_to = [&fsten] (int ii_, int jj_) -> Real {
1383  Real wxm = std::abs(fsten(ii_ ,jj_-1,0,1))/(std::abs(fsten(ii_,jj_-2,0,3))+std::abs(fsten(ii_ ,jj_-1,0,3))+eps);
1384  Real wyp = std::abs(fsten(ii_+1,jj_-1,0,2))/(std::abs(fsten(ii_,jj_-1,0,3))+std::abs(fsten(ii_+1,jj_-1,0,3))+eps);
1385  Real wmp = std::abs(fsten(ii_ ,jj_-1,0,3)) *(Real(1.) + wxm + wyp);
1386  return wmp * fsten(ii_+1,jj_-1,0,4);
1387  };
1388 
1389  auto restrict_from_m0_to = [&fsten] (int ii_, int jj_) -> Real {
1390  return std::abs(fsten(ii_-1,jj_,0,1))/(std::abs(fsten(ii_-2,jj_,0,1))+std::abs(fsten(ii_-1,jj_,0,1))+eps);
1391  };
1392 
1393  auto restrict_from_p0_to = [&fsten] (int ii_, int jj_) -> Real {
1394  return std::abs(fsten(ii_,jj_,0,1))/(std::abs(fsten(ii_,jj_,0,1))+std::abs(fsten(ii_+1,jj_,0,1))+eps);
1395  };
1396 
1397  auto restrict_from_mp_to = [&fsten] (int ii_, int jj_) -> Real {
1398  Real wxp = std::abs(fsten(ii_-1,jj_+1,0,1))/(std::abs(fsten(ii_-1,jj_,0,3))+std::abs(fsten(ii_-1,jj_+1,0,3))+eps);
1399  Real wym = std::abs(fsten(ii_-1,jj_ ,0,2))/(std::abs(fsten(ii_-2,jj_,0,3))+std::abs(fsten(ii_-1,jj_ ,0,3))+eps);
1400  Real wpm = std::abs(fsten(ii_-1,jj_ ,0,3)) * (Real(1.) + wxp + wym);
1401  return wpm * fsten(ii_-1,jj_+1,0,4);
1402  };
1403 
1404  auto restrict_from_0p_to = [&fsten] (int ii_, int jj_) -> Real {
1405  return std::abs(fsten(ii_,jj_,0,2))/(std::abs(fsten(ii_,jj_,0,2))+std::abs(fsten(ii_,jj_+1,0,2))+eps);
1406  };
1407 
1408  auto restrict_from_pp_to = [&fsten] (int ii_, int jj_) -> Real {
1409  Real wxm = std::abs(fsten(ii_ ,jj_+1,0,1))/(std::abs(fsten(ii_ ,jj_ ,0,3))+std::abs(fsten(ii_ ,jj_+1,0,3))+eps);
1410  Real wym = std::abs(fsten(ii_+1,jj_ ,0,2))/(std::abs(fsten(ii_ ,jj_ ,0,3))+std::abs(fsten(ii_+1,jj_ ,0,3))+eps);
1411  Real wmm = std::abs(fsten(ii_ ,jj_ ,0,3)) * (Real(1.) + wxm + wym);
1412  return wmm * fsten(ii_+1,jj_+1,0,4);
1413  };
1414 
1415  int ii = 2*i;
1416  int jj = 2*j;
1417  Array2D<Real,-1,1,-1,1> ap, p;
1418 
1419  // csten(i,j,k,1)
1420  p(-1,-1) = interp_from_pp_to(ii+1,jj-1);
1421  p( 0,-1) = interp_from_0p_to(ii+2,jj-1);
1422  p(-1, 0) = interp_from_p0_to(ii+1,jj );
1423  p( 0, 0) = Real(1.);
1424  p(-1, 1) = interp_from_pm_to(ii+1,jj+1);
1425  p( 0, 1) = interp_from_0m_to(ii+2,jj+1);
1426 
1427  ap(0,-1) = Ap0(ii,jj-1)*p(-1,-1) + App(ii,jj-1)*p(-1,0);
1428  ap(1,-1) = A00(ii+1,jj-1)*p(-1,-1) + Ap0(ii+1,jj-1)*p(0,-1)
1429  + A0p(ii+1,jj-1)*p(-1,0) + App(ii+1,jj-1)*p(0,0);
1430  ap(0,0) = Apm(ii,jj)*p(-1,-1) + Ap0(ii,jj)*p(-1,0) + App(ii,jj)*p(-1,1);
1431  ap(1,0) = A0m(ii+1,jj)*p(-1,-1) + Apm(ii+1,jj)*p(0,-1)
1432  + A00(ii+1,jj)*p(-1,0) + Ap0(ii+1,jj)*p(0,0)
1433  + A0p(ii+1,jj)*p(-1,1) + App(ii+1,jj)*p(0,1);
1434  ap(0,1) = Apm(ii,jj+1)*p(-1,0) + Ap0(ii,jj+1)*p(-1,1);
1435  ap(1,1) = A0m(ii+1,jj+1)*p(-1,0) + Apm(ii+1,jj+1)*p(0,0)
1436  + A00(ii+1,jj+1)*p(-1,1) + Ap0(ii+1,jj+1)*p(0,1);
1437 
1438  csten(i,j,k,1) = Real(0.25)*(restrict_from_0m_to(ii,jj)*ap(0,-1)
1439  + restrict_from_pm_to(ii,jj)*ap(1,-1)
1440  + ap(0,0)
1441  + restrict_from_p0_to(ii,jj)*ap(1,0)
1442  + restrict_from_0p_to(ii,jj)*ap(0,1)
1443  + restrict_from_pp_to(ii,jj)*ap(1,1));
1444 
1445  // csten(i,j,k,2)
1446  p(-1,-1) = interp_from_pp_to(ii-1,jj+1);
1447  p( 0,-1) = interp_from_0p_to(ii ,jj+1);
1448  p( 1,-1) = interp_from_mp_to(ii+1,jj+1);
1449  p(-1, 0) = interp_from_p0_to(ii-1,jj+2);
1450  p( 0, 0) = Real(1.);
1451  p( 1, 0) = interp_from_m0_to(ii+1,jj+2);
1452 
1453  ap(-1,0) = A0p(ii-1,jj)*p(-1,-1) + App(ii-1,jj)*p(0,-1);
1454  ap(0,0) = Amp(ii,jj)*p(-1,-1) + A0p(ii,jj)*p(0,-1) + App(ii,jj)*p(1,-1);
1455  ap(1,0) = Amp(ii+1,jj)*p(0,-1) + A0p(ii+1,jj)*p(1,-1);
1456  ap(-1,1) = A00(ii-1,jj+1)*p(-1,-1) + Ap0(ii-1,jj+1)*p(0,-1)
1457  + A0p(ii-1,jj+1)*p(-1,0) + App(ii-1,jj+1)*p(0,0);
1458  ap(0,1) = Am0(ii,jj+1)*p(-1,-1) + A00(ii,jj+1)*p(0,-1) + Ap0(ii,jj+1)*p(1,-1)
1459  + Amp(ii,jj+1)*p(-1,0) + A0p(ii,jj+1)*p(0,0) + App(ii,jj+1)*p(1,0);
1460  ap(1,1) = Am0(ii+1,jj+1)*p(0,-1) + A00(ii+1,jj+1)*p(1,-1)
1461  + Amp(ii+1,jj+1)*p(0,0) + A0p(ii+1,jj+1)*p(1,0);
1462 
1463  csten(i,j,k,2) = Real(0.25)*(restrict_from_m0_to(ii,jj)*ap(-1,0)
1464  + ap(0,0)
1465  + restrict_from_p0_to(ii,jj)*ap(1,0)
1466  + restrict_from_mp_to(ii,jj)*ap(-1,1)
1467  + restrict_from_0p_to(ii,jj)*ap(0,1)
1468  + restrict_from_pp_to(ii,jj)*ap(1,1));
1469 
1470  // csten(i,j,k,3)
1471  p(-1,-1) = interp_from_pp_to(ii+1,jj+1);
1472  p( 0,-1) = interp_from_0p_to(ii+2,jj+1);
1473  p(-1, 0) = interp_from_p0_to(ii+1,jj+2);
1474  p( 0, 0) = Real(1.);
1475 
1476  ap(0,0) = App(ii,jj)*p(-1,-1);
1477  ap(1,0) = A0p(ii+1,jj)*p(-1,-1) + App(ii+1,jj)*p(0,-1);
1478  ap(0,1) = Ap0(ii,jj+1)*p(-1,-1) + App(ii,jj+1)*p(-1,0);
1479  ap(1,1) = A00(ii+1,jj+1)*p(-1,-1) + Ap0(ii+1,jj+1)*p(0,-1)
1480  + A0p(ii+1,jj+1)*p(-1,0) + App(ii+1,jj+1)*p(0,0);
1481 
1482  Real cross1 = Real(0.25)*(ap(0,0)
1483  + restrict_from_p0_to(ii,jj)*ap(1,0)
1484  + restrict_from_0p_to(ii,jj)*ap(0,1)
1485  + restrict_from_pp_to(ii,jj)*ap(1,1));
1486 
1487  p(0,-1) = interp_from_0p_to(ii,jj+1);
1488  p(1,-1) = interp_from_mp_to(ii+1,jj+1);
1489  p(0, 0) = Real(1.);
1490  p(1, 0) = interp_from_m0_to(ii+1,jj+2);
1491 
1492  ap(-1,0) = Amp(ii+1,jj)*p(0,-1) + A0p(ii+1,jj)*p(1,-1);
1493  ap( 0,0) = Amp(ii+2,jj)*p(1,-1);
1494  ap(-1,1) = Am0(ii+1,jj+1)*p(0,-1) + A00(ii+1,jj+1)*p(1,-1) + Amp(ii+1,jj+1)*p(0,0)
1495  + A0p(ii+1,jj+1)*p(1,0);
1496  ap( 0,1) = Am0(ii+2,jj+1)*p(1,-1) + Amp(ii+2,jj+1)*p(1,0);
1497 
1498  Real cross2 = Real(0.25)*(ap(0,0)
1499  + restrict_from_m0_to(ii+2,jj)*ap(-1,0)
1500  + restrict_from_mp_to(ii+2,jj)*ap(-1,1)
1501  + restrict_from_0p_to(ii+2,jj)*ap( 0,1));
1502 
1503  csten(i,j,k,3) = Real(0.5)*(cross1+cross2);
1504 }
1505 
1507 Real mlndlap_adotx_sten_doit (int i, int j, int k, Array4<Real const> const& x,
1508  Array4<Real const> const& sten) noexcept
1509 {
1510  return x(i-1,j-1,k)*sten(i-1,j-1,k,3)
1511  + x(i ,j-1,k)*sten(i ,j-1,k,2)
1512  + x(i+1,j-1,k)*sten(i ,j-1,k,3)
1513  + x(i-1,j ,k)*sten(i-1,j ,k,1)
1514  + x(i ,j ,k)*sten(i ,j ,k,0)
1515  + x(i+1,j ,k)*sten(i ,j ,k,1)
1516  + x(i-1,j+1,k)*sten(i-1,j ,k,3)
1517  + x(i ,j+1,k)*sten(i ,j ,k,2)
1518  + x(i+1,j+1,k)*sten(i ,j ,k,3);
1519 }
1520 
1522 Real mlndlap_adotx_sten (int i, int j, int k, Array4<Real const> const& x,
1523  Array4<Real const> const& sten, Array4<int const> const& msk) noexcept
1524 {
1525  if (msk(i,j,k)) {
1526  return Real(0.0);
1527  } else {
1528  return mlndlap_adotx_sten_doit(i,j,k,x,sten);
1529  }
1530 }
1531 
1533 void mlndlap_gauss_seidel_sten (int i, int j, int k, Array4<Real> const& sol,
1534  Array4<Real const> const& rhs,
1535  Array4<Real const> const& sten,
1536  Array4<int const> const& msk) noexcept
1537 {
1538  if (msk(i,j,k)) {
1539  sol(i,j,k) = Real(0.0);
1540  } else if (sten(i,j,k,0) != Real(0.0)) {
1541  Real Ax = mlndlap_adotx_sten_doit(i,j,k,sol,sten);
1542  sol(i,j,k) += (rhs(i,j,k) - Ax) / sten(i,j,k,0);
1543  }
1544 }
1545 
1546 inline
1547 void mlndlap_gauss_seidel_sten (Box const& bx, Array4<Real> const& sol,
1548  Array4<Real const> const& rhs,
1549  Array4<Real const> const& sten,
1550  Array4<int const> const& msk) noexcept
1551 {
1552  AMREX_LOOP_3D(bx, i, j, k,
1553  {
1554  mlndlap_gauss_seidel_sten(i,j,k,sol,rhs,sten,msk);
1555  });
1556 }
1557 
1559 void mlndlap_interpadd_rap (int i, int j, int, Array4<Real> const& fine,
1560  Array4<Real const> const& crse, Array4<Real const> const& sten,
1561  Array4<int const> const& msk) noexcept
1562 {
1563  using namespace nodelap_detail;
1564 
1565  if (!msk(i,j,0) && sten(i,j,0,0) != Real(0.0)) {
1566  int ic = amrex::coarsen(i,2);
1567  int jc = amrex::coarsen(j,2);
1568  bool ieven = ic*2 == i;
1569  bool jeven = jc*2 == j;
1570  Real fv;
1571  if (ieven && jeven) {
1572  fv = crse(ic,jc,0);
1573  } else if (ieven) {
1574  Real wym = std::abs(sten(i,j-1,0,2));
1575  Real wyp = std::abs(sten(i,j ,0,2));
1576  fv = (wym*crse(ic,jc,0) + wyp*crse(ic,jc+1,0)) / (wym+wyp+eps);
1577  } else if (jeven) {
1578  Real wxm = std::abs(sten(i-1,j,0,1));
1579  Real wxp = std::abs(sten(i ,j,0,1));
1580  fv = (wxm*crse(ic,jc,0) + wxp*crse(ic+1,jc,0)) / (wxm+wxp+eps);
1581  } else {
1582  Real wxm = std::abs(sten(i-1,j ,0,1)) /
1583  (std::abs(sten(i-1,j-1,0,3))+std::abs(sten(i-1,j ,0,3))+eps);
1584  Real wxp = std::abs(sten(i ,j ,0,1)) /
1585  (std::abs(sten(i ,j-1,0,3))+std::abs(sten(i ,j ,0,3))+eps);
1586  Real wym = std::abs(sten(i ,j-1,0,2)) /
1587  (std::abs(sten(i-1,j-1,0,3))+std::abs(sten(i ,j-1,0,3))+eps);
1588  Real wyp = std::abs(sten(i ,j ,0,2)) /
1589  (std::abs(sten(i-1,j ,0,3))+std::abs(sten(i ,j ,0,3))+eps);
1590  Real wmm = std::abs(sten(i-1,j-1,0,3)) * (Real(1.0) + wxm + wym);
1591  Real wpm = std::abs(sten(i,j-1,0,3)) * (Real(1.0) + wxp + wym);
1592  Real wmp = std::abs(sten(i-1,j,0,3)) *(Real(1.0) + wxm + wyp);
1593  Real wpp = std::abs(sten(i,j,0,3)) * (Real(1.0) + wxp + wyp);
1594  fv = (wmm*crse(ic,jc,0) + wpm*crse(ic+1,jc,0)
1595  + wmp*crse(ic,jc+1,0) + wpp*crse(ic+1,jc+1,0))
1596  / (wmm+wpm+wmp+wpp+eps);
1597  }
1598 
1599  fine(i,j,0) += fv;
1600  }
1601 }
1602 
1604 void mlndlap_restriction_rap (int i, int j, int /*k*/, Array4<Real> const& crse,
1605  Array4<Real const> const& fine, Array4<Real const> const& sten,
1606  Array4<int const> const& msk) noexcept
1607 {
1608  using namespace nodelap_detail;
1609 
1610  int ii = i*2;
1611  int jj = j*2;
1612  if (msk(ii,jj,0)) {
1613  crse(i,j,0) = Real(0.0);
1614  } else {
1615 
1616  Real cv = fine(ii,jj,0)
1617  + fine(ii-1,jj ,0)*std::abs(sten(ii-1,jj ,0,1))
1618  / (std::abs(sten(ii-2,jj ,0,1))
1619  +std::abs(sten(ii-1,jj ,0,1))+eps)
1620  + fine(ii+1,jj ,0)*std::abs(sten(ii ,jj ,0,1))
1621  / (std::abs(sten(ii ,jj ,0,1))
1622  +std::abs(sten(ii+1,jj ,0,1))+eps)
1623  + fine(ii ,jj-1,0)*std::abs(sten(ii ,jj-1,0,2))
1624  / (std::abs(sten(ii ,jj-2,0,2))
1625  +std::abs(sten(ii ,jj-1,0,2))+eps)
1626  + fine(ii ,jj+1,0)*std::abs(sten(ii ,jj ,0,2))
1627  / (std::abs(sten(ii ,jj ,0,2))
1628  +std::abs(sten(ii ,jj+1,0,2))+eps);
1629 
1630  Real wxp = std::abs(sten(ii-1,jj-1,0,1))
1631  / (std::abs(sten(ii-1,jj-2,0,3))
1632  +std::abs(sten(ii-1,jj-1,0,3))+eps);
1633  Real wyp = std::abs(sten(ii-1,jj-1,0,2))
1634  / (std::abs(sten(ii-2,jj-1,0,3))
1635  +std::abs(sten(ii-1,jj-1,0,3))+eps);
1636  Real wpp = std::abs(sten(ii-1,jj-1,0,3))*(Real(1.0) + wxp + wyp);
1637  cv += wpp*sten(ii-1,jj-1,0,4)*fine(ii-1,jj-1,0);
1638 
1639  Real wxm = std::abs(sten(ii ,jj-1,0,1))
1640  / (std::abs(sten(ii ,jj-2,0,3))
1641  +std::abs(sten(ii ,jj-1,0,3))+eps);
1642  wyp = std::abs(sten(ii+1,jj-1,0,2))
1643  / (std::abs(sten(ii ,jj-1,0,3))
1644  +std::abs(sten(ii+1,jj-1,0,3))+eps);
1645  Real wmp = std::abs(sten(ii ,jj-1,0,3))*(Real(1.0) + wxm + wyp);
1646  cv += wmp*sten(ii+1,jj-1,0,4)*fine(ii+1,jj-1,0);
1647 
1648  wxp = std::abs(sten(ii-1,jj+1,0,1))
1649  / (std::abs(sten(ii-1,jj ,0,3))
1650  +std::abs(sten(ii-1,jj+1,0,3))+eps);
1651  Real wym = std::abs(sten(ii-1,jj ,0,2))
1652  / (std::abs(sten(ii-2,jj ,0,3))
1653  +std::abs(sten(ii-1,jj ,0,3))+eps);
1654  Real wpm = std::abs(sten(ii-1,jj ,0,3)) * (Real(1.0) + wxp + wym);
1655  cv += wpm*sten(ii-1,jj+1,0,4)*fine(ii-1,jj+1,0);
1656 
1657  wxm = std::abs(sten(ii ,jj+1,0,1))
1658  / (std::abs(sten(ii ,jj ,0,3))
1659  +std::abs(sten(ii ,jj+1,0,3))+eps);
1660  wym = std::abs(sten(ii+1,jj ,0,2))
1661  / (std::abs(sten(ii ,jj ,0,3))
1662  +std::abs(sten(ii+1,jj ,0,3))+eps);
1663  Real wmm = std::abs(sten(ii ,jj ,0,3)) * (Real(1.0) + wxm + wym);
1664  cv += wmm*sten(ii+1,jj+1,0,4)*fine(ii+1,jj+1,0);
1665 
1666  crse(i,j,0) = cv * Real(0.25);
1667  }
1668 }
1669 
1670 #ifdef AMREX_USE_EB
1671 
1672 namespace nodelap_detail {
1673  constexpr int i_S_x = 0;
1674  constexpr int i_S_y = 1;
1675  constexpr int i_S_x2 = 2;
1676  constexpr int i_S_y2 = 3;
1677  constexpr int i_S_xy = 4;
1678  constexpr int n_Sintg = 5;
1679 
1680  constexpr int i_B_x = 0;
1681  constexpr int i_B_y = 1;
1682  constexpr int i_B_xy = 2;
1683  constexpr int n_Bintg = 3;
1684 }
1685 
1687 void mlndlap_set_connection (int i, int j, int, Array4<Real> const& conn,
1688  Array4<Real const> const& intg, Array4<Real const> const& vol,
1689  Array4<EBCellFlag const> const& flag) noexcept
1690 {
1691  using namespace nodelap_detail;
1692 
1693  if (flag(i,j,0).isCovered()) {
1694  for (int n = 0; n < 6; ++n) { conn(i,j,0,n) = Real(0.); }
1695  } else if (flag(i,j,0).isRegular() || vol(i,j,0) >= almostone) {
1696  for (int n = 0; n < 6; ++n) { conn(i,j,0,n) = Real(1.); }
1697  } else {
1698  // Note that these are normalized so that they equal 1 in the case of a regular cell
1699 
1700  conn(i,j,0,0) = Real(3.)*(Real(0.25)*vol(i,j,0) + intg(i,j,0,i_S_y2) - intg(i,j,0,i_S_y));
1701  conn(i,j,0,1) = Real(6.)*(Real(0.25)*vol(i,j,0) - intg(i,j,0,i_S_y2));
1702  conn(i,j,0,2) = Real(3.)*(Real(0.25)*vol(i,j,0) + intg(i,j,0,i_S_y2) + intg(i,j,0,i_S_y));
1703 
1704  conn(i,j,0,3) = Real(3.)*(Real(0.25)*vol(i,j,0) + intg(i,j,0,i_S_x2) - intg(i,j,0,i_S_x));
1705  conn(i,j,0,4) = Real(6.)*(Real(0.25)*vol(i,j,0) - intg(i,j,0,i_S_x2));
1706  conn(i,j,0,5) = Real(3.)*(Real(0.25)*vol(i,j,0) + intg(i,j,0,i_S_x2) + intg(i,j,0,i_S_x));
1707  }
1708 }
1709 
1711 void mlndlap_set_stencil_eb (int i, int j, int, Array4<Real> const& sten,
1712  Array4<Real const> const& sig, Array4<Real const> const& conn,
1713  GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
1714 {
1715  Real facx = Real(1./6.)*dxinv[0]*dxinv[0];
1716  Real facy = Real(1./6.)*dxinv[1]*dxinv[1];
1717 
1718  sten(i,j,0,1) = Real(2.)*facx*(sig(i,j-1,0)*conn(i,j-1,0,2)+sig(i,j,0)*conn(i,j,0,0))
1719  -facy*(sig(i,j-1,0)*conn(i,j-1,0,4)+sig(i,j,0)*conn(i,j,0,4));
1720  sten(i,j,0,2) = Real(2.)*facy*(sig(i-1,j,0)*conn(i-1,j,0,5)+sig(i,j,0)*conn(i,j,0,3))
1721  -facx*(sig(i-1,j,0)*conn(i-1,j,0,1)+sig(i,j,0)*conn(i,j,0,1));
1722  sten(i,j,0,3) = (facx*conn(i,j,0,1)+facy*conn(i,j,0,4))*sig(i,j,0);
1723 }
1724 
1725 
1727 void mlndlap_divu_eb (int i, int j, int, Array4<Real> const& rhs, Array4<Real const> const& vel,
1728  Array4<Real const> const& vfrac, Array4<Real const> const& intg,
1729  Array4<int const> const& msk, GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
1730  Box const& nodal_domain,
1731  GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bclo,
1732  GpuArray<LinOpBCType, AMREX_SPACEDIM> const& bchi) noexcept
1733 {
1734  Real facx = Real(0.5)*dxinv[0];
1735  Real facy = Real(0.5)*dxinv[1];
1736 
1737  const auto domlo = amrex::lbound(nodal_domain);
1738  const auto domhi = amrex::ubound(nodal_domain);
1739 
1740  if (!msk(i,j,0)) {
1741 
1742  Real zero_ilo = Real(1.0);
1743  Real zero_ihi = Real(1.0);
1744  Real zero_jlo = Real(1.0);
1745  Real zero_jhi = Real(1.0);
1746 
1747  // The nodal divergence operator should not see the tangential velocity
1748  // at an inflow face
1749  if ((bclo[0] == LinOpBCType::Neumann || bclo[0] == LinOpBCType::inflow)
1750  && i == domlo.x)
1751  {
1752  zero_ilo = Real(0.0);
1753  }
1754  if ((bchi[0] == LinOpBCType::Neumann || bchi[0] == LinOpBCType::inflow)
1755  && i == domhi.x)
1756  {
1757  zero_ihi = Real(0.0);
1758  }
1759  if ((bclo[1] == LinOpBCType::Neumann || bclo[1] == LinOpBCType::inflow)
1760  && j == domlo.y)
1761  {
1762  zero_jlo = Real(0.0);
1763  }
1764  if ((bchi[1] == LinOpBCType::Neumann || bchi[1] == LinOpBCType::inflow)
1765  && j == domhi.y)
1766  {
1767  zero_jhi = Real(0.0);
1768  }
1769 
1770  rhs(i,j,0) = facx*(-vel(i-1,j-1,0,0)*(vfrac(i-1,j-1,0)+Real(2.)*intg(i-1,j-1,0,1))*zero_jlo
1771  +vel(i ,j-1,0,0)*(vfrac(i ,j-1,0)+Real(2.)*intg(i ,j-1,0,1))*zero_jlo
1772  -vel(i-1,j ,0,0)*(vfrac(i-1,j ,0)-Real(2.)*intg(i-1,j ,0,1))*zero_jhi
1773  +vel(i ,j ,0,0)*(vfrac(i ,j ,0)-Real(2.)*intg(i ,j ,0,1))*zero_jhi)
1774  + facy*(-vel(i-1,j-1,0,1)*(vfrac(i-1,j-1,0)+Real(2.)*intg(i-1,j-1,0,0))*zero_ilo
1775  -vel(i ,j-1,0,1)*(vfrac(i ,j-1,0)-Real(2.)*intg(i ,j-1,0,0))*zero_ihi
1776  +vel(i-1,j ,0,1)*(vfrac(i-1,j ,0)+Real(2.)*intg(i-1,j ,0,0))*zero_ilo
1777  +vel(i ,j ,0,1)*(vfrac(i ,j ,0)-Real(2.)*intg(i ,j ,0,0))*zero_ihi);
1778  } else {
1779  rhs(i,j,0) = Real(0.);
1780  }
1781 }
1782 
1784 void add_eb_flow_contribution (int i, int j, int, Array4<Real> const& rhs,
1785  Array4<int const> const& msk, GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
1786  Array4<Real const> const& bareaarr,
1787  Array4<Real const> const& sintg,
1788  Array4<Real const> const& eb_vel_dot_n) noexcept
1789 {
1790  using namespace nodelap_detail;
1791 
1792  Real fac_eb = 0.25 *dxinv[0];
1793 
1794  if (!msk(i,j,0)) {
1795  rhs(i,j,0) += fac_eb*(
1796  eb_vel_dot_n(i-1,j-1,0)*( bareaarr(i-1,j-1,0)
1797  +Real(2.)*sintg(i-1,j-1,0,i_B_x )
1798  +Real(2.)*sintg(i-1,j-1,0,i_B_y )
1799  +Real(4.)*sintg(i-1,j-1,0,i_B_xy))
1800  +eb_vel_dot_n(i ,j-1,0)*( bareaarr(i ,j-1,0)
1801  -Real(2.)*sintg(i ,j-1,0,i_B_x )
1802  +Real(2.)*sintg(i ,j-1,0,i_B_y )
1803  -Real(4.)*sintg(i ,j-1,0,i_B_xy))
1804  +eb_vel_dot_n(i-1,j ,0)*( bareaarr(i-1,j ,0)
1805  +Real(2.)*sintg(i-1,j ,0,i_B_x )
1806  -Real(2.)*sintg(i-1,j ,0,i_B_y )
1807  -Real(4.)*sintg(i-1,j ,0,i_B_xy))
1808  +eb_vel_dot_n(i ,j ,0)*( bareaarr(i ,j ,0)
1809  -Real(2.)*sintg(i ,j ,0,i_B_x )
1810  -Real(2.)*sintg(i ,j ,0,i_B_y )
1811  +Real(4.)*sintg(i ,j ,0,i_B_xy)));
1812  }
1813 }
1814 
1816 void mlndlap_mknewu_eb (int i, int j, int, Array4<Real> const& u, Array4<Real const> const& p,
1817  Array4<Real const> const& sig, Array4<Real const> const& vfrac,
1818  Array4<Real const> const& intg, GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
1819 {
1820  Real facx = Real(0.5)*dxinv[0];
1821  Real facy = Real(0.5)*dxinv[1];
1822  if (vfrac(i,j,0) == Real(0.)) {
1823  u(i,j,0,0) = u(i,j,0,1) = Real(0.);
1824  } else {
1825  Real dpdx = facx*(-p(i,j,0)+p(i+1,j,0)-p(i,j+1,0)+p(i+1,j+1,0));
1826  Real dpdy = facy*(-p(i,j,0)-p(i+1,j,0)+p(i,j+1,0)+p(i+1,j+1,0));
1827  Real dpp = (p(i,j,0)+p(i+1,j+1,0)-p(i+1,j,0)-p(i,j+1,0))/vfrac(i,j,0);
1828  u(i,j,0,0) -= sig(i,j,0)*(dpdx + dxinv[0]*intg(i,j,0,1)*dpp);
1829  u(i,j,0,1) -= sig(i,j,0)*(dpdy + dxinv[1]*intg(i,j,0,0)*dpp);
1830  }
1831 }
1832 
1834 void mlndlap_mknewu_eb_c (int i, int j, int, Array4<Real> const& u, Array4<Real const> const& p,
1835  Real sig, Array4<Real const> const& vfrac,
1836  Array4<Real const> const& intg, GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
1837 {
1838  Real facx = Real(0.5)*dxinv[0];
1839  Real facy = Real(0.5)*dxinv[1];
1840  if (vfrac(i,j,0) == Real(0.)) {
1841  u(i,j,0,0) = u(i,j,0,1) = Real(0.);
1842  } else {
1843  Real dpdx = facx*(-p(i,j,0)+p(i+1,j,0)-p(i,j+1,0)+p(i+1,j+1,0));
1844  Real dpdy = facy*(-p(i,j,0)-p(i+1,j,0)+p(i,j+1,0)+p(i+1,j+1,0));
1845  Real dpp = (p(i,j,0)+p(i+1,j+1,0)-p(i+1,j,0)-p(i,j+1,0))/vfrac(i,j,0);
1846  u(i,j,0,0) -= sig*(dpdx + dxinv[0]*intg(i,j,0,1)*dpp);
1847  u(i,j,0,1) -= sig*(dpdy + dxinv[1]*intg(i,j,0,0)*dpp);
1848  }
1849 }
1850 
1852 Real mlndlap_rhcc_eb (int i, int j, int, Array4<Real const> const& rhcc,
1853  Array4<Real const> const& vfrac, Array4<Real const> const& intg,
1854  Array4<int const> const& msk) noexcept
1855 {
1856  using namespace nodelap_detail;
1857 
1858  if (!msk(i,j,0)) {
1859  return
1860  rhcc(i ,j ,0)*(Real(0.25)*vfrac(i ,j ,0)-intg(i ,j ,0,i_S_x)-intg(i ,j ,0,i_S_y)+intg(i ,j ,0,i_S_xy)) +
1861  rhcc(i-1,j ,0)*(Real(0.25)*vfrac(i-1,j ,0)+intg(i-1,j ,0,i_S_x)-intg(i-1,j ,0,i_S_y)-intg(i-1,j ,0,i_S_xy)) +
1862  rhcc(i-1,j-1,0)*(Real(0.25)*vfrac(i-1,j-1,0)+intg(i-1,j-1,0,i_S_x)+intg(i-1,j-1,0,i_S_y)+intg(i-1,j-1,0,i_S_xy)) +
1863  rhcc(i ,j-1,0)*(Real(0.25)*vfrac(i ,j-1,0)-intg(i ,j-1,0,i_S_x)+intg(i ,j-1,0,i_S_y)-intg(i ,j-1,0,i_S_xy));
1864  } else {
1865  return Real(0.);
1866  }
1867 }
1868 
1870 void mlndlap_set_integral (int i, int j, int, Array4<Real> const& intg) noexcept
1871 {
1872  using namespace nodelap_detail;
1873 
1874  intg(i,j,0,i_S_x ) = Real(0.);
1875  intg(i,j,0,i_S_y ) = Real(0.);
1876  intg(i,j,0,i_S_x2) = Real(1./12.);
1877  intg(i,j,0,i_S_y2) = Real(1./12.);
1878  intg(i,j,0,i_S_xy) = Real(0.);
1879 }
1880 
1882 void mlndlap_set_surface_integral (int i, int j, int, Array4<Real> const& sintg) noexcept
1883 {
1884  using namespace nodelap_detail;
1885 
1886  sintg(i,j,0,i_B_x ) = Real(0.);
1887  sintg(i,j,0,i_B_y ) = Real(0.);
1888  sintg(i,j,0,i_B_xy) = Real(0.);
1889 }
1890 
1892 void mlndlap_set_integral_eb (int i, int j, int, Array4<Real> const& intg,
1893  Array4<EBCellFlag const> const& flag, Array4<Real const> const& vol,
1894  Array4<Real const> const& ax, Array4<Real const> const& ay,
1895  Array4<Real const> const& bcen) noexcept
1896 {
1897  using namespace nodelap_detail;
1898 
1899  if (flag(i,j,0).isCovered()) {
1900  intg(i,j,0,i_S_x ) = Real(0.);
1901  intg(i,j,0,i_S_y ) = Real(0.);
1902  intg(i,j,0,i_S_x2) = Real(0.);
1903  intg(i,j,0,i_S_y2) = Real(0.);
1904  intg(i,j,0,i_S_xy) = Real(0.);
1905  } else if (flag(i,j,0).isRegular() || vol(i,j,0) >= almostone) {
1906  intg(i,j,0,i_S_x ) = Real(0.);
1907  intg(i,j,0,i_S_y ) = Real(0.);
1908  intg(i,j,0,i_S_x2) = Real(1./12.);
1909  intg(i,j,0,i_S_y2) = Real(1./12.);
1910  intg(i,j,0,i_S_xy) = Real(0.);
1911  } else {
1912  Real axm = ax(i,j,0);
1913  Real axp = ax(i+1,j,0);
1914  Real aym = ay(i,j,0);
1915  Real ayp = ay(i,j+1,0);
1916 
1917  Real apnorm = std::sqrt((axm-axp)*(axm-axp) + (aym-ayp)*(aym-ayp));
1918  if (apnorm == Real(0.)) {
1919  amrex::Abort("amrex_mlndlap_set_integral: we are in trouble");
1920  }
1921 
1922  Real apnorminv = Real(1.)/apnorm;
1923  Real anrmx = (axm-axp) * apnorminv; // pointing to the wall
1924  Real anrmy = (aym-ayp) * apnorminv;
1925 
1926  Real bcx = bcen(i,j,0,0);
1927  Real bcy = bcen(i,j,0,1);
1928 
1929  Real Sx, Sy, Sx2, Sy2, Sxy;
1930  if (std::abs(anrmx) <= almostzero) {
1931  Sx = Real(0.);
1932  Sx2 = Real(1./24.)*(axm+axp);
1933  Sxy = Real(0.);
1934  } else if (std::abs(anrmy) <= almostzero) {
1935  Sx = Real(1./8.) *(axp-axm) + anrmx*Real(0.5)*(bcx*bcx);
1936  Sx2 = Real(1./24.)*(axp+axm) + anrmx*Real(1./3.)*(bcx*bcx*bcx);
1937  Sxy = Real(0.);
1938  } else {
1939  Real xmin, xmax;
1940  if (anrmx > Real(0.)) {
1941  xmin = Real(-0.5) + amrex::min(aym,ayp);
1942  xmax = Real(-0.5) + amrex::max(aym,ayp);
1943  } else {
1944  xmin = Real(0.5) - amrex::max(aym,ayp);
1945  xmax = Real(0.5) - amrex::min(aym,ayp);
1946  }
1947  Real xmin3 = xmin*xmin*xmin;
1948  Real xmin4 = xmin3*xmin;
1949  Real xmax3 = xmax*xmax*xmax;
1950  Real xmax4 = xmax3*xmax;
1951  Sx = Real(1./8.) *(axp-axm) + (anrmx/std::abs(anrmy))*Real(1./6.) *(xmax3-xmin3);
1952  Sx2 = Real(1./24.)*(axp+axm) + (anrmx/std::abs(anrmy))*Real(1./12.)*(xmax4-xmin4);
1953 
1954  Real kk = -anrmx/anrmy;
1955  Real bb = bcy-kk*bcx;
1956  Sxy = Real(1./8.)*kk*kk*(xmax4-xmin4) + Real(1./3.)*kk*bb*(xmax3-xmin3)
1957  + (Real(0.25)*bb*bb-Real(1./16.))*(xmax*xmax-xmin*xmin);
1958  Sxy = std::copysign(Sxy, anrmy);
1959  }
1960 
1961  if (std::abs(anrmy) <= almostzero) {
1962  Sy = Real(0.);
1963  Sy2 = Real(1./24.)*(aym+ayp);
1964  } else if (std::abs(anrmx) <= almostzero) {
1965  Sy = Real(1./8.) *(ayp-aym) + anrmy*Real(0.5)*(bcy*bcy);
1966  Sy2 = Real(1./24.)*(ayp+aym) + anrmy*Real(1./3.)*(bcy*bcy*bcy);
1967  } else {
1968  Real ymin, ymax;
1969  if (anrmy > Real(0.)) {
1970  ymin = Real(-0.5) + amrex::min(axm,axp);
1971  ymax = Real(-0.5) + amrex::max(axm,axp);
1972  } else {
1973  ymin = Real(0.5) - amrex::max(axm,axp);
1974  ymax = Real(0.5) - amrex::min(axm,axp);
1975  }
1976  Real ymin3 = ymin*ymin*ymin;
1977  Real ymin4 = ymin3*ymin;
1978  Real ymax3 = ymax*ymax*ymax;
1979  Real ymax4 = ymax3*ymax;
1980  Sy = Real(1./8.) *(ayp-aym) + (anrmy/std::abs(anrmx))*Real(1./6.) *(ymax3-ymin3);
1981  Sy2 = Real(1./24.)*(ayp+aym) + (anrmy/std::abs(anrmx))*Real(1./12.)*(ymax4-ymin4);
1982  }
1983 
1984  intg(i,j,0,i_S_x ) = Sx;
1985  intg(i,j,0,i_S_y ) = Sy;
1986  intg(i,j,0,i_S_x2) = Sx2;
1987  intg(i,j,0,i_S_y2) = Sy2;
1988  intg(i,j,0,i_S_xy) = Sxy;
1989  }
1990 }
1991 
1993 void mlndlap_set_surface_integral_eb (int i, int j, int, Array4<Real> const& sintg,
1994  Array4<EBCellFlag const> const& flag,
1995  Array4<Real const> const& bcen,
1996  Array4<Real const> const& barea,
1997  Array4<Real const> const& bnorm) noexcept
1998 {
1999  using namespace nodelap_detail;
2000 
2001  if (flag(i,j,0).isCovered() || flag(i,j,0).isRegular()) {
2002  sintg(i,j,0,i_B_x ) = Real(0.);
2003  sintg(i,j,0,i_B_y ) = Real(0.);
2004  sintg(i,j,0,i_B_xy) = Real(0.);
2005  } else {
2006  Real bcx = bcen(i,j,0,0);
2007  Real bcy = bcen(i,j,0,1);
2008 
2009  Real btanx = bnorm(i,j,0,1);
2010  Real btany = -bnorm(i,j,0,0);
2011 
2012  Real x0 = bcx - Real(0.5)*barea(i,j,0)*btanx;
2013  Real x1 = bcx + Real(0.5)*barea(i,j,0)*btanx;
2014 
2015  Real y0 = bcy - Real(0.5)*barea(i,j,0)*btany;
2016  Real y1 = bcy + Real(0.5)*barea(i,j,0)*btany;
2017 
2018  Real Bx = barea(i,j,0)*Real(0.5)*(x1 + x0);
2019  Real By = barea(i,j,0)*Real(0.5)*(y1 + y0);
2020  Real Bxy = barea(i,j,0)*(x0*y0 + (x0*(y1 - y0) + y0*(x1 - x0))/Real(2.) + (x1 - x0)*(y1 - y0)/Real(3.));
2021 
2022  sintg(i,j,0,i_B_x ) = Bx;
2023  sintg(i,j,0,i_B_y ) = By;
2024  sintg(i,j,0,i_B_xy) = Bxy;
2025  }
2026 }
2027 
2028 #endif
2029 
2030 #if defined(AMREX_USE_HYPRE)
2031 
2032 template <typename HypreInt, typename AtomicInt>
2033 void mlndlap_fillijmat_sten_cpu (Box const& ndbx,
2034  Array4<AtomicInt const> const& gid,
2035  Array4<int const> const& lid,
2036  HypreInt* ncols, HypreInt* cols,
2037  Real* mat, // NOLINT(readability-non-const-parameter)
2038  Array4<Real const> const& sten) noexcept
2039 {
2040  constexpr auto gidmax = std::numeric_limits<AtomicInt>::max();
2041  HypreInt nelems = 0;
2042  amrex::LoopOnCpu(ndbx, [&] (int i, int j, int k) noexcept
2043  {
2044  if (lid(i,j,k) >= 0)
2045  {
2046  cols[nelems] = gid(i,j,k);
2047  mat[nelems] = sten(i,j,k,0);
2048  HypreNodeLap::Int nelems_old = nelems;
2049  ++nelems;
2050 
2051  if (gid(i-1,j-1,k) < gidmax) {
2052  cols[nelems] = gid(i-1,j-1,k);
2053  mat[nelems] = sten(i-1,j-1,k,3);
2054  ++nelems;
2055  }
2056 
2057  if (gid(i,j-1,k) < gidmax) {
2058  cols[nelems] = gid(i,j-1,k);
2059  mat[nelems] = sten(i,j-1,k,2);
2060  ++nelems;
2061  }
2062 
2063  if (gid(i+1,j-1,k) < gidmax) {
2064  cols[nelems] = gid(i+1,j-1,k);
2065  mat[nelems] = sten(i ,j-1,k,3);
2066  ++nelems;
2067  }
2068 
2069  if (gid(i-1,j,k) < gidmax) {
2070  cols[nelems] = gid(i-1,j,k);
2071  mat[nelems] = sten(i-1,j,k,1);
2072  ++nelems;
2073  }
2074 
2075  if (gid(i+1,j,k) < gidmax) {
2076  cols[nelems] = gid(i+1,j,k);
2077  mat[nelems] = sten(i ,j,k,1);
2078  ++nelems;
2079  }
2080 
2081  if (gid(i-1,j+1,k) < gidmax) {
2082  cols[nelems] = gid(i-1,j+1,k);
2083  mat[nelems] = sten(i-1,j ,k,3);
2084  ++nelems;
2085  }
2086 
2087  if (gid(i,j+1,k) < gidmax) {
2088  cols[nelems] = gid(i,j+1,k);
2089  mat[nelems] = sten(i,j ,k,2);
2090  ++nelems;
2091  }
2092 
2093  if (gid(i+1,j+1,k) < gidmax) {
2094  cols[nelems] = gid(i+1,j+1,k);
2095  mat[nelems] = sten(i ,j ,k,3);
2096  ++nelems;
2097  }
2098 
2099  ncols[lid(i,j,k)] = nelems - nelems_old;
2100  }
2101  });
2102 }
2103 
2104 template <typename HypreInt, typename AtomicInt>
2105 void mlndlap_fillijmat_aa_cpu (Box const& ndbx,
2106  Array4<AtomicInt const> const& gid,
2107  Array4<int const> const& lid,
2108  HypreInt* ncols, HypreInt* cols,
2109  Real* mat, // NOLINT(readability-non-const-parameter)
2110  Array4<Real const> const& sig,
2111  GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
2112  Box const& ccdom, bool is_rz) noexcept
2113 {
2114  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0];
2115  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1];
2116  Real fxy = facx + facy;
2117  Real f2xmy = Real(2.0)*facx - facy;
2118  Real fmx2y = Real(2.0)*facy - facx;
2119 
2120  // Note that ccdom has been grown at periodic boundaries.
2121  const Box& nddom = amrex::surroundingNodes(ccdom);
2122 
2123  constexpr auto gidmax = std::numeric_limits<AtomicInt>::max();
2124  HypreInt nelems = 0;
2125  amrex::LoopOnCpu(ndbx, [&] (int i, int j, int k) noexcept
2126  {
2127  if (lid(i,j,k) >= 0)
2128  {
2129  Real fp, fm;
2130  if (is_rz) {
2131  fp = facy / static_cast<Real>(2*i+1);
2132  fm = facy / static_cast<Real>(2*i-1);
2133  } else {
2134  fp = fm = Real(0.0);
2135  }
2136 
2137  HypreInt nelems_old = nelems;
2138  cols[nelems_old] = gid(i,j,k);
2139  Real m0 = Real(0.);
2140  ++nelems;
2141 
2142  if (nddom.contains(i-1,j-1,k)) {
2143  Real tmp = fxy*sig(i-1,j-1,k);
2144  m0 -= tmp;
2145  if ( gid(i-1,j-1,k) < gidmax) {
2146  cols[nelems] = gid(i-1,j-1,k);
2147  mat[nelems] = tmp;
2148  ++nelems;
2149  }
2150  }
2151 
2152  if (nddom.contains(i,j-1,k)) {
2153  Real tmp = Real(0.0);
2154  if ( ccdom.contains(i-1,j-1,k)) {
2155  tmp += sig(i-1,j-1,k) * (fmx2y + fm);
2156  }
2157  if ( ccdom.contains(i,j-1,k)) {
2158  tmp += sig(i,j-1,k) * (fmx2y - fp);
2159  }
2160  m0 -= tmp;
2161  if (gid(i,j-1,k) < gidmax) {
2162  cols[nelems] = gid(i,j-1,k);
2163  mat[nelems] = tmp;
2164  ++nelems;
2165  }
2166  }
2167 
2168  if (nddom.contains(i+1,j-1,k)) {
2169  Real tmp = fxy*sig(i ,j-1,k);
2170  m0 -= tmp;
2171  if (gid(i+1,j-1,k) < gidmax) {
2172  cols[nelems] = gid(i+1,j-1,k);
2173  mat[nelems] = tmp;
2174  ++nelems;
2175  }
2176  }
2177 
2178  if (nddom.contains(i-1,j,k)) {
2179  Real tmp = Real(0.0);
2180  if ( ccdom.contains(i-1,j-1,k)) {
2181  tmp += f2xmy*sig(i-1,j-1,k);
2182  }
2183  if ( ccdom.contains(i-1,j,k)) {
2184  tmp += f2xmy*sig(i-1,j,k);
2185  }
2186  m0 -= tmp;
2187  if (gid(i-1,j,k) < gidmax) {
2188  cols[nelems] = gid(i-1,j,k);
2189  mat[nelems] = tmp;
2190  ++nelems;
2191  }
2192  }
2193 
2194  if (nddom.contains(i+1,j,k)) {
2195  Real tmp = Real(0.0);
2196  if ( ccdom.contains(i ,j-1,k)) {
2197  tmp += f2xmy*sig(i ,j-1,k);
2198  }
2199  if ( ccdom.contains(i ,j,k)) {
2200  tmp += f2xmy*sig(i ,j,k);
2201  }
2202  m0 -= tmp;
2203  if (gid(i+1,j,k) < gidmax) {
2204  cols[nelems] = gid(i+1,j,k);
2205  mat[nelems] = tmp;
2206  ++nelems;
2207  }
2208  }
2209 
2210  if (nddom.contains(i-1,j+1,k)) {
2211  Real tmp = fxy*sig(i-1,j ,k);
2212  m0 -= tmp;
2213  if (gid(i-1,j+1,k) < gidmax) {
2214  cols[nelems] = gid(i-1,j+1,k);
2215  mat[nelems] = tmp;
2216  ++nelems;
2217  }
2218  }
2219 
2220  if (nddom.contains(i,j+1,k)) {
2221  Real tmp = Real(0.0);
2222  if ( ccdom.contains(i-1,j ,k)) {
2223  tmp += sig(i-1,j ,k) * (fmx2y + fm);
2224  }
2225  if ( ccdom.contains(i,j ,k)) {
2226  tmp += sig(i,j ,k) * (fmx2y - fp);
2227  }
2228  m0 -= tmp;
2229  if (gid(i,j+1,k) < gidmax) {
2230  cols[nelems] = gid(i,j+1,k);
2231  mat[nelems] = tmp;
2232  ++nelems;
2233  }
2234  }
2235 
2236  if (nddom.contains(i+1,j+1,k)) {
2237  Real tmp = fxy*sig(i ,j ,k);
2238  m0 -= tmp;
2239  if (gid(i+1,j+1,k) < gidmax) {
2240  cols[nelems] = gid(i+1,j+1,k);
2241  mat[nelems] = tmp;
2242  ++nelems;
2243  }
2244  }
2245 
2246  mat[nelems_old] = m0;
2247  ncols[lid(i,j,k)] = nelems - nelems_old;
2248  }
2249  });
2250 }
2251 
2252 template <typename HypreInt, typename AtomicInt>
2253 void mlndlap_fillijmat_ha_cpu (Box const& ndbx,
2254  Array4<AtomicInt const> const& gid,
2255  Array4<int const> const& lid,
2256  HypreInt* ncols, HypreInt* cols,
2257  Real* mat, // NOLINT(readability-non-const-parameter)
2258  Array4<Real const> const& sx,
2259  Array4<Real const> const& sy,
2260  GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
2261  Box const& ccdom, bool is_rz) noexcept
2262 {
2263  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0];
2264  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1];
2265 
2266  // Note that ccdom has been grown at periodic boundaries.
2267  const Box& nddom = amrex::surroundingNodes(ccdom);
2268 
2269  constexpr auto gidmax = std::numeric_limits<AtomicInt>::max();
2270  HypreInt nelems = 0;
2271  amrex::LoopOnCpu(ndbx, [&] (int i, int j, int k) noexcept
2272  {
2273  if (lid(i,j,k) >= 0)
2274  {
2275  Real fp, fm;
2276  if (is_rz) {
2277  fp = facy / static_cast<Real>(2*i+1);
2278  fm = facy / static_cast<Real>(2*i-1);
2279  } else {
2280  fp = fm = Real(0.0);
2281  }
2282 
2283  HypreInt nelems_old = nelems;
2284  cols[nelems_old] = gid(i,j,k);
2285  Real m0 = Real(0.);
2286  ++nelems;
2287 
2288  if (nddom.contains(i-1,j-1,k)) {
2289  Real tmp = facx*sx(i-1,j-1,k) + facy*sy(i-1,j-1,k);
2290  m0 -= tmp;
2291  if ( gid(i-1,j-1,k) < gidmax) {
2292  cols[nelems] = gid(i-1,j-1,k);
2293  mat[nelems] = tmp;
2294  ++nelems;
2295  }
2296  }
2297 
2298  if (nddom.contains(i,j-1,k)) {
2299  Real tmp = Real(0.0);
2300  if ( ccdom.contains(i-1,j-1,k)) {
2301  tmp += sy(i-1,j-1,k) * (facy * Real(2.0) + fm)
2302  - sx(i-1,j-1,k) * facx;
2303  }
2304  if ( ccdom.contains(i,j-1,k)) {
2305  tmp += sy(i,j-1,k) * (facy * Real(2.0) - fp)
2306  - sx(i,j-1,k) * facx;
2307  }
2308  m0 -= tmp;
2309  if (gid(i,j-1,k) < gidmax) {
2310  cols[nelems] = gid(i,j-1,k);
2311  mat[nelems] = tmp;
2312  ++nelems;
2313  }
2314  }
2315 
2316  if (nddom.contains(i+1,j-1,k)) {
2317  Real tmp = facx*sx(i ,j-1,k) + facy*sy(i ,j-1,k);
2318  m0 -= tmp;
2319  if (gid(i+1,j-1,k) < gidmax) {
2320  cols[nelems] = gid(i+1,j-1,k);
2321  mat[nelems] = tmp;
2322  ++nelems;
2323  }
2324  }
2325 
2326  if (nddom.contains(i-1,j,k)) {
2327  Real tmp = Real(0.0);
2328  if ( ccdom.contains(i-1,j-1,k)) {
2329  tmp += sx(i-1,j-1,k) * facx*Real(2.0)
2330  - sy(i-1,j-1,k) * facy;
2331  }
2332  if ( ccdom.contains(i-1,j,k)) {
2333  tmp += sx(i-1,j,k) * facx*Real(2.0)
2334  - sy(i-1,j,k) * facy;
2335  }
2336  m0 -= tmp;
2337  if (gid(i-1,j,k) < gidmax) {
2338  cols[nelems] = gid(i-1,j,k);
2339  mat[nelems] = tmp;
2340  ++nelems;
2341  }
2342  }
2343 
2344  if (nddom.contains(i+1,j,k)) {
2345  Real tmp = Real(0.0);
2346  if ( ccdom.contains(i ,j-1,k)) {
2347  tmp += sx(i ,j-1,k) * facx*Real(2.0)
2348  - sy(i ,j-1,k) * facy;
2349  }
2350  if ( ccdom.contains(i ,j,k)) {
2351  tmp += sx(i ,j,k) * facx*Real(2.0)
2352  - sy(i ,j,k) * facy;
2353  }
2354  m0 -= tmp;
2355  if (gid(i+1,j,k) < gidmax) {
2356  cols[nelems] = gid(i+1,j,k);
2357  mat[nelems] = tmp;
2358  ++nelems;
2359  }
2360  }
2361 
2362  if (nddom.contains(i-1,j+1,k)) {
2363  Real tmp = facx*sx(i-1,j ,k) + facy*sy(i-1,j ,k);
2364  m0 -= tmp;
2365  if (gid(i-1,j+1,k) < gidmax) {
2366  cols[nelems] = gid(i-1,j+1,k);
2367  mat[nelems] = tmp;
2368  ++nelems;
2369  }
2370  }
2371 
2372  if (nddom.contains(i,j+1,k)) {
2373  Real tmp = Real(0.0);
2374  if ( ccdom.contains(i-1,j ,k)) {
2375  tmp += sy(i-1,j ,k) * (facy*Real(2.0) + fm)
2376  - sx(i-1,j ,k) * facx;
2377  }
2378  if ( ccdom.contains(i,j ,k)) {
2379  tmp += sy(i,j ,k) * (facy*Real(2.0) - fp)
2380  - sx(i,j ,k) * facx;
2381  }
2382  m0 -= tmp;
2383  if (gid(i,j+1,k) < gidmax) {
2384  cols[nelems] = gid(i,j+1,k);
2385  mat[nelems] = tmp;
2386  ++nelems;
2387  }
2388  }
2389 
2390  if (nddom.contains(i+1,j+1,k)) {
2391  Real tmp = facx*sx(i ,j ,k) + facy*sy(i ,j ,k);
2392  m0 -= tmp;
2393  if (gid(i+1,j+1,k) < gidmax) {
2394  cols[nelems] = gid(i+1,j+1,k);
2395  mat[nelems] = tmp;
2396  ++nelems;
2397  }
2398  }
2399 
2400  mat[nelems_old] = m0;
2401  ncols[lid(i,j,k)] = nelems - nelems_old;
2402  }
2403  });
2404 }
2405 
2406 template <typename HypreInt, typename AtomicInt>
2407 void mlndlap_fillijmat_cs_cpu (Box const& ndbx,
2408  Array4<AtomicInt const> const& gid,
2409  Array4<int const> const& lid,
2410  HypreInt* ncols, HypreInt* cols,
2411  Real* mat, // NOLINT(readability-non-const-parameter)
2412  Real sig,
2413  GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
2414  Box const& ccdom, bool is_rz) noexcept
2415 {
2416  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0] * sig;
2417  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1] * sig;
2418  Real fxy = facx + facy;
2419  Real f2xmy = Real(2.0)*facx - facy;
2420  Real fmx2y = Real(2.0)*facy - facx;
2421 
2422  // Note that ccdom has been grown at periodic boundaries.
2423  const Box& nddom = amrex::surroundingNodes(ccdom);
2424 
2425  constexpr auto gidmax = std::numeric_limits<AtomicInt>::max();
2426  HypreInt nelems = 0;
2427  amrex::LoopOnCpu(ndbx, [&] (int i, int j, int k) noexcept
2428  {
2429  if (lid(i,j,k) >= 0)
2430  {
2431  Real fp, fm;
2432  if (is_rz) {
2433  fp = facy / static_cast<Real>(2*i+1);
2434  fm = facy / static_cast<Real>(2*i-1);
2435  } else {
2436  fp = fm = Real(0.0);
2437  }
2438 
2439  HypreInt nelems_old = nelems;
2440  cols[nelems_old] = gid(i,j,k);
2441  Real m0 = Real(0.);
2442  ++nelems;
2443 
2444  if (nddom.contains(i-1,j-1,k)) {
2445  Real tmp = fxy;
2446  m0 -= tmp;
2447  if ( gid(i-1,j-1,k) < gidmax) {
2448  cols[nelems] = gid(i-1,j-1,k);
2449  mat[nelems] = tmp;
2450  ++nelems;
2451  }
2452  }
2453 
2454  if (nddom.contains(i,j-1,k)) {
2455  Real tmp = Real(0.0);
2456  if ( ccdom.contains(i-1,j-1,k)) {
2457  tmp += fmx2y + fm;
2458  }
2459  if ( ccdom.contains(i,j-1,k)) {
2460  tmp += fmx2y - fp;
2461  }
2462  m0 -= tmp;
2463  if (gid(i,j-1,k) < gidmax) {
2464  cols[nelems] = gid(i,j-1,k);
2465  mat[nelems] = tmp;
2466  ++nelems;
2467  }
2468  }
2469 
2470  if (nddom.contains(i+1,j-1,k)) {
2471  Real tmp = fxy;
2472  m0 -= tmp;
2473  if (gid(i+1,j-1,k) < gidmax) {
2474  cols[nelems] = gid(i+1,j-1,k);
2475  mat[nelems] = tmp;
2476  ++nelems;
2477  }
2478  }
2479 
2480  if (nddom.contains(i-1,j,k)) {
2481  Real tmp = Real(0.0);
2482  if ( ccdom.contains(i-1,j-1,k)) {
2483  tmp += f2xmy;
2484  }
2485  if ( ccdom.contains(i-1,j,k)) {
2486  tmp += f2xmy;
2487  }
2488  m0 -= tmp;
2489  if (gid(i-1,j,k) < gidmax) {
2490  cols[nelems] = gid(i-1,j,k);
2491  mat[nelems] = tmp;
2492  ++nelems;
2493  }
2494  }
2495 
2496  if (nddom.contains(i+1,j,k)) {
2497  Real tmp = Real(0.0);
2498  if ( ccdom.contains(i ,j-1,k)) {
2499  tmp += f2xmy;
2500  }
2501  if ( ccdom.contains(i ,j,k)) {
2502  tmp += f2xmy;
2503  }
2504  m0 -= tmp;
2505  if (gid(i+1,j,k) < gidmax) {
2506  cols[nelems] = gid(i+1,j,k);
2507  mat[nelems] = tmp;
2508  ++nelems;
2509  }
2510  }
2511 
2512  if (nddom.contains(i-1,j+1,k)) {
2513  Real tmp = fxy;
2514  m0 -= tmp;
2515  if (gid(i-1,j+1,k) < gidmax) {
2516  cols[nelems] = gid(i-1,j+1,k);
2517  mat[nelems] = tmp;
2518  ++nelems;
2519  }
2520  }
2521 
2522  if (nddom.contains(i,j+1,k)) {
2523  Real tmp = Real(0.0);
2524  if ( ccdom.contains(i-1,j ,k)) {
2525  tmp += fmx2y + fm;
2526  }
2527  if ( ccdom.contains(i,j ,k)) {
2528  tmp += fmx2y - fp;
2529  }
2530  m0 -= tmp;
2531  if (gid(i,j+1,k) < gidmax) {
2532  cols[nelems] = gid(i,j+1,k);
2533  mat[nelems] = tmp;
2534  ++nelems;
2535  }
2536  }
2537 
2538  if (nddom.contains(i+1,j+1,k)) {
2539  Real tmp = fxy;
2540  m0 -= tmp;
2541  if (gid(i+1,j+1,k) < gidmax) {
2542  cols[nelems] = gid(i+1,j+1,k);
2543  mat[nelems] = tmp;
2544  ++nelems;
2545  }
2546  }
2547 
2548  mat[nelems_old] = m0;
2549  ncols[lid(i,j,k)] = nelems - nelems_old;
2550  }
2551  });
2552 }
2553 
2554 #ifdef AMREX_USE_GPU
2555 
2556 template <typename HypreInt, typename AtomicInt>
2558 void mlndlap_fillijmat_sten_gpu (const int ps, const int i, const int j, const int k,
2559  const int offset,
2560  Array4<AtomicInt const> const& gid,
2561  Array4<int const> const& lid,
2562  HypreInt* ncols, HypreInt* cols,
2563  Real* mat, // NOLINT(readability-non-const-parameter)
2564  Array4<Real const> const& sten) noexcept
2565 {
2566  if (lid(i,j,k) >= 0)
2567  {
2568  constexpr auto gidmax = std::numeric_limits<AtomicInt>::max();
2569  int nelems = 0;
2570 
2571  if (offset == 1 || offset == 0) {
2572  if (gid(i-1,j-1,k) < gidmax) {
2573  if (offset != 0) {
2574  cols[ps] = gid(i-1,j-1,k);
2575  mat[ps] = sten(i-1,j-1,k,3);
2576  }
2577  ++nelems;
2578  }
2579  if (offset != 0) { return; }
2580  }
2581 
2582  if (offset == 2 || offset == 0) {
2583  if (gid(i,j-1,k) < gidmax) {
2584  if (offset != 0) {
2585  cols[ps] = gid(i,j-1,k);
2586  mat[ps] = sten(i,j-1,k,2);
2587  }
2588  ++nelems;
2589  }
2590  if (offset != 0) { return; }
2591  }
2592 
2593  if (offset == 3 || offset == 0) {
2594  if (gid(i+1,j-1,k) < gidmax) {
2595  if (offset != 0) {
2596  cols[ps] = gid(i+1,j-1,k);
2597  mat[ps] = sten(i ,j-1,k,3);
2598  }
2599  ++nelems;
2600  }
2601  if (offset != 0) { return; }
2602  }
2603 
2604  if (offset == 4 || offset == 0) {
2605  if (gid(i-1,j,k) < gidmax) {
2606  if (offset != 0) {
2607  cols[ps] = gid(i-1,j,k);
2608  mat[ps] = sten(i-1,j,k,1);
2609  }
2610  ++nelems;
2611  }
2612  if (offset != 0) { return; }
2613  }
2614 
2615  if (offset == 5 || offset == 0) {
2616  if (gid(i+1,j,k) < gidmax) {
2617  if (offset != 0) {
2618  cols[ps] = gid(i+1,j,k);
2619  mat[ps] = sten(i ,j,k,1);
2620  }
2621  ++nelems;
2622  }
2623  if (offset != 0) { return; }
2624  }
2625 
2626  if (offset == 6 || offset == 0) {
2627  if (gid(i-1,j+1,k) < gidmax) {
2628  if (offset != 0) {
2629  cols[ps] = gid(i-1,j+1,k);
2630  mat[ps] = sten(i-1,j ,k,3);
2631  }
2632  ++nelems;
2633  }
2634  if (offset != 0) { return; }
2635  }
2636 
2637  if (offset == 7 || offset == 0) {
2638  if (gid(i,j+1,k) < gidmax) {
2639  if (offset != 0) {
2640  cols[ps] = gid(i,j+1,k);
2641  mat[ps] = sten(i,j ,k,2);
2642  }
2643  ++nelems;
2644  }
2645  if (offset != 0) { return; }
2646  }
2647 
2648  if (offset == 8 || offset == 0) {
2649  if (gid(i+1,j+1,k) < gidmax) {
2650  if (offset != 0) {
2651  cols[ps] = gid(i+1,j+1,k);
2652  mat[ps] = sten(i ,j ,k,3);
2653  }
2654  ++nelems;
2655  }
2656  if (offset != 0) { return; }
2657  }
2658 
2659  // Only offset == 0 could get this far.
2660  cols[ps] = gid(i,j,k);
2661  mat[ps] = sten(i,j,k,0);
2662  ncols[lid(i,j,k)] = nelems+1;
2663  }
2664 }
2665 
2666 template <typename HypreInt, typename AtomicInt>
2668 void mlndlap_fillijmat_aa_gpu (const int ps, const int i, const int j, const int k,
2669  const int offset,
2670  Box const& ndbx, Array4<AtomicInt const> const& gid,
2671  Array4<int const> const& lid,
2672  HypreInt* ncols, HypreInt* cols,
2673  Real* mat, // NOLINT(readability-non-const-parameter)
2674  Array4<Real const> const& sig,
2675  GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
2676  Box const& ccdom, bool is_rz) noexcept
2677 {
2678  if (lid(i,j,k) >= 0)
2679  {
2680  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0];
2681  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1];
2682  Real fxy = facx + facy;
2683  Real f2xmy = Real(2.0)*facx - facy;
2684  Real fmx2y = Real(2.0)*facy - facx;
2685 
2686  Real fp, fm;
2687  if (is_rz) {
2688  fp = facy / static_cast<Real>(2*i+1);
2689  fm = facy / static_cast<Real>(2*i-1);
2690  } else {
2691  fp = fm = Real(0.0);
2692  }
2693 
2694  const Box& nddom = amrex::surroundingNodes(ccdom);
2695 
2696  constexpr auto gidmax = std::numeric_limits<AtomicInt>::max();
2697  int nelems = 0;
2698  Real m0 = Real(0.);
2699 
2700  if (offset == 1 || offset == 0) {
2701  if (nddom.contains(i-1,j-1,k)) {
2702  Real tmp = fxy*sig(i-1,j-1,k);
2703  m0 -= tmp;
2704  if (gid(i-1,j-1,k) < gidmax) {
2705  if (offset != 0) {
2706  cols[ps] = gid(i-1,j-1,k);
2707  mat[ps] = tmp;
2708  }
2709  ++nelems;
2710  }
2711  }
2712  if (offset != 0) { return; }
2713  }
2714 
2715  if (offset == 2 || offset == 0) {
2716  if (nddom.contains(i,j-1,k)) {
2717  Real tmp = Real(0.0);
2718  if ( ccdom.contains(i-1,j-1,k)) {
2719  tmp += sig(i-1,j-1,k) * (fmx2y + fm);
2720  }
2721  if ( ccdom.contains(i,j-1,k)) {
2722  tmp += sig(i,j-1,k) * (fmx2y - fp);
2723  }
2724  m0 -= tmp;
2725  if (gid(i,j-1,k) < gidmax) {
2726  if (offset != 0) {
2727  cols[ps] = gid(i,j-1,k);
2728  mat[ps] = tmp;
2729  }
2730  ++nelems;
2731  }
2732  }
2733  if (offset != 0) { return; }
2734  }
2735 
2736  if (offset == 3 || offset == 0) {
2737  if (nddom.contains(i+1,j-1,k)) {
2738  Real tmp = fxy*sig(i ,j-1,k);
2739  m0 -= tmp;
2740  if (gid(i+1,j-1,k) < gidmax) {
2741  if (offset != 0) {
2742  cols[ps] = gid(i+1,j-1,k);
2743  mat[ps] = tmp;
2744  }
2745  ++nelems;
2746  }
2747  }
2748  if (offset != 0) { return; }
2749  }
2750 
2751  if (offset == 4 || offset == 0) {
2752  if (nddom.contains(i-1,j,k)) {
2753  Real tmp = Real(0.0);
2754  if ( ccdom.contains(i-1,j-1,k)) {
2755  tmp += f2xmy*sig(i-1,j-1,k);
2756  }
2757  if ( ccdom.contains(i-1,j,k)) {
2758  tmp += f2xmy*sig(i-1,j,k);
2759  }
2760  m0 -= tmp;
2761  if (gid(i-1,j,k) < gidmax) {
2762  if (offset != 0) {
2763  cols[ps] = gid(i-1,j,k);
2764  mat[ps] = tmp;
2765  }
2766  ++nelems;
2767  }
2768  }
2769  if (offset != 0) { return; }
2770  }
2771 
2772  if (offset == 5 || offset == 0) {
2773  if (nddom.contains(i+1,j,k)) {
2774  Real tmp = Real(0.0);
2775  if ( ccdom.contains(i ,j-1,k)) {
2776  tmp += f2xmy*sig(i ,j-1,k);
2777  }
2778  if ( ccdom.contains(i ,j,k)) {
2779  tmp += f2xmy*sig(i ,j,k);
2780  }
2781  m0 -= tmp;
2782  if (gid(i+1,j,k) < gidmax) {
2783  if (offset != 0) {
2784  cols[ps] = gid(i+1,j,k);
2785  mat[ps] = tmp;
2786  }
2787  ++nelems;
2788  }
2789  }
2790  if (offset != 0) { return; }
2791  }
2792 
2793  if (offset == 6 || offset == 0) {
2794  if (nddom.contains(i-1,j+1,k)) {
2795  Real tmp = fxy*sig(i-1,j ,k);
2796  m0 -= tmp;
2797  if (gid(i-1,j+1,k) < gidmax) {
2798  if (offset != 0) {
2799  cols[ps] = gid(i-1,j+1,k);
2800  mat[ps] = tmp;
2801  }
2802  ++nelems;
2803  }
2804  }
2805  if (offset != 0) { return; }
2806  }
2807 
2808  if (offset == 7 || offset == 0) {
2809  if (nddom.contains(i,j+1,k)) {
2810  Real tmp = Real(0.0);
2811  if ( ccdom.contains(i-1,j ,k)) {
2812  tmp += sig(i-1,j ,k) * (fmx2y + fm);
2813  }
2814  if ( ccdom.contains(i,j ,k)) {
2815  tmp += sig(i,j ,k) * (fmx2y - fp);
2816  }
2817  m0 -= tmp;
2818  if (gid(i,j+1,k) < gidmax) {
2819  if (offset != 0) {
2820  cols[ps] = gid(i,j+1,k);
2821  mat[ps] = tmp;
2822  }
2823  ++nelems;
2824  }
2825  }
2826  if (offset != 0) { return; }
2827  }
2828 
2829  if (offset == 8 || offset == 0) {
2830  if (nddom.contains(i+1,j+1,k)) {
2831  Real tmp = fxy*sig(i ,j ,k);
2832  m0 -= tmp;
2833  if (gid(i+1,j+1,k) < gidmax) {
2834  if (offset != 0) {
2835  cols[ps] = gid(i+1,j+1,k);
2836  mat[ps] = tmp;
2837  }
2838  ++nelems;
2839  }
2840  }
2841  if (offset != 0) { return; }
2842  }
2843 
2844  // Only offset == 0 could get this far.
2845  cols[ps] = gid(i,j,k);
2846  mat[ps] = m0;
2847  ncols[lid(i,j,k)] = nelems+1;
2848  }
2849 }
2850 
2851 template <typename HypreInt, typename AtomicInt>
2853 void mlndlap_fillijmat_ha_gpu (const int ps, const int i, const int j, const int k,
2854  const int offset,
2855  Box const& ndbx, Array4<AtomicInt const> const& gid,
2856  Array4<int const> const& lid,
2857  HypreInt* ncols, HypreInt* cols,
2858  Real* mat, // NOLINT(readability-non-const-parameter)
2859  Array4<Real const> const& sx,
2860  Array4<Real const> const& sy,
2861  GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
2862  Box const& ccdom, bool is_rz) noexcept
2863 {
2864  if (lid(i,j,k) >= 0)
2865  {
2866  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0];
2867  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1];
2868 
2869  Real fp, fm;
2870  if (is_rz) {
2871  fp = facy / static_cast<Real>(2*i+1);
2872  fm = facy / static_cast<Real>(2*i-1);
2873  } else {
2874  fp = fm = Real(0.0);
2875  }
2876 
2877  const Box& nddom = amrex::surroundingNodes(ccdom);
2878 
2879  constexpr auto gidmax = std::numeric_limits<AtomicInt>::max();
2880  int nelems = 0;
2881  Real m0 = Real(0.);
2882 
2883  if (offset == 1 || offset == 0) {
2884  if (nddom.contains(i-1,j-1,k)) {
2885  Real tmp = facx*sx(i-1,j-1,k) + facy*sy(i-1,j-1,k);
2886  m0 -= tmp;
2887  if ( gid(i-1,j-1,k) < gidmax) {
2888  if (offset != 0) {
2889  cols[ps] = gid(i-1,j-1,k);
2890  mat[ps] = tmp;
2891  }
2892  ++nelems;
2893  }
2894  }
2895  if (offset != 0) { return; }
2896  }
2897 
2898  if (offset == 2 || offset == 0) {
2899  if (nddom.contains(i,j-1,k)) {
2900  Real tmp = Real(0.0);
2901  if ( ccdom.contains(i-1,j-1,k)) {
2902  tmp += sy(i-1,j-1,k) * (facy * Real(2.0) + fm)
2903  - sx(i-1,j-1,k) * facx;
2904  }
2905  if ( ccdom.contains(i,j-1,k)) {
2906  tmp += sy(i,j-1,k) * (facy * Real(2.0) - fp)
2907  - sx(i,j-1,k) * facx;
2908  }
2909  m0 -= tmp;
2910  if (gid(i,j-1,k) < gidmax) {
2911  if (offset != 0) {
2912  cols[ps] = gid(i,j-1,k);
2913  mat[ps] = tmp;
2914  }
2915  ++nelems;
2916  }
2917  }
2918  if (offset != 0) { return; }
2919  }
2920 
2921  if (offset == 3 || offset == 0) {
2922  if (nddom.contains(i+1,j-1,k)) {
2923  Real tmp = facx*sx(i ,j-1,k) + facy*sy(i ,j-1,k);
2924  m0 -= tmp;
2925  if (gid(i+1,j-1,k) < gidmax) {
2926  if (offset != 0) {
2927  cols[ps] = gid(i+1,j-1,k);
2928  mat[ps] = tmp;
2929  }
2930  ++nelems;
2931  }
2932  }
2933  if (offset != 0) { return; }
2934  }
2935 
2936  if (offset == 4 || offset == 0) {
2937  if (nddom.contains(i-1,j,k)) {
2938  Real tmp = Real(0.0);
2939  if ( ccdom.contains(i-1,j-1,k)) {
2940  tmp += sx(i-1,j-1,k) * facx*Real(2.0)
2941  - sy(i-1,j-1,k) * facy;
2942  }
2943  if ( ccdom.contains(i-1,j,k)) {
2944  tmp += sx(i-1,j,k) * facx*Real(2.0)
2945  - sy(i-1,j,k) * facy;
2946  }
2947  m0 -= tmp;
2948  if (gid(i-1,j,k) < gidmax) {
2949  if (offset != 0) {
2950  cols[ps] = gid(i-1,j,k);
2951  mat[ps] = tmp;
2952  }
2953  ++nelems;
2954  }
2955  }
2956  if (offset != 0) { return; }
2957  }
2958 
2959  if (offset == 5 || offset == 0) {
2960  if (nddom.contains(i+1,j,k)) {
2961  Real tmp = Real(0.0);
2962  if ( ccdom.contains(i ,j-1,k)) {
2963  tmp += sx(i ,j-1,k) * facx*Real(2.0)
2964  - sy(i ,j-1,k) * facy;
2965  }
2966  if ( ccdom.contains(i ,j,k)) {
2967  tmp += sx(i ,j,k) * facx*Real(2.0)
2968  - sy(i ,j,k) * facy;
2969  }
2970  m0 -= tmp;
2971  if (gid(i+1,j,k) < gidmax) {
2972  if (offset != 0) {
2973  cols[ps] = gid(i+1,j,k);
2974  mat[ps] = tmp;
2975  }
2976  ++nelems;
2977  }
2978  }
2979  if (offset != 0) { return; }
2980  }
2981 
2982  if (offset == 6 || offset == 0) {
2983  if (nddom.contains(i-1,j+1,k)) {
2984  Real tmp = facx*sx(i-1,j ,k) + facy*sy(i-1,j ,k);
2985  m0 -= tmp;
2986  if (gid(i-1,j+1,k) < gidmax) {
2987  if (offset != 0) {
2988  cols[ps] = gid(i-1,j+1,k);
2989  mat[ps] = tmp;
2990  }
2991  ++nelems;
2992  }
2993  }
2994  if (offset != 0) { return; }
2995  }
2996 
2997  if (offset == 7 || offset == 0) {
2998  if (nddom.contains(i,j+1,k)) {
2999  Real tmp = Real(0.0);
3000  if ( ccdom.contains(i-1,j ,k)) {
3001  tmp += sy(i-1,j ,k) * (facy*Real(2.0) + fm)
3002  - sx(i-1,j ,k) * facx;
3003  }
3004  if ( ccdom.contains(i,j ,k)) {
3005  tmp += sy(i,j ,k) * (facy*Real(2.0) - fp)
3006  - sx(i,j ,k) * facx;
3007  }
3008  m0 -= tmp;
3009  if (gid(i,j+1,k) < gidmax) {
3010  if (offset != 0) {
3011  cols[ps] = gid(i,j+1,k);
3012  mat[ps] = tmp;
3013  }
3014  ++nelems;
3015  }
3016  }
3017  if (offset != 0) { return; }
3018  }
3019 
3020  if (offset == 8 || offset == 0) {
3021  if (nddom.contains(i+1,j+1,k)) {
3022  Real tmp = facx*sx(i ,j ,k) + facy*sy(i ,j ,k);
3023  m0 -= tmp;
3024  if (gid(i+1,j+1,k) < gidmax) {
3025  if (offset != 0) {
3026  cols[ps] = gid(i+1,j+1,k);
3027  mat[ps] = tmp;
3028  }
3029  ++nelems;
3030  }
3031  }
3032  if (offset != 0) { return; }
3033  }
3034 
3035  // Only offset == 0 could get this far.
3036  cols[ps] = gid(i,j,k);
3037  mat[ps] = m0;
3038  ncols[lid(i,j,k)] = nelems+1;
3039  }
3040 }
3041 
3042 template <typename HypreInt, typename AtomicInt>
3044 void mlndlap_fillijmat_cs_gpu (const int ps, const int i, const int j, const int k,
3045  const int offset,
3046  Box const& ndbx, Array4<AtomicInt const> const& gid,
3047  Array4<int const> const& lid,
3048  HypreInt* ncols, HypreInt* cols,
3049  Real* mat, // NOLINT(readability-non-const-parameter)
3050  Real sig, GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
3051  Box const& ccdom, bool is_rz) noexcept
3052 {
3053  if (lid(i,j,k) >= 0)
3054  {
3055  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0] * sig;
3056  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1] * sig;
3057  Real fxy = facx + facy;
3058  Real f2xmy = Real(2.0)*facx - facy;
3059  Real fmx2y = Real(2.0)*facy - facx;
3060 
3061  Real fp, fm;
3062  if (is_rz) {
3063  fp = facy / static_cast<Real>(2*i+1);
3064  fm = facy / static_cast<Real>(2*i-1);
3065  } else {
3066  fp = fm = Real(0.0);
3067  }
3068 
3069  // Note that nddom has been grown at periodic boundaries.
3070  const Box& nddom = amrex::surroundingNodes(ccdom);
3071 
3072  constexpr auto gidmax = std::numeric_limits<AtomicInt>::max();
3073  int nelems = 0;
3074  Real m0 = Real(0.);
3075 
3076  if (offset == 1 || offset == 0) {
3077  if (nddom.contains(i-1,j-1,k)) {
3078  Real tmp = fxy;
3079  m0 -= tmp;
3080  if ( gid(i-1,j-1,k) < gidmax) {
3081  if (offset != 0) {
3082  cols[ps] = gid(i-1,j-1,k);
3083  mat[ps] = tmp;
3084  }
3085  ++nelems;
3086  }
3087  }
3088  if (offset != 0) { return; }
3089  }
3090 
3091  if (offset == 2 || offset == 0) {
3092  if (nddom.contains(i,j-1,k)) {
3093  Real tmp = Real(0.0);
3094  if ( ccdom.contains(i-1,j-1,k)) {
3095  tmp += fmx2y + fm;
3096  }
3097  if ( ccdom.contains(i,j-1,k)) {
3098  tmp += fmx2y - fp;
3099  }
3100  m0 -= tmp;
3101  if (gid(i,j-1,k) < gidmax) {
3102  if (offset != 0) {
3103  cols[ps] = gid(i,j-1,k);
3104  mat[ps] = tmp;
3105  }
3106  ++nelems;
3107  }
3108  }
3109  if (offset != 0) { return; }
3110  }
3111 
3112  if (offset == 3 || offset == 0) {
3113  if (nddom.contains(i+1,j-1,k)) {
3114  Real tmp = fxy;
3115  m0 -= tmp;
3116  if (gid(i+1,j-1,k) < gidmax) {
3117  if (offset != 0) {
3118  cols[ps] = gid(i+1,j-1,k);
3119  mat[ps] = tmp;
3120  }
3121  ++nelems;
3122  }
3123  }
3124  if (offset != 0) { return; }
3125  }
3126 
3127  if (offset == 4 || offset == 0) {
3128  if (nddom.contains(i-1,j,k)) {
3129  Real tmp = Real(0.0);
3130  if ( ccdom.contains(i-1,j-1,k)) {
3131  tmp += f2xmy;
3132  }
3133  if ( ccdom.contains(i-1,j,k)) {
3134  tmp += f2xmy;
3135  }
3136  m0 -= tmp;
3137  if (gid(i-1,j,k) < gidmax) {
3138  if (offset != 0) {
3139  cols[ps] = gid(i-1,j,k);
3140  mat[ps] = tmp;
3141  }
3142  ++nelems;
3143  }
3144  }
3145  if (offset != 0) { return; }
3146  }
3147 
3148  if (offset == 5 || offset == 0) {
3149  if (nddom.contains(i+1,j,k)) {
3150  Real tmp = Real(0.0);
3151  if ( ccdom.contains(i ,j-1,k)) {
3152  tmp += f2xmy;
3153  }
3154  if ( ccdom.contains(i ,j,k)) {
3155  tmp += f2xmy;
3156  }
3157  m0 -= tmp;
3158  if (gid(i+1,j,k) < gidmax) {
3159  if (offset != 0) {
3160  cols[ps] = gid(i+1,j,k);
3161  mat[ps] = tmp;
3162  }
3163  ++nelems;
3164  }
3165  }
3166  if (offset != 0) { return; }
3167  }
3168 
3169  if (offset == 6 || offset == 0) {
3170  if (nddom.contains(i-1,j+1,k)) {
3171  Real tmp = fxy;
3172  m0 -= tmp;
3173  if (gid(i-1,j+1,k) < gidmax) {
3174  if (offset != 0) {
3175  cols[ps] = gid(i-1,j+1,k);
3176  mat[ps] = tmp;
3177  }
3178  ++nelems;
3179  }
3180  }
3181  if (offset != 0) { return; }
3182  }
3183 
3184  if (offset == 7 || offset == 0) {
3185  if (nddom.contains(i,j+1,k)) {
3186  Real tmp = Real(0.0);
3187  if ( ccdom.contains(i-1,j ,k)) {
3188  tmp += fmx2y + fm;
3189  }
3190  if ( ccdom.contains(i,j ,k)) {
3191  tmp += fmx2y - fp;
3192  }
3193  m0 -= tmp;
3194  if (gid(i,j+1,k) < gidmax) {
3195  if (offset != 0) {
3196  cols[ps] = gid(i,j+1,k);
3197  mat[ps] = tmp;
3198  }
3199  ++nelems;
3200  }
3201  }
3202  if (offset != 0) { return; }
3203  }
3204 
3205  if (offset == 8 || offset == 0) {
3206  if (nddom.contains(i+1,j+1,k)) {
3207  Real tmp = fxy;
3208  m0 -= tmp;
3209  if (gid(i+1,j+1,k) < gidmax) {
3210  if (offset != 0) {
3211  cols[ps] = gid(i+1,j+1,k);
3212  mat[ps] = tmp;
3213  }
3214  ++nelems;
3215  }
3216  }
3217  if (offset != 0) { return; }
3218  }
3219 
3220  // Only offset == 0 could get this far.
3221  cols[ps] = gid(i,j,k);
3222  mat[ps] = m0;
3223  ncols[lid(i,j,k)] = nelems+1;
3224  }
3225 }
3226 
3227 #endif
3228 
3229 #endif
3230 
3232 int mlndlap_color (int i, int j, int)
3233 {
3234  return (i%2) + (j%2)*2;
3235 }
3236 
3238 void mlndlap_gscolor_ha (int i, int j, int k, Array4<Real> const& sol,
3239  Array4<Real const> const& rhs, Array4<Real const> const& sx,
3240  Array4<Real const> const& sy, Array4<int const> const& msk,
3241  GpuArray<Real,AMREX_SPACEDIM> const& dxinv, int color,
3242  bool is_rz) noexcept
3243 {
3244  if (mlndlap_color(i,j,k) == color) {
3245  if (msk(i,j,k)) {
3246  sol(i,j,k) = Real(0.0);
3247  } else {
3248  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0];
3249  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1];
3250 
3251  Real s0 = Real(-2.0)*(facx*(sx(i-1,j-1,k)+sx(i,j-1,k)+sx(i-1,j,k)+sx(i,j,k))
3252  +facy*(sy(i-1,j-1,k)+sy(i,j-1,k)+sy(i-1,j,k)+sy(i,j,k)));
3253 
3254  Real Ax = sol(i-1,j-1,k)*(facx*sx(i-1,j-1,k)+facy*sy(i-1,j-1,k))
3255  + sol(i+1,j-1,k)*(facx*sx(i ,j-1,k)+facy*sy(i ,j-1,k))
3256  + sol(i-1,j+1,k)*(facx*sx(i-1,j ,k)+facy*sy(i-1,j ,k))
3257  + sol(i+1,j+1,k)*(facx*sx(i ,j ,k)+facy*sy(i ,j ,k))
3258  + sol(i-1,j,k)*(Real(2.0)*facx*(sx(i-1,j-1,k)+sx(i-1,j,k))
3259  - facy*(sy(i-1,j-1,k)+sy(i-1,j,k)))
3260  + sol(i+1,j,k)*(Real(2.0)*facx*(sx(i ,j-1,k)+sx(i ,j,k))
3261  - facy*(sy(i ,j-1,k)+sy(i ,j,k)))
3262  + sol(i,j-1,k)*( -facx*(sx(i-1,j-1,k)+sx(i,j-1,k))
3263  +Real(2.0)*facy*(sy(i-1,j-1,k)+sy(i,j-1,k)))
3264  + sol(i,j+1,k)*( -facx*(sx(i-1,j ,k)+sx(i,j ,k))
3265  +Real(2.0)*facy*(sy(i-1,j ,k)+sy(i,j ,k)))
3266  + sol(i,j,k)*s0;
3267 
3268  if (is_rz) {
3269  Real fp = facy / static_cast<Real>(2*i+1);
3270  Real fm = facy / static_cast<Real>(2*i-1);
3271  Real frzlo = fm*sy(i-1,j-1,k)-fp*sy(i,j-1,k);
3272  Real frzhi = fm*sy(i-1,j ,k)-fp*sy(i,j ,k);
3273  s0 += - frzhi - frzlo;
3274  Ax += frzhi*(sol(i,j+1,k)-sol(i,j,k))
3275  + frzlo*(sol(i,j-1,k)-sol(i,j,k));
3276  }
3277 
3278  sol(i,j,k) += (rhs(i,j,k) - Ax) / s0;
3279  }
3280  }
3281 }
3282 
3284 void mlndlap_gscolor_aa (int i, int j, int k, Array4<Real> const& sol,
3285  Array4<Real const> const& rhs, Array4<Real const> const& sig,
3286  Array4<int const> const& msk,
3287  GpuArray<Real,AMREX_SPACEDIM> const& dxinv, int color,
3288  bool is_rz) noexcept
3289 {
3290  if (mlndlap_color(i,j,k) == color) {
3291  if (msk(i,j,k)) {
3292  sol(i,j,k) = Real(0.0);
3293  } else {
3294  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0];
3295  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1];
3296  Real fxy = facx + facy;
3297  Real f2xmy = Real(2.0)*facx - facy;
3298  Real fmx2y = Real(2.0)*facy - facx;
3299 
3300  Real s0 = (-Real(2.0))*fxy*(sig(i-1,j-1,k)+sig(i,j-1,k)+sig(i-1,j,k)+sig(i,j,k));
3301  Real Ax = sol(i-1,j-1,k)*fxy*sig(i-1,j-1,k)
3302  + sol(i+1,j-1,k)*fxy*sig(i ,j-1,k)
3303  + sol(i-1,j+1,k)*fxy*sig(i-1,j ,k)
3304  + sol(i+1,j+1,k)*fxy*sig(i ,j ,k)
3305  + sol(i-1,j,k)*f2xmy*(sig(i-1,j-1,k)+sig(i-1,j,k))
3306  + sol(i+1,j,k)*f2xmy*(sig(i ,j-1,k)+sig(i ,j,k))
3307  + sol(i,j-1,k)*fmx2y*(sig(i-1,j-1,k)+sig(i,j-1,k))
3308  + sol(i,j+1,k)*fmx2y*(sig(i-1,j ,k)+sig(i,j ,k))
3309  + sol(i,j,k)*s0;
3310 
3311  if (is_rz) {
3312  Real fp = facy / static_cast<Real>(2*i+1);
3313  Real fm = facy / static_cast<Real>(2*i-1);
3314  Real frzlo = fm*sig(i-1,j-1,k)-fp*sig(i,j-1,k);
3315  Real frzhi = fm*sig(i-1,j ,k)-fp*sig(i,j ,k);
3316  s0 += - frzhi - frzlo;
3317  Ax += frzhi*(sol(i,j+1,k)-sol(i,j,k))
3318  + frzlo*(sol(i,j-1,k)-sol(i,j,k));
3319  }
3320 
3321  sol(i,j,k) += (rhs(i,j,k) - Ax) / s0;
3322  }
3323  }
3324 }
3325 
3327 void mlndlap_gscolor_c (int i, int j, int k, Array4<Real> const& sol,
3328  Array4<Real const> const& rhs, Real sig,
3329  Array4<int const> const& msk,
3330  GpuArray<Real,AMREX_SPACEDIM> const& dxinv, int color,
3331  bool is_rz) noexcept
3332 {
3333  if (mlndlap_color(i,j,k) == color) {
3334  if (msk(i,j,k)) {
3335  sol(i,j,k) = Real(0.0);
3336  } else {
3337  Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0];
3338  Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1];
3339  Real fxy = facx + facy;
3340  Real f2xmy = Real(2.0)*facx - facy;
3341  Real fmx2y = Real(2.0)*facy - facx;
3342 
3343  Real s0 = (-Real(2.0))*fxy*Real(4.);
3344  Real Ax = sol(i-1,j-1,k)*fxy
3345  + sol(i+1,j-1,k)*fxy
3346  + sol(i-1,j+1,k)*fxy
3347  + sol(i+1,j+1,k)*fxy
3348  + sol(i-1,j,k)*f2xmy*Real(2.)
3349  + sol(i+1,j,k)*f2xmy*Real(2.)
3350  + sol(i,j-1,k)*fmx2y*Real(2.)
3351  + sol(i,j+1,k)*fmx2y*Real(2.)
3352  + sol(i,j,k)*s0;
3353 
3354  if (is_rz) {
3355  Real fp = facy / static_cast<Real>(2*i+1);
3356  Real fm = facy / static_cast<Real>(2*i-1);
3357  Real frzlo = fm-fp;
3358  Real frzhi = fm-fp;
3359  s0 += - frzhi - frzlo;
3360  Ax += frzhi*(sol(i,j+1,k)-sol(i,j,k))
3361  + frzlo*(sol(i,j-1,k)-sol(i,j,k));
3362  }
3363 
3364  sol(i,j,k) += (rhs(i,j,k) - Ax*sig) / (s0*sig);
3365  }
3366  }
3367 }
3368 
3370 void mlndlap_gscolor_sten (int i, int j, int k, Array4<Real> const& sol,
3371  Array4<Real const> const& rhs,
3372  Array4<Real const> const& sten,
3373  Array4<int const> const& msk, int color) noexcept
3374 {
3375  if (mlndlap_color(i,j,k) == color) {
3376  mlndlap_gauss_seidel_sten(i,j,k,sol,rhs,sten,msk);
3377  }
3378 }
3379 
3380 }
3381 #endif
#define AMREX_FORCE_INLINE
Definition: AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition: AMReX_GpuQualifiers.H:20
int idir
Definition: AMReX_HypreMLABecLap.cpp:1093
Array4< int const > offset
Definition: AMReX_HypreMLABecLap.cpp:1089
Array4< Real > fine
Definition: AMReX_InterpFaceRegister.cpp:90
Array4< Real const > crse
Definition: AMReX_InterpFaceRegister.cpp:92
#define AMREX_LOOP_3D(bx, i, j, k, block)
Definition: AMReX_Loop.nolint.H:4
HYPRE_Int Int
Definition: AMReX_HypreNodeLap.H:36
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44
static constexpr int i_S_x2
Definition: AMReX_algoim.H:15
static constexpr int i_S_y
Definition: AMReX_algoim.H:13
static constexpr int i_S_x
Definition: AMReX_algoim.H:12
static constexpr int i_B_x
Definition: AMReX_algoim.H:36
static constexpr int i_B_y
Definition: AMReX_algoim.H:37
static constexpr int i_S_y2
Definition: AMReX_algoim.H:16
@ max
Definition: AMReX_ParallelReduce.H:17
constexpr int iy
Definition: AMReX_Interp_2D_C.H:33
constexpr int ix
Definition: AMReX_Interp_2D_C.H:32
constexpr Real almostzero
Definition: AMReX_MLNodeLap_K.H:22
constexpr int crse_cell
Definition: AMReX_MLNodeLinOp_K.H:54
constexpr int crse_fine_node
Definition: AMReX_MLNodeLinOp_K.H:57
constexpr double eps
Definition: AMReX_MLNodeLap_K.H:19
constexpr Real almostone
Definition: AMReX_MLNodeLap_K.H:21
static constexpr int P
Definition: AMReX_OpenBC.H:14
Definition: AMReX_Amr.cpp:49
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real mlndlap_adotx_ha(int i, int, int, Array4< Real const > const &x, Array4< Real const > const &sx, Array4< int const > const &msk, GpuArray< Real, AMREX_SPACEDIM > const &dxinv) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:52
AMREX_GPU_DEVICE AMREX_FORCE_INLINE void mlndlap_gscolor_ha(int i, int j, int k, Array4< Real > const &sol, Array4< Real const > const &rhs, Array4< Real const > const &sx, Array4< int const > const &msk, GpuArray< Real, AMREX_SPACEDIM > const &dxinv, int color) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:426
AMREX_ATTRIBUTE_FLATTEN_FOR void LoopOnCpu(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition: AMReX_Loop.H:355
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_Ax_fine_contrib_cs(int i, int j, int, Box const &ndbx, Box const &ccbx, Array4< Real > const &f, Array4< Real const > const &res, Array4< Real const > const &rhs, Array4< Real const > const &phi, Real const sig, Array4< int const > const &msk, bool is_rz, GpuArray< Real, AMREX_SPACEDIM > const &dxinv) noexcept
Definition: AMReX_MLNodeLap_2D_K.H:1157
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_divu_fine_contrib(int i, int j, int, Box const &fvbx, Box const &velbx, Array4< Real > const &rhs, Array4< Real const > const &vel, Array4< Real const > const &frhs, Array4< int const > const &msk, GpuArray< Real, AMREX_SPACEDIM > const &dxinv, bool is_rz) noexcept
Definition: AMReX_MLNodeLap_2D_K.H:867
BoxND< AMREX_SPACEDIM > Box
Definition: AMReX_BaseFwd.H:27
AMREX_ATTRIBUTE_FLATTEN_FOR void LoopConcurrentOnCpu(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition: AMReX_Loop.H:378
void mlndlap_gauss_seidel_with_line_solve_aa(Box const &, Array4< Real > const &, Array4< Real const > const &, Array4< Real const > const &, Array4< int const > const &, GpuArray< Real, AMREX_SPACEDIM > const &) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:225
AMREX_GPU_DEVICE AMREX_FORCE_INLINE void mlndlap_jacobi_ha(int i, int, int, Array4< Real > const &sol, Real Ax, Array4< Real const > const &rhs, Array4< Real const > const &sx, Array4< int const > const &msk, GpuArray< Real, AMREX_SPACEDIM > const &dxinv) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:92
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_semi_avgdown_coeff(int i, int j, int k, Array4< Real > const &crse, Array4< Real const > const &fine, int) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:30
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_interpadd_rap(int, int, int, Array4< Real > const &, Array4< Real const > const &, Array4< Real const > const &, Array4< int const > const &) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:408
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_avgdown_coeff_y(int i, int j, int k, Array4< Real > const &crse, Array4< Real const > const &fine) noexcept
Definition: AMReX_MLNodeLap_2D_K.H:36
void mlndlap_gauss_seidel_aa(Box const &bx, Array4< Real > const &sol, Array4< Real const > const &rhs, Array4< Real const > const &sx, Array4< int const > const &msk, GpuArray< Real, AMREX_SPACEDIM > const &dxinv) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:193
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & max(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:35
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real mlndlap_adotx_aa(int i, int j, int k, Array4< Real const > const &x, Array4< Real const > const &sx, Array4< int const > const &msk, GpuArray< Real, AMREX_SPACEDIM > const &dxinv) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:66
AMREX_GPU_DEVICE AMREX_FORCE_INLINE void mlndlap_jacobi_c(int i, int, int, Array4< Real > const &sol, Real Ax, Array4< Real const > const &rhs, Real sig, Array4< int const > const &msk, GpuArray< Real, AMREX_SPACEDIM > const &dxinv) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:141
AMREX_GPU_DEVICE AMREX_FORCE_INLINE void mlndlap_jacobi_aa(int i, int j, int k, Array4< Real > const &sol, Real Ax, Array4< Real const > const &rhs, Array4< Real const > const &sig, Array4< int const > const &msk, GpuArray< Real, AMREX_SPACEDIM > const &dxinv) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:125
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real mlndlap_adotx_sten(int, int, int, Array4< Real const > const &, Array4< Real const > const &, Array4< int const > const &) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:396
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & min(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:21
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_interpadd_c(int i, int, int, Array4< Real > const &fine, Array4< Real const > const &crse, Array4< int const > const &msk) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:233
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_set_stencil_s0(int, int, int, Array4< Real > const &) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:387
void mlndlap_gauss_seidel_ha(Box const &bx, Array4< Real > const &sol, Array4< Real const > const &rhs, Array4< Real const > const &sx, Array4< int const > const &msk, GpuArray< Real, AMREX_SPACEDIM > const &dxinv) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:170
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T abs(const GpuComplex< T > &a_z) noexcept
Return the absolute value of a complex number.
Definition: AMReX_GpuComplex.H:356
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_res_cf_contrib(int, int, int, Array4< Real > const &, Array4< Real const > const &, Array4< Real const > const &, Array4< Real const > const &, Array4< int const > const &, Array4< int const > const &, Array4< int const > const &, Array4< Real const > const &, GpuArray< Real, AMREX_SPACEDIM > const &, Box const &, GpuArray< LinOpBCType, AMREX_SPACEDIM > const &, GpuArray< LinOpBCType, AMREX_SPACEDIM > const &, bool) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:357
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_normalize_aa(int i, int j, int k, Array4< Real > const &x, Array4< Real const > const &sx, Array4< int const > const &msk, GpuArray< Real, AMREX_SPACEDIM > const &dxinv) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:84
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real mlndlap_sum_Ax(P const &pred, S const &sig, int i, int j, Real facx, Real facy, Array4< Real const > const &phi, bool is_rz) noexcept
Definition: AMReX_MLNodeLap_2D_K.H:1057
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 ubound(Array4< T > const &a) noexcept
Definition: AMReX_Array4.H:315
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_rhcc_fine_contrib(int, int, int, Box const &, Array4< Real > const &, Array4< Real const > const &, Array4< int const > const &) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:332
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_semi_interpadd_aa(int, int, int, Array4< Real > const &, Array4< Real const > const &, Array4< Real const > const &, Array4< int const > const &, int) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:270
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
Definition: AMReX_Array4.H:308
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_set_stencil(Box const &, Array4< Real > const &, Array4< Real const > const &, GpuArray< Real, AMREX_SPACEDIM > const &) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:381
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real aa_interp_line_y(Array4< Real const > const &crse, Array4< Real const > const &sig, int i, int j, int ic, int jc) noexcept
Definition: AMReX_MLNodeLap_2D_K.H:582
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_zero_fine(int i, int, int, Array4< Real > const &phi, Array4< int const > const &msk, int fine_flag) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:8
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real mlndlap_adotx_c(int i, int, int, Array4< Real const > const &x, Real sigma, Array4< int const > const &msk, GpuArray< Real, AMREX_SPACEDIM > const &dxinv) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:37
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real ha_interp_face_xy(Array4< Real const > const &crse, Array4< Real const > const &sigx, Array4< Real const > const &sigy, int i, int j, int ic, int jc) noexcept
Definition: AMReX_MLNodeLap_2D_K.H:694
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_interpadd_aa(int i, int, int, Array4< Real > const &fine, Array4< Real const > const &crse, Array4< Real const > const &sig, Array4< int const > const &msk) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:251
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_res_cf_contrib_cs(int, int, int, Array4< Real > const &, Array4< Real const > const &, Array4< Real const > const &, Real, Array4< int const > const &, Array4< int const > const &, Array4< int const > const &, Array4< Real const > const &, GpuArray< Real, AMREX_SPACEDIM > const &, Box const &, GpuArray< LinOpBCType, AMREX_SPACEDIM > const &, GpuArray< LinOpBCType, AMREX_SPACEDIM > const &, bool) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:369
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_divu(int i, int, int, Array4< Real > const &rhs, Array4< Real const > const &vel, Array4< int const > const &msk, GpuArray< Real, AMREX_SPACEDIM > const &dxinv, Box const &, GpuArray< LinOpBCType, AMREX_SPACEDIM > const &, GpuArray< LinOpBCType, AMREX_SPACEDIM > const &) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:284
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_divu_cf_contrib(int, int, int, Array4< Real > const &, Array4< Real const > const &, Array4< Real const > const &, Array4< Real const > const &, Array4< int const > const &, Array4< int const > const &, Array4< int const > const &, GpuArray< Real, AMREX_SPACEDIM > const &, Box const &, GpuArray< LinOpBCType, AMREX_SPACEDIM > const &, GpuArray< LinOpBCType, AMREX_SPACEDIM > const &, bool) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:338
void mlndlap_gauss_seidel_sten(Box const &, Array4< Real > const &, Array4< Real const > const &, Array4< Real const > const &, Array4< int const > const &) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:401
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_gscolor_sten(int, int, int, Array4< Real > const &, Array4< Real const > const &, Array4< Real const > const &, Array4< int const > const &, int) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:479
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > surroundingNodes(const BoxND< dim > &b, int dir) noexcept
Returns a BoxND with NODE based coordinates in direction dir that encloses BoxND b....
Definition: AMReX_Box.H:1399
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE IntVectND< dim > scale(const IntVectND< dim > &p, int s) noexcept
Returns a IntVectND obtained by multiplying each of the components of this IntVectND by s.
Definition: AMReX_IntVect.H:1006
AMREX_GPU_DEVICE AMREX_FORCE_INLINE void mlndlap_gscolor_aa(int i, int j, int k, Array4< Real > const &sol, Array4< Real const > const &rhs, Array4< Real const > const &sx, Array4< int const > const &msk, GpuArray< Real, AMREX_SPACEDIM > const &dxinv, int color) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:448
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_mknewu(int i, int, int, Array4< Real > const &u, Array4< Real const > const &p, Array4< Real const > const &sig, GpuArray< Real, AMREX_SPACEDIM > const &dxinv) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:312
AMREX_GPU_HOST_DEVICE AMREX_ATTRIBUTE_FLATTEN_FOR void LoopConcurrent(Dim3 lo, Dim3 hi, F const &f) noexcept
Definition: AMReX_Loop.H:150
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_restriction_rap(int, int, int, Array4< Real > const &, Array4< Real const > const &, Array4< Real const > const &, Array4< int const > const &) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:414
AMREX_GPU_DEVICE AMREX_FORCE_INLINE void mlndlap_gscolor_c(int i, int j, int k, Array4< Real > const &sol, Array4< Real const > const &rhs, Real sig, Array4< int const > const &msk, GpuArray< Real, AMREX_SPACEDIM > const &dxinv, int color) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:458
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real mlndlap_sum_Df(int ii, int jj, Real facx, Real facy, Array4< Real const > const &vel, Box const &velbx, bool is_rz) noexcept
Definition: AMReX_MLNodeLap_2D_K.H:843
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_Ax_fine_contrib_doit(S const &sig, int i, int j, Box const &ndbx, Box const &ccbx, Array4< Real > const &f, Array4< Real const > const &res, Array4< Real const > const &rhs, Array4< Real const > const &phi, Array4< int const > const &msk, bool is_rz, GpuArray< Real, AMREX_SPACEDIM > const &dxinv) noexcept
Definition: AMReX_MLNodeLap_2D_K.H:1099
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real aa_interp_face_xy(Array4< Real const > const &crse, Array4< Real const > const &sig, int i, int j, int ic, int jc) noexcept
Definition: AMReX_MLNodeLap_2D_K.H:591
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > coarsen(const BoxND< dim > &b, int ref_ratio) noexcept
Coarsen BoxND by given (positive) refinement ratio. NOTE: if type(dir) = CELL centered: lo <- lo/rati...
Definition: AMReX_Box.H:1304
AMREX_GPU_DEVICE AMREX_FORCE_INLINE int mlndlap_color(int i, int, int)
Definition: AMReX_MLNodeLap_1D_K.H:420
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition: AMReX.cpp:225
const int[]
Definition: AMReX_BLProfiler.cpp:1664
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_crse_resid(int, int, int, Array4< Real > const &, Array4< Real const > const &, Array4< int const > const &, Box const &, GpuArray< LinOpBCType, AMREX_SPACEDIM > const &, GpuArray< LinOpBCType, AMREX_SPACEDIM > const &, bool) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:349
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_normalize_ha(int i, int, int, Array4< Real > const &x, Array4< Real const > const &sx, Array4< int const > const &msk, GpuArray< Real, AMREX_SPACEDIM > const &dxinv) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:74
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_avgdown_coeff_x(int i, int, int, Array4< Real > const &crse, Array4< Real const > const &fine) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:21
void mlndlap_gauss_seidel_c(Box const &bx, Array4< Real > const &sol, Array4< Real const > const &rhs, Real sig, Array4< int const > const &msk, GpuArray< Real, AMREX_SPACEDIM > const &dxinv) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:203
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_interpadd_ha(int i, int j, int k, Array4< Real > const &fine, Array4< Real const > const &crse, Array4< Real const > const &sig, Array4< int const > const &msk) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:276
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_Ax_fine_contrib(int i, int j, int, Box const &ndbx, Box const &ccbx, Array4< Real > const &f, Array4< Real const > const &res, Array4< Real const > const &rhs, Array4< Real const > const &phi, Array4< Real const > const &sig, Array4< int const > const &msk, bool is_rz, GpuArray< Real, AMREX_SPACEDIM > const &dxinv) noexcept
Definition: AMReX_MLNodeLap_2D_K.H:1143
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real aa_interp_line_x(Array4< Real const > const &crse, Array4< Real const > const &sig, int i, int j, int ic, int jc) noexcept
Definition: AMReX_MLNodeLap_2D_K.H:573
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_stencil_rap(int, int, int, Array4< Real > const &, Array4< Real const > const &) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:391
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real mlndlap_adotx_sten_doit(int i, int j, int k, Array4< Real const > const &x, Array4< Real const > const &sten) noexcept
Definition: AMReX_MLNodeLap_2D_K.H:1507
AMREX_FORCE_INLINE void tridiagonal_solve(Array1D< T, 0, 31 > &a_ls, Array1D< T, 0, 31 > &b_ls, Array1D< T, 0, 31 > &c_ls, Array1D< T, 0, 31 > &r_ls, Array1D< T, 0, 31 > &u_ls, Array1D< T, 0, 31 > &gam, int ilen) noexcept
Definition: AMReX_MLABecLap_3D_K.H:432
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real mlndlap_rhcc(int i, int, int, Array4< Real const > const &rhcc, Array4< int const > const &msk) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:299
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlndlap_mknewu_c(int i, int, int, Array4< Real > const &u, Array4< Real const > const &p, Real sig, GpuArray< Real, AMREX_SPACEDIM > const &dxinv) noexcept
Definition: AMReX_MLNodeLap_1D_K.H:322
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE GpuComplex< T > sqrt(const GpuComplex< T > &a_z) noexcept
Return the square root of a complex number.
Definition: AMReX_GpuComplex.H:373
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real neumann_scale(int i, int j, Box const &nddom, GpuArray< LinOpBCType, AMREX_SPACEDIM > const &bclo, GpuArray< LinOpBCType, AMREX_SPACEDIM > const &bchi) noexcept
Definition: AMReX_MLNodeLap_2D_K.H:930
Definition: AMReX_Array.H:161
Definition: AMReX_Array4.H:61