Block-Structured AMR Software Framework
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Loading...
Searching...
No Matches
AMReX_ParticleUtil.H
Go to the documentation of this file.
1#ifndef AMREX_PARTICLEUTIL_H_
2#define AMREX_PARTICLEUTIL_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_IntVect.H>
6#include <AMReX_Box.H>
7#include <AMReX_Gpu.H>
8#include <AMReX_Print.H>
10#include <AMReX_Math.H>
11#include <AMReX_MFIter.H>
12#include <AMReX_ParGDB.H>
13#include <AMReX_ParticleTile.H>
15#include <AMReX_TypeTraits.H>
16#include <AMReX_Scan.H>
17
18#include <limits>
19
20namespace amrex {
21
32template <class Iterator, std::enable_if_t<IsParticleIterator<Iterator>::value, int> foo = 0>
33int
34numParticlesOutOfRange (Iterator const& pti, int nGrow)
35{
36 return numParticlesOutOfRange(pti,
37 IntVect(AMREX_D_DECL(nGrow, nGrow, nGrow)));
38}
39
50template <class Iterator, std::enable_if_t<IsParticleIterator<Iterator>::value && !Iterator::ContainerType::ParticleType::is_soa_particle, int> foo = 0>
51int
52numParticlesOutOfRange (Iterator const& pti, IntVect nGrow)
53{
54 using ParticleType = typename Iterator::ContainerType::ParticleType;
55
56 const auto& tile = pti.GetParticleTile();
57 const auto np = tile.numParticles();
58 const auto& ptd = tile.getConstParticleTileData();
59 const auto& geom = pti.Geom(pti.GetLevel());
60
61 const auto& domain = geom.Domain();
62 const auto& plo = geom.ProbLoArray();
63 const auto& dxi = geom.InvCellSizeArray();
64
65 Box box = pti.tilebox();
66 box.grow(nGrow);
67
68 ReduceOps<ReduceOpSum> reduce_op;
69 ReduceData<int> reduce_data(reduce_op);
70 using ReduceTuple = typename decltype(reduce_data)::Type;
71
72 reduce_op.eval(np, reduce_data,
73 [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple
74 {
75 auto p = make_particle<ParticleType>{}(ptd,i);
76 if ((p.id() < 0)) { return false; }
77 using AssignorType = typename Iterator::CellAssignor;
78 AssignorType assignor;
79 IntVect iv = assignor(p, plo, dxi, domain);
80 return !box.contains(iv);
81 });
82 int hv = amrex::get<0>(reduce_data.value(reduce_op));
83 return hv;
84}
85
86template <class Iterator, std::enable_if_t<IsParticleIterator<Iterator>::value && Iterator::ContainerType::ParticleType::is_soa_particle, int> foo = 0>
87int
88numParticlesOutOfRange (Iterator const& pti, IntVect nGrow)
89{
90 using ParticleType = typename Iterator::ContainerType::ConstParticleType;
91
92 const auto& tile = pti.GetParticleTile();
93 const auto tile_data = tile.getConstParticleTileData();
94 const auto np = tile.numParticles();
95 const auto& geom = pti.Geom(pti.GetLevel());
96
97 const auto domain = geom.Domain();
98 const auto plo = geom.ProbLoArray();
99 const auto dxi = geom.InvCellSizeArray();
100
101 Box box = pti.tilebox();
102 box.grow(nGrow);
103
104 ReduceOps<ReduceOpSum> reduce_op;
105 ReduceData<int> reduce_data(reduce_op);
106 using ReduceTuple = typename decltype(reduce_data)::Type;
107
108 reduce_op.eval(np, reduce_data,
109 [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple
110 {
111 ParticleType p(tile_data,i);
112 if ((p.id() < 0)) { return false; }
113 using AssignorType = typename Iterator::CellAssignor;
114 AssignorType assignor;
115 IntVect iv = assignor(p, plo, dxi, domain);
116 return !box.contains(iv);
117 });
118 int hv = amrex::get<0>(reduce_data.value(reduce_op));
119 return hv;
120}
121
134template <class PC, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
135int
136numParticlesOutOfRange (PC const& pc, int nGrow)
137{
138 return numParticlesOutOfRange(pc, 0, pc.finestLevel(), nGrow);
139}
140
153template <class PC, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
154int
155numParticlesOutOfRange (PC const& pc, IntVect nGrow)
156{
157 return numParticlesOutOfRange(pc, 0, pc.finestLevel(), nGrow);
158}
159
174template <class PC, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
175int
176numParticlesOutOfRange (PC const& pc, int lev_min, int lev_max, int nGrow)
177{
178 BL_PROFILE("numParticlesOutOfRange()");
179
180 return numParticlesOutOfRange(pc, lev_min, lev_max,
181 IntVect(AMREX_D_DECL(nGrow, nGrow, nGrow)));
182}
183
198template <class PC, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
199int
200numParticlesOutOfRange (PC const& pc, int lev_min, int lev_max, IntVect nGrow)
201{
202 BL_PROFILE("numParticlesOutOfRange()");
203
204 using ParIter = typename PC::ParConstIterType;
205 int num_wrong = 0;
206 for (int lev = lev_min; lev <= lev_max; ++lev)
207 {
208#ifdef AMREX_USE_OMP
209#pragma omp parallel if (Gpu::notInLaunchRegion() && !system::regtest_reduction) reduction(+:num_wrong)
210#endif
211 for(ParIter pti(pc, lev); pti.isValid(); ++pti)
212 {
213 num_wrong += numParticlesOutOfRange(pti, nGrow);
214 }
215 }
217
218 return num_wrong;
219}
220
222int getTileIndex (const IntVect& iv, const Box& box, const bool a_do_tiling,
223 const IntVect& a_tile_size, Box& tbx)
224{
225 if (a_do_tiling == false) {
226 tbx = box;
227 return 0;
228 } else {
229 //
230 // This function must be consistent with FabArrayBase::buildTileArray function!!!
231 //
232 auto tiling_1d = [](int i, int lo, int hi, int tilesize,
233 int& ntile, int& tileidx, int& tlo, int& thi) {
234 int ncells = hi-lo+1;
235 ntile = amrex::max(ncells/tilesize, 1);
236 int ts_right = ncells/ntile;
237 int ts_left = ts_right+1;
238 int nleft = ncells - ntile*ts_right;
239 int ii = i - lo;
240 int nbndry = nleft*ts_left;
241 if (ii < nbndry) {
242 tileidx = ii / ts_left; // tiles on the left of nbndry have size of ts_left
243 tlo = lo + tileidx * ts_left;
244 thi = tlo + ts_left - 1;
245 } else {
246 tileidx = nleft + (ii-nbndry) / ts_right; // tiles on the right: ts_right
247 tlo = lo + tileidx * ts_right + nleft;
248 thi = tlo + ts_right - 1;
249 }
250 };
251 const IntVect& sml = box.smallEnd();
252 const IntVect& big = box.bigEnd();
253 IntVect ntiles, ivIndex, tilelo, tilehi;
254
255 AMREX_D_TERM(int iv0 = amrex::min(amrex::max(iv[0], sml[0]), big[0]);,
256 int iv1 = amrex::min(amrex::max(iv[1], sml[1]), big[1]);,
257 int iv2 = amrex::min(amrex::max(iv[2], sml[2]), big[2]););
258
259 AMREX_D_TERM(tiling_1d(iv0, sml[0], big[0], a_tile_size[0], ntiles[0], ivIndex[0], tilelo[0], tilehi[0]);,
260 tiling_1d(iv1, sml[1], big[1], a_tile_size[1], ntiles[1], ivIndex[1], tilelo[1], tilehi[1]);,
261 tiling_1d(iv2, sml[2], big[2], a_tile_size[2], ntiles[2], ivIndex[2], tilelo[2], tilehi[2]););
262
263 tbx = Box(tilelo, tilehi);
264
265 return AMREX_D_TERM(ivIndex[0], + ntiles[0]*ivIndex[1], + ntiles[0]*ntiles[1]*ivIndex[2]);
266 }
267}
268
270int numTilesInBox (const Box& box, const bool a_do_tiling, const IntVect& a_tile_size)
271{
272 if (a_do_tiling == false) {
273 return 1;
274 } else {
275 //
276 // This function must be consistent with FabArrayBase::buildTileArray function!!!
277 //
278 auto tiling_1d = [](int lo, int hi, int tilesize, int& ntile) {
279 int ncells = hi-lo+1;
280 ntile = amrex::max(ncells/tilesize, 1);
281 };
282
283 const IntVect& sml = box.smallEnd();
284 const IntVect& big = box.bigEnd();
285 IntVect ntiles;
286
287 AMREX_D_TERM(tiling_1d(sml[0], big[0], a_tile_size[0], ntiles[0]);,
288 tiling_1d(sml[1], big[1], a_tile_size[1], ntiles[1]);,
289 tiling_1d(sml[2], big[2], a_tile_size[2], ntiles[2]););
290
291 return AMREX_D_TERM(ntiles[0], *=ntiles[1], *=ntiles[2]);
292 }
293}
294
296{
297 BinMapper(const int* off_bins_p,
300 const Dim3* lo_p,
301 const Dim3* hi_p,
302 int* bin_type_array=nullptr)
303 : m_off_bins_p(off_bins_p), m_dxi_p(dxi_p), m_plo_p(plo_p) ,
304 m_lo_p(lo_p) , m_hi_p(hi_p) , m_bin_type_array(bin_type_array) {}
305
306 template <typename T>
308 unsigned int operator() (const T& ptd, int i) const
309 {
310 auto p = ptd[i];
311 int type = (m_bin_type_array) ? m_bin_type_array[i] : 0;
312 int offset = m_off_bins_p[type];
313
314 AMREX_D_TERM(AMREX_ASSERT((p.pos(0)-m_plo_p[type][0])*m_dxi_p[type][0] - m_lo_p[type].x >= 0.0);,
315 AMREX_ASSERT((p.pos(1)-m_plo_p[type][1])*m_dxi_p[type][1] - m_lo_p[type].y >= 0.0);,
316 AMREX_ASSERT((p.pos(2)-m_plo_p[type][2])*m_dxi_p[type][2] - m_lo_p[type].z >= 0.0));
317
318 auto iv = IntVect(AMREX_D_DECL(static_cast<int>(amrex::Math::floor((p.pos(0)-m_plo_p[type][0])*m_dxi_p[type][0])) - m_lo_p[type].x,
319 static_cast<int>(amrex::Math::floor((p.pos(1)-m_plo_p[type][1])*m_dxi_p[type][1])) - m_lo_p[type].y,
320 static_cast<int>(amrex::Math::floor((p.pos(2)-m_plo_p[type][2])*m_dxi_p[type][2])) - m_lo_p[type].z));
321 auto iv3 = iv.dim3();
322 int nx = m_hi_p[type].x-m_lo_p[type].x+1;
323 int ny = m_hi_p[type].y-m_lo_p[type].y+1;
324 int nz = m_hi_p[type].z-m_lo_p[type].z+1;
325 int uix = amrex::min(nx-1,amrex::max(0,iv3.x));
326 int uiy = amrex::min(ny-1,amrex::max(0,iv3.y));
327 int uiz = amrex::min(nz-1,amrex::max(0,iv3.z));
328 return static_cast<unsigned int>( (uix * ny + uiy) * nz + uiz + offset );
329 }
330
331private:
332 const int* m_off_bins_p;
335 const Dim3* m_lo_p;
336 const Dim3* m_hi_p;
338};
339
341{
347
348 template <typename ParticleType>
350 unsigned int operator() (const ParticleType& p) const noexcept
351 {
352 Box tbx;
353 auto iv = getParticleCell(p, plo, dxi, domain);
354 auto tid = getTileIndex(iv, box, true, bin_size, tbx);
355 return static_cast<unsigned int>(tid);
356 }
357};
358
372template <typename P>
377{
378 IntVect iv(
379 AMREX_D_DECL(int(amrex::Math::floor((p.pos(0)-plo[0])*dxi[0])),
380 int(amrex::Math::floor((p.pos(1)-plo[1])*dxi[1])),
381 int(amrex::Math::floor((p.pos(2)-plo[2])*dxi[2]))));
382 return iv;
383}
384
399template <typename P>
404 const Box& domain) noexcept
405{
406 IntVect iv = getParticleCell(p, plo, dxi);
407 iv += domain.smallEnd();
408 return iv;
409}
410
411template <typename PTD>
413IntVect getParticleCell (PTD const& ptd, int i,
416 const Box& domain) noexcept
417{
418 if constexpr (PTD::ParticleType::is_soa_particle)
419 {
420 IntVect iv(
421 AMREX_D_DECL(int(amrex::Math::floor((ptd.m_rdata[0][i]-plo[0])*dxi[0])),
422 int(amrex::Math::floor((ptd.m_rdata[1][i]-plo[1])*dxi[1])),
423 int(amrex::Math::floor((ptd.m_rdata[2][i]-plo[2])*dxi[2]))));
424 iv += domain.smallEnd();
425 return iv;
426 } else {
427 return getParticleCell(ptd.m_aos[i], plo, dxi, domain);;
428 }
429}
430
432{
433
434 template <typename P>
439 const Box& domain) const noexcept
440 {
441 return getParticleCell(p, plo, dxi, domain);
442 }
443};
444
445template <typename P>
450 const Box& domain) noexcept
451{
452 if (p.id() < 0) { return -1; }
453 IntVect iv = getParticleCell(p, plo, dxi, domain);
454 return mask(iv);
455}
456
457template <typename P>
464 amrex::GpuArray<int,AMREX_SPACEDIM> const& is_per) noexcept
465{
466 bool shifted = false;
467 for (int idim = 0; idim < AMREX_SPACEDIM; ++idim)
468 {
469 if (! is_per[idim]) { continue; }
470 if (p.pos(idim) > rhi[idim]) {
471 while (p.pos(idim) > rhi[idim]) {
472 p.pos(idim) -= static_cast<ParticleReal>(phi[idim] - plo[idim]);
473 }
474 // clamp to avoid precision issues;
475 if (p.pos(idim) < rlo[idim]) {
476 p.pos(idim) = rlo[idim];
477 }
478 shifted = true;
479 }
480 else if (p.pos(idim) < rlo[idim]) {
481 while (p.pos(idim) < rlo[idim]) {
482 p.pos(idim) += static_cast<ParticleReal>(phi[idim] - plo[idim]);
483 }
484 // clamp to avoid precision issues;
485 if (p.pos(idim) > rhi[idim]) {
486 p.pos(idim) = rhi[idim];
487 }
488 shifted = true;
489 }
490 AMREX_ASSERT( (p.pos(idim) >= rlo[idim] ) && ( p.pos(idim) <= rhi[idim] ));
491 }
492
493 return shifted;
494}
495
508template <typename PTile, typename ParFunc>
509int
510partitionParticles (PTile& ptile, ParFunc const& is_left)
511{
512 const int np = ptile.numParticles();
513 if (np == 0) { return 0; }
514
515 auto ptd = ptile.getParticleTileData();
516
517 const int num_left = Reduce::Sum<int>(np,
518 [=] AMREX_GPU_DEVICE (int i) -> int
519 {
520 return int(is_left(ptd, i));
521 });
522
523 // The ptile will be partitioned into left [0, num_left-1] and right [num_left, np-1].
524 //
525 // Note that currently the number of particles in [0, num_left-1] that should belong to the
526 // right partition is equal to the number of particles in [num_left, np-1] that should belong
527 // in the left partition. We will define num_swaps to be this number. This is the minimum
528 // number of swaps that need to be performed to partition the ptile in place for any algorithm.
529 //
530 // From this it is easy to see that
531 // max_num_swaps = min(size([0, num_left-1]), size([num_left, np-1]))
532 // is an upper bound for num_swaps.
533
534 const int max_num_swaps = std::min(num_left, np - num_left);
535 if (max_num_swaps == 0) { return num_left; }
536
537 Gpu::DeviceVector<int> index_left(max_num_swaps);
538 Gpu::DeviceVector<int> index_right(max_num_swaps);
539 int * const p_index_left = index_left.dataPtr();
540 int * const p_index_right = index_right.dataPtr();
541
542 // The num_swaps particles that are in [0, num_left-1] but should be moved to the right
543 // partition are at the same time the first num_swaps particles for which is_left is false
544 // in all the ptile.
545 // Similarly, the num_swaps particles in [num_left, np-1] that should be moved to the left
546 // partition are the last num_swaps particles of the ptile for which is_left is true.
547 //
548 // The PrefixSum is used to find exactly these particles and store their indexes in
549 // index_left and index_right. Since num_swaps is not known, the first max_num_swaps
550 // particles are stored instead. Here, dst = num_left-1-(i-s) is used to effectively reverse
551 // the PrefixSum to store the last particles for which is_left is true.
552 //
553 // This way, all indexes in index_right are in ascending order, and all indexes in
554 // index_left are in descending order.
555
556 Scan::PrefixSum<int>(np,
557 [=] AMREX_GPU_DEVICE (int i) -> int
558 {
559 return int(!is_left(ptd, i));
560 },
561 [=] AMREX_GPU_DEVICE (int i, int const& s)
562 {
563 if (!is_left(ptd, i)) {
564 int dst = s;
565 if (dst < max_num_swaps) {
566 p_index_right[dst] = i;
567 }
568 } else {
569 int dst = num_left-1-(i-s); // avoid integer overflow
570 if (dst < max_num_swaps) {
571 p_index_left[dst] = i;
572 }
573 }
574 },
576
577 // Finally, the particles are swapped. Since max_num_swaps is only an upper bound for num_swaps,
578 // some swaps should not be performed and need to be skipped. This is the case if the index
579 // in index_left[i] is already in the left partition or the index in index_right[i] is already
580 // in the right partition. These two cases coincide for the same i because index_right is in
581 // ascending order and index_left in descending order. This means for both index_left and
582 // index_right the first num_swaps particles need to be swapped, and the particles after that
583 // should be skipped.
584 //
585 // The check right_i < left_i makes sure that the particle going to the right partition is
586 // actually coming from the left partition, which has a lower index than the other particle and
587 // visa-versa.
588 //
589 // Since exactly num_swaps swap operations are performed in the end, which is the smallest
590 // number possible, this algorithm is optimal in the number of swap operations.
591 // This results in good performance in practice if the size of a particle is large enough that
592 // it compensates for the extra kernel launches and evaluations of is_left which this
593 // algorithm needs.
594
595 ParallelFor(max_num_swaps,
596 [=] AMREX_GPU_DEVICE (int i)
597 {
598 int left_i = p_index_left[i];
599 int right_i = p_index_right[i];
600 if (right_i < left_i) {
601 swapParticle(ptd, ptd, left_i, right_i);
602 }
603 });
604
605 Gpu::streamSynchronize(); // for index_left and index_right deallocation
606
607 return num_left;
608}
609
610template <typename PTile>
611void
613{
614 const int new_size = partitionParticles(ptile,
615 [=] AMREX_GPU_DEVICE (auto& ptd, int i) {
616 return ptd.id(i).is_valid();
617 });
618 ptile.resize(new_size);
619}
620
621#if defined(AMREX_USE_GPU)
622
623template <typename PTile, typename PLocator, typename CellAssignor>
624int
625partitionParticlesByDest (PTile& ptile, const PLocator& ploc, CellAssignor const& assignor,
626 const ParticleBufferMap& pmap,
631 const GpuArray<int ,AMREX_SPACEDIM>& is_per,
632 int lev, int gid, int /*tid*/,
633 int lev_min, int lev_max, int nGrow, bool remove_negative)
634{
635 auto getPID = pmap.getPIDFunctor();
636 int pid = ParallelContext::MyProcSub();
637
638 Gpu::DeviceVector<uint8_t> particle_stays(ptile.numParticles());
639 uint8_t * const p_particle_stays = particle_stays.dataPtr();
640 auto ptd = ptile.getParticleTileData();
641
642 // the function for determining if a particle stays on this grid is very slow,
643 // so we cache it in particle_stays to avoid evaluating it multiple times.
644 ParallelFor(ptile.numParticles(),
645 [=] AMREX_GPU_DEVICE (int i)
646 {
647 int assigned_grid;
648 int assigned_lev;
649
650 if (!ptd.id(i).is_valid())
651 {
652 assigned_grid = -1;
653 assigned_lev = -1;
654 }
655 else
656 {
657 auto p_prime = ptd.getSuperParticle(i);
658 enforcePeriodic(p_prime, plo, phi, rlo, rhi, is_per);
659 auto tup_prime = ploc(p_prime, lev_min, lev_max, nGrow, assignor);
660 assigned_grid = amrex::get<0>(tup_prime);
661 assigned_lev = amrex::get<1>(tup_prime);
662 if (assigned_grid >= 0)
663 {
664 AMREX_D_TERM(ptd.pos(0, i) = p_prime.pos(0);,
665 ptd.pos(1, i) = p_prime.pos(1);,
666 ptd.pos(2, i) = p_prime.pos(2););
667 }
668 else if (lev_min > 0)
669 {
670 AMREX_D_TERM(p_prime.pos(0) = ptd.pos(0, i);,
671 p_prime.pos(1) = ptd.pos(1, i);,
672 p_prime.pos(2) = ptd.pos(2, i););
673 auto tup = ploc(p_prime, lev_min, lev_max, nGrow, assignor);
674 assigned_grid = amrex::get<0>(tup);
675 assigned_lev = amrex::get<1>(tup);
676 }
677 }
678
679 p_particle_stays[i] = uint8_t(
680 ((remove_negative == false) && (!ptd.id(i).is_valid())) ||
681 ((assigned_grid == gid) && (assigned_lev == lev) && (getPID(lev, gid) == pid)));
682 });
683
684 return partitionParticles(ptile,
685 [=] AMREX_GPU_DEVICE (auto& /* ptd */, int i) -> bool {
686 return p_particle_stays[i];
687 });
688}
689
690#endif
691
692template <class PC1, class PC2>
693bool SameIteratorsOK (const PC1& pc1, const PC2& pc2) {
694 if (pc1.numLevels() != pc2.numLevels()) {return false;}
695 if (pc1.do_tiling != pc2.do_tiling) {return false;}
696 if (pc1.tile_size != pc2.tile_size) {return false;}
697 for (int lev = 0; lev < pc1.numLevels(); ++lev) {
698 if (pc1.ParticleBoxArray(lev) != pc2.ParticleBoxArray(lev)) {return false;}
699 if (pc1.ParticleDistributionMap(lev) != pc2.ParticleDistributionMap(lev)) {return false;}
700 }
701 return true;
702}
703
704template <class PC>
706 using Iter = typename PC::ParIterType;
707 for (int lev = 0; lev < pc.numLevels(); ++lev) {
708 for (Iter pti(pc, lev); pti.isValid(); ++pti) {
709 pc.DefineAndReturnParticleTile(lev, pti);
710 }
711 }
712}
713
714IntVect computeRefFac (const ParGDBBase* a_gdb, int src_lev, int lev);
715
716Vector<int> computeNeighborProcs (const ParGDBBase* a_gdb, int ngrow);
717
718namespace particle_detail
719{
720template <typename C>
722{
723 for (auto c_it = c.begin(); c_it != c.end(); /* no ++ */)
724 {
725 if (c_it->second.empty()) { c.erase(c_it++); }
726 else { ++c_it; }
727 }
728}
729}
730
731template <class index_type, typename F>
733 index_type nbins, F const& f)
734{
735 BL_PROFILE("PermutationForDeposition()");
736
737#if defined(AMREX_USE_HIP)
738 // MI250X has a small L2 cache and is more tolerant of atomic add contention,
739 // so we use a small block size of 64 and the compressed layout.
740 static constexpr index_type gpu_block_size = 64;
741 static constexpr bool compressed_layout = true;
742#else
743 // A100 has a larger L2 cache and is very sensitive to atomic add contention,
744 // so we use a large bock size of 1024 and not the compressed layout.
745 static constexpr index_type gpu_block_size = 1024;
746 static constexpr bool compressed_layout = false;
747#endif
748
749 static constexpr index_type gpu_block_size_m1 = gpu_block_size - 1;
750 static constexpr index_type llist_guard = std::numeric_limits<index_type>::max();
751
752 // round up to gpu_block_size
753 nbins = (nbins + gpu_block_size_m1) / gpu_block_size * gpu_block_size;
754
755 Gpu::DeviceVector<index_type> llist_start(nbins, llist_guard);
756 Gpu::DeviceVector<index_type> llist_next(nitems);
757 perm.resize(nitems);
758 Gpu::DeviceScalar<index_type> global_idx(0);
759
760 index_type* pllist_start = llist_start.dataPtr();
761 index_type* pllist_next = llist_next.dataPtr();
762 index_type* pperm = perm.dataPtr();
763 index_type* pglobal_idx = global_idx.dataPtr();
764
765 amrex::ParallelFor(nitems, [=] AMREX_GPU_DEVICE (index_type i) noexcept
766 {
767 i = nitems - i - 1;
768 pllist_next[i] = Gpu::Atomic::Exch(pllist_start + f(i), i);
769 });
770
771#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
772 amrex::launch<gpu_block_size>(nbins / gpu_block_size, Gpu::gpuStream(),
773 [pllist_start,pllist_next,pperm,pglobal_idx] AMREX_GPU_DEVICE () {
774 __shared__ index_type sdata[gpu_block_size];
775 __shared__ index_type global_idx_start;
776 __shared__ index_type idx_start;
777
778 index_type current_idx = 0;
779
780 if constexpr (compressed_layout) {
781 // Compressed layout: subsequent sweeps of up to gpu_block_size contiguous particles
782 // are put right next to each other, while without the compressed layout,
783 // there can be other particle sweeps from different locations between them.
784 current_idx = pllist_start[threadIdx.x + gpu_block_size * blockIdx.x];
785
786 index_type num_particles_thread = 0;
787 while (current_idx != llist_guard) {
788 ++num_particles_thread;
789 current_idx = pllist_next[current_idx];
790 }
791
792 index_type num_particles_block =
793 Gpu::blockReduceSum<gpu_block_size>(num_particles_thread);
794
795 if (threadIdx.x == 0) {
796 global_idx_start = Gpu::Atomic::Add(pglobal_idx, num_particles_block);
797 }
798 }
799
800 current_idx = pllist_start[threadIdx.x + gpu_block_size * blockIdx.x];
801
802 while (true) {
803 sdata[threadIdx.x] = index_type(current_idx != llist_guard);
804 index_type x = 0;
805
806 // simple block wide prefix sum
807 for (index_type i = 1; i<gpu_block_size; i*=2) {
808 __syncthreads();
809 if (threadIdx.x >= i) {
810 x = sdata[threadIdx.x - i];
811 }
812 __syncthreads();
813 if (threadIdx.x >= i) {
814 sdata[threadIdx.x] += x;
815 }
816 }
817 __syncthreads();
818 if (sdata[gpu_block_size_m1] == 0) {
819 break;
820 }
821 if (threadIdx.x == gpu_block_size_m1) {
822 if constexpr (compressed_layout) {
823 idx_start = global_idx_start;
824 global_idx_start += sdata[gpu_block_size_m1];
825 } else {
826 idx_start = Gpu::Atomic::Add(pglobal_idx, sdata[gpu_block_size_m1]);
827 }
828 }
829 __syncthreads();
830 sdata[threadIdx.x] += idx_start;
831 if (current_idx != llist_guard) {
832 pperm[sdata[threadIdx.x] - 1] = current_idx;
833 current_idx = pllist_next[current_idx];
834 }
835 }
836 });
837#else
838 amrex::ignore_unused(pperm, pglobal_idx, compressed_layout);
839 Abort("PermutationForDeposition only implemented for CUDA and HIP");
840#endif
841
842 Gpu::Device::streamSynchronize();
843}
844
845template <class index_type, class PTile>
847 const PTile& ptile, Box bx, Geometry geom, const IntVect idx_type)
848{
849 AMREX_ALWAYS_ASSERT(idx_type.allGE(0) && idx_type.allLE(2));
850
851 const IntVect refine_vect = max(idx_type, IntVect(1)).min(IntVect(2));
852 const IntVect type_vect = idx_type - idx_type / 2 * 2;
853
854 geom.refine(refine_vect);
855
856 Box domain = geom.Domain();
857
858 bx.convert(type_vect);
859 domain.convert(type_vect);
860
861 const RealVect dxi(geom.InvCellSize());
862 const RealVect pos_offset = Real(0.5) * (RealVect(geom.ProbLo()) + RealVect(geom.ProbHi())
863 - RealVect(geom.CellSize()) * RealVect(domain.smallEnd() + domain.bigEnd()));
864
865 const int ref_product = AMREX_D_TERM(refine_vect[0], * refine_vect[1], * refine_vect[2]);
866 const IntVect ref_offset(AMREX_D_DECL(1, refine_vect[0], refine_vect[0] * refine_vect[1]));
867
868 auto ptd = ptile.getConstParticleTileData();
869 using ParticleType = typename PTile::ParticleType::ConstType;
870 PermutationForDeposition<index_type>(perm, nitems, bx.numPts() * ref_product,
871 [=] AMREX_GPU_DEVICE (index_type idx) noexcept
872 {
873 const auto p = make_particle<ParticleType>{}(ptd,idx);
874
875 IntVect iv = ((p.pos() - pos_offset) * dxi).round();
876
877 IntVect iv_coarse = iv / refine_vect;
878 IntVect iv_remainder = iv - iv_coarse * refine_vect;
879
880 iv_coarse = iv_coarse.max(bx.smallEnd());
881 iv_coarse = iv_coarse.min(bx.bigEnd());
882 return bx.index(iv_coarse) + bx.numPts() * (iv_remainder * ref_offset).sum();
883 });
884}
885
886template <typename P>
887std::string getDefaultCompNameReal (const int i) {
888 int first_r_name = 0;
889 if constexpr (P::is_soa_particle) {
890 if (i < AMREX_SPACEDIM) {
891 constexpr int x_in_ascii = 120;
892 std::string const name{char(x_in_ascii+i)};
893 return name;
894 }
895 first_r_name = AMREX_SPACEDIM;
896 }
897 std::string const name{("real_comp" + std::to_string(i-first_r_name))};
898 return name;
899}
900
901template <typename P>
902std::string getDefaultCompNameInt (const int i) {
903 std::string const name{("int_comp" + std::to_string(i))};
904 return name;
905}
906
907#ifdef AMREX_USE_HDF5_ASYNC
908void async_vol_es_wait_particle();
909void async_vol_es_wait_close_particle();
910#endif
911}
912
913#endif // include guard
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_ALWAYS_ASSERT(EX)
Definition AMReX_BLassert.H:50
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1089
Array4< int const > mask
Definition AMReX_InterpFaceRegister.cpp:93
#define AMREX_D_TERM(a, b, c)
Definition AMReX_SPACE.H:129
#define AMREX_D_DECL(a, b, c)
Definition AMReX_SPACE.H:104
if(!(yy_init))
Definition amrex_iparser.lex.nolint.H:935
AMREX_GPU_HOST_DEVICE Long index(const IntVectND< dim > &v) const noexcept
Returns offset of point from smallend; i.e. index(smallend) -> 0, bigend would return numPts()-1....
Definition AMReX_Box.H:993
AMREX_GPU_HOST_DEVICE BoxND & convert(IndexTypeND< dim > typ) noexcept
Convert the BoxND from the current type into the argument type. This may change the BoxND coordinates...
Definition AMReX_Box.H:912
AMREX_GPU_HOST_DEVICE const IntVectND< dim > & smallEnd() const &noexcept
Get the smallend of the BoxND.
Definition AMReX_Box.H:105
AMREX_GPU_HOST_DEVICE BoxND & grow(int i) noexcept
Definition AMReX_Box.H:627
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
Returns the number of points contained in the BoxND.
Definition AMReX_Box.H:346
AMREX_GPU_HOST_DEVICE bool contains(const IntVectND< dim > &p) const noexcept
Returns true if argument is contained within BoxND.
Definition AMReX_Box.H:204
AMREX_GPU_HOST_DEVICE const IntVectND< dim > & bigEnd() const &noexcept
Get the bigend.
Definition AMReX_Box.H:116
const Real * CellSize() const noexcept
Returns the cellsize for each coordinate direction.
Definition AMReX_CoordSys.H:71
const Real * InvCellSize() const noexcept
Returns the inverse cellsize for each coordinate direction.
Definition AMReX_CoordSys.H:82
Rectangular problem domain geometry.
Definition AMReX_Geometry.H:73
void refine(IntVect const &rr)
Definition AMReX_Geometry.H:411
const Real * ProbHi() const noexcept
Returns the hi end of the problem domain in each dimension.
Definition AMReX_Geometry.H:180
const Box & Domain() const noexcept
Returns our rectangular domain.
Definition AMReX_Geometry.H:210
const Real * ProbLo() const noexcept
Returns the lo end of the problem domain in each dimension.
Definition AMReX_Geometry.H:178
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool allGE(const IntVectND< dim > &rhs) const noexcept
Returns true if this is greater than or equal to argument for all components. NOTE: This is NOT a str...
Definition AMReX_IntVect.H:441
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool allLE(const IntVectND< dim > &rhs) const noexcept
Returns true if this is less than or equal to argument for all components. NOTE: This is NOT a strict...
Definition AMReX_IntVect.H:391
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int min() const noexcept
minimum (no absolute values) value
Definition AMReX_IntVect.H:223
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int max() const noexcept
maximum (no absolute values) value
Definition AMReX_IntVect.H:212
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:141
Definition AMReX_PODVector.H:262
T * dataPtr() noexcept
Definition AMReX_PODVector.H:613
void resize(size_type a_new_size)
Definition AMReX_PODVector.H:641
Definition AMReX_ParIter.H:113
Definition AMReX_ParticleBufferMap.H:53
GetPID getPIDFunctor() const noexcept
Definition AMReX_ParticleBufferMap.H:156
A Real vector in SpaceDim-dimensional space.
Definition AMReX_RealVect.H:32
Definition AMReX_Reduce.H:249
Type value()
Definition AMReX_Reduce.H:281
Definition AMReX_Reduce.H:364
std::enable_if_t< IsFabArray< MF >::value > eval(MF const &mf, IntVect const &nghost, D &reduce_data, F &&f)
Definition AMReX_Reduce.H:441
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:237
void Sum(T &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:204
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
static constexpr struct amrex::Scan::Type::Exclusive exclusive
static constexpr RetSum noRetSum
Definition AMReX_Scan.H:30
void clearEmptyEntries(C &c)
Definition AMReX_ParticleUtil.H:721
Definition AMReX_Amr.cpp:49
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void swapParticle(const ParticleTileData< T_ParticleType, NAR, NAI > &dst, const ParticleTileData< T_ParticleType, NAR, NAI > &src, int src_i, int dst_i) noexcept
A general single particle swapping routine that can run on the GPU.
Definition AMReX_ParticleTransformation.H:119
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:191
BoxND< AMREX_SPACEDIM > Box
Definition AMReX_BaseFwd.H:27
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE IntVect getParticleCell(P const &p, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &plo, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &dxi) noexcept
Returns the cell index for a given particle using the provided lower bounds and cell sizes.
Definition AMReX_ParticleUtil.H:374
int partitionParticlesByDest(PTile &ptile, const PLocator &ploc, CellAssignor const &assignor, const ParticleBufferMap &pmap, const GpuArray< Real, AMREX_SPACEDIM > &plo, const GpuArray< Real, AMREX_SPACEDIM > &phi, const GpuArray< ParticleReal, AMREX_SPACEDIM > &rlo, const GpuArray< ParticleReal, AMREX_SPACEDIM > &rhi, const GpuArray< int, AMREX_SPACEDIM > &is_per, int lev, int gid, int, int lev_min, int lev_max, int nGrow, bool remove_negative)
Definition AMReX_ParticleUtil.H:625
void EnsureThreadSafeTiles(PC &pc)
Definition AMReX_ParticleUtil.H:705
std::string getDefaultCompNameInt(const int i)
Definition AMReX_ParticleUtil.H:902
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr const T & min(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:21
void removeInvalidParticles(PTile &ptile)
Definition AMReX_ParticleUtil.H:612
std::string getDefaultCompNameReal(const int i)
Definition AMReX_ParticleUtil.H:887
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int getParticleGrid(P const &p, amrex::Array4< int > const &mask, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &plo, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &dxi, const Box &domain) noexcept
Definition AMReX_ParticleUtil.H:447
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int numTilesInBox(const Box &box, const bool a_do_tiling, const IntVect &a_tile_size)
Definition AMReX_ParticleUtil.H:270
IntVectND< AMREX_SPACEDIM > IntVect
Definition AMReX_BaseFwd.H:30
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool enforcePeriodic(P &p, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &plo, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &phi, amrex::GpuArray< amrex::ParticleReal, AMREX_SPACEDIM > const &rlo, amrex::GpuArray< amrex::ParticleReal, AMREX_SPACEDIM > const &rhi, amrex::GpuArray< int, AMREX_SPACEDIM > const &is_per) noexcept
Definition AMReX_ParticleUtil.H:459
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:127
void PermutationForDeposition(Gpu::DeviceVector< index_type > &perm, index_type nitems, index_type nbins, F const &f)
Definition AMReX_ParticleUtil.H:732
bool SameIteratorsOK(const PC1 &pc1, const PC2 &pc2)
Definition AMReX_ParticleUtil.H:693
int partitionParticles(PTile &ptile, ParFunc const &is_left)
Reorders the ParticleTile into two partitions left [0, num_left-1] and right [num_left,...
Definition AMReX_ParticleUtil.H:510
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:35
int numParticlesOutOfRange(Iterator const &pti, int nGrow)
Returns the number of particles that are more than nGrow cells from the box correspond to the input i...
Definition AMReX_ParticleUtil.H:34
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:230
const int[]
Definition AMReX_BLProfiler.cpp:1664
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int getTileIndex(const IntVect &iv, const Box &box, const bool a_do_tiling, const IntVect &a_tile_size, Box &tbx)
Definition AMReX_ParticleUtil.H:222
Definition AMReX_Array4.H:61
Definition AMReX_ParticleUtil.H:296
const Dim3 * m_hi_p
Definition AMReX_ParticleUtil.H:336
const Dim3 * m_lo_p
Definition AMReX_ParticleUtil.H:335
BinMapper(const int *off_bins_p, const GpuArray< Real, AMREX_SPACEDIM > *dxi_p, const GpuArray< Real, AMREX_SPACEDIM > *plo_p, const Dim3 *lo_p, const Dim3 *hi_p, int *bin_type_array=nullptr)
Definition AMReX_ParticleUtil.H:297
int * m_bin_type_array
Definition AMReX_ParticleUtil.H:337
AMREX_GPU_HOST_DEVICE unsigned int operator()(const T &ptd, int i) const
Definition AMReX_ParticleUtil.H:308
const int * m_off_bins_p
Definition AMReX_ParticleUtil.H:332
const GpuArray< Real, AMREX_SPACEDIM > * m_plo_p
Definition AMReX_ParticleUtil.H:334
const GpuArray< Real, AMREX_SPACEDIM > * m_dxi_p
Definition AMReX_ParticleUtil.H:333
Definition AMReX_ParticleUtil.H:432
AMREX_GPU_HOST_DEVICE IntVect operator()(P const &p, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &plo, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &dxi, const Box &domain) const noexcept
Definition AMReX_ParticleUtil.H:436
Definition AMReX_Dim3.H:12
int x
Definition AMReX_Dim3.H:12
int z
Definition AMReX_Dim3.H:12
int y
Definition AMReX_Dim3.H:12
Definition AMReX_ParticleUtil.H:341
IntVect bin_size
Definition AMReX_ParticleUtil.H:345
GpuArray< Real, AMREX_SPACEDIM > dxi
Definition AMReX_ParticleUtil.H:343
AMREX_GPU_HOST_DEVICE unsigned int operator()(const ParticleType &p) const noexcept
Definition AMReX_ParticleUtil.H:350
Box domain
Definition AMReX_ParticleUtil.H:344
Box box
Definition AMReX_ParticleUtil.H:346
GpuArray< Real, AMREX_SPACEDIM > plo
Definition AMReX_ParticleUtil.H:342
Definition AMReX_Array.H:34
Definition AMReX_GpuMemory.H:56
T * dataPtr()
Definition AMReX_GpuMemory.H:90
Definition AMReX_MakeParticle.H:18