Block-Structured AMR Software Framework
 
Loading...
Searching...
No Matches
AMReX_ParticleUtil.H
Go to the documentation of this file.
1#ifndef AMREX_PARTICLEUTIL_H_
2#define AMREX_PARTICLEUTIL_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_IntVect.H>
6#include <AMReX_Box.H>
7#include <AMReX_Gpu.H>
8#include <AMReX_Print.H>
10#include <AMReX_Math.H>
11#include <AMReX_MFIter.H>
12#include <AMReX_ParGDB.H>
13#include <AMReX_ParticleTile.H>
15#include <AMReX_TypeTraits.H>
16#include <AMReX_Scan.H>
17
18#include <limits>
19
20namespace amrex {
21
32template <class Iterator, std::enable_if_t<IsParticleIterator<Iterator>::value, int> foo = 0>
33int
34numParticlesOutOfRange (Iterator const& pti, int nGrow)
35{
36 return numParticlesOutOfRange(pti,
37 IntVect(AMREX_D_DECL(nGrow, nGrow, nGrow)));
38}
39
50template <class Iterator, std::enable_if_t<IsParticleIterator<Iterator>::value, int> foo = 0>
51int
52numParticlesOutOfRange (Iterator const& pti, IntVect nGrow)
53{
54 const auto& tile = pti.GetParticleTile();
55 const auto np = tile.numParticles();
56 const auto& ptd = tile.getConstParticleTileData();
57 const auto& geom = pti.Geom(pti.GetLevel());
58
59 const auto& domain = geom.Domain();
60 const auto& plo = geom.ProbLoArray();
61 const auto& dxi = geom.InvCellSizeArray();
62
63 Box box = pti.tilebox();
64 box.grow(nGrow);
65
66 ReduceOps<ReduceOpSum> reduce_op;
67 ReduceData<int> reduce_data(reduce_op);
68 using ReduceTuple = typename decltype(reduce_data)::Type;
69
70 reduce_op.eval(np, reduce_data,
71 [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple
72 {
73 auto p = ptd[i];
74 if (!p.id().is_valid()) { return false; }
75 using AssignorType = typename Iterator::CellAssignor;
76 AssignorType assignor;
77 IntVect iv = assignor(p, plo, dxi, domain);
78 return !box.contains(iv);
79 });
80 int hv = amrex::get<0>(reduce_data.value(reduce_op));
81 return hv;
82}
83
96template <class PC, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
97int
98numParticlesOutOfRange (PC const& pc, int nGrow)
99{
100 return numParticlesOutOfRange(pc, 0, pc.finestLevel(), nGrow);
101}
102
115template <class PC, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
116int
117numParticlesOutOfRange (PC const& pc, IntVect nGrow)
118{
119 return numParticlesOutOfRange(pc, 0, pc.finestLevel(), nGrow);
120}
121
136template <class PC, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
137int
138numParticlesOutOfRange (PC const& pc, int lev_min, int lev_max, int nGrow)
139{
140 BL_PROFILE("numParticlesOutOfRange()");
141
142 return numParticlesOutOfRange(pc, lev_min, lev_max,
143 IntVect(AMREX_D_DECL(nGrow, nGrow, nGrow)));
144}
145
160template <class PC, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
161int
162numParticlesOutOfRange (PC const& pc, int lev_min, int lev_max, IntVect nGrow)
163{
164 BL_PROFILE("numParticlesOutOfRange()");
165
166 using ParIter = typename PC::ParConstIterType;
167 int num_wrong = 0;
168 for (int lev = lev_min; lev <= lev_max; ++lev)
169 {
170#ifdef AMREX_USE_OMP
171#pragma omp parallel if (Gpu::notInLaunchRegion() && !system::regtest_reduction) reduction(+:num_wrong)
172#endif
173 for(ParIter pti(pc, lev); pti.isValid(); ++pti)
174 {
175 num_wrong += numParticlesOutOfRange(pti, nGrow);
176 }
177 }
179
180 return num_wrong;
181}
182
184int getTileIndex (const IntVect& iv, const Box& box, const bool a_do_tiling,
185 const IntVect& a_tile_size, Box& tbx)
186{
187 if (a_do_tiling == false) {
188 tbx = box;
189 return 0;
190 } else {
191 //
192 // This function must be consistent with FabArrayBase::buildTileArray function!!!
193 //
194 auto tiling_1d = [](int i, int lo, int hi, int tilesize,
195 int& ntile, int& tileidx, int& tlo, int& thi) {
196 int ncells = hi-lo+1;
197 ntile = amrex::max(ncells/tilesize, 1);
198 int ts_right = ncells/ntile;
199 int ts_left = ts_right+1;
200 int nleft = ncells - ntile*ts_right;
201 int ii = i - lo;
202 int nbndry = nleft*ts_left;
203 if (ii < nbndry) {
204 tileidx = ii / ts_left; // tiles on the left of nbndry have size of ts_left
205 tlo = lo + tileidx * ts_left;
206 thi = tlo + ts_left - 1;
207 } else {
208 tileidx = nleft + (ii-nbndry) / ts_right; // tiles on the right: ts_right
209 tlo = lo + tileidx * ts_right + nleft;
210 thi = tlo + ts_right - 1;
211 }
212 };
213 const IntVect& sml = box.smallEnd();
214 const IntVect& big = box.bigEnd();
215 IntVect ntiles, ivIndex, tilelo, tilehi;
216
217 AMREX_D_TERM(int iv0 = amrex::min(amrex::max(iv[0], sml[0]), big[0]);,
218 int iv1 = amrex::min(amrex::max(iv[1], sml[1]), big[1]);,
219 int iv2 = amrex::min(amrex::max(iv[2], sml[2]), big[2]););
220
221 AMREX_D_TERM(tiling_1d(iv0, sml[0], big[0], a_tile_size[0], ntiles[0], ivIndex[0], tilelo[0], tilehi[0]);,
222 tiling_1d(iv1, sml[1], big[1], a_tile_size[1], ntiles[1], ivIndex[1], tilelo[1], tilehi[1]);,
223 tiling_1d(iv2, sml[2], big[2], a_tile_size[2], ntiles[2], ivIndex[2], tilelo[2], tilehi[2]););
224
225 tbx = Box(tilelo, tilehi);
226
227 return AMREX_D_TERM(ivIndex[0], + ntiles[0]*ivIndex[1], + ntiles[0]*ntiles[1]*ivIndex[2]);
228 }
229}
230
232int numTilesInBox (const Box& box, const bool a_do_tiling, const IntVect& a_tile_size)
233{
234 if (a_do_tiling == false) {
235 return 1;
236 } else {
237 //
238 // This function must be consistent with FabArrayBase::buildTileArray function!!!
239 //
240 auto tiling_1d = [](int lo, int hi, int tilesize, int& ntile) {
241 int ncells = hi-lo+1;
242 ntile = amrex::max(ncells/tilesize, 1);
243 };
244
245 const IntVect& sml = box.smallEnd();
246 const IntVect& big = box.bigEnd();
247 IntVect ntiles;
248
249 AMREX_D_TERM(tiling_1d(sml[0], big[0], a_tile_size[0], ntiles[0]);,
250 tiling_1d(sml[1], big[1], a_tile_size[1], ntiles[1]);,
251 tiling_1d(sml[2], big[2], a_tile_size[2], ntiles[2]););
252
253 return AMREX_D_TERM(ntiles[0], *=ntiles[1], *=ntiles[2]);
254 }
255}
256
258{
259 BinMapper(const int* off_bins_p,
262 const Dim3* lo_p,
263 const Dim3* hi_p,
264 int* bin_type_array=nullptr)
265 : m_off_bins_p(off_bins_p), m_dxi_p(dxi_p), m_plo_p(plo_p) ,
266 m_lo_p(lo_p) , m_hi_p(hi_p) , m_bin_type_array(bin_type_array) {}
267
268 template <typename T>
270 unsigned int operator() (const T& ptd, int i) const
271 {
272 auto p = ptd[i];
273 int type = (m_bin_type_array) ? m_bin_type_array[i] : 0;
274 int offset = m_off_bins_p[type];
275
276 AMREX_D_TERM(AMREX_ASSERT((p.pos(0)-m_plo_p[type][0])*m_dxi_p[type][0] - m_lo_p[type].x >= 0.0);,
277 AMREX_ASSERT((p.pos(1)-m_plo_p[type][1])*m_dxi_p[type][1] - m_lo_p[type].y >= 0.0);,
278 AMREX_ASSERT((p.pos(2)-m_plo_p[type][2])*m_dxi_p[type][2] - m_lo_p[type].z >= 0.0));
279
280 auto iv = IntVect(AMREX_D_DECL(static_cast<int>(amrex::Math::floor((p.pos(0)-m_plo_p[type][0])*m_dxi_p[type][0])) - m_lo_p[type].x,
281 static_cast<int>(amrex::Math::floor((p.pos(1)-m_plo_p[type][1])*m_dxi_p[type][1])) - m_lo_p[type].y,
282 static_cast<int>(amrex::Math::floor((p.pos(2)-m_plo_p[type][2])*m_dxi_p[type][2])) - m_lo_p[type].z));
283 auto iv3 = iv.dim3();
284 int nx = m_hi_p[type].x-m_lo_p[type].x+1;
285 int ny = m_hi_p[type].y-m_lo_p[type].y+1;
286 int nz = m_hi_p[type].z-m_lo_p[type].z+1;
287 int uix = amrex::min(nx-1,amrex::max(0,iv3.x));
288 int uiy = amrex::min(ny-1,amrex::max(0,iv3.y));
289 int uiz = amrex::min(nz-1,amrex::max(0,iv3.z));
290 return static_cast<unsigned int>( (uix * ny + uiy) * nz + uiz + offset );
291 }
292
293private:
294 const int* m_off_bins_p;
297 const Dim3* m_lo_p;
298 const Dim3* m_hi_p;
300};
301
303{
309
310 template <typename ParticleType>
312 unsigned int operator() (const ParticleType& p) const noexcept
313 {
314 Box tbx;
315 auto iv = getParticleCell(p, plo, dxi, domain);
316 auto tid = getTileIndex(iv, box, true, bin_size, tbx);
317 return static_cast<unsigned int>(tid);
318 }
319};
320
334template <typename P>
339{
340 IntVect iv(
341 AMREX_D_DECL(int(amrex::Math::floor((p.pos(0)-plo[0])*dxi[0])),
342 int(amrex::Math::floor((p.pos(1)-plo[1])*dxi[1])),
343 int(amrex::Math::floor((p.pos(2)-plo[2])*dxi[2]))));
344 return iv;
345}
346
361template <typename P>
366 const Box& domain) noexcept
367{
368 IntVect iv = getParticleCell(p, plo, dxi);
369 iv += domain.smallEnd();
370 return iv;
371}
372
373template <typename PTD>
375IntVect getParticleCell (PTD const& ptd, int i,
378 const Box& domain) noexcept
379{
380 if constexpr (PTD::ParticleType::is_soa_particle)
381 {
382 IntVect iv(
383 AMREX_D_DECL(int(amrex::Math::floor((ptd.m_rdata[0][i]-plo[0])*dxi[0])),
384 int(amrex::Math::floor((ptd.m_rdata[1][i]-plo[1])*dxi[1])),
385 int(amrex::Math::floor((ptd.m_rdata[2][i]-plo[2])*dxi[2]))));
386 iv += domain.smallEnd();
387 return iv;
388 } else {
389 return getParticleCell(ptd.m_aos[i], plo, dxi, domain);;
390 }
391}
392
394{
395
396 template <typename P>
401 const Box& domain) const noexcept
402 {
403 return getParticleCell(p, plo, dxi, domain);
404 }
405};
406
407template <typename P>
412 const Box& domain) noexcept
413{
414 if (!p.id().is_valid()) { return -1; }
415 IntVect iv = getParticleCell(p, plo, dxi, domain);
416 return mask(iv);
417}
418
419template <typename P>
426 amrex::GpuArray<int,AMREX_SPACEDIM> const& is_per) noexcept
427{
428 bool shifted = false;
429 for (int idim = 0; idim < AMREX_SPACEDIM; ++idim)
430 {
431 if (! is_per[idim]) { continue; }
432 if (p.pos(idim) > rhi[idim]) {
433 while (p.pos(idim) > rhi[idim]) {
434 p.pos(idim) -= static_cast<ParticleReal>(phi[idim] - plo[idim]);
435 }
436 // clamp to avoid precision issues;
437 if (p.pos(idim) < rlo[idim]) {
438 p.pos(idim) = rlo[idim];
439 }
440 shifted = true;
441 }
442 else if (p.pos(idim) < rlo[idim]) {
443 while (p.pos(idim) < rlo[idim]) {
444 p.pos(idim) += static_cast<ParticleReal>(phi[idim] - plo[idim]);
445 }
446 // clamp to avoid precision issues;
447 if (p.pos(idim) > rhi[idim]) {
448 p.pos(idim) = rhi[idim];
449 }
450 shifted = true;
451 }
452 AMREX_ASSERT( (p.pos(idim) >= rlo[idim] ) && ( p.pos(idim) <= rhi[idim] ));
453 }
454
455 return shifted;
456}
457
470template <typename PTile, typename ParFunc>
471int
472partitionParticles (PTile& ptile, ParFunc const& is_left)
473{
474 const int np = ptile.numParticles();
475 if (np == 0) { return 0; }
476
477 auto ptd = ptile.getParticleTileData();
478
479 const int num_left = Reduce::Sum<int>(np,
480 [=] AMREX_GPU_DEVICE (int i) -> int
481 {
482 return int(is_left(ptd, i));
483 });
484
485 // The ptile will be partitioned into left [0, num_left-1] and right [num_left, np-1].
486 //
487 // Note that currently the number of particles in [0, num_left-1] that should belong to the
488 // right partition is equal to the number of particles in [num_left, np-1] that should belong
489 // in the left partition. We will define num_swaps to be this number. This is the minimum
490 // number of swaps that need to be performed to partition the ptile in place for any algorithm.
491 //
492 // From this it is easy to see that
493 // max_num_swaps = min(size([0, num_left-1]), size([num_left, np-1]))
494 // is an upper bound for num_swaps.
495
496 const int max_num_swaps = std::min(num_left, np - num_left);
497 if (max_num_swaps == 0) { return num_left; }
498
499 Gpu::DeviceVector<int> index_left(max_num_swaps);
500 Gpu::DeviceVector<int> index_right(max_num_swaps);
501 int * const p_index_left = index_left.dataPtr();
502 int * const p_index_right = index_right.dataPtr();
503
504 // The num_swaps particles that are in [0, num_left-1] but should be moved to the right
505 // partition are at the same time the first num_swaps particles for which is_left is false
506 // in all the ptile.
507 // Similarly, the num_swaps particles in [num_left, np-1] that should be moved to the left
508 // partition are the last num_swaps particles of the ptile for which is_left is true.
509 //
510 // The PrefixSum is used to find exactly these particles and store their indexes in
511 // index_left and index_right. Since num_swaps is not known, the first max_num_swaps
512 // particles are stored instead. Here, dst = num_left-1-(i-s) is used to effectively reverse
513 // the PrefixSum to store the last particles for which is_left is true.
514 //
515 // This way, all indexes in index_right are in ascending order, and all indexes in
516 // index_left are in descending order.
517
518 Scan::PrefixSum<int>(np,
519 [=] AMREX_GPU_DEVICE (int i) -> int
520 {
521 return int(!is_left(ptd, i));
522 },
523 [=] AMREX_GPU_DEVICE (int i, int const& s)
524 {
525 if (!is_left(ptd, i)) {
526 int dst = s;
527 if (dst < max_num_swaps) {
528 p_index_right[dst] = i;
529 }
530 } else {
531 int dst = num_left-1-(i-s); // avoid integer overflow
532 if (dst < max_num_swaps) {
533 p_index_left[dst] = i;
534 }
535 }
536 },
538
539 // Finally, the particles are swapped. Since max_num_swaps is only an upper bound for num_swaps,
540 // some swaps should not be performed and need to be skipped. This is the case if the index
541 // in index_left[i] is already in the left partition or the index in index_right[i] is already
542 // in the right partition. These two cases coincide for the same i because index_right is in
543 // ascending order and index_left in descending order. This means for both index_left and
544 // index_right the first num_swaps particles need to be swapped, and the particles after that
545 // should be skipped.
546 //
547 // The check right_i < left_i makes sure that the particle going to the right partition is
548 // actually coming from the left partition, which has a lower index than the other particle and
549 // visa-versa.
550 //
551 // Since exactly num_swaps swap operations are performed in the end, which is the smallest
552 // number possible, this algorithm is optimal in the number of swap operations.
553 // This results in good performance in practice if the size of a particle is large enough that
554 // it compensates for the extra kernel launches and evaluations of is_left which this
555 // algorithm needs.
556
557 ParallelFor(max_num_swaps,
558 [=] AMREX_GPU_DEVICE (int i)
559 {
560 int left_i = p_index_left[i];
561 int right_i = p_index_right[i];
562 if (right_i < left_i) {
563 swapParticle(ptd, ptd, left_i, right_i);
564 }
565 });
566
567 index_left.free_async();
568 index_right.free_async();
569
570 return num_left;
571}
572
587template <typename PTile, typename ParFunc>
588void
589partitionParticles (PTile& ptile, int num_left, ParFunc const& is_left)
590{
591 const int np = ptile.numParticles();
592 if (np == 0) { return; }
593
594 auto ptd = ptile.getParticleTileData();
595
596 const int max_num_swaps = std::min(num_left, np - num_left);
597 if (max_num_swaps == 0) { return; }
598
599 Gpu::DeviceVector<int> index_left(max_num_swaps);
600 Gpu::DeviceVector<int> index_right(max_num_swaps);
601 int * const p_index_left = index_left.dataPtr();
602 int * const p_index_right = index_right.dataPtr();
603
604 Scan::PrefixSum<int>(np,
605 [=] AMREX_GPU_DEVICE (int i) -> int
606 {
607 return int(!is_left(ptd, i));
608 },
609 [=] AMREX_GPU_DEVICE (int i, int const& s)
610 {
611 if (!is_left(ptd, i)) {
612 int dst = s;
613 if (dst < max_num_swaps) {
614 p_index_right[dst] = i;
615 }
616 } else {
617 int dst = num_left-1-(i-s); // avoid integer overflow
618 if (dst < max_num_swaps) {
619 p_index_left[dst] = i;
620 }
621 }
622 },
624
625 ParallelFor(max_num_swaps,
626 [=] AMREX_GPU_DEVICE (int i)
627 {
628 int left_i = p_index_left[i];
629 int right_i = p_index_right[i];
630 if (right_i < left_i) {
631 swapParticle(ptd, ptd, left_i, right_i);
632 }
633 });
634
635 index_left.free_async();
636 index_right.free_async();
637}
638
639template <typename PTile>
640void
642{
643 const int new_size = partitionParticles(ptile,
644 [=] AMREX_GPU_DEVICE (auto& ptd, int i) {
645 return ptd.id(i).is_valid();
646 });
647 ptile.resize(new_size);
648}
649
650#if defined(AMREX_USE_GPU)
651
652template <typename PTile, typename PLocator, typename CellAssignor>
653int
654partitionParticlesByDest (PTile& ptile, const PLocator& ploc, CellAssignor const& assignor,
655 const ParticleBufferMap& pmap,
660 const GpuArray<int ,AMREX_SPACEDIM>& is_per,
661 int lev, int gid, int /*tid*/,
662 int lev_min, int lev_max, int nGrow, bool remove_negative)
663{
664 auto getPID = pmap.getPIDFunctor();
665 int pid = ParallelContext::MyProcSub();
666
667 Gpu::DeviceVector<uint8_t> particle_stays(ptile.numParticles());
668 uint8_t * const p_particle_stays = particle_stays.dataPtr();
669 auto ptd = ptile.getParticleTileData();
670
671 // the function for determining if a particle stays on this grid is very slow,
672 // so we cache it in particle_stays to avoid evaluating it multiple times.
673 ParallelFor(ptile.numParticles(),
674 [=] AMREX_GPU_DEVICE (int i)
675 {
676 int assigned_grid;
677 int assigned_lev;
678
679 if (!ptd.id(i).is_valid())
680 {
681 assigned_grid = -1;
682 assigned_lev = -1;
683 }
684 else
685 {
686 auto p_prime = ptd.getSuperParticle(i);
687 enforcePeriodic(p_prime, plo, phi, rlo, rhi, is_per);
688 auto tup_prime = ploc(p_prime, lev_min, lev_max, nGrow, assignor);
689 assigned_grid = amrex::get<0>(tup_prime);
690 assigned_lev = amrex::get<1>(tup_prime);
691 if (assigned_grid >= 0)
692 {
693 AMREX_D_TERM(ptd.pos(0, i) = p_prime.pos(0);,
694 ptd.pos(1, i) = p_prime.pos(1);,
695 ptd.pos(2, i) = p_prime.pos(2););
696 }
697 else if (lev_min > 0)
698 {
699 AMREX_D_TERM(p_prime.pos(0) = ptd.pos(0, i);,
700 p_prime.pos(1) = ptd.pos(1, i);,
701 p_prime.pos(2) = ptd.pos(2, i););
702 auto tup = ploc(p_prime, lev_min, lev_max, nGrow, assignor);
703 assigned_grid = amrex::get<0>(tup);
704 assigned_lev = amrex::get<1>(tup);
705 }
706 }
707
708 p_particle_stays[i] = uint8_t(
709 ((remove_negative == false) && (!ptd.id(i).is_valid())) ||
710 ((assigned_grid == gid) && (assigned_lev == lev) && (getPID(lev, gid) == pid)));
711 });
712
713 return partitionParticles(ptile,
714 [=] AMREX_GPU_DEVICE (auto& /* ptd */, int i) -> bool {
715 return p_particle_stays[i];
716 });
717}
718
719#endif
720
721template <class PC1, class PC2>
722bool SameIteratorsOK (const PC1& pc1, const PC2& pc2) {
723 if (pc1.numLevels() != pc2.numLevels()) {return false;}
724 if (pc1.do_tiling != pc2.do_tiling) {return false;}
725 if (pc1.tile_size != pc2.tile_size) {return false;}
726 for (int lev = 0; lev < pc1.numLevels(); ++lev) {
727 if (pc1.ParticleBoxArray(lev) != pc2.ParticleBoxArray(lev)) {return false;}
728 if (pc1.ParticleDistributionMap(lev) != pc2.ParticleDistributionMap(lev)) {return false;}
729 }
730 return true;
731}
732
733template <class PC>
735 using Iter = typename PC::ParIterType;
736 for (int lev = 0; lev < pc.numLevels(); ++lev) {
737 for (Iter pti(pc, lev); pti.isValid(); ++pti) {
738 pc.DefineAndReturnParticleTile(lev, pti);
739 }
740 }
741}
742
743IntVect computeRefFac (const ParGDBBase* a_gdb, int src_lev, int lev);
744
745Vector<int> computeNeighborProcs (const ParGDBBase* a_gdb, int ngrow);
746
747namespace particle_detail
748{
749template <typename C>
751{
752 for (auto c_it = c.begin(); c_it != c.end(); /* no ++ */)
753 {
754 if (c_it->second.empty()) { c.erase(c_it++); }
755 else { ++c_it; }
756 }
757}
758}
759
760template <class index_type, typename F>
762 index_type nbins, F const& f)
763{
764 BL_PROFILE("PermutationForDeposition()");
765
766#if defined(AMREX_USE_HIP)
767 // MI250X has a small L2 cache and is more tolerant of atomic add contention,
768 // so we use a small block size of 64 and the compressed layout.
769 static constexpr index_type gpu_block_size = 64;
770 static constexpr bool compressed_layout = true;
771#else
772 // A100 has a larger L2 cache and is very sensitive to atomic add contention,
773 // so we use a large bock size of 1024 and not the compressed layout.
774 static constexpr index_type gpu_block_size = 1024;
775 static constexpr bool compressed_layout = false;
776#endif
777
778 static constexpr index_type gpu_block_size_m1 = gpu_block_size - 1;
779 static constexpr index_type llist_guard = std::numeric_limits<index_type>::max();
780
781 // round up to gpu_block_size
782 nbins = (nbins + gpu_block_size_m1) / gpu_block_size * gpu_block_size;
783
784 Gpu::DeviceVector<index_type> llist_start(nbins, llist_guard);
785 Gpu::DeviceVector<index_type> llist_next(nitems);
786 perm.resize(nitems);
787 Gpu::DeviceScalar<index_type> global_idx(0);
788
789 index_type* pllist_start = llist_start.dataPtr();
790 index_type* pllist_next = llist_next.dataPtr();
791 index_type* pperm = perm.dataPtr();
792 index_type* pglobal_idx = global_idx.dataPtr();
793
794 amrex::ParallelFor(nitems, [=] AMREX_GPU_DEVICE (index_type i) noexcept
795 {
796 i = nitems - i - 1;
797 pllist_next[i] = Gpu::Atomic::Exch(pllist_start + f(i), i);
798 });
799
800#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
801 amrex::launch<gpu_block_size>(nbins / gpu_block_size, Gpu::gpuStream(),
802 [pllist_start,pllist_next,pperm,pglobal_idx] AMREX_GPU_DEVICE () {
803 __shared__ index_type sdata[gpu_block_size];
804 __shared__ index_type global_idx_start;
805 __shared__ index_type idx_start;
806
807 index_type current_idx = 0;
808
809 if constexpr (compressed_layout) {
810 // Compressed layout: subsequent sweeps of up to gpu_block_size contiguous particles
811 // are put right next to each other, while without the compressed layout,
812 // there can be other particle sweeps from different locations between them.
813 current_idx = pllist_start[threadIdx.x + gpu_block_size * blockIdx.x];
814
815 index_type num_particles_thread = 0;
816 while (current_idx != llist_guard) {
817 ++num_particles_thread;
818 current_idx = pllist_next[current_idx];
819 }
820
821 index_type num_particles_block =
822 Gpu::blockReduceSum<gpu_block_size>(num_particles_thread);
823
824 if (threadIdx.x == 0) {
825 global_idx_start = Gpu::Atomic::Add(pglobal_idx, num_particles_block);
826 }
827 }
828
829 current_idx = pllist_start[threadIdx.x + gpu_block_size * blockIdx.x];
830
831 while (true) {
832 sdata[threadIdx.x] = index_type(current_idx != llist_guard);
833 index_type x = 0;
834
835 // simple block wide prefix sum
836 for (index_type i = 1; i<gpu_block_size; i*=2) {
837 __syncthreads();
838 if (threadIdx.x >= i) {
839 x = sdata[threadIdx.x - i];
840 }
841 __syncthreads();
842 if (threadIdx.x >= i) {
843 sdata[threadIdx.x] += x;
844 }
845 }
846 __syncthreads();
847 if (sdata[gpu_block_size_m1] == 0) {
848 break;
849 }
850 if (threadIdx.x == gpu_block_size_m1) {
851 if constexpr (compressed_layout) {
852 idx_start = global_idx_start;
853 global_idx_start += sdata[gpu_block_size_m1];
854 } else {
855 idx_start = Gpu::Atomic::Add(pglobal_idx, sdata[gpu_block_size_m1]);
856 }
857 }
858 __syncthreads();
859 sdata[threadIdx.x] += idx_start;
860 if (current_idx != llist_guard) {
861 pperm[sdata[threadIdx.x] - 1] = current_idx;
862 current_idx = pllist_next[current_idx];
863 }
864 }
865 });
866#else
867 amrex::ignore_unused(pperm, pglobal_idx, compressed_layout);
868 Abort("PermutationForDeposition only implemented for CUDA and HIP");
869#endif
870
871 Gpu::Device::streamSynchronize();
872}
873
874template <class index_type, class PTile>
876 const PTile& ptile, Box bx, Geometry geom, const IntVect idx_type)
877{
878 AMREX_ALWAYS_ASSERT(idx_type.allGE(0) && idx_type.allLE(2));
879
880 const IntVect refine_vect = max(idx_type, IntVect(1)).min(IntVect(2));
881 const IntVect type_vect = idx_type - idx_type / 2 * 2;
882
883 geom.refine(refine_vect);
884
885 Box domain = geom.Domain();
886
887 bx.convert(type_vect);
888 domain.convert(type_vect);
889
890 const RealVect dxi(geom.InvCellSize());
891 const RealVect pos_offset = Real(0.5) * (RealVect(geom.ProbLo()) + RealVect(geom.ProbHi())
892 - RealVect(geom.CellSize()) * RealVect(domain.smallEnd() + domain.bigEnd()));
893
894 const int ref_product = AMREX_D_TERM(refine_vect[0], * refine_vect[1], * refine_vect[2]);
895 const IntVect ref_offset(AMREX_D_DECL(1, refine_vect[0], refine_vect[0] * refine_vect[1]));
896
897 auto ptd = ptile.getConstParticleTileData();
898 PermutationForDeposition<index_type>(perm, nitems, bx.numPts() * ref_product,
899 [=] AMREX_GPU_DEVICE (index_type idx) noexcept
900 {
901 const auto p = ptd[idx];
902
903 IntVect iv = ((p.pos() - pos_offset) * dxi).round();
904
905 IntVect iv_coarse = iv / refine_vect;
906 IntVect iv_remainder = iv - iv_coarse * refine_vect;
907
908 iv_coarse = iv_coarse.max(bx.smallEnd());
909 iv_coarse = iv_coarse.min(bx.bigEnd());
910 return bx.index(iv_coarse) + bx.numPts() * (iv_remainder * ref_offset).sum();
911 });
912}
913
914template <typename P>
915std::string getDefaultCompNameReal (const int i) {
916 int first_r_name = 0;
917 if constexpr (P::is_soa_particle) {
918 if (i < AMREX_SPACEDIM) {
919 constexpr int x_in_ascii = 120;
920 std::string const name{char(x_in_ascii+i)};
921 return name;
922 }
923 first_r_name = AMREX_SPACEDIM;
924 }
925 std::string const name{("real_comp" + std::to_string(i-first_r_name))};
926 return name;
927}
928
929template <typename P>
930std::string getDefaultCompNameInt (const int i) {
931 std::string const name{("int_comp" + std::to_string(i))};
932 return name;
933}
934
935#ifdef AMREX_USE_HDF5_ASYNC
936void async_vol_es_wait_particle();
937void async_vol_es_wait_close_particle();
938#endif
939}
940
941#endif // include guard
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_ALWAYS_ASSERT(EX)
Definition AMReX_BLassert.H:50
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1089
Array4< int const > mask
Definition AMReX_InterpFaceRegister.cpp:93
#define AMREX_D_TERM(a, b, c)
Definition AMReX_SPACE.H:172
#define AMREX_D_DECL(a, b, c)
Definition AMReX_SPACE.H:171
__host__ __device__ BoxND & grow(int i) noexcept
Definition AMReX_Box.H:630
__host__ __device__ const IntVectND< dim > & bigEnd() const &noexcept
Get the bigend.
Definition AMReX_Box.H:119
__host__ __device__ Long numPts() const noexcept
Returns the number of points contained in the BoxND.
Definition AMReX_Box.H:349
__host__ __device__ BoxND & convert(IndexTypeND< dim > typ) noexcept
Convert the BoxND from the current type into the argument type. This may change the BoxND coordinates...
Definition AMReX_Box.H:930
__host__ __device__ bool contains(const IntVectND< dim > &p) const noexcept
Returns true if argument is contained within BoxND.
Definition AMReX_Box.H:207
__host__ __device__ Long index(const IntVectND< dim > &v) const noexcept
Returns offset of point from smallend; i.e. index(smallend) -> 0, bigend would return numPts()-1....
Definition AMReX_Box.H:1011
__host__ __device__ const IntVectND< dim > & smallEnd() const &noexcept
Get the smallend of the BoxND.
Definition AMReX_Box.H:108
const Real * CellSize() const noexcept
Returns the cellsize for each coordinate direction.
Definition AMReX_CoordSys.H:71
const Real * InvCellSize() const noexcept
Returns the inverse cellsize for each coordinate direction.
Definition AMReX_CoordSys.H:82
Rectangular problem domain geometry.
Definition AMReX_Geometry.H:73
void refine(IntVect const &rr)
Definition AMReX_Geometry.H:411
const Real * ProbHi() const noexcept
Returns the hi end of the problem domain in each dimension.
Definition AMReX_Geometry.H:180
const Box & Domain() const noexcept
Returns our rectangular domain.
Definition AMReX_Geometry.H:210
const Real * ProbLo() const noexcept
Returns the lo end of the problem domain in each dimension.
Definition AMReX_Geometry.H:178
__host__ __device__ bool allGE(const IntVectND< dim > &rhs) const noexcept
Returns true if this is greater than or equal to argument for all components. NOTE: This is NOT a str...
Definition AMReX_IntVect.H:448
__host__ __device__ int min() const noexcept
minimum (no absolute values) value
Definition AMReX_IntVect.H:230
__host__ __device__ int max() const noexcept
maximum (no absolute values) value
Definition AMReX_IntVect.H:219
__host__ __device__ bool allLE(const IntVectND< dim > &rhs) const noexcept
Returns true if this is less than or equal to argument for all components. NOTE: This is NOT a strict...
Definition AMReX_IntVect.H:398
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:141
Definition AMReX_PODVector.H:297
void resize(size_type a_new_size, GrowthStrategy strategy=GrowthStrategy::Poisson)
Definition AMReX_PODVector.H:717
void free_async() noexcept
Definition AMReX_PODVector.H:842
T * dataPtr() noexcept
Definition AMReX_PODVector.H:659
Definition AMReX_ParIter.H:115
Definition AMReX_ParticleBufferMap.H:53
GetPID getPIDFunctor() const noexcept
Definition AMReX_ParticleBufferMap.H:156
Definition AMReX_Reduce.H:249
Type value()
Definition AMReX_Reduce.H:281
Definition AMReX_Reduce.H:364
std::enable_if_t< IsFabArray< MF >::value > eval(MF const &mf, IntVect const &nghost, D &reduce_data, F &&f)
Definition AMReX_Reduce.H:433
void Sum(T &v, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:204
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
static constexpr struct amrex::Scan::Type::Exclusive exclusive
static constexpr RetSum noRetSum
Definition AMReX_Scan.H:33
void clearEmptyEntries(C &c)
Definition AMReX_ParticleUtil.H:750
Definition AMReX_Amr.cpp:49
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:138
__host__ __device__ void swapParticle(const ParticleTileData< T_ParticleType, NAR, NAI > &dst, const ParticleTileData< T_ParticleType, NAR, NAI > &src, int src_i, int dst_i) noexcept
A general single particle swapping routine that can run on the GPU.
Definition AMReX_ParticleTransformation.H:119
__host__ __device__ int getTileIndex(const IntVect &iv, const Box &box, const bool a_do_tiling, const IntVect &a_tile_size, Box &tbx)
Definition AMReX_ParticleUtil.H:184
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:191
void EnsureThreadSafeTiles(PC &pc)
Definition AMReX_ParticleUtil.H:734
std::string getDefaultCompNameInt(const int i)
Definition AMReX_ParticleUtil.H:930
__host__ __device__ bool enforcePeriodic(P &p, amrex::GpuArray< amrex::Real, 3 > const &plo, amrex::GpuArray< amrex::Real, 3 > const &phi, amrex::GpuArray< amrex::ParticleReal, 3 > const &rlo, amrex::GpuArray< amrex::ParticleReal, 3 > const &rhi, amrex::GpuArray< int, 3 > const &is_per) noexcept
Definition AMReX_ParticleUtil.H:421
void removeInvalidParticles(PTile &ptile)
Definition AMReX_ParticleUtil.H:641
__host__ __device__ int numTilesInBox(const Box &box, const bool a_do_tiling, const IntVect &a_tile_size)
Definition AMReX_ParticleUtil.H:232
BoxND< 3 > Box
Definition AMReX_BaseFwd.H:27
std::string getDefaultCompNameReal(const int i)
Definition AMReX_ParticleUtil.H:915
__host__ __device__ constexpr const T & min(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:21
IntVectND< 3 > IntVect
Definition AMReX_BaseFwd.H:30
__host__ __device__ constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:35
__host__ __device__ int getParticleGrid(P const &p, amrex::Array4< int > const &mask, amrex::GpuArray< amrex::Real, 3 > const &plo, amrex::GpuArray< amrex::Real, 3 > const &dxi, const Box &domain) noexcept
Definition AMReX_ParticleUtil.H:409
void PermutationForDeposition(Gpu::DeviceVector< index_type > &perm, index_type nitems, index_type nbins, F const &f)
Definition AMReX_ParticleUtil.H:761
bool SameIteratorsOK(const PC1 &pc1, const PC2 &pc2)
Definition AMReX_ParticleUtil.H:722
int partitionParticles(PTile &ptile, ParFunc const &is_left)
Reorders the ParticleTile into two partitions left [0, num_left-1] and right [num_left,...
Definition AMReX_ParticleUtil.H:472
int numParticlesOutOfRange(Iterator const &pti, int nGrow)
Returns the number of particles that are more than nGrow cells from the box correspond to the input i...
Definition AMReX_ParticleUtil.H:34
int partitionParticlesByDest(PTile &ptile, const PLocator &ploc, CellAssignor const &assignor, const ParticleBufferMap &pmap, const GpuArray< Real, 3 > &plo, const GpuArray< Real, 3 > &phi, const GpuArray< ParticleReal, 3 > &rlo, const GpuArray< ParticleReal, 3 > &rhi, const GpuArray< int, 3 > &is_per, int lev, int gid, int, int lev_min, int lev_max, int nGrow, bool remove_negative)
Definition AMReX_ParticleUtil.H:654
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:230
__host__ __device__ IntVect getParticleCell(P const &p, amrex::GpuArray< amrex::Real, 3 > const &plo, amrex::GpuArray< amrex::Real, 3 > const &dxi) noexcept
Returns the cell index for a given particle using the provided lower bounds and cell sizes.
Definition AMReX_ParticleUtil.H:336
Definition AMReX_Array4.H:61
Definition AMReX_ParticleUtil.H:258
const Dim3 * m_hi_p
Definition AMReX_ParticleUtil.H:298
BinMapper(const int *off_bins_p, const GpuArray< Real, 3 > *dxi_p, const GpuArray< Real, 3 > *plo_p, const Dim3 *lo_p, const Dim3 *hi_p, int *bin_type_array=nullptr)
Definition AMReX_ParticleUtil.H:259
const Dim3 * m_lo_p
Definition AMReX_ParticleUtil.H:297
const GpuArray< Real, 3 > * m_plo_p
Definition AMReX_ParticleUtil.H:296
int * m_bin_type_array
Definition AMReX_ParticleUtil.H:299
const int * m_off_bins_p
Definition AMReX_ParticleUtil.H:294
__host__ __device__ unsigned int operator()(const T &ptd, int i) const
Definition AMReX_ParticleUtil.H:270
const GpuArray< Real, 3 > * m_dxi_p
Definition AMReX_ParticleUtil.H:295
Definition AMReX_ParticleUtil.H:394
__host__ __device__ IntVect operator()(P const &p, amrex::GpuArray< amrex::Real, 3 > const &plo, amrex::GpuArray< amrex::Real, 3 > const &dxi, const Box &domain) const noexcept
Definition AMReX_ParticleUtil.H:398
Definition AMReX_Dim3.H:12
int x
Definition AMReX_Dim3.H:12
int z
Definition AMReX_Dim3.H:12
int y
Definition AMReX_Dim3.H:12
Definition AMReX_ParticleUtil.H:303
GpuArray< Real, 3 > plo
Definition AMReX_ParticleUtil.H:304
IntVect bin_size
Definition AMReX_ParticleUtil.H:307
__host__ __device__ unsigned int operator()(const ParticleType &p) const noexcept
Definition AMReX_ParticleUtil.H:312
GpuArray< Real, 3 > dxi
Definition AMReX_ParticleUtil.H:305
Box domain
Definition AMReX_ParticleUtil.H:306
Box box
Definition AMReX_ParticleUtil.H:308
Definition AMReX_Array.H:34
Definition AMReX_GpuMemory.H:56
T * dataPtr()
Definition AMReX_GpuMemory.H:90