1#ifndef AMREX_PARTICLECOMMUNICATION_H_
2#define AMREX_PARTICLECOMMUNICATION_H_
3#include <AMReX_Config.H>
23class ParticleContainerBase;
27 template <
class PTile>
28 void resizeTiles (std::vector<PTile*>& tiles,
const std::vector<int>& sizes, std::vector<int>& offsets)
const
30 for(
int i = 0; i < static_cast<int>(sizes.size()); ++i)
32 int offset = tiles[i]->numTotalParticles();
33 int nn = tiles[i]->getNumNeighbors();
34 tiles[i]->setNumNeighbors(nn + sizes[i]);
42 template <
class PTile>
43 void resizeTiles (std::vector<PTile*>& tiles,
const std::vector<int>& sizes, std::vector<int>& offsets)
const
45 int N =
static_cast<int>(sizes.size());
47 std::map<PTile*, int> tile_sizes;
48 for(
int i = 0; i < N; ++i) {
49 tile_sizes[tiles[i]] = tiles[i]->numParticles();
52 for(
int i = 0; i < N; ++i)
54 offsets.push_back(tile_sizes[tiles[i]]);
55 tile_sizes[tiles[i]] += sizes[i];
58 for (
auto& kv : tile_sizes) {
59 kv.first->resize(kv.second);
78 void resize (
int gid,
int tid,
int lev,
int size);
82 if (
m_boxes.size() <= lev) {
return 0; }
83 auto mit =
m_boxes[lev].find(index);
84 return mit ==
m_boxes[lev].end() ? 0 :
int(mit->second.size());
98#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
101 const int* boxes =
nullptr;
102 const int* levs =
nullptr;
103 const int* tiles =
nullptr;
104 int* dst_indices =
nullptr;
113#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
114 , omp_copy_offsets(1, 0)
121#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
128 template <
class PC,
class F>
133 auto&& batch_fn = std::forward<F>(f);
135 for (
int lev = 0; lev < num_levels; ++lev)
137 for (
const auto& kv : pc.GetParticles(lev))
139 auto index = kv.first;
140 int num_copies = op.
numCopies(index, lev);
141 if (num_copies == 0) {
continue; }
144 dst_indices.resize(num_copies);
146 batch_fn(lev, index, num_copies, dst_indices);
151 template <
class PC,
class GetBucket>
155 BL_PROFILE(
"ParticleCopyPlan::buildCopiesStableOrdered");
181 for (
int i = 0; i < num_copies; ++i) {
182 int dst_box = h_boxes[i];
184 int dst_tile = h_tiles[i];
185 int dst_lev = h_levs[i];
186 int dst_index =
static_cast<int>(workspace.
h_box_counts[getBucket(dst_lev, dst_box, dst_tile)]++);
187 h_dst_indices[i] = dst_index;
193 h_dst_indices.
begin(), h_dst_indices.
end(),
194 dst_indices.
begin());
199#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
200 template <
class PC,
class GetBucket>
202 TwoPassHostAlgorithm, BuildWorkspace& workspace,
GetBucket const& getBucket)
204 BL_PROFILE(
"ParticleCopyPlan::buildCopiesTwoPassHost");
211 const auto* p_boxes = op.
m_boxes[lev].at(index).dataPtr();
212 const auto* p_levs = op.
m_levels[lev].at(index).dataPtr();
213 const auto* p_tiles = op.
m_tiles[lev].at(index).dataPtr();
214 auto* p_dst_indices = dst_indices.
dataPtr();
216 workspace.omp_copy_work.push_back({p_boxes, p_levs, p_tiles, p_dst_indices, num_copies});
217 workspace.omp_copy_offsets.push_back(workspace.omp_copy_offsets.back() + num_copies);
220 if (workspace.omp_copy_work.empty()) {
return; }
222 std::fill(workspace.omp_thread_box_counts.begin(), workspace.omp_thread_box_counts.end(), 0U);
223 auto* p_omp_thread_box_counts = workspace.omp_thread_box_counts.data();
224 const auto* p_omp_copy_work = workspace.omp_copy_work.data();
225 const auto* p_omp_copy_offsets = workspace.omp_copy_offsets.data();
226 const Long total_num_copies = workspace.omp_copy_offsets.back();
232 Long ibegin = thread_num*total_num_copies/num_threads;
233 Long iend = (thread_num+1)*total_num_copies/num_threads;
234 auto* p_thread_box_counts = p_omp_thread_box_counts + thread_num*workspace.num_buckets;
238 int iwork =
static_cast<int>(std::upper_bound(workspace.omp_copy_offsets.begin(),
239 workspace.omp_copy_offsets.end(),
241 - workspace.omp_copy_offsets.begin()) - 1;
242 while (iwork <
static_cast<int>(workspace.omp_copy_work.size()) &&
243 p_omp_copy_offsets[iwork] < iend)
245 auto const& work = p_omp_copy_work[iwork];
246 Long global_begin = std::max(ibegin, p_omp_copy_offsets[iwork]);
247 Long global_end = std::min(iend, p_omp_copy_offsets[iwork+1]);
248 int local_begin =
static_cast<int>(global_begin - p_omp_copy_offsets[iwork]);
249 int local_end =
static_cast<int>(global_end - p_omp_copy_offsets[iwork]);
250 for (
int i = local_begin; i < local_end; ++i)
252 int dst_box = work.boxes[i];
255 int dst_tile = work.tiles[i];
256 int dst_lev = work.levs[i];
257 ++p_thread_box_counts[getBucket(dst_lev, dst_box, dst_tile)];
266 for (
int ibucket = 0; ibucket < workspace.num_buckets; ++ibucket)
268 unsigned int offset = workspace.h_box_counts[ibucket];
269 for (
int tid = 0; tid < num_threads; ++tid)
271 auto& count = p_omp_thread_box_counts[tid*workspace.num_buckets + ibucket];
272 unsigned int total = count;
276 workspace.h_box_counts[ibucket] =
offset;
281 int iwork =
static_cast<int>(std::upper_bound(workspace.omp_copy_offsets.begin(),
282 workspace.omp_copy_offsets.end(),
284 - workspace.omp_copy_offsets.begin()) - 1;
285 while (iwork <
static_cast<int>(workspace.omp_copy_work.size()) &&
286 p_omp_copy_offsets[iwork] < iend)
288 auto const& work = p_omp_copy_work[iwork];
289 Long global_begin = std::max(ibegin, p_omp_copy_offsets[iwork]);
290 Long global_end = std::min(iend, p_omp_copy_offsets[iwork+1]);
291 int local_begin =
static_cast<int>(global_begin - p_omp_copy_offsets[iwork]);
292 int local_end =
static_cast<int>(global_end - p_omp_copy_offsets[iwork]);
293 for (
int i = local_begin; i < local_end; ++i)
295 int dst_box = work.boxes[i];
298 int dst_tile = work.tiles[i];
299 int dst_lev = work.levs[i];
300 int bucket = getBucket(dst_lev, dst_box, dst_tile);
301 work.dst_indices[i] =
static_cast<int>(p_thread_box_counts[bucket]++);
311 template <
class PC,
class GetBucket>
315 BL_PROFILE(
"ParticleCopyPlan::buildCopiesAtomicScatter");
320 [&op, &getBucket, p_dst_box_counts] (
int lev,
TileKey const& index,
323 const auto* p_boxes = op.
m_boxes[lev].at(index).dataPtr();
324 const auto* p_levs = op.
m_levels[lev].at(index).dataPtr();
325 const auto* p_tiles = op.
m_tiles[lev].at(index).dataPtr();
326 auto* p_dst_indices = dst_indices.
dataPtr();
330 int dst_box = p_boxes[i];
333 int dst_tile = p_tiles[i];
334 int dst_lev = p_levs[i];
336 &p_dst_box_counts[getBucket(dst_lev, dst_box, dst_tile)], 1U));
337 p_dst_indices[i] = dst_index;
345 if (use_host_box_counters) {
406 template <class PC, std::enable_if_t<IsParticleContainer<PC>::value,
int> foo = 0>
416 pp.
query(
"do_one_sided_comms", m_do_one_sided_comms);
417 const int num_buckets = pc.BufferMap().numBuckets();
434#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
435 constexpr bool use_host_bucket_counters =
true;
437 constexpr bool use_host_bucket_counters =
false;
440 bool use_host_box_counters = pc.stableRedistribute() || use_host_bucket_counters;
441 if (use_host_box_counters) {
445 if (pc.stableRedistribute())
451#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
480 int NStructReal = PC::ParticleContainerType::NStructReal;
481 int NStructInt = PC::ParticleContainerType::NStructInt;
483 int num_real_comm_comp = 0;
484 int comm_comps_start = 0;
485 if constexpr (!PC::ParticleType::is_soa_particle) {
486 comm_comps_start += AMREX_SPACEDIM + NStructReal;
488 for (
int i = comm_comps_start; i < real_comp_mask.
size(); ++i) {
489 if (real_comp_mask[i]) {++num_real_comm_comp;}
492 int num_int_comm_comp = 0;
493 for (
int i = 2 + NStructInt; i < int_comp_mask.
size(); ++i) {
494 if (int_comp_mask[i]) {++num_int_comm_comp;}
497 if constexpr (PC::ParticleType::is_soa_particle) {
503 + num_int_comm_comp *
sizeof(
int);
548 bool m_local =
false;
549 int m_do_one_sided_comms = 0;
568 Long operator() (
int dst_box,
int dst_tile,
int dst_lev, std::size_t psize,
int i)
const
570 int dst_pid =
m_get_pid(dst_lev, dst_box, dst_tile);
577template <
class PC,
class Buffer,
578 std::enable_if_t<IsParticleContainer<PC>::value &&
579 std::is_base_of_v<PolymorphicArenaAllocator<typename Buffer::value_type>,
580 Buffer>,
int> foo = 0>
589 int num_buckets = pc.BufferMap().numBuckets();
591 std::size_t total_buffer_size = 0;
597 total_buffer_size = np*psize;
604 if (! snd_buffer.arena()->hasFreeDeviceMemory(total_buffer_size)) {
608 snd_buffer.resize(total_buffer_size);
613 const auto plo = pc.Geom(0).ProbLoArray();
614 const auto phi = pc.Geom(0).ProbHiArray();
615 const auto is_per = pc.Geom(0).isPeriodicArray();
616#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
619 typename PC::ParticleTileType
const* src_tile =
nullptr;
620 const int* boxes =
nullptr;
621 const int* levels =
nullptr;
622 const int* tiles =
nullptr;
623 const int* src_indices =
nullptr;
624 const IntVect* periodic_shift =
nullptr;
625 const int* dst_indices =
nullptr;
631 for (
int lev = 0; lev < num_levels; ++lev)
633 auto& plev = pc.GetParticles(lev);
634 for (
auto& kv : plev)
636 auto index = kv.first;
637 auto& src_tile = plev.at(index);
638 int num_copies = op.
numCopies(index, lev);
639 if (num_copies == 0) {
continue; }
641 const auto* p_boxes = op.
m_boxes[lev].at(index).dataPtr();
642 const auto* p_levels = op.
m_levels[lev].at(index).dataPtr();
643 const auto* p_tiles = op.
m_tiles[lev].at(index).dataPtr();
644 const auto* p_src_indices = op.
m_src_indices[lev].at(index).dataPtr();
645 const auto* p_periodic_shift = op.
m_periodic_shift[lev].at(index).dataPtr();
646 const auto* p_dst_indices = plan.
m_dst_indices[lev].at(index).dataPtr();
647#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
648 omp_pack_work.push_back({&src_tile, p_boxes, p_levels, p_tiles,
649 p_src_indices, p_periodic_shift, p_dst_indices, num_copies});
650 omp_pack_offsets.push_back(omp_pack_offsets.back() + num_copies);
652 const auto& ptd = src_tile.getConstParticleTileData();
653 auto* p_snd_buffer = snd_buffer.
dataPtr();
657 int dst_box = p_boxes[i];
660 int dst_tile = p_tiles[i];
661 int dst_lev = p_levels[i];
662 auto dst_offset = get_offset(dst_box, dst_tile, dst_lev, psize, p_dst_indices[i]);
663 int src_index = p_src_indices[i];
664 ptd.packParticleData(p_snd_buffer, src_index, dst_offset, p_comm_real, p_comm_int);
666 const IntVect& pshift = p_periodic_shift[i];
667 bool do_periodic_shift =
669 || (is_per[1] && pshift[1] != 0),
670 || (is_per[2] && pshift[2] != 0) );
672 if (do_periodic_shift)
675 Long pos_offset = dst_offset;
677 if constexpr (PC::ParticleType::is_soa_particle) {
678 pos_offset +=
sizeof(uint64_t);
682 for (
int idim = 0; idim < AMREX_SPACEDIM; ++idim)
684 if (! is_per[idim]) {
continue; }
685 if (pshift[idim] > 0) {
686 pos[idim] += phi[idim] - plo[idim];
687 }
else if (pshift[idim] < 0) {
688 pos[idim] -= phi[idim] - plo[idim];
700#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
701 if (!omp_pack_work.empty())
703 auto* p_snd_buffer = snd_buffer.dataPtr();
705 const Long total_num_copies = omp_pack_offsets.back();
711 Long ibegin = thread_num*total_num_copies/num_threads;
712 Long iend = (thread_num+1)*total_num_copies/num_threads;
716 int iwork =
static_cast<int>(std::upper_bound(omp_pack_offsets.begin(),
717 omp_pack_offsets.end(),
719 - omp_pack_offsets.begin()) - 1;
720 while (iwork <
static_cast<int>(omp_pack_work.
size()) &&
721 omp_pack_offsets[iwork] < iend)
723 auto const& work = omp_pack_work[iwork];
724 auto const& ptd = work.src_tile->getConstParticleTileData();
725 Long global_begin = std::max(ibegin, omp_pack_offsets[iwork]);
726 Long global_end = std::min(iend, omp_pack_offsets[iwork+1]);
727 int local_begin =
static_cast<int>(global_begin - omp_pack_offsets[iwork]);
728 int local_end =
static_cast<int>(global_end - omp_pack_offsets[iwork]);
729 for (
int i = local_begin; i < local_end; ++i)
731 int dst_box = work.boxes[i];
734 int dst_tile = work.tiles[i];
735 int dst_lev = work.levels[i];
736 auto dst_offset = get_offset(dst_box, dst_tile, dst_lev, psize,
737 work.dst_indices[i]);
738 int src_index = work.src_indices[i];
739 ptd.packParticleData(p_snd_buffer, src_index, dst_offset,
740 p_comm_real, p_comm_int);
742 const IntVect& pshift = work.periodic_shift[i];
743 bool do_periodic_shift =
745 || (is_per[1] && pshift[1] != 0),
746 || (is_per[2] && pshift[2] != 0) );
748 if (do_periodic_shift)
751 Long pos_offset = dst_offset;
752 if constexpr (PC::ParticleType::is_soa_particle) {
753 pos_offset +=
sizeof(uint64_t);
757 for (
int idim = 0; idim < AMREX_SPACEDIM; ++idim)
759 if (! is_per[idim]) {
continue; }
760 if (pshift[idim] > 0) {
761 pos[idim] += phi[idim] - plo[idim];
762 }
else if (pshift[idim] < 0) {
763 pos[idim] -= phi[idim] - plo[idim];
779template <
class PC,
class Buffer,
class UnpackPolicy,
780 std::enable_if_t<IsParticleContainer<PC>::value,
int> foo = 0>
785 using PTile =
typename PC::ParticleTileType;
787 int num_levels = pc.BufferMap().numLevels();
791 std::vector<int> sizes;
792 std::vector<PTile*> tiles;
793 for (
int lev = 0; lev < num_levels; ++lev)
795 for(
MFIter mfi = pc.MakeMFIter(lev); mfi.isValid(); ++mfi)
797 int gid = mfi.index();
798 int tid = mfi.LocalTileIndex();
799 auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
800 int num_copies = plan.
m_box_counts_h[pc.BufferMap().gridAndTileAndLevToBucket(gid, tid, lev)];
801 sizes.push_back(num_copies);
802 tiles.push_back(&tile);
807 std::vector<int> offsets;
808 policy.resizeTiles(tiles, sizes, offsets);
813#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
816 PTile* tile =
nullptr;
829 for (
int lev = 0; lev < num_levels; ++lev)
831 auto& plev = pc.GetParticles(lev);
832 for(
MFIter mfi = pc.MakeMFIter(lev); mfi.isValid(); ++mfi)
834 int gid = mfi.index();
835 int tid = mfi.LocalTileIndex();
836 auto index = std::make_pair(gid, tid);
838 auto& tile = plev[index];
842 int offset = offsets[uindex];
843 int size = sizes[uindex];
846#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
847 omp_unpack_work.push_back({&tile, gid, tid, lev,
offset, size});
848 omp_unpack_offsets.push_back(omp_unpack_offsets.back() + size);
850 auto p_snd_buffer = snd_buffer.
dataPtr();
851 auto ptd = tile.getParticleTileData();
854 auto src_offset = get_offset(gid, tid, lev, psize, i);
855 int dst_index =
offset + i;
856 ptd.unpackParticleData(p_snd_buffer, src_offset, dst_index, p_comm_real, p_comm_int);
862#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
863 if (!omp_unpack_work.empty())
866 auto p_snd_buffer = snd_buffer.dataPtr();
867 const Long total_num_copies = omp_unpack_offsets.back();
873 Long ibegin = thread_num*total_num_copies/num_threads;
874 Long iend = (thread_num+1)*total_num_copies/num_threads;
878 int iwork =
static_cast<int>(std::upper_bound(omp_unpack_offsets.begin(),
879 omp_unpack_offsets.end(),
881 - omp_unpack_offsets.begin()) - 1;
882 while (iwork <
static_cast<int>(omp_unpack_work.
size()) &&
883 omp_unpack_offsets[iwork] < iend)
885 auto const& work = omp_unpack_work[iwork];
886 auto ptd = work.tile->getParticleTileData();
887 Long global_begin = std::max(ibegin, omp_unpack_offsets[iwork]);
888 Long global_end = std::min(iend, omp_unpack_offsets[iwork+1]);
889 int local_begin =
static_cast<int>(global_begin - omp_unpack_offsets[iwork]);
890 int local_end =
static_cast<int>(global_end - omp_unpack_offsets[iwork]);
891 for (
int i = local_begin; i < local_end; ++i)
893 auto src_offset = get_offset(work.gid, work.tid, work.lev, psize, i);
894 int dst_index = work.offset + i;
895 ptd.unpackParticleData(p_snd_buffer, src_offset, dst_index,
896 p_comm_real, p_comm_int);
906template <
class PC,
class SndBuffer,
class RcvBuffer,
907 std::enable_if_t<IsParticleContainer<PC>::value,
int> foo = 0>
910 BL_PROFILE(
"amrex::communicateParticlesStart");
917 if (NProcs == 1) {
return; }
925 Long TotRcvBytes = 0;
926 for (
int i = 0; i < NProcs; ++i) {
928 RcvProc.push_back(i);
930 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
932 rOffset.push_back(TotRcvBytes);
938 for (
int i = 0; i < plan.
m_nrcvs; ++i)
947 rcv_buffer.resize(TotRcvBytes);
963 for (
int i = 0; i < plan.
m_nrcvs; ++i) {
964 const auto Who = RcvProc[i];
965 const auto offset = rOffset[i];
967 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
981 for (
int i = 0; i < NProcs; ++i)
983 if (i == MyProc) {
continue; }
986 if (Cnt == 0) {
continue; }
1006template <
class PC,
class Buffer,
class UnpackPolicy,
1007 std::enable_if_t<IsParticleContainer<PC>::value,
int> foo = 0>
1014 if (NProcs == 1) {
return; }
1018 using PTile =
typename PC::ParticleTileType;
1024 auto* p_rcv_buffer = rcv_buffer.dataPtr();
1026 std::vector<int> sizes;
1027 std::vector<PTile*> tiles;
1034 auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
1035 sizes.push_back(copy_size);
1036 tiles.push_back(&tile);
1040 policy.resizeTiles(tiles, sizes, offsets);
1050 procindex = (rproc == plan.
m_rcv_box_pids[i]) ? procindex : procindex+1;
1053 auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
1054 auto ptd = tile.getParticleTileData();
1059 int dst_offset = offsets[uindex];
1060 int size = sizes[uindex];
1068 +
static_cast<Long>(p_pad_adjust[procindex]);
1069 int dst_index = dst_offset + ip;
1070 ptd.unpackParticleData(p_rcv_buffer, src_offset, dst_index,
1071 p_comm_real, p_comm_int);
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
amrex::ParmParse pp
Input file parser instance for the given namespace.
Definition AMReX_HypreIJIface.cpp:15
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1139
#define AMREX_D_TERM(a, b, c)
Definition AMReX_SPACE.H:172
Iterator for looping ever tiles and boxes of amrex::FabArray based containers.
Definition AMReX_MFIter.H:88
Dynamically allocated vector for trivially copyable data.
Definition AMReX_PODVector.H:308
size_type size() const noexcept
Definition AMReX_PODVector.H:648
iterator begin() noexcept
Definition AMReX_PODVector.H:674
iterator end() noexcept
Definition AMReX_PODVector.H:678
T * dataPtr() noexcept
Definition AMReX_PODVector.H:670
MPI_Request req() const
Definition AMReX_ParallelDescriptor.H:74
Parse Parameters From Command Line and Input Files.
Definition AMReX_ParmParse.H:349
int query(std::string_view name, bool &ref, int ival=FIRST) const
Same as querykth() but searches for the last occurrence of name.
Definition AMReX_ParmParse.cpp:1946
Definition AMReX_ParticleBufferMap.H:59
Definition AMReX_ParticleContainerBase.H:43
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
T * dataPtr() noexcept
get access to the underlying data pointer
Definition AMReX_Vector.H:49
Long size() const noexcept
Definition AMReX_Vector.H:53
amrex_particle_real ParticleReal
Floating Point Type for Particles.
Definition AMReX_REAL.H:90
amrex_long Long
Definition AMReX_INT.H:30
OutIter exclusive_scan(InIter begin, InIter end, OutIter result)
Definition AMReX_Scan.H:1442
void ParallelForOMP(T n, L const &f) noexcept
Performance-portable kernel launch function with optional OpenMP threading.
Definition AMReX_GpuLaunch.H:326
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:860
void copy(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition AMReX_GpuContainers.H:128
void copyAsync(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition AMReX_GpuContainers.H:228
static constexpr DeviceToHost deviceToHost
Definition AMReX_GpuContainers.H:106
static constexpr HostToDevice hostToDevice
Definition AMReX_GpuContainers.H:105
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310
__host__ __device__ void * memcpy(void *dest, const void *src, std::size_t count)
Definition AMReX_GpuUtility.H:220
__host__ __device__ AMREX_FORCE_INLINE T FetchAdd(T *const sum, T const value) noexcept
Definition AMReX_GpuAtomic.H:644
constexpr int get_thread_num()
Definition AMReX_OpenMP.H:37
constexpr int get_num_threads()
Definition AMReX_OpenMP.H:35
constexpr int get_max_threads()
Definition AMReX_OpenMP.H:36
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
int global_to_local_rank(int rank) noexcept
Definition AMReX_ParallelContext.H:98
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
Message Asend(const T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1172
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition AMReX_ParallelDescriptor.H:696
Message Arecv(T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1214
Definition AMReX_Amr.cpp:49
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139
void communicateParticlesStart(const PC &pc, ParticleCopyPlan &plan, const SndBuffer &snd_buffer, RcvBuffer &rcv_buffer)
Definition AMReX_ParticleCommunication.H:908
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:193
void unpackRemotes(PC &pc, const ParticleCopyPlan &plan, Buffer &rcv_buffer, UnpackPolicy const &policy)
Definition AMReX_ParticleCommunication.H:1008
void communicateParticlesFinish(const ParticleCopyPlan &plan)
Definition AMReX_ParticleCommunication.cpp:443
const int[]
Definition AMReX_BLProfiler.cpp:1664
std::size_t aligned_size(std::size_t align_requirement, std::size_t size) noexcept
Given a minimum required size in bytes, this returns the smallest size greater or equal to size that ...
Definition AMReX_Arena.H:33
void unpackBuffer(PC &pc, const ParticleCopyPlan &plan, const Buffer &snd_buffer, UnpackPolicy const &policy)
Definition AMReX_ParticleCommunication.H:781
void packBuffer(const PC &pc, const ParticleCopyOp &op, const ParticleCopyPlan &plan, Buffer &snd_buffer)
Definition AMReX_ParticleCommunication.H:581
Definition AMReX_ParticleBufferMap.H:38
Definition AMReX_ParticleBufferMap.H:14
Definition AMReX_ParticleCommunication.H:553
const unsigned int * m_box_offsets
Definition AMReX_ParticleCommunication.H:554
GetPID m_get_pid
Definition AMReX_ParticleCommunication.H:557
GetBucket m_get_bucket
Definition AMReX_ParticleCommunication.H:558
const std::size_t * m_pad_correction
Definition AMReX_ParticleCommunication.H:555
GetSendBufferOffset(const ParticleCopyPlan &plan, const ParticleBufferMap &map)
Definition AMReX_ParticleCommunication.H:560
__device__ Long operator()(int dst_box, int dst_tile, int dst_lev, std::size_t psize, int i) const
Definition AMReX_ParticleCommunication.H:568
Definition AMReX_ParticleCommunication.H:26
void resizeTiles(std::vector< PTile * > &tiles, const std::vector< int > &sizes, std::vector< int > &offsets) const
Definition AMReX_ParticleCommunication.H:28
Definition AMReX_ParticleCommunication.H:65
void resize(int gid, int tid, int lev, int size)
Definition AMReX_ParticleCommunication.cpp:25
void setNumLevels(int num_levels)
Definition AMReX_ParticleCommunication.cpp:16
Vector< std::map< TileKey, Gpu::DeviceVector< IntVect > > > m_periodic_shift
Definition AMReX_ParticleCommunication.H:72
int numLevels() const
Definition AMReX_ParticleCommunication.H:87
Vector< std::map< TileKey, Gpu::DeviceVector< int > > > m_levels
Definition AMReX_ParticleCommunication.H:69
Vector< std::map< TileKey, Gpu::DeviceVector< int > > > m_boxes
Definition AMReX_ParticleCommunication.H:68
int numCopies(TileKey const &index, int lev) const
Definition AMReX_ParticleCommunication.H:80
std::pair< int, int > TileKey
Definition AMReX_ParticleCommunication.H:66
Vector< std::map< TileKey, Gpu::DeviceVector< int > > > m_tiles
Definition AMReX_ParticleCommunication.H:70
Vector< std::map< TileKey, Gpu::DeviceVector< int > > > m_src_indices
Definition AMReX_ParticleCommunication.H:71
void clear()
Definition AMReX_ParticleCommunication.cpp:7
Definition AMReX_ParticleCommunication.H:96
Definition AMReX_ParticleCommunication.H:110
BuildWorkspace(int a_num_buckets)
Definition AMReX_ParticleCommunication.H:111
Gpu::HostVector< unsigned int > h_box_counts
Definition AMReX_ParticleCommunication.H:119
int num_buckets
Definition AMReX_ParticleCommunication.H:118
Definition AMReX_ParticleCommunication.H:94
Definition AMReX_ParticleCommunication.H:95
Definition AMReX_ParticleCommunication.H:91
Vector< int > m_rcv_box_ids
Definition AMReX_ParticleCommunication.H:365
Vector< std::size_t > m_snd_offsets
Definition AMReX_ParticleCommunication.H:392
Vector< int > m_rcv_box_counts
Definition AMReX_ParticleCommunication.H:363
Vector< std::size_t > m_snd_counts
Definition AMReX_ParticleCommunication.H:393
void finalizeBuildBoxCounts(BuildWorkspace const &workspace, bool use_host_box_counters)
Definition AMReX_ParticleCommunication.H:343
Long m_NumSnds
Definition AMReX_ParticleCommunication.H:370
void buildMPIFinish(const ParticleBufferMap &map)
Definition AMReX_ParticleCommunication.cpp:221
Vector< int > m_neighbor_procs
Definition AMReX_ParticleCommunication.H:384
void clear()
Definition AMReX_ParticleCommunication.cpp:39
Vector< int > m_rcv_box_pids
Definition AMReX_ParticleCommunication.H:367
void buildCopies(const PC &pc, const ParticleCopyOp &op, AtomicScatterAlgorithm, BuildWorkspace &, GetBucket const &getBucket)
Definition AMReX_ParticleCommunication.H:312
Vector< int > m_rcv_box_levs
Definition AMReX_ParticleCommunication.H:368
void build(const PC &pc, const ParticleCopyOp &op, const Vector< int > &int_comp_mask, const Vector< int > &real_comp_mask, int local)
Definition AMReX_ParticleCommunication.H:407
Gpu::DeviceVector< int > d_real_comp_mask
Definition AMReX_ParticleCommunication.H:401
std::pair< int, int > TileKey
Definition AMReX_ParticleCommunication.H:92
Gpu::DeviceVector< std::size_t > m_snd_pad_correction_d
Definition AMReX_ParticleCommunication.H:396
void forEachCopyBatch(const PC &pc, const ParticleCopyOp &op, F &&f)
Definition AMReX_ParticleCommunication.H:129
Long m_superparticle_size
Definition AMReX_ParticleCommunication.H:402
Vector< int > m_rcv_box_tids
Definition AMReX_ParticleCommunication.H:366
Vector< Long > m_Snds
Definition AMReX_ParticleCommunication.H:386
Vector< MPI_Request > m_particle_sreqs
Definition AMReX_ParticleCommunication.H:379
Vector< std::map< TileKey, Gpu::DeviceVector< int > > > m_dst_indices
Definition AMReX_ParticleCommunication.H:357
Gpu::DeviceVector< unsigned int > m_box_counts_d
Definition AMReX_ParticleCommunication.H:359
Vector< Long > m_Rcvs
Definition AMReX_ParticleCommunication.H:387
Vector< int > m_rcv_box_offsets
Definition AMReX_ParticleCommunication.H:364
Vector< std::size_t > m_snd_pad_correction_h
Definition AMReX_ParticleCommunication.H:395
Vector< MPI_Status > m_particle_sstats
Definition AMReX_ParticleCommunication.H:378
Gpu::DeviceVector< unsigned int > m_box_offsets
Definition AMReX_ParticleCommunication.H:361
Gpu::DeviceVector< std::size_t > m_rcv_pad_correction_d
Definition AMReX_ParticleCommunication.H:399
Vector< std::size_t > m_rOffset
Definition AMReX_ParticleCommunication.H:389
Vector< MPI_Status > m_particle_rstats
Definition AMReX_ParticleCommunication.H:375
Vector< Long > m_snd_num_particles
Definition AMReX_ParticleCommunication.H:381
Vector< MPI_Request > m_particle_rreqs
Definition AMReX_ParticleCommunication.H:376
Long superParticleSize() const
Definition AMReX_ParticleCommunication.H:404
Gpu::HostVector< int > m_rcv_data
Definition AMReX_ParticleCommunication.H:390
void buildCopies(const PC &pc, const ParticleCopyOp &op, StableOrderedAlgorithm, BuildWorkspace &workspace, GetBucket const &getBucket)
Definition AMReX_ParticleCommunication.H:152
Vector< MPI_Status > m_build_stats
Definition AMReX_ParticleCommunication.H:372
Vector< int > m_RcvProc
Definition AMReX_ParticleCommunication.H:388
Vector< std::size_t > m_rcv_pad_correction_h
Definition AMReX_ParticleCommunication.H:398
int m_nrcvs
Definition AMReX_ParticleCommunication.H:371
Gpu::HostVector< unsigned int > m_box_counts_h
Definition AMReX_ParticleCommunication.H:360
Vector< MPI_Request > m_build_rreqs
Definition AMReX_ParticleCommunication.H:373
Vector< Long > m_rcv_num_particles
Definition AMReX_ParticleCommunication.H:382
Gpu::DeviceVector< int > d_int_comp_mask
Definition AMReX_ParticleCommunication.H:401
Definition AMReX_ParticleCommunication.H:41
void resizeTiles(std::vector< PTile * > &tiles, const std::vector< int > &sizes, std::vector< int > &offsets) const
Definition AMReX_ParticleCommunication.H:43