1 #ifndef AMREX_PARTICLECOMMUNICATION_H_
2 #define AMREX_PARTICLECOMMUNICATION_H_
3 #include <AMReX_Config.H>
20 template <
class PTile>
21 void resizeTiles (std::vector<PTile*>& tiles,
const std::vector<int>& sizes, std::vector<int>& offsets)
const
23 for(
int i = 0; i < static_cast<int>(sizes.size()); ++i)
25 int offset = tiles[i]->numTotalParticles();
26 int nn = tiles[i]->getNumNeighbors();
27 tiles[i]->setNumNeighbors(nn + sizes[i]);
35 template <
class PTile>
36 void resizeTiles (std::vector<PTile*>& tiles,
const std::vector<int>& sizes, std::vector<int>& offsets)
const
38 int N =
static_cast<int>(sizes.size());
40 std::map<PTile*, int> tile_sizes;
41 for(
int i = 0; i < N; ++i) {
42 tile_sizes[tiles[i]] = tiles[i]->numParticles();
45 for(
int i = 0; i < N; ++i)
47 offsets.push_back(tile_sizes[tiles[i]]);
48 tile_sizes[tiles[i]] += sizes[i];
51 for (
auto& kv : tile_sizes) {
52 kv.first->resize(kv.second);
70 [[nodiscard]]
int numCopies (
int gid,
int lev)
const
72 if (
m_boxes.size() <= lev) {
return 0; }
73 auto mit =
m_boxes[lev].find(gid);
74 return mit ==
m_boxes[lev].end() ? 0 :
int(mit->second.size());
127 template <class PC, std::enable_if_t<IsParticleContainer<PC>::value,
int> foo = 0>
141 const int num_buckets = pc.BufferMap().numBuckets();
157 auto getBucket = pc.stableRedistribute() ? pc.BufferMap().getHostBucketFunctor() : pc.BufferMap().getBucketFunctor();
160 if (pc.stableRedistribute() ) {
165 for (
int lev = 0; lev < num_levels; ++lev)
167 for (
const auto& kv : pc.GetParticles(lev))
169 int gid = kv.first.first;
171 if (num_copies == 0) {
continue; }
174 if (pc.stableRedistribute()) {
184 for (
int i = 0; i < num_copies; ++i) {
185 int dst_box = h_boxes[i];
187 int dst_lev = h_levs[i];
188 int index =
static_cast<int>(h_box_counts[getBucket(dst_lev, dst_box)]++);
189 h_dst_indices[i] = index;
195 const auto* p_boxes = op.
m_boxes[lev].at(gid).dataPtr();
196 const auto* p_levs = op.
m_levels[lev].at(gid).dataPtr();
200 int dst_box = p_boxes[i];
203 int dst_lev = p_levs[i];
205 &p_dst_box_counts[getBucket(dst_lev, dst_box)], 1U));
206 p_dst_indices[i] = index;
213 if (pc.stableRedistribute()) {
240 int NStructReal = PC::ParticleContainerType::NStructReal;
241 int NStructInt = PC::ParticleContainerType::NStructInt;
243 int num_real_comm_comp = 0;
244 for (
int i = AMREX_SPACEDIM + NStructReal; i < real_comp_mask.
size(); ++i) {
245 if (real_comp_mask[i]) {++num_real_comm_comp;}
248 int num_int_comm_comp = 0;
249 for (
int i = 2 + NStructInt; i < int_comp_mask.
size(); ++i) {
250 if (int_comp_mask[i]) {++num_int_comm_comp;}
253 if constexpr (PC::ParticleType::is_soa_particle) {
259 + num_int_comm_comp *
sizeof(
int);
316 Long
operator() (
int dst_box,
int dst_lev, std::size_t psize,
int i)
const
318 int dst_pid =
m_get_pid(dst_lev, dst_box);
325 template <
class PC,
class Buffer,
326 std::enable_if_t<IsParticleContainer<PC>::value &&
327 std::is_base_of_v<PolymorphicArenaAllocator<typename Buffer::value_type>,
328 Buffer>,
int> foo = 0>
337 int num_buckets = pc.BufferMap().numBuckets();
339 std::size_t total_buffer_size = 0;
345 total_buffer_size = np*psize;
352 if (! snd_buffer.arena()->hasFreeDeviceMemory(total_buffer_size)) {
356 snd_buffer.resize(total_buffer_size);
361 const auto plo = pc.Geom(0).ProbLoArray();
362 const auto phi = pc.Geom(0).ProbHiArray();
363 const auto is_per = pc.Geom(0).isPeriodicArray();
364 for (
int lev = 0; lev < num_levels; ++lev)
366 auto& plev = pc.GetParticles(lev);
367 for (
auto& kv : plev)
369 int gid = kv.first.first;
370 int tid = kv.first.second;
371 auto index = std::make_pair(gid, tid);
373 auto& src_tile = plev.at(index);
374 const auto& ptd = src_tile.getConstParticleTileData();
377 if (num_copies == 0) {
continue; }
379 const auto* p_boxes = op.
m_boxes[lev].at(gid).dataPtr();
380 const auto* p_levels = op.
m_levels[lev].at(gid).dataPtr();
381 const auto* p_src_indices = op.
m_src_indices[lev].at(gid).dataPtr();
383 const auto* p_dst_indices = plan.
m_dst_indices[lev].at(gid).dataPtr();
384 auto* p_snd_buffer = snd_buffer.dataPtr();
389 int dst_box = p_boxes[i];
392 int dst_lev = p_levels[i];
393 auto dst_offset = get_offset(dst_box, dst_lev, psize, p_dst_indices[i]);
394 int src_index = p_src_indices[i];
395 ptd.packParticleData(p_snd_buffer, src_index, dst_offset, p_comm_real, p_comm_int);
397 const IntVect& pshift = p_periodic_shift[i];
398 bool do_periodic_shift =
400 || (is_per[1] && pshift[1] != 0),
401 || (is_per[2] && pshift[2] != 0) );
403 if (do_periodic_shift)
405 ParticleReal pos[AMREX_SPACEDIM];
407 AMREX_SPACEDIM*
sizeof(ParticleReal));
408 for (
int idim = 0; idim < AMREX_SPACEDIM; ++idim)
410 if (! is_per[idim]) {
continue; }
411 if (pshift[idim] > 0) {
412 pos[idim] += phi[idim] - plo[idim];
413 }
else if (pshift[idim] < 0) {
414 pos[idim] -= phi[idim] - plo[idim];
418 AMREX_SPACEDIM*
sizeof(ParticleReal));
426 template <
class PC,
class Buffer,
class UnpackPolicy,
427 std::enable_if_t<IsParticleContainer<PC>::value,
int> foo = 0>
432 using PTile =
typename PC::ParticleTileType;
434 int num_levels = pc.BufferMap().numLevels();
438 std::vector<int> sizes;
439 std::vector<PTile*> tiles;
440 for (
int lev = 0; lev < num_levels; ++lev)
444 int gid = mfi.index();
445 int tid = mfi.LocalTileIndex();
446 auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
447 int num_copies = plan.
m_box_counts_h[pc.BufferMap().gridAndLevToBucket(gid, lev)];
448 sizes.push_back(num_copies);
449 tiles.push_back(&tile);
454 std::vector<int> offsets;
455 policy.resizeTiles(tiles, sizes, offsets);
462 for (
int lev = 0; lev < num_levels; ++lev)
464 auto& plev = pc.GetParticles(lev);
467 int gid = mfi.index();
468 int tid = mfi.LocalTileIndex();
469 auto index = std::make_pair(gid, tid);
471 auto& tile = plev[index];
474 auto p_snd_buffer = snd_buffer.dataPtr();
476 int offset = offsets[uindex];
477 int size = sizes[uindex];
480 auto ptd = tile.getParticleTileData();
483 auto src_offset = get_offset(gid, lev, psize, i);
484 int dst_index =
offset + i;
485 ptd.unpackParticleData(p_snd_buffer, src_offset, dst_index, p_comm_real, p_comm_int);
491 template <
class PC,
class SndBuffer,
class RcvBuffer,
492 std::enable_if_t<IsParticleContainer<PC>::value,
int> foo = 0>
495 BL_PROFILE(
"amrex::communicateParticlesStart");
502 if (
NProcs == 1) {
return; }
510 Long TotRcvBytes = 0;
511 for (
int i = 0; i <
NProcs; ++i) {
513 RcvProc.push_back(i);
515 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
517 rOffset.push_back(TotRcvBytes);
523 for (
int i = 0; i < plan.
m_nrcvs; ++i)
532 rcv_buffer.resize(TotRcvBytes);
545 for (
int i = 0; i < plan.
m_nrcvs; ++i) {
546 const auto Who = RcvProc[i];
547 const auto offset = rOffset[i];
549 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
563 for (
int i = 0; i <
NProcs; ++i)
565 if (i ==
MyProc) {
continue; }
568 if (Cnt == 0) {
continue; }
586 template <
class PC,
class Buffer,
class UnpackPolicy,
587 std::enable_if_t<IsParticleContainer<PC>::value,
int> foo = 0>
594 if (
NProcs == 1) {
return; }
598 using PTile =
typename PC::ParticleTileType;
604 auto* p_rcv_buffer = rcv_buffer.dataPtr();
606 std::vector<int> sizes;
607 std::vector<PTile*> tiles;
614 auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
615 sizes.push_back(copy_size);
616 tiles.push_back(&tile);
620 policy.resizeTiles(tiles, sizes, offsets);
630 procindex = (rproc == plan.
m_rcv_box_pids[i]) ? procindex : procindex+1;
633 auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
634 auto ptd = tile.getParticleTileData();
639 int dst_offset = offsets[uindex];
640 int size = sizes[uindex];
647 Long src_offset = psize*(
offset + ip) + p_pad_adjust[procindex];
648 int dst_index = dst_offset + ip;
649 ptd.unpackParticleData(p_rcv_buffer, src_offset, dst_index,
650 p_comm_real, p_comm_int);
#define BL_PROFILE(a)
Definition: AMReX_BLProfiler.H:551
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition: AMReX_Extension.H:119
#define AMREX_FOR_1D(...)
Definition: AMReX_GpuLaunch.nolint.H:41
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition: AMReX_HypreMLABecLap.cpp:1089
#define AMREX_D_TERM(a, b, c)
Definition: AMReX_SPACE.H:129
Definition: AMReX_MFIter.H:57
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition: AMReX_MFIter.H:141
Definition: AMReX_PODVector.H:246
size_type size() const noexcept
Definition: AMReX_PODVector.H:575
iterator begin() noexcept
Definition: AMReX_PODVector.H:601
iterator end() noexcept
Definition: AMReX_PODVector.H:605
void resize(size_type a_new_size)
Definition: AMReX_PODVector.H:625
MPI_Request req() const
Definition: AMReX_ParallelDescriptor.H:74
Definition: AMReX_ParticleBufferMap.H:53
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition: AMReX_Vector.H:27
Long size() const noexcept
Definition: AMReX_Vector.H:50
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T Add(T *sum, T value) noexcept
Definition: AMReX_GpuAtomic.H:198
AMREX_GPU_HOST_DEVICE Long size(T const &b) noexcept
integer version
Definition: AMReX_GpuRange.H:26
void copy(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition: AMReX_GpuContainers.H:121
void copyAsync(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition: AMReX_GpuContainers.H:233
OutIter exclusive_scan(InIter begin, InIter end, OutIter result)
Definition: AMReX_Scan.H:1377
static constexpr DeviceToHost deviceToHost
Definition: AMReX_GpuContainers.H:99
static constexpr HostToDevice hostToDevice
Definition: AMReX_GpuContainers.H:98
void streamSynchronize() noexcept
Definition: AMReX_GpuDevice.H:237
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void * memcpy(void *dest, const void *src, std::size_t count)
Definition: AMReX_GpuUtility.H:214
int NProcs()
Process ID in MPI_COMM_WORLD.
Definition: AMReX_MPMD.cpp:122
int MyProc()
Definition: AMReX_MPMD.cpp:117
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition: AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition: AMReX_ParallelContext.H:76
int global_to_local_rank(int rank) noexcept
Definition: AMReX_ParallelContext.H:98
int NProcsSub() noexcept
number of ranks in current frame
Definition: AMReX_ParallelContext.H:74
Message Send(const T *buf, size_t n, int dst_pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1109
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition: AMReX_ParallelDescriptor.H:613
Message Arecv(T *, size_t n, int pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1130
Definition: AMReX_Amr.cpp:49
void communicateParticlesStart(const PC &pc, ParticleCopyPlan &plan, const SndBuffer &snd_buffer, RcvBuffer &rcv_buffer)
Definition: AMReX_ParticleCommunication.H:493
void unpackRemotes(PC &pc, const ParticleCopyPlan &plan, Buffer &rcv_buffer, UnpackPolicy const &policy)
Definition: AMReX_ParticleCommunication.H:588
void communicateParticlesFinish(const ParticleCopyPlan &plan)
Definition: AMReX_ParticleCommunication.cpp:371
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition: AMReX.H:111
Arena * The_Pinned_Arena()
Definition: AMReX_Arena.cpp:649
const int[]
Definition: AMReX_BLProfiler.cpp:1664
std::size_t aligned_size(std::size_t align_requirement, std::size_t size) noexcept
Given a minimum required size of size bytes, this returns the next largest arena size that will align...
Definition: AMReX_Arena.H:30
void unpackBuffer(PC &pc, const ParticleCopyPlan &plan, const Buffer &snd_buffer, UnpackPolicy const &policy)
Definition: AMReX_ParticleCommunication.H:428
void packBuffer(const PC &pc, const ParticleCopyOp &op, const ParticleCopyPlan &plan, Buffer &snd_buffer)
Definition: AMReX_ParticleCommunication.H:329
Definition: AMReX_ParticleBufferMap.H:35
Definition: AMReX_ParticleBufferMap.H:14
Definition: AMReX_ParticleCommunication.H:301
const unsigned int * m_box_offsets
Definition: AMReX_ParticleCommunication.H:302
GetPID m_get_pid
Definition: AMReX_ParticleCommunication.H:305
AMREX_FORCE_INLINE AMREX_GPU_DEVICE Long operator()(int dst_box, int dst_lev, std::size_t psize, int i) const
Definition: AMReX_ParticleCommunication.H:316
GetBucket m_get_bucket
Definition: AMReX_ParticleCommunication.H:306
const std::size_t * m_pad_correction
Definition: AMReX_ParticleCommunication.H:303
GetSendBufferOffset(const ParticleCopyPlan &plan, const ParticleBufferMap &map)
Definition: AMReX_ParticleCommunication.H:308
Definition: AMReX_ParticleCommunication.H:19
void resizeTiles(std::vector< PTile * > &tiles, const std::vector< int > &sizes, std::vector< int > &offsets) const
Definition: AMReX_ParticleCommunication.H:21
Definition: AMReX_ParticleCommunication.H:58
int numCopies(int gid, int lev) const
Definition: AMReX_ParticleCommunication.H:70
void setNumLevels(int num_levels)
Definition: AMReX_ParticleCommunication.cpp:14
void clear()
Definition: AMReX_ParticleCommunication.cpp:6
int numLevels() const
Definition: AMReX_ParticleCommunication.H:77
Vector< std::map< int, Gpu::DeviceVector< IntVect > > > m_periodic_shift
Definition: AMReX_ParticleCommunication.H:62
Vector< std::map< int, Gpu::DeviceVector< int > > > m_boxes
Definition: AMReX_ParticleCommunication.H:59
Vector< std::map< int, Gpu::DeviceVector< int > > > m_levels
Definition: AMReX_ParticleCommunication.H:60
void resize(int gid, int lev, int size)
Definition: AMReX_ParticleCommunication.cpp:22
Vector< std::map< int, Gpu::DeviceVector< int > > > m_src_indices
Definition: AMReX_ParticleCommunication.H:61
Definition: AMReX_ParticleCommunication.H:81
Vector< int > m_rcv_box_ids
Definition: AMReX_ParticleCommunication.H:90
Vector< std::size_t > m_snd_offsets
Definition: AMReX_ParticleCommunication.H:113
Vector< int > m_rcv_box_counts
Definition: AMReX_ParticleCommunication.H:88
Vector< std::size_t > m_snd_counts
Definition: AMReX_ParticleCommunication.H:114
Long m_NumSnds
Definition: AMReX_ParticleCommunication.H:94
bool m_local
Definition: AMReX_ParticleCommunication.H:297
void doHandShake(const Vector< Long > &Snds, Vector< Long > &Rcvs) const
Definition: AMReX_ParticleCommunication.cpp:256
Vector< int > m_neighbor_procs
Definition: AMReX_ParticleCommunication.H:105
Vector< int > m_rcv_box_pids
Definition: AMReX_ParticleCommunication.H:91
Vector< int > m_rcv_box_levs
Definition: AMReX_ParticleCommunication.H:92
Gpu::DeviceVector< int > d_real_comp_mask
Definition: AMReX_ParticleCommunication.H:122
Gpu::DeviceVector< std::size_t > m_snd_pad_correction_d
Definition: AMReX_ParticleCommunication.H:117
static void doHandShakeAllToAll(const Vector< Long > &Snds, Vector< Long > &Rcvs)
Definition: AMReX_ParticleCommunication.cpp:304
Long m_superparticle_size
Definition: AMReX_ParticleCommunication.H:123
Vector< Long > m_Snds
Definition: AMReX_ParticleCommunication.H:107
Gpu::DeviceVector< unsigned int > m_box_counts_d
Definition: AMReX_ParticleCommunication.H:84
Vector< Long > m_Rcvs
Definition: AMReX_ParticleCommunication.H:108
Vector< int > m_rcv_box_offsets
Definition: AMReX_ParticleCommunication.H:89
Vector< std::size_t > m_snd_pad_correction_h
Definition: AMReX_ParticleCommunication.H:116
Vector< std::map< int, Gpu::DeviceVector< int > > > m_dst_indices
Definition: AMReX_ParticleCommunication.H:82
Gpu::DeviceVector< unsigned int > m_box_offsets
Definition: AMReX_ParticleCommunication.H:86
Gpu::DeviceVector< std::size_t > m_rcv_pad_correction_d
Definition: AMReX_ParticleCommunication.H:120
Vector< std::size_t > m_rOffset
Definition: AMReX_ParticleCommunication.H:110
void buildMPIStart(const ParticleBufferMap &map, Long psize)
Definition: AMReX_ParticleCommunication.cpp:48
Vector< Long > m_snd_num_particles
Definition: AMReX_ParticleCommunication.H:102
Vector< MPI_Request > m_particle_rreqs
Definition: AMReX_ParticleCommunication.H:100
Long superParticleSize() const
Definition: AMReX_ParticleCommunication.H:125
Vector< MPI_Status > m_particle_stats
Definition: AMReX_ParticleCommunication.H:99
Gpu::HostVector< int > m_rcv_data
Definition: AMReX_ParticleCommunication.H:111
Vector< MPI_Status > m_build_stats
Definition: AMReX_ParticleCommunication.H:96
void buildMPIFinish(const ParticleBufferMap &map)
Definition: AMReX_ParticleCommunication.cpp:207
void build(const PC &pc, const ParticleCopyOp &op, const Vector< int > &int_comp_mask, const Vector< int > &real_comp_mask, bool local)
Definition: AMReX_ParticleCommunication.H:128
Vector< int > m_RcvProc
Definition: AMReX_ParticleCommunication.H:109
Vector< std::size_t > m_rcv_pad_correction_h
Definition: AMReX_ParticleCommunication.H:119
int m_nrcvs
Definition: AMReX_ParticleCommunication.H:95
Gpu::HostVector< unsigned int > m_box_counts_h
Definition: AMReX_ParticleCommunication.H:85
Vector< MPI_Request > m_build_rreqs
Definition: AMReX_ParticleCommunication.H:97
Vector< Long > m_rcv_num_particles
Definition: AMReX_ParticleCommunication.H:103
Gpu::DeviceVector< int > d_int_comp_mask
Definition: AMReX_ParticleCommunication.H:122
static void doHandShakeGlobal(const Vector< Long > &Snds, Vector< Long > &Rcvs)
Definition: AMReX_ParticleCommunication.cpp:327
void doHandShakeLocal(const Vector< Long > &Snds, Vector< Long > &Rcvs) const
Definition: AMReX_ParticleCommunication.cpp:263
void clear()
Definition: AMReX_ParticleCommunication.cpp:34
Definition: AMReX_ParticleCommunication.H:34
void resizeTiles(std::vector< PTile * > &tiles, const std::vector< int > &sizes, std::vector< int > &offsets) const
Definition: AMReX_ParticleCommunication.H:36