1#ifndef AMREX_NEIGHBORPARTICLESCPUIMPL_H_
2#define AMREX_NEIGHBORPARTICLESCPUIMPL_H_
3#include <AMReX_Config.H>
7template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
9NeighborParticleContainer<NStructReal, NStructInt, NArrayReal, NArrayInt>
10::fillNeighborsCPU () {
11 BL_PROFILE(
"NeighborParticleContainer::fillNeighborsCPU");
12 if (!areMasksValid()) {
14 GetNeighborCommTags();
17 updateNeighborsCPU(
false);
20template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
22NeighborParticleContainer<NStructReal, NStructInt, NArrayReal, NArrayInt>
23::sumNeighborsCPU (
int real_start_comp,
int real_num_comp,
24 int int_start_comp,
int int_num_comp)
26 BL_PROFILE(
"NeighborParticleContainer::sumNeighborsCPU");
28 if ( ! enableInverse() )
30 amrex::Abort(
"Need to enable inverse to true to use sumNeighbors. \n");
35 std::map<int, Vector<char> > isend_data;
37 for (
int lev = 0; lev < this->numLevels(); ++lev)
39 for (MyParIter pti(*
this, lev); pti.isValid(); ++pti)
41 PairIndex src_index(pti.index(), pti.LocalTileIndex());
42 const auto& tags = inverse_tags[lev][src_index];
43 const auto& neighbs = neighbors[lev][src_index].GetArrayOfStructs();
46 const int num_neighbs = neighbs.size();
47 for (
int i = 0; i < num_neighbs; ++i)
49 const auto& neighb = neighbs[i];
50 const auto& tag = tags[i];
51 const int dst_grid = tag.src_grid;
52 const int global_rank = this->ParticleDistributionMap(lev)[dst_grid];
54 const int dst_tile = tag.src_tile;
55 const int dst_index = tag.src_index;
56 const int dst_level = tag.src_level;
58 if (dst_proc == MyProc)
60 auto pair = std::make_pair(dst_grid, dst_tile);
61 auto& dst_ptile = this->GetParticles(dst_level)[pair];
62 auto& dst_parts = dst_ptile.GetArrayOfStructs();
63 auto& p = dst_parts[dst_index];
65 for (
int comp = real_start_comp; comp < real_start_comp + real_num_comp; ++comp)
67 p.rdata(comp) += neighb.rdata(comp);
70 for (
int comp = int_start_comp; comp < int_start_comp + int_num_comp; ++comp)
72 p.idata(comp) += neighb.idata(comp);
78 auto& sdata = isend_data[dst_proc];
79 auto old_size = sdata.size();
80 auto new_size = old_size + real_num_comp*
sizeof(Real) + int_num_comp*
sizeof(
int) + 4*
sizeof(
int);
81 sdata.resize(new_size);
82 char* dst = &sdata[old_size];
83 std::memcpy(dst, &dst_grid,
sizeof(
int)); dst +=
sizeof(
int);
84 std::memcpy(dst, &dst_tile,
sizeof(
int)); dst +=
sizeof(
int);
85 std::memcpy(dst, &dst_index,
sizeof(
int)); dst +=
sizeof(
int);
86 std::memcpy(dst, &dst_level,
sizeof(
int)); dst +=
sizeof(
int);
87 for (
int comp = real_start_comp; comp < real_start_comp + real_num_comp; ++comp)
89 Real data = neighb.rdata(comp);
90 std::memcpy(dst, &data,
sizeof(Real));
93 for (
int comp = int_start_comp; comp < int_start_comp + int_num_comp; ++comp)
95 int data = neighb.idata(comp);
96 std::memcpy(dst, &data,
sizeof(
int));
104 sumNeighborsMPI(isend_data, real_start_comp, real_num_comp, int_start_comp, int_num_comp);
107template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
111 int real_start_comp,
int real_num_comp,
112 int int_start_comp,
int int_num_comp)
114 BL_PROFILE(
"NeighborParticleContainer::sumNeighborsMPI");
121 for (
int i = 0; i < NProcs; ++i) {
130 for (
const auto& kv : not_ours)
132 num_isnds += kv.second.
size();
133 isnds[kv.first] = kv.second.
size();
138 if (num_isnds == 0) {
return; }
140 const int num_ircvs = neighbor_procs.size();
147 for (
int i = 0; i < num_ircvs; ++i)
149 const int Who = neighbor_procs[i];
159 for (
int i = 0; i < num_ircvs; ++i) {
160 const int Who = neighbor_procs[i];
174 std::size_t TotRcvBytes = 0;
175 for (
int i = 0; i < NProcs; ++i) {
177 RcvProc.push_back(i);
178 rOffset.push_back(TotRcvBytes);
179 TotRcvBytes += ircvs[i];
183 const auto nrcvs =
int(RcvProc.
size());
193 for (
int i = 0; i < nrcvs; ++i) {
194 const auto Who = RcvProc[i];
195 const auto offset = rOffset[i];
196 const auto Cnt = ircvs[Who];
207 for (
const auto& kv : not_ours) {
208 const auto Who = kv.first;
209 const auto Cnt = kv.second.size();
224 const size_t data_size = real_num_comp*
sizeof(Real) + int_num_comp*
sizeof(
int) + 4 *
sizeof(
int);
226 if (recvdata.
size() % data_size != 0) {
228 if (this->m_verbose) {
230 << recvdata.
size() <<
", " << data_size <<
"\n";
232 amrex::Abort(
"NeighborParticles::sumNeighbors: How did this happen?");
235 auto npart =
int(recvdata.
size() / data_size);
237 char* buffer = recvdata.data();
238 for (
int j = 0; j < npart; ++j)
240 int grid, tile, index, lev;
241 std::memcpy(&grid, buffer,
sizeof(
int)); buffer +=
sizeof(
int);
242 std::memcpy(&tile, buffer,
sizeof(
int)); buffer +=
sizeof(
int);
243 std::memcpy(&index, buffer,
sizeof(
int)); buffer +=
sizeof(
int);
244 std::memcpy(&lev, buffer,
sizeof(
int)); buffer +=
sizeof(
int);
246 auto pair = std::make_pair(grid, tile);
247 auto& ptile = this->GetParticles(lev)[pair];
248 auto& parts = ptile.GetArrayOfStructs();
249 auto& p = parts[index];
251 for (
int comp = real_start_comp; comp < real_start_comp + real_num_comp; ++comp)
254 std::memcpy(&data, buffer,
sizeof(Real));
255 p.rdata(comp) += data;
256 buffer +=
sizeof(Real);
259 for (
int comp = int_start_comp; comp < int_start_comp + int_num_comp; ++comp)
262 std::memcpy(&data, buffer,
sizeof(
int));
263 p.idata(comp) += data;
264 buffer +=
sizeof(
int);
273template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
278 BL_PROFILE_VAR(
"NeighborParticleContainer::updateNeighborsCPU", update);
282 for (
int lev = 0; lev < this->numLevels(); ++lev) {
283 const Periodicity& periodicity = this->Geom(lev).periodicity();
284 const RealBox& prob_domain = this->Geom(lev).ProbDomain();
288 for (MyParIter pti(*
this, lev); pti.isValid(); ++pti) {
289 PairIndex src_index(pti.index(), pti.LocalTileIndex());
290 auto src = pti.GetParticleTile().getParticleTileData();
291 for (
int j = 0; j < num_threads; ++j) {
292 auto& tags = buffer_tag_cache[lev][src_index][j];
293 int num_tags = tags.size();
295#pragma omp parallel for
297 for (
int i = 0; i < num_tags; ++i) {
298 const NeighborCopyTag& tag = tags[i];
299 const int global_who = this->ParticleDistributionMap(tag.level)[tag.grid];
302 PairIndex dst_index(tag.grid, tag.tile);
303 auto dst = neighbors[tag.level][dst_index].getParticleTileData();
306 auto& aos = neighbors[tag.level][dst_index].GetArrayOfStructs();
307 ParticleType& p = aos[tag.dst_index];
308 for (
int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
309 if (! periodicity.
isPeriodic(dir)) {
continue; }
310 if (tag.periodic_shift[dir] < 0) {
311 p.pos(dir) +=
static_cast<ParticleReal
> (prob_domain.
length(dir));
312 }
else if (tag.periodic_shift[dir] > 0) {
313 p.pos(dir) -=
static_cast<ParticleReal
> (prob_domain.
length(dir));
318 if ( enableInverse() )
320 auto& itags = inverse_tags[tag.level][dst_index];
322 itags[tag.dst_index].src_grid = src_index.first;
323 itags[tag.dst_index].src_tile = src_index.second;
324 itags[tag.dst_index].src_index = tag.src_index;
325 itags[tag.dst_index].src_level = lev;
328 auto& aos = pti.GetArrayOfStructs();
329 auto& soa = pti.GetStructOfArrays();
330 ParticleType p = aos[tag.src_index];
332 for (
int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
333 if (! periodicity.
isPeriodic(dir)) {
continue; }
334 if (tag.periodic_shift[dir] < 0) {
335 p.pos(dir) +=
static_cast<ParticleReal
> (prob_domain.
length(dir));
336 }
else if (tag.periodic_shift[dir] > 0) {
337 p.pos(dir) -=
static_cast<ParticleReal
> (prob_domain.
length(dir));
342 char* dst_ptr = &send_data[who][tag.dst_index];
343 char* src_ptr = (
char *) &p;
344 for (
int ii = 0; ii < AMREX_SPACEDIM + NStructReal; ++ii) {
345 if (ghost_real_comp[ii]) {
346 std::memcpy(dst_ptr, src_ptr,
sizeof(
typename ParticleType::RealType));
347 dst_ptr +=
sizeof(
typename ParticleType::RealType);
349 src_ptr +=
sizeof(
typename ParticleType::RealType);
351 for (
int ii = 0; ii < this->NumRealComps(); ++ii) {
352 if (ghost_real_comp[ii+AMREX_SPACEDIM+NStructReal])
354 std::memcpy(dst_ptr, &(soa.GetRealData(ii)[tag.src_index]),
355 sizeof(
typename ParticleType::RealType));
356 dst_ptr +=
sizeof(
typename ParticleType::RealType);
359 for (
int ii = 0; ii < 2 + NStructInt; ++ii) {
360 if (ghost_int_comp[ii]) {
361 std::memcpy(dst_ptr, src_ptr,
sizeof(
int));
362 dst_ptr +=
sizeof(
int);
364 src_ptr +=
sizeof(
int);
366 for (
int ii = 0; ii < this->NumIntComps(); ++ii) {
367 if (ghost_int_comp[ii+2+NStructInt])
369 std::memcpy(dst_ptr, &(soa.GetIntData(ii)[tag.src_index]),
371 dst_ptr +=
sizeof(
int);
374 if ( enableInverse() )
376 std::memcpy(dst_ptr,&(src_index.first),
sizeof(
int)); dst_ptr +=
sizeof(
int);
377 std::memcpy(dst_ptr,&(src_index.second),
sizeof(
int)); dst_ptr +=
sizeof(
int);
378 std::memcpy(dst_ptr,&(tag.src_index),
sizeof(
int)); dst_ptr +=
sizeof(
int);
379 std::memcpy(dst_ptr,&(lev),
sizeof(
int)); dst_ptr +=
sizeof(
int);
389 for (MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi) {
390 const int grid = mfi.index();
391 const int tile = mfi.LocalTileIndex();
392 PairIndex dst_index(grid, tile);
393 neighbors[lev][dst_index].resize(local_neighbor_sizes[lev][dst_index]);
394 if ( enableInverse() ) {
395 inverse_tags[lev][dst_index].resize(local_neighbor_sizes[lev][dst_index]);
401 fillNeighborsMPI(reuse_rcv_counts);
403 for (
int lev = 0; lev < this->numLevels(); ++lev)
405 for(MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi)
407 int src_grid = mfi.index();
408 int src_tile = mfi.LocalTileIndex();
409 auto index = std::make_pair(src_grid, src_tile);
410 auto& ptile = this->GetParticles(lev)[index];
411 ptile.setNumNeighbors(neighbors[lev][index].
size());
413 ptile.numRealParticles(), ptile.numNeighborParticles());
419template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
421NeighborParticleContainer<NStructReal, NStructInt, NArrayReal, NArrayInt>
422::clearNeighborsCPU ()
424 BL_PROFILE(
"NeighborParticleContainer::clearNeighborsCPU");
426 resizeContainers(this->numLevels());
427 for (
int lev = 0; lev < this->numLevels(); ++lev) {
428 neighbors[lev].clear();
429 if ( enableInverse() ) { inverse_tags[lev].clear(); }
430 buffer_tag_cache[lev].clear();
432 for(MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi)
434 int src_grid = mfi.index();
435 int src_tile = mfi.LocalTileIndex();
436 auto index = std::make_pair(src_grid, src_tile);
437 auto& ptile = this->GetParticles(lev)[index];
438 ptile.setNumNeighbors(0);
445template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
450 BL_PROFILE(
"NeighborParticleContainer::getRcvCountsMPI");
459 for (
int i = 0; i < NProcs; ++i) {
464 for (
const auto& kv : send_data) {
465 num_snds += kv.second.size();
466 snds[kv.first] = kv.second.
size();
471 if (num_snds == 0) {
return; }
473 const int num_rcvs = neighbor_procs.size();
480 for (
int i = 0; i < num_rcvs; ++i) {
481 const int Who = neighbor_procs[i];
491 for (
int i = 0; i < num_rcvs; ++i) {
492 const int Who = neighbor_procs[i];
506template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
511 BL_PROFILE(
"NeighborParticleContainer::fillNeighborsMPI");
518 if (!reuse_rcv_counts) { getRcvCountsMPI(); }
519 if (num_snds == 0) {
return; }
523 std::size_t TotRcvBytes = 0;
524 for (
int i = 0; i < NProcs; ++i) {
526 RcvProc.push_back(i);
527 rOffset.push_back(TotRcvBytes);
528 TotRcvBytes += rcvs[i];
532 const auto nrcvs =
int(RcvProc.
size());
542 for (
int i = 0; i < nrcvs; ++i) {
543 const auto Who = RcvProc[i];
544 const auto offset = rOffset[i];
545 const auto Cnt = rcvs[Who];
556 for (
const auto& kv : send_data) {
557 const auto Who = kv.first;
558 const auto Cnt = kv.second.size();
570 for (
int i = 0; i < nrcvs; ++i) {
572 char* buffer = &recvdata[
offset];
573 int num_tiles, lev, gid, tid, size, np;
574 std::memcpy(&num_tiles, buffer,
sizeof(
int)); buffer +=
sizeof(
int);
575 for (
int j = 0; j < num_tiles; ++j) {
576 std::memcpy(&lev, buffer,
sizeof(
int)); buffer +=
sizeof(
int);
577 std::memcpy(&gid, buffer,
sizeof(
int)); buffer +=
sizeof(
int);
578 std::memcpy(&tid, buffer,
sizeof(
int)); buffer +=
sizeof(
int);
579 std::memcpy(&size, buffer,
sizeof(
int)); buffer +=
sizeof(
int);
581 if (size == 0) {
continue; }
583 np = size / cdata_size;
588 size_t old_size = neighbors[lev][dst_index].size();
589 size_t new_size = neighbors[lev][dst_index].size() + np;
590 if ( enableInverse() )
593 size_t(inverse_tags[lev][dst_index].size()));
594 inverse_tags[lev][dst_index].resize(new_size);
596 neighbors[lev][dst_index].resize(new_size);
599 for (
int n = 0; n < np; ++n) {
600 char* dst_aos = (
char*) &neighbors[lev][dst_index].GetArrayOfStructs()[old_size+n];
601 auto& dst_soa = neighbors[lev][dst_index].GetStructOfArrays();
602 for (
int ii = 0; ii < AMREX_SPACEDIM + NStructReal; ++ii) {
603 if (ghost_real_comp[ii]) {
604 std::memcpy(dst_aos, src,
sizeof(
typename ParticleType::RealType));
605 src +=
sizeof(
typename ParticleType::RealType);
607 dst_aos +=
sizeof(
typename ParticleType::RealType);
609 for (
int ii = 0; ii < this->NumRealComps(); ++ii) {
610 if (ghost_real_comp[ii+AMREX_SPACEDIM+NStructReal])
612 std::memcpy(&(dst_soa.GetRealData(ii)[old_size+n]),
613 src,
sizeof(
typename ParticleType::RealType));
614 src +=
sizeof(
typename ParticleType::RealType);
617 for (
int ii = 0; ii < 2 + NStructInt; ++ii) {
618 if (ghost_int_comp[ii]) {
619 std::memcpy(dst_aos, src,
sizeof(
int));
622 dst_aos +=
sizeof(
int);
624 for (
int ii = 0; ii < this->NumIntComps(); ++ii) {
625 if (ghost_int_comp[ii+2+NStructInt])
627 std::memcpy(&(dst_soa.GetIntData(ii)[old_size+n]),
633 if ( enableInverse() )
635 auto& tag = inverse_tags[lev][dst_index][old_size+n];
636 std::memcpy(&(tag.src_grid),src,
sizeof(
int));
639 std::memcpy(&(tag.src_tile),src,
sizeof(
int));
642 std::memcpy(&(tag.src_index),src,
sizeof(
int));
645 std::memcpy(&(tag.src_level),src,
sizeof(
int));
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define BL_PROFILE_VAR_STOP(vname)
Definition AMReX_BLProfiler.H:563
#define BL_PROFILE_VAR(fname, vname)
Definition AMReX_BLProfiler.H:560
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1089
Print on all processors of the default communicator.
Definition AMReX_Print.H:117
Definition AMReX_NeighborParticles.H:35
std::pair< int, int > PairIndex
Definition AMReX_NeighborParticles.H:196
void getRcvCountsMPI()
Definition AMReX_NeighborParticlesCPUImpl.H:448
void sumNeighborsMPI(std::map< int, Vector< char > > ¬_ours, int real_start_comp, int real_num_comp, int int_start_comp, int int_num_comp)
Definition AMReX_NeighborParticlesCPUImpl.H:110
void fillNeighborsMPI(bool reuse_rcv_counts)
Definition AMReX_NeighborParticlesCPUImpl.H:509
MPI_Request req() const
Definition AMReX_ParallelDescriptor.H:74
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition AMReX_Periodicity.H:17
bool isAnyPeriodic() const noexcept
Definition AMReX_Periodicity.H:22
bool isPeriodic(int dir) const noexcept
Definition AMReX_Periodicity.H:26
This class provides the user with a few print options.
Definition AMReX_Print.H:35
A Box with real dimensions. A RealBox is OK iff volume >= 0.
Definition AMReX_RealBox.H:21
AMREX_GPU_HOST_DEVICE Real length(int dir) const noexcept
Returns length in specified direction.
Definition AMReX_RealBox.H:62
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:27
Long size() const noexcept
Definition AMReX_Vector.H:50
AMREX_GPU_HOST_DEVICE Long size(T const &b) noexcept
integer version
Definition AMReX_GpuRange.H:26
int MyProc()
Definition AMReX_MPMD.cpp:117
constexpr int get_max_threads()
Definition AMReX_OpenMP.H:36
void Max(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:126
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
int global_to_local_rank(int rank) noexcept
Definition AMReX_ParallelContext.H:98
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
void Waitall(Vector< MPI_Request > &, Vector< MPI_Status > &)
Definition AMReX_ParallelDescriptor.cpp:1298
Message Send(const T *buf, size_t n, int dst_pid, int tag)
Definition AMReX_ParallelDescriptor.H:1109
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition AMReX_ParallelDescriptor.H:613
Message Arecv(T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1130
Definition AMReX_Amr.cpp:49
void copyParticles(DstTile &dst, const SrcTile &src) noexcept
Copy particles from src to dst. This version copies all the particles, writing them to the beginning ...
Definition AMReX_ParticleTransformation.H:158
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:127
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:230
const int[]
Definition AMReX_BLProfiler.cpp:1664
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void copyParticle(const ParticleTileData< T_ParticleType, NAR, NAI > &dst, const ConstParticleTileData< T_ParticleType, NAR, NAI > &src, int src_i, int dst_i) noexcept
A general single particle copying routine that can run on the GPU.
Definition AMReX_ParticleTransformation.H:31