1 #ifndef AMREX_NEIGHBORPARTICLESCPUIMPL_H_
2 #define AMREX_NEIGHBORPARTICLESCPUIMPL_H_
3 #include <AMReX_Config.H>
5 template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
7 NeighborParticleContainer<NStructReal, NStructInt, NArrayReal, NArrayInt>
8 ::fillNeighborsCPU () {
9 BL_PROFILE(
"NeighborParticleContainer::fillNeighborsCPU");
10 if (!areMasksValid()) {
12 GetNeighborCommTags();
15 updateNeighborsCPU(
false);
18 template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
20 NeighborParticleContainer<NStructReal, NStructInt, NArrayReal, NArrayInt>
21 ::sumNeighborsCPU (
int real_start_comp,
int real_num_comp,
22 int int_start_comp,
int int_num_comp)
24 BL_PROFILE(
"NeighborParticleContainer::sumNeighborsCPU");
26 if ( ! enableInverse() )
28 amrex::Abort(
"Need to enable inverse to true to use sumNeighbors. \n");
33 std::map<int, Vector<char> > isend_data;
35 for (
int lev = 0; lev < this->numLevels(); ++lev)
37 for (MyParIter pti(*
this, lev); pti.isValid(); ++pti)
39 PairIndex src_index(pti.index(), pti.LocalTileIndex());
40 const auto& tags = inverse_tags[lev][src_index];
41 const auto& neighbs = neighbors[lev][src_index].GetArrayOfStructs();
44 const int num_neighbs = neighbs.size();
45 for (
int i = 0; i < num_neighbs; ++i)
47 const auto& neighb = neighbs[i];
48 const auto& tag = tags[i];
49 const int dst_grid = tag.src_grid;
50 const int global_rank = this->ParticleDistributionMap(lev)[dst_grid];
52 const int dst_tile = tag.src_tile;
53 const int dst_index = tag.src_index;
54 const int dst_level = tag.src_level;
58 auto pair = std::make_pair(dst_grid, dst_tile);
59 auto& dst_ptile = this->GetParticles(dst_level)[pair];
60 auto& dst_parts = dst_ptile.GetArrayOfStructs();
61 auto& p = dst_parts[dst_index];
63 for (
int comp = real_start_comp; comp < real_start_comp + real_num_comp; ++comp)
65 p.rdata(comp) += neighb.rdata(comp);
68 for (
int comp = int_start_comp; comp < int_start_comp + int_num_comp; ++comp)
70 p.idata(comp) += neighb.idata(comp);
76 auto& sdata = isend_data[dst_proc];
77 auto old_size = sdata.size();
78 auto new_size = old_size + real_num_comp*
sizeof(Real) + int_num_comp*
sizeof(
int) + 4*
sizeof(
int);
79 sdata.resize(new_size);
80 char* dst = &sdata[old_size];
85 for (
int comp = real_start_comp; comp < real_start_comp + real_num_comp; ++comp)
87 Real data = neighb.rdata(comp);
91 for (
int comp = int_start_comp; comp < int_start_comp + int_num_comp; ++comp)
93 int data = neighb.idata(comp);
102 sumNeighborsMPI(isend_data, real_start_comp, real_num_comp, int_start_comp, int_num_comp);
105 template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
109 int real_start_comp,
int real_num_comp,
110 int int_start_comp,
int int_num_comp)
112 BL_PROFILE(
"NeighborParticleContainer::sumNeighborsMPI");
119 for (
int i = 0; i <
NProcs; ++i) {
128 for (
const auto& kv : not_ours)
130 num_isnds += kv.second.
size();
131 isnds[kv.first] = kv.second.
size();
136 if (num_isnds == 0) {
return; }
138 const int num_ircvs = neighbor_procs.size();
145 for (
int i = 0; i < num_ircvs; ++i)
147 const int Who = neighbor_procs[i];
157 for (
int i = 0; i < num_ircvs; ++i) {
158 const int Who = neighbor_procs[i];
172 std::size_t TotRcvBytes = 0;
173 for (
int i = 0; i <
NProcs; ++i) {
175 RcvProc.push_back(i);
176 rOffset.push_back(TotRcvBytes);
177 TotRcvBytes += ircvs[i];
181 const auto nrcvs =
int(RcvProc.
size());
191 for (
int i = 0; i < nrcvs; ++i) {
192 const auto Who = RcvProc[i];
193 const auto offset = rOffset[i];
194 const auto Cnt = ircvs[Who];
205 for (
const auto& kv : not_ours) {
206 const auto Who = kv.first;
207 const auto Cnt = kv.second.size();
222 const size_t data_size = real_num_comp*
sizeof(Real) + int_num_comp*
sizeof(
int) + 4 *
sizeof(
int);
224 if (recvdata.
size() % data_size != 0) {
226 if (this->m_verbose) {
228 << recvdata.
size() <<
", " << data_size <<
"\n";
230 amrex::Abort(
"NeighborParticles::sumNeighbors: How did this happen?");
233 auto npart =
int(recvdata.
size() / data_size);
235 char* buffer = recvdata.data();
236 for (
int j = 0; j < npart; ++j)
238 int grid, tile, index, lev;
244 auto pair = std::make_pair(grid, tile);
245 auto& ptile = this->GetParticles(lev)[pair];
246 auto& parts = ptile.GetArrayOfStructs();
247 auto& p = parts[index];
249 for (
int comp = real_start_comp; comp < real_start_comp + real_num_comp; ++comp)
253 p.rdata(comp) += data;
254 buffer +=
sizeof(Real);
257 for (
int comp = int_start_comp; comp < int_start_comp + int_num_comp; ++comp)
261 p.idata(comp) += data;
262 buffer +=
sizeof(
int);
271 template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
276 BL_PROFILE_VAR(
"NeighborParticleContainer::updateNeighborsCPU", update);
280 for (
int lev = 0; lev < this->numLevels(); ++lev) {
281 const Periodicity& periodicity = this->Geom(lev).periodicity();
282 const RealBox& prob_domain = this->Geom(lev).ProbDomain();
286 for (MyParIter pti(*
this, lev); pti.isValid(); ++pti) {
287 PairIndex src_index(pti.index(), pti.LocalTileIndex());
288 auto src = pti.GetParticleTile().getParticleTileData();
289 for (
int j = 0; j < num_threads; ++j) {
290 auto& tags = buffer_tag_cache[lev][src_index][j];
291 int num_tags = tags.size();
293 #pragma omp parallel for
295 for (
int i = 0; i < num_tags; ++i) {
296 const NeighborCopyTag& tag = tags[i];
297 const int global_who = this->ParticleDistributionMap(tag.level)[tag.grid];
300 PairIndex dst_index(tag.grid, tag.tile);
301 auto dst = neighbors[tag.level][dst_index].getParticleTileData();
304 auto& aos = neighbors[tag.level][dst_index].GetArrayOfStructs();
305 ParticleType& p = aos[tag.dst_index];
306 for (
int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
307 if (! periodicity.
isPeriodic(dir)) {
continue; }
308 if (tag.periodic_shift[dir] < 0) {
309 p.pos(dir) +=
static_cast<ParticleReal
> (prob_domain.
length(dir));
310 }
else if (tag.periodic_shift[dir] > 0) {
311 p.pos(dir) -=
static_cast<ParticleReal
> (prob_domain.
length(dir));
316 if ( enableInverse() )
318 auto& itags = inverse_tags[tag.level][dst_index];
320 itags[tag.dst_index].src_grid = src_index.first;
321 itags[tag.dst_index].src_tile = src_index.second;
322 itags[tag.dst_index].src_index = tag.src_index;
323 itags[tag.dst_index].src_level = lev;
326 auto& aos = pti.GetArrayOfStructs();
327 auto& soa = pti.GetStructOfArrays();
328 ParticleType p = aos[tag.src_index];
330 for (
int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
331 if (! periodicity.
isPeriodic(dir)) {
continue; }
332 if (tag.periodic_shift[dir] < 0) {
333 p.pos(dir) +=
static_cast<ParticleReal
> (prob_domain.
length(dir));
334 }
else if (tag.periodic_shift[dir] > 0) {
335 p.pos(dir) -=
static_cast<ParticleReal
> (prob_domain.
length(dir));
340 char* dst_ptr = &send_data[who][tag.dst_index];
341 char* src_ptr = (
char *) &p;
342 for (
int ii = 0; ii < AMREX_SPACEDIM + NStructReal; ++ii) {
343 if (ghost_real_comp[ii]) {
344 std::memcpy(dst_ptr, src_ptr,
sizeof(
typename ParticleType::RealType));
345 dst_ptr +=
sizeof(
typename ParticleType::RealType);
347 src_ptr +=
sizeof(
typename ParticleType::RealType);
349 for (
int ii = 0; ii < this->NumRealComps(); ++ii) {
350 if (ghost_real_comp[ii+AMREX_SPACEDIM+NStructReal])
352 std::memcpy(dst_ptr, &(soa.GetRealData(ii)[tag.src_index]),
353 sizeof(
typename ParticleType::RealType));
354 dst_ptr +=
sizeof(
typename ParticleType::RealType);
357 for (
int ii = 0; ii < 2 + NStructInt; ++ii) {
358 if (ghost_int_comp[ii]) {
360 dst_ptr +=
sizeof(
int);
362 src_ptr +=
sizeof(
int);
364 for (
int ii = 0; ii < this->NumIntComps(); ++ii) {
365 if (ghost_int_comp[ii+2+NStructInt])
367 std::memcpy(dst_ptr, &(soa.GetIntData(ii)[tag.src_index]),
369 dst_ptr +=
sizeof(
int);
372 if ( enableInverse() )
374 std::memcpy(dst_ptr,&(src_index.first),
sizeof(
int)); dst_ptr +=
sizeof(
int);
375 std::memcpy(dst_ptr,&(src_index.second),
sizeof(
int)); dst_ptr +=
sizeof(
int);
376 std::memcpy(dst_ptr,&(tag.src_index),
sizeof(
int)); dst_ptr +=
sizeof(
int);
387 for (MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi) {
388 const int grid = mfi.index();
389 const int tile = mfi.LocalTileIndex();
390 PairIndex dst_index(grid, tile);
391 neighbors[lev][dst_index].resize(local_neighbor_sizes[lev][dst_index]);
392 if ( enableInverse() ) {
393 inverse_tags[lev][dst_index].resize(local_neighbor_sizes[lev][dst_index]);
399 fillNeighborsMPI(reuse_rcv_counts);
401 for (
int lev = 0; lev < this->numLevels(); ++lev)
403 for(MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi)
405 int src_grid = mfi.index();
406 int src_tile = mfi.LocalTileIndex();
407 auto index = std::make_pair(src_grid, src_tile);
408 auto& ptile = this->GetParticles(lev)[index];
409 ptile.setNumNeighbors(neighbors[lev][index].
size());
411 ptile.numRealParticles(), ptile.numNeighborParticles());
417 template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
419 NeighborParticleContainer<NStructReal, NStructInt, NArrayReal, NArrayInt>
420 ::clearNeighborsCPU ()
422 BL_PROFILE(
"NeighborParticleContainer::clearNeighborsCPU");
424 resizeContainers(this->numLevels());
425 for (
int lev = 0; lev < this->numLevels(); ++lev) {
426 neighbors[lev].clear();
427 if ( enableInverse() ) { inverse_tags[lev].clear(); }
428 buffer_tag_cache[lev].clear();
430 for(MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi)
432 int src_grid = mfi.index();
433 int src_tile = mfi.LocalTileIndex();
434 auto index = std::make_pair(src_grid, src_tile);
435 auto& ptile = this->GetParticles(lev)[index];
436 ptile.setNumNeighbors(0);
443 template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
448 BL_PROFILE(
"NeighborParticleContainer::getRcvCountsMPI");
457 for (
int i = 0; i <
NProcs; ++i) {
462 for (
const auto& kv : send_data) {
463 num_snds += kv.second.size();
464 snds[kv.first] = kv.second.
size();
469 if (num_snds == 0) {
return; }
471 const int num_rcvs = neighbor_procs.size();
478 for (
int i = 0; i < num_rcvs; ++i) {
479 const int Who = neighbor_procs[i];
489 for (
int i = 0; i < num_rcvs; ++i) {
490 const int Who = neighbor_procs[i];
504 template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
509 BL_PROFILE(
"NeighborParticleContainer::fillNeighborsMPI");
516 if (!reuse_rcv_counts) { getRcvCountsMPI(); }
517 if (num_snds == 0) {
return; }
521 std::size_t TotRcvBytes = 0;
522 for (
int i = 0; i <
NProcs; ++i) {
524 RcvProc.push_back(i);
525 rOffset.push_back(TotRcvBytes);
526 TotRcvBytes += rcvs[i];
530 const auto nrcvs =
int(RcvProc.
size());
540 for (
int i = 0; i < nrcvs; ++i) {
541 const auto Who = RcvProc[i];
542 const auto offset = rOffset[i];
543 const auto Cnt = rcvs[Who];
554 for (
const auto& kv : send_data) {
555 const auto Who = kv.first;
556 const auto Cnt = kv.second.size();
568 for (
int i = 0; i < nrcvs; ++i) {
570 char* buffer = &recvdata[
offset];
571 int num_tiles, lev, gid, tid,
size, np;
572 std::memcpy(&num_tiles, buffer,
sizeof(
int)); buffer +=
sizeof(
int);
573 for (
int j = 0; j < num_tiles; ++j) {
579 if (
size == 0) {
continue; }
581 np =
size / cdata_size;
586 size_t old_size = neighbors[lev][dst_index].size();
587 size_t new_size = neighbors[lev][dst_index].size() + np;
588 if ( enableInverse() )
591 size_t(inverse_tags[lev][dst_index].
size()));
592 inverse_tags[lev][dst_index].resize(new_size);
594 neighbors[lev][dst_index].resize(new_size);
597 for (
int n = 0; n < np; ++n) {
598 char* dst_aos = (
char*) &neighbors[lev][dst_index].GetArrayOfStructs()[old_size+n];
599 auto& dst_soa = neighbors[lev][dst_index].GetStructOfArrays();
600 for (
int ii = 0; ii < AMREX_SPACEDIM + NStructReal; ++ii) {
601 if (ghost_real_comp[ii]) {
602 std::memcpy(dst_aos, src,
sizeof(
typename ParticleType::RealType));
603 src +=
sizeof(
typename ParticleType::RealType);
605 dst_aos +=
sizeof(
typename ParticleType::RealType);
607 for (
int ii = 0; ii < this->NumRealComps(); ++ii) {
608 if (ghost_real_comp[ii+AMREX_SPACEDIM+NStructReal])
610 std::memcpy(&(dst_soa.GetRealData(ii)[old_size+n]),
611 src,
sizeof(
typename ParticleType::RealType));
612 src +=
sizeof(
typename ParticleType::RealType);
615 for (
int ii = 0; ii < 2 + NStructInt; ++ii) {
616 if (ghost_int_comp[ii]) {
620 dst_aos +=
sizeof(
int);
622 for (
int ii = 0; ii < this->NumIntComps(); ++ii) {
623 if (ghost_int_comp[ii+2+NStructInt])
631 if ( enableInverse() )
633 auto& tag = inverse_tags[lev][dst_index][old_size+n];
#define BL_PROFILE(a)
Definition: AMReX_BLProfiler.H:551
#define BL_PROFILE_VAR_STOP(vname)
Definition: AMReX_BLProfiler.H:563
#define BL_PROFILE_VAR(fname, vname)
Definition: AMReX_BLProfiler.H:560
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
Array4< int const > offset
Definition: AMReX_HypreMLABecLap.cpp:1089
Print on all processors of the default communicator.
Definition: AMReX_Print.H:117
Definition: AMReX_NeighborParticles.H:35
std::pair< int, int > PairIndex
Definition: AMReX_NeighborParticles.H:196
MPI_Request req() const
Definition: AMReX_ParallelDescriptor.H:74
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition: AMReX_Periodicity.H:17
bool isAnyPeriodic() const noexcept
Definition: AMReX_Periodicity.H:22
bool isPeriodic(int dir) const noexcept
Definition: AMReX_Periodicity.H:26
This class provides the user with a few print options.
Definition: AMReX_Print.H:35
A Box with real dimensions. A RealBox is OK iff volume >= 0.
Definition: AMReX_RealBox.H:21
AMREX_GPU_HOST_DEVICE Real length(int dir) const noexcept
Returns length in specified direction.
Definition: AMReX_RealBox.H:62
Long size() const noexcept
Definition: AMReX_Vector.H:50
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T Max(T *const m, T const value) noexcept
Definition: AMReX_GpuAtomic.H:417
AMREX_GPU_HOST_DEVICE Long size(T const &b) noexcept
integer version
Definition: AMReX_GpuRange.H:26
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void * memcpy(void *dest, const void *src, std::size_t count)
Definition: AMReX_GpuUtility.H:214
int NProcs()
Process ID in MPI_COMM_WORLD.
Definition: AMReX_MPMD.cpp:122
int MyProc()
Definition: AMReX_MPMD.cpp:117
constexpr int get_max_threads()
Definition: AMReX_OpenMP.H:36
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition: AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition: AMReX_ParallelContext.H:76
int global_to_local_rank(int rank) noexcept
Definition: AMReX_ParallelContext.H:98
int NProcsSub() noexcept
number of ranks in current frame
Definition: AMReX_ParallelContext.H:74
void Waitall(Vector< MPI_Request > &, Vector< MPI_Status > &)
Definition: AMReX_ParallelDescriptor.cpp:1295
Message Send(const T *buf, size_t n, int dst_pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1109
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition: AMReX_ParallelDescriptor.H:613
Message Arecv(T *, size_t n, int pid, int tag)
Definition: AMReX_ParallelDescriptor.H:1130
@ max
Definition: AMReX_ParallelReduce.H:17
void copyParticles(DstTile &dst, const SrcTile &src) noexcept
Copy particles from src to dst. This version copies all the particles, writing them to the beginning ...
Definition: AMReX_ParticleTransformation.H:158
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition: AMReX.H:111
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition: AMReX.cpp:225
const int[]
Definition: AMReX_BLProfiler.cpp:1664
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void copyParticle(const ParticleTileData< T_ParticleType, NAR, NAI > &dst, const ConstParticleTileData< T_ParticleType, NAR, NAI > &src, int src_i, int dst_i) noexcept
A general single particle copying routine that can run on the GPU.
Definition: AMReX_ParticleTransformation.H:31