1#ifndef AMREX_NEIGHBORPARTICLESCPUIMPL_H_
2#define AMREX_NEIGHBORPARTICLESCPUIMPL_H_
3#include <AMReX_Config.H>
9template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
11NeighborParticleContainer<NStructReal, NStructInt, NArrayReal, NArrayInt>
12::fillNeighborsCPU () {
13 BL_PROFILE(
"NeighborParticleContainer::fillNeighborsCPU");
14 if (!areMasksValid()) {
16 GetNeighborCommTags();
19 updateNeighborsCPU(
false);
22template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
24NeighborParticleContainer<NStructReal, NStructInt, NArrayReal, NArrayInt>
25::sumNeighborsCPU (
int real_start_comp,
int real_num_comp,
26 int int_start_comp,
int int_num_comp)
28 BL_PROFILE(
"NeighborParticleContainer::sumNeighborsCPU");
30 if ( ! enableInverse() )
32 amrex::Abort(
"Need to enable inverse to true to use sumNeighbors. \n");
37 std::map<int, Vector<char> > isend_data;
39 for (
int lev = 0; lev < this->numLevels(); ++lev)
41 for (MyParIter pti(*
this, lev); pti.isValid(); ++pti)
43 PairIndex src_index(pti.index(), pti.LocalTileIndex());
44 const auto& tags = inverse_tags[lev][src_index];
45 const auto& neighbs = neighbors[lev][src_index].GetArrayOfStructs();
48 const int num_neighbs = neighbs.size();
49 for (
int i = 0; i < num_neighbs; ++i)
51 const auto& neighb = neighbs[i];
52 const auto& tag = tags[i];
53 const int dst_grid = tag.src_grid;
54 const int global_rank = this->ParticleDistributionMap(lev)[dst_grid];
56 const int dst_tile = tag.src_tile;
57 const int dst_index = tag.src_index;
58 const int dst_level = tag.src_level;
60 if (dst_proc == MyProc)
62 auto pair = std::make_pair(dst_grid, dst_tile);
63 auto& dst_ptile = this->GetParticles(dst_level)[pair];
64 auto& dst_parts = dst_ptile.GetArrayOfStructs();
65 auto& p = dst_parts[dst_index];
67 for (
int comp = real_start_comp; comp < real_start_comp + real_num_comp; ++comp)
69 p.rdata(comp) += neighb.rdata(comp);
72 for (
int comp = int_start_comp; comp < int_start_comp + int_num_comp; ++comp)
74 p.idata(comp) += neighb.idata(comp);
80 auto& sdata = isend_data[dst_proc];
81 auto old_size = sdata.size();
82 auto new_size = old_size + real_num_comp*
sizeof(Real) + int_num_comp*
sizeof(
int) + 4*
sizeof(int);
83 sdata.resize(new_size);
84 char* dst = &sdata[old_size];
85 std::memcpy(dst, &dst_grid,
sizeof(
int)); dst +=
sizeof(int);
86 std::memcpy(dst, &dst_tile,
sizeof(
int)); dst +=
sizeof(int);
87 std::memcpy(dst, &dst_index,
sizeof(
int)); dst +=
sizeof(int);
88 std::memcpy(dst, &dst_level,
sizeof(
int)); dst +=
sizeof(int);
89 for (
int comp = real_start_comp; comp < real_start_comp + real_num_comp; ++comp)
91 Real data = neighb.rdata(comp);
92 std::memcpy(dst, &data,
sizeof(Real));
95 for (
int comp = int_start_comp; comp < int_start_comp + int_num_comp; ++comp)
97 int data = neighb.idata(comp);
98 std::memcpy(dst, &data,
sizeof(
int));
106 sumNeighborsMPI(isend_data, real_start_comp, real_num_comp, int_start_comp, int_num_comp);
109template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
113 int real_start_comp,
int real_num_comp,
114 int int_start_comp,
int int_num_comp)
116 BL_PROFILE(
"NeighborParticleContainer::sumNeighborsMPI");
121 Vector<Long> isnds(NProcs, 0);
122 Vector<Long> ircvs(NProcs, 0);
123 for (
int i = 0; i <
NProcs; ++i) {
132 for (
const auto& kv : not_ours)
134 num_isnds += kv.second.size();
135 isnds[kv.first] = kv.second.size();
140 if (num_isnds == 0) {
return; }
142 const int num_ircvs = neighbor_procs.size();
143 Vector<MPI_Status> stats(num_ircvs);
144 Vector<MPI_Request> rreqs(num_ircvs);
149 for (
int i = 0; i < num_ircvs; ++i)
151 const int Who = neighbor_procs[i];
161 for (
int i = 0; i < num_ircvs; ++i) {
162 const int Who = neighbor_procs[i];
175 Vector<std::size_t> rOffset;
176 std::size_t TotRcvBytes = 0;
177 for (
int i = 0; i <
NProcs; ++i) {
179 RcvProc.push_back(i);
180 rOffset.push_back(TotRcvBytes);
181 TotRcvBytes += ircvs[i];
185 const auto nrcvs = int(RcvProc.size());
186 Vector<MPI_Status> stats(nrcvs);
187 Vector<MPI_Request> rreqs(nrcvs);
192 Vector<char> recvdata(TotRcvBytes);
195 for (
int i = 0; i < nrcvs; ++i) {
196 const auto Who = RcvProc[i];
197 const auto offset = rOffset[i];
198 const auto Cnt = ircvs[Who];
209 for (
const auto& kv : not_ours) {
210 const auto Who = kv.first;
211 const auto Cnt = kv.second.size();
226 const size_t data_size = real_num_comp*
sizeof(Real) + int_num_comp*
sizeof(
int) + 4 *
sizeof(int);
228 if (recvdata.size() % data_size != 0) {
229 amrex::Print() << recvdata.size() <<
" " << data_size <<
"\n";
230 if (this->m_verbose) {
232 << recvdata.size() <<
", " << data_size <<
"\n";
234 amrex::Abort(
"NeighborParticles::sumNeighbors: How did this happen?");
237 auto npart = int(recvdata.size() / data_size);
239 char* buffer = recvdata.data();
240 for (
int j = 0; j < npart; ++j)
242 int grid, tile, index, lev;
243 std::memcpy(&grid, buffer,
sizeof(
int)); buffer +=
sizeof(int);
244 std::memcpy(&tile, buffer,
sizeof(
int)); buffer +=
sizeof(int);
245 std::memcpy(&index, buffer,
sizeof(
int)); buffer +=
sizeof(int);
246 std::memcpy(&lev, buffer,
sizeof(
int)); buffer +=
sizeof(int);
248 auto pair = std::make_pair(grid, tile);
249 auto& ptile = this->GetParticles(lev)[pair];
250 auto& parts = ptile.GetArrayOfStructs();
251 auto& p = parts[index];
253 for (
int comp = real_start_comp; comp < real_start_comp + real_num_comp; ++comp)
256 std::memcpy(&data, buffer,
sizeof(Real));
257 p.rdata(comp) += data;
258 buffer +=
sizeof(Real);
261 for (
int comp = int_start_comp; comp < int_start_comp + int_num_comp; ++comp)
264 std::memcpy(&data, buffer,
sizeof(
int));
265 p.idata(comp) += data;
266 buffer +=
sizeof(int);
275template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
277NeighborParticleContainer<NStructReal, NStructInt, NArrayReal, NArrayInt>
278::updateNeighborsCPU (
bool reuse_rcv_counts) {
280 BL_PROFILE_VAR(
"NeighborParticleContainer::updateNeighborsCPU", update);
284 for (
int lev = 0; lev < this->numLevels(); ++lev) {
285 const Periodicity& periodicity = this->Geom(lev).periodicity();
286 const RealBox& prob_domain = this->Geom(lev).ProbDomain();
290 for (MyParIter pti(*
this, lev); pti.isValid(); ++pti) {
291 PairIndex src_index(pti.index(), pti.LocalTileIndex());
292 auto src = pti.GetParticleTile().getParticleTileData();
293 for (
int j = 0; j < num_threads; ++j) {
294 auto& tags = buffer_tag_cache[lev][src_index][j];
295 int num_tags = tags.size();
297#pragma omp parallel for
299 for (
int i = 0; i < num_tags; ++i) {
300 const NeighborCopyTag& tag = tags[i];
301 const int global_who = this->ParticleDistributionMap(tag.level)[tag.grid];
304 PairIndex dst_index(tag.grid, tag.tile);
305 auto dst = neighbors[tag.level][dst_index].getParticleTileData();
307 if (periodicity.isAnyPeriodic()) {
308 auto& aos = neighbors[tag.level][dst_index].GetArrayOfStructs();
309 ParticleType& p = aos[tag.dst_index];
310 for (
int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
311 if (! periodicity.isPeriodic(dir)) {
continue; }
312 if (tag.periodic_shift[dir] < 0) {
313 p.pos(dir) +=
static_cast<ParticleReal
> (prob_domain.length(dir));
314 }
else if (tag.periodic_shift[dir] > 0) {
315 p.pos(dir) -=
static_cast<ParticleReal
> (prob_domain.length(dir));
320 if ( enableInverse() )
322 auto& itags = inverse_tags[tag.level][dst_index];
324 itags[tag.dst_index].src_grid = src_index.first;
325 itags[tag.dst_index].src_tile = src_index.second;
326 itags[tag.dst_index].src_index = tag.src_index;
327 itags[tag.dst_index].src_level = lev;
330 auto& aos = pti.GetArrayOfStructs();
331 auto& soa = pti.GetStructOfArrays();
332 ParticleType p = aos[tag.src_index];
333 if (periodicity.isAnyPeriodic()) {
334 for (
int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
335 if (! periodicity.isPeriodic(dir)) {
continue; }
336 if (tag.periodic_shift[dir] < 0) {
337 p.pos(dir) +=
static_cast<ParticleReal
> (prob_domain.length(dir));
338 }
else if (tag.periodic_shift[dir] > 0) {
339 p.pos(dir) -=
static_cast<ParticleReal
> (prob_domain.length(dir));
344 char* dst_ptr = &send_data[who][tag.dst_index];
345 char* src_ptr = (
char *) &p;
346 for (
int ii = 0; ii < AMREX_SPACEDIM + NStructReal; ++ii) {
347 if (ghost_real_comp[ii]) {
348 std::memcpy(dst_ptr, src_ptr,
sizeof(
typename ParticleType::RealType));
349 dst_ptr +=
sizeof(
typename ParticleType::RealType);
351 src_ptr +=
sizeof(
typename ParticleType::RealType);
353 for (
int ii = 0; ii < this->NumRealComps(); ++ii) {
354 if (ghost_real_comp[ii+AMREX_SPACEDIM+NStructReal])
356 std::memcpy(dst_ptr, &(soa.GetRealData(ii)[tag.src_index]),
357 sizeof(
typename ParticleType::RealType));
358 dst_ptr +=
sizeof(
typename ParticleType::RealType);
361 for (
int ii = 0; ii < 2 + NStructInt; ++ii) {
362 if (ghost_int_comp[ii]) {
363 std::memcpy(dst_ptr, src_ptr,
sizeof(
int));
364 dst_ptr +=
sizeof(int);
366 src_ptr +=
sizeof(int);
368 for (
int ii = 0; ii < this->NumIntComps(); ++ii) {
369 if (ghost_int_comp[ii+2+NStructInt])
371 std::memcpy(dst_ptr, &(soa.GetIntData(ii)[tag.src_index]),
373 dst_ptr +=
sizeof(int);
376 if ( enableInverse() )
378 std::memcpy(dst_ptr,&(src_index.first),
sizeof(
int)); dst_ptr +=
sizeof(int);
379 std::memcpy(dst_ptr,&(src_index.second),
sizeof(
int)); dst_ptr +=
sizeof(int);
380 std::memcpy(dst_ptr,&(tag.src_index),
sizeof(
int)); dst_ptr +=
sizeof(int);
381 std::memcpy(dst_ptr,&(lev),
sizeof(
int)); dst_ptr +=
sizeof(int);
391 for (MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi) {
392 const int grid = mfi.index();
393 const int tile = mfi.LocalTileIndex();
394 PairIndex dst_index(grid, tile);
395 neighbors[lev][dst_index].resize(local_neighbor_sizes[lev][dst_index]);
396 if ( enableInverse() ) {
397 inverse_tags[lev][dst_index].resize(local_neighbor_sizes[lev][dst_index]);
403 fillNeighborsMPI(reuse_rcv_counts);
405 for (
int lev = 0; lev < this->numLevels(); ++lev)
407 for(MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi)
409 int src_grid = mfi.index();
410 int src_tile = mfi.LocalTileIndex();
411 auto index = std::make_pair(src_grid, src_tile);
412 auto& ptile = this->GetParticles(lev)[index];
413 ptile.setNumNeighbors(neighbors[lev][index].
size());
415 ptile.numRealParticles(), ptile.numNeighborParticles());
421template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
423NeighborParticleContainer<NStructReal, NStructInt, NArrayReal, NArrayInt>
424::clearNeighborsCPU ()
426 BL_PROFILE(
"NeighborParticleContainer::clearNeighborsCPU");
428 resizeContainers(this->numLevels());
429 for (
int lev = 0; lev < this->numLevels(); ++lev) {
430 neighbors[lev].clear();
431 if ( enableInverse() ) { inverse_tags[lev].clear(); }
432 buffer_tag_cache[lev].clear();
434 for(MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi)
436 int src_grid = mfi.index();
437 int src_tile = mfi.LocalTileIndex();
438 auto index = std::make_pair(src_grid, src_tile);
439 auto& ptile = this->GetParticles(lev)[index];
440 ptile.setNumNeighbors(0);
447template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
452 BL_PROFILE(
"NeighborParticleContainer::getRcvCountsMPI");
459 Vector<Long> snds(NProcs, 0);
461 for (
int i = 0; i <
NProcs; ++i) {
466 for (
const auto& kv : send_data) {
467 num_snds += kv.second.size();
468 snds[kv.first] = kv.second.size();
473 if (num_snds == 0) {
return; }
475 const int num_rcvs = neighbor_procs.size();
476 Vector<MPI_Status> stats(num_rcvs);
477 Vector<MPI_Request> rreqs(num_rcvs);
482 for (
int i = 0; i < num_rcvs; ++i) {
483 const int Who = neighbor_procs[i];
493 for (
int i = 0; i < num_rcvs; ++i) {
494 const int Who = neighbor_procs[i];
508template <
int NStructReal,
int NStructInt,
int NArrayReal,
int NArrayInt>
513 BL_PROFILE(
"NeighborParticleContainer::fillNeighborsMPI");
520 if (!reuse_rcv_counts) { getRcvCountsMPI(); }
521 if (num_snds == 0) {
return; }
524 Vector<std::size_t> rOffset;
525 std::size_t TotRcvBytes = 0;
526 for (
int i = 0; i <
NProcs; ++i) {
528 RcvProc.push_back(i);
529 rOffset.push_back(TotRcvBytes);
530 TotRcvBytes += rcvs[i];
534 const auto nrcvs = int(RcvProc.size());
535 Vector<MPI_Status> stats(nrcvs);
536 Vector<MPI_Request> rreqs(nrcvs);
541 Vector<char> recvdata(TotRcvBytes);
544 for (
int i = 0; i < nrcvs; ++i) {
545 const auto Who = RcvProc[i];
546 const auto offset = rOffset[i];
547 const auto Cnt = rcvs[Who];
558 for (
const auto& kv : send_data) {
559 const auto Who = kv.first;
560 const auto Cnt = kv.second.size();
572 for (
int i = 0; i < nrcvs; ++i) {
573 const auto offset = int(rOffset[i]);
574 char* buffer = &recvdata[
offset];
575 int num_tiles, lev, gid, tid,
size, np;
576 std::memcpy(&num_tiles, buffer,
sizeof(
int)); buffer +=
sizeof(int);
577 for (
int j = 0; j < num_tiles; ++j) {
578 std::memcpy(&lev, buffer,
sizeof(
int)); buffer +=
sizeof(int);
579 std::memcpy(&gid, buffer,
sizeof(
int)); buffer +=
sizeof(int);
580 std::memcpy(&tid, buffer,
sizeof(
int)); buffer +=
sizeof(int);
581 std::memcpy(&size, buffer,
sizeof(
int)); buffer +=
sizeof(int);
583 if (size == 0) {
continue; }
585 np =
size / cdata_size;
589 PairIndex dst_index(gid, tid);
590 size_t old_size = neighbors[lev][dst_index].size();
591 size_t new_size = neighbors[lev][dst_index].size() + np;
592 if ( enableInverse() )
595 size_t(inverse_tags[lev][dst_index].
size()));
596 inverse_tags[lev][dst_index].resize(new_size);
598 neighbors[lev][dst_index].resize(new_size);
601 for (
int n = 0; n < np; ++n) {
602 char* dst_aos = (
char*) &neighbors[lev][dst_index].GetArrayOfStructs()[old_size+n];
603 auto& dst_soa = neighbors[lev][dst_index].GetStructOfArrays();
604 for (
int ii = 0; ii < AMREX_SPACEDIM + NStructReal; ++ii) {
605 if (ghost_real_comp[ii]) {
606 std::memcpy(dst_aos, src,
sizeof(
typename ParticleType::RealType));
607 src +=
sizeof(
typename ParticleType::RealType);
609 dst_aos +=
sizeof(
typename ParticleType::RealType);
611 for (
int ii = 0; ii < this->NumRealComps(); ++ii) {
612 if (ghost_real_comp[ii+AMREX_SPACEDIM+NStructReal])
614 std::memcpy(&(dst_soa.GetRealData(ii)[old_size+n]),
615 src,
sizeof(
typename ParticleType::RealType));
616 src +=
sizeof(
typename ParticleType::RealType);
619 for (
int ii = 0; ii < 2 + NStructInt; ++ii) {
620 if (ghost_int_comp[ii]) {
621 std::memcpy(dst_aos, src,
sizeof(
int));
624 dst_aos +=
sizeof(int);
626 for (
int ii = 0; ii < this->NumIntComps(); ++ii) {
627 if (ghost_int_comp[ii+2+NStructInt])
629 std::memcpy(&(dst_soa.GetIntData(ii)[old_size+n]),
635 if ( enableInverse() )
637 auto& tag = inverse_tags[lev][dst_index][old_size+n];
638 std::memcpy(&(tag.src_grid),src,
sizeof(
int));
641 std::memcpy(&(tag.src_tile),src,
sizeof(
int));
644 std::memcpy(&(tag.src_index),src,
sizeof(
int));
647 std::memcpy(&(tag.src_level),src,
sizeof(
int));
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define BL_PROFILE_VAR_STOP(vname)
Definition AMReX_BLProfiler.H:563
#define BL_PROFILE_VAR(fname, vname)
Definition AMReX_BLProfiler.H:560
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1089
Print on all processors of the default communicator.
Definition AMReX_Print.H:117
void sumNeighborsMPI(std::map< int, Vector< char > > ¬_ours, int real_start_comp, int real_num_comp, int int_start_comp, int int_num_comp)
void fillNeighborsMPI(bool reuse_rcv_counts)
MPI_Request req() const
Definition AMReX_ParallelDescriptor.H:74
This class provides the user with a few print options.
Definition AMReX_Print.H:35
__host__ __device__ Long size(T const &b) noexcept
integer version
Definition AMReX_GpuRange.H:26
int NProcs()
Process ID in MPI_COMM_WORLD.
Definition AMReX_MPMD.cpp:122
int MyProc()
Definition AMReX_MPMD.cpp:117
constexpr int get_max_threads()
Definition AMReX_OpenMP.H:36
void Max(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:126
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
int global_to_local_rank(int rank) noexcept
Definition AMReX_ParallelContext.H:98
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
void Waitall(Vector< MPI_Request > &, Vector< MPI_Status > &)
Definition AMReX_ParallelDescriptor.cpp:1304
Message Send(const T *buf, size_t n, int dst_pid, int tag)
Definition AMReX_ParallelDescriptor.H:1109
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition AMReX_ParallelDescriptor.H:613
Message Arecv(T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1130
Definition AMReX_Amr.cpp:49
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:138
__host__ __device__ void copyParticle(const ParticleTileData< T_ParticleType, NAR, NAI > &dst, const ConstParticleTileData< T_ParticleType, NAR, NAI > &src, int src_i, int dst_i) noexcept
A general single particle copying routine that can run on the GPU.
Definition AMReX_ParticleTransformation.H:31
void copyParticles(DstTile &dst, const SrcTile &src) noexcept
Copy particles from src to dst. This version copies all the particles, writing them to the beginning ...
Definition AMReX_ParticleTransformation.H:161
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:230