Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_NeighborParticlesCPUImpl.H
Go to the documentation of this file.
1#ifndef AMREX_NEIGHBORPARTICLESCPUIMPL_H_
2#define AMREX_NEIGHBORPARTICLESCPUIMPL_H_
3#include <AMReX_Config.H>
4
5#include <array>
6#include <cstring>
7
9
10namespace amrex {
11
12namespace detail {
13
14template <typename T>
16void applyNeighborPeriodicShift (T& value, int dir, const IntVect& periodic_shift,
17 const Periodicity& periodicity,
18 const RealBox& prob_domain) noexcept
19{
20 if (!periodicity.isPeriodic(dir)) { return; }
21 if (periodic_shift[dir] < 0) {
22 value += static_cast<T>(prob_domain.length(dir));
23 } else if (periodic_shift[dir] > 0) {
24 value -= static_cast<T>(prob_domain.length(dir));
25 }
26}
27
28template <typename PTD>
30void shiftNeighborParticlePositions (PTD const& ptd, int index, const IntVect& periodic_shift,
31 const Periodicity& periodicity,
32 const RealBox& prob_domain) noexcept
33{
34 for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
35 applyNeighborPeriodicShift(ptd.pos(dir, index), dir, periodic_shift, periodicity, prob_domain);
36 }
37}
38
39template <typename PTD>
41void packNeighborParticleData (char*& dst, PTD const& src, int src_index,
42 int num_real_comps, int num_int_comps,
43 const Vector<int>& ghost_real_comp,
44 const Vector<int>& ghost_int_comp,
45 const IntVect& periodic_shift,
46 const Periodicity& periodicity,
47 const RealBox& prob_domain) noexcept
48{
49 using ParticleType = typename std::decay_t<PTD>::ParticleType;
50 constexpr int real_comm_comp_start =
51 ParticleType::is_soa_particle ? 0 : AMREX_SPACEDIM + ParticleType::NReal;
52 constexpr int int_comm_comp_start = 2 + ParticleType::NInt;
53
54 if constexpr (!ParticleType::is_soa_particle) {
55 auto const p = src[src_index];
56 for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
57 if (!ghost_real_comp[dir]) { continue; }
58 auto value = p.pos(dir);
59 applyNeighborPeriodicShift(value, dir, periodic_shift, periodicity, prob_domain);
60 std::memcpy(dst, &value, sizeof(typename ParticleType::RealType));
61 dst += sizeof(typename ParticleType::RealType);
62 }
63 for (int ii = 0; ii < ParticleType::NReal; ++ii) {
64 if (!ghost_real_comp[AMREX_SPACEDIM + ii]) { continue; }
65 auto value = p.rdata(ii);
66 std::memcpy(dst, &value, sizeof(typename ParticleType::RealType));
67 dst += sizeof(typename ParticleType::RealType);
68 }
69 }
70
71 for (int ii = 0; ii < num_real_comps; ++ii) {
72 if (!ghost_real_comp[real_comm_comp_start + ii]) { continue; }
73
74 ParticleReal value;
75 if (ii < std::decay_t<PTD>::NAR) {
76 value = src.rdata(ii)[src_index];
77 } else {
78 value = src.m_runtime_rdata[ii - std::decay_t<PTD>::NAR][src_index];
79 }
80 if constexpr (ParticleType::is_soa_particle) {
81 if (ii < AMREX_SPACEDIM) {
82 applyNeighborPeriodicShift(value, ii, periodic_shift, periodicity, prob_domain);
83 }
84 }
85 std::memcpy(dst, &value, sizeof(ParticleReal));
86 dst += sizeof(ParticleReal);
87 }
88
89 std::array<int,2> idcpu_parts{};
90 auto const idcpu = src.idcpu(src_index);
91 std::memcpy(idcpu_parts.data(), &idcpu, sizeof(idcpu));
92 for (int ii = 0; ii < 2; ++ii) {
93 if (!ghost_int_comp[ii]) { continue; }
94 std::memcpy(dst, &idcpu_parts[ii], sizeof(int));
95 dst += sizeof(int);
96 }
97
98 if constexpr (!ParticleType::is_soa_particle) {
99 auto const p = src[src_index];
100 for (int ii = 0; ii < ParticleType::NInt; ++ii) {
101 if (!ghost_int_comp[2 + ii]) { continue; }
102 auto value = p.idata(ii);
103 std::memcpy(dst, &value, sizeof(int));
104 dst += sizeof(int);
105 }
106 }
107
108 for (int ii = 0; ii < num_int_comps; ++ii) {
109 if (!ghost_int_comp[int_comm_comp_start + ii]) { continue; }
110
111 int value;
112 if (ii < std::decay_t<PTD>::NAI) {
113 value = src.idata(ii)[src_index];
114 } else {
115 value = src.m_runtime_idata[ii - std::decay_t<PTD>::NAI][src_index];
116 }
117 std::memcpy(dst, &value, sizeof(int));
118 dst += sizeof(int);
119 }
120}
121
122template <typename PTD>
124void unpackNeighborParticleData (PTD const& dst_ptd, int dst_index, const char*& src,
125 int num_real_comps, int num_int_comps,
126 const Vector<int>& ghost_real_comp,
127 const Vector<int>& ghost_int_comp) noexcept
128{
129 using ParticleType = typename std::decay_t<PTD>::ParticleType;
130 constexpr int real_comm_comp_start =
131 ParticleType::is_soa_particle ? 0 : AMREX_SPACEDIM + ParticleType::NReal;
132 constexpr int int_comm_comp_start = 2 + ParticleType::NInt;
133
134 if constexpr (!ParticleType::is_soa_particle) {
135 for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
136 if (!ghost_real_comp[dir]) { continue; }
137 std::memcpy(&(dst_ptd.pos(dir, dst_index)), src, sizeof(typename ParticleType::RealType));
138 src += sizeof(typename ParticleType::RealType);
139 }
140 for (int ii = 0; ii < ParticleType::NReal; ++ii) {
141 if (!ghost_real_comp[AMREX_SPACEDIM + ii]) { continue; }
142 std::memcpy(&(dst_ptd[dst_index].rdata(ii)), src, sizeof(typename ParticleType::RealType));
143 src += sizeof(typename ParticleType::RealType);
144 }
145 }
146
147 for (int ii = 0; ii < num_real_comps; ++ii) {
148 if (!ghost_real_comp[real_comm_comp_start + ii]) { continue; }
149
150 if (ii < std::decay_t<PTD>::NAR) {
151 std::memcpy(dst_ptd.rdata(ii) + dst_index, src, sizeof(ParticleReal));
152 } else {
153 std::memcpy(dst_ptd.m_runtime_rdata[ii - std::decay_t<PTD>::NAR] + dst_index, src,
154 sizeof(ParticleReal));
155 }
156 src += sizeof(ParticleReal);
157 }
158
159 std::array<int,2> idcpu_parts{};
160 std::memcpy(idcpu_parts.data(), &(dst_ptd.idcpu(dst_index)), sizeof(uint64_t));
161 for (int ii = 0; ii < 2; ++ii) {
162 if (!ghost_int_comp[ii]) { continue; }
163 std::memcpy(&idcpu_parts[ii], src, sizeof(int));
164 src += sizeof(int);
165 }
166 std::memcpy(&(dst_ptd.idcpu(dst_index)), idcpu_parts.data(), sizeof(uint64_t));
167
168 if constexpr (!ParticleType::is_soa_particle) {
169 for (int ii = 0; ii < ParticleType::NInt; ++ii) {
170 if (!ghost_int_comp[2 + ii]) { continue; }
171 std::memcpy(&(dst_ptd[dst_index].idata(ii)), src, sizeof(int));
172 src += sizeof(int);
173 }
174 }
175
176 for (int ii = 0; ii < num_int_comps; ++ii) {
177 if (!ghost_int_comp[int_comm_comp_start + ii]) { continue; }
178
179 if (ii < std::decay_t<PTD>::NAI) {
180 std::memcpy(dst_ptd.idata(ii) + dst_index, src, sizeof(int));
181 } else {
182 std::memcpy(dst_ptd.m_runtime_idata[ii - std::decay_t<PTD>::NAI] + dst_index, src,
183 sizeof(int));
184 }
185 src += sizeof(int);
186 }
187}
188
189template <typename PTD>
191ParticleReal getSummedRealComp (PTD const& ptd, int index, int comp) noexcept
192{
193 using ParticleType = typename std::decay_t<PTD>::ParticleType;
194
195 if constexpr (!ParticleType::is_soa_particle) {
196 if (comp < ParticleType::NReal) {
197 return ptd[index].rdata(comp);
198 }
199 comp -= ParticleType::NReal;
200 }
201
202 if (comp < std::decay_t<PTD>::NAR) {
203 return ptd.rdata(comp)[index];
204 }
205
206 return ptd.m_runtime_rdata[comp - std::decay_t<PTD>::NAR][index];
207}
208
209template <typename PTD>
211int getSummedIntComp (PTD const& ptd, int index, int comp) noexcept
212{
213 using ParticleType = typename std::decay_t<PTD>::ParticleType;
214
215 if constexpr (!ParticleType::is_soa_particle) {
216 if (comp < ParticleType::NInt) {
217 return ptd[index].idata(comp);
218 }
219 comp -= ParticleType::NInt;
220 }
221
222 if (comp < std::decay_t<PTD>::NAI) {
223 return ptd.idata(comp)[index];
224 }
225
226 return ptd.m_runtime_idata[comp - std::decay_t<PTD>::NAI][index];
227}
228
229template <typename PTD>
231void addSummedRealComp (PTD const& ptd, int index, int comp, ParticleReal value) noexcept
232{
233 using ParticleType = typename std::decay_t<PTD>::ParticleType;
234
235 if constexpr (!ParticleType::is_soa_particle) {
236 if (comp < ParticleType::NReal) {
237 ptd[index].rdata(comp) += value;
238 return;
239 }
240 comp -= ParticleType::NReal;
241 }
242
243 if (comp < std::decay_t<PTD>::NAR) {
244 ptd.rdata(comp)[index] += value;
245 return;
246 }
247
248 ptd.m_runtime_rdata[comp - std::decay_t<PTD>::NAR][index] += value;
249}
250
251template <typename PTD>
253void addSummedIntComp (PTD const& ptd, int index, int comp, int value) noexcept
254{
255 using ParticleType = typename std::decay_t<PTD>::ParticleType;
256
257 if constexpr (!ParticleType::is_soa_particle) {
258 if (comp < ParticleType::NInt) {
259 ptd[index].idata(comp) += value;
260 return;
261 }
262 comp -= ParticleType::NInt;
263 }
264
265 if (comp < std::decay_t<PTD>::NAI) {
266 ptd.idata(comp)[index] += value;
267 return;
268 }
269
270 ptd.m_runtime_idata[comp - std::decay_t<PTD>::NAI][index] += value;
271}
272
273}
274
275template <typename T_ParticleType, int NArrayReal, int NArrayInt,
276 template<class> class Allocator, class CellAssignor>
277void
278NeighborParticleContainer_impl<T_ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>
279::fillNeighborsCPU () {
280 BL_PROFILE("NeighborParticleContainer::fillNeighborsCPU");
281 if (!areMasksValid()) {
282 BuildMasks();
283 GetNeighborCommTags();
284 }
285 cacheNeighborInfo();
286 updateNeighborsCPU(false);
287}
288
289template <typename T_ParticleType, int NArrayReal, int NArrayInt,
290 template<class> class Allocator, class CellAssignor>
291void
292NeighborParticleContainer_impl<T_ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>
293::sumNeighborsCPU (int real_start_comp, int real_num_comp,
294 int int_start_comp, int int_num_comp)
295{
296 BL_PROFILE("NeighborParticleContainer::sumNeighborsCPU");
297
298 if ( ! enableInverse() )
299 {
300 amrex::Abort("Need to enable inverse to true to use sumNeighbors. \n");
301 }
302
304
305 std::map<int, Vector<char> > isend_data;
306
307 for (int lev = 0; lev < this->numLevels(); ++lev)
308 {
309 for (MyParIter pti(*this, lev); pti.isValid(); ++pti)
310 {
311 PairIndex src_index(pti.index(), pti.LocalTileIndex());
312 const auto& tags = inverse_tags[lev][src_index];
313 const auto& neighb_tile = neighbors[lev][src_index];
314 const auto neighbs = neighb_tile.getConstParticleTileData();
315 const Long num_neighbs = static_cast<Long>(neighb_tile.size());
316 AMREX_ASSERT(tags.size() == num_neighbs);
317
318 for (int i = 0; i < static_cast<int>(num_neighbs); ++i)
319 {
320 const auto& tag = tags[i];
321 const int dst_grid = tag.src_grid;
322 const int global_rank = this->ParticleDistributionMap(lev)[dst_grid];
323 const int dst_proc = ParallelContext::global_to_local_rank(global_rank);
324 const int dst_tile = tag.src_tile;
325 const int dst_index = tag.src_index;
326 const int dst_level = tag.src_level;
327
328 if (dst_proc == MyProc)
329 {
330 auto pair = std::make_pair(dst_grid, dst_tile);
331 auto& dst_ptile = this->GetParticles(dst_level)[pair];
332 auto dst_ptd = dst_ptile.getParticleTileData();
333
334 for (int comp = real_start_comp; comp < real_start_comp + real_num_comp; ++comp)
335 {
336 detail::addSummedRealComp(
337 dst_ptd, dst_index, comp, detail::getSummedRealComp(neighbs, i, comp));
338 }
339
340 for (int comp = int_start_comp; comp < int_start_comp + int_num_comp; ++comp)
341 {
342 detail::addSummedIntComp(
343 dst_ptd, dst_index, comp, detail::getSummedIntComp(neighbs, i, comp));
344 }
345 }
346
347 else
348 {
349 auto& sdata = isend_data[dst_proc];
350 auto old_size = sdata.size();
351 auto new_size = old_size + real_num_comp*sizeof(ParticleReal) + int_num_comp*sizeof(int)
352 + 4*sizeof(int);
353 sdata.resize(new_size);
354 char* dst = &sdata[old_size];
355 std::memcpy(dst, &dst_grid, sizeof(int)); dst += sizeof(int);
356 std::memcpy(dst, &dst_tile, sizeof(int)); dst += sizeof(int);
357 std::memcpy(dst, &dst_index, sizeof(int)); dst += sizeof(int);
358 std::memcpy(dst, &dst_level, sizeof(int)); dst += sizeof(int);
359 for (int comp = real_start_comp; comp < real_start_comp + real_num_comp; ++comp)
360 {
361 ParticleReal data = detail::getSummedRealComp(neighbs, i, comp);
362 std::memcpy(dst, &data, sizeof(ParticleReal));
363 dst += sizeof(ParticleReal);
364 }
365 for (int comp = int_start_comp; comp < int_start_comp + int_num_comp; ++comp)
366 {
367 int data = detail::getSummedIntComp(neighbs, i, comp);
368 std::memcpy(dst, &data, sizeof(int));
369 dst += sizeof(int);
370 }
371 }
372 }
373 }
374 }
375
376 sumNeighborsMPI(isend_data, real_start_comp, real_num_comp, int_start_comp, int_num_comp);
377}
378
379template <typename T_ParticleType, int NArrayReal, int NArrayInt,
380 template<class> class Allocator, class CellAssignor>
381void
383sumNeighborsMPI (std::map<int, Vector<char> >& not_ours,
384 int real_start_comp, int real_num_comp,
385 int int_start_comp, int int_num_comp)
386{
387 BL_PROFILE("NeighborParticleContainer::sumNeighborsMPI");
388
389#ifdef AMREX_USE_MPI
391
392 Vector<Long> isnds(NProcs, 0);
393 Vector<Long> ircvs(NProcs, 0);
394 for (int i = 0; i < NProcs; ++i) {
395 ircvs[i] = 0;
396 }
397
398 {
399 // each proc figures out how many bytes it will send, and how
400 // many it will receive
401
402 Long num_isnds = 0;
403 for (const auto& kv : not_ours)
404 {
405 num_isnds += kv.second.size();
406 isnds[kv.first] = kv.second.size();
407 }
408
410
411 if (num_isnds == 0) { return; }
412
413 const int num_ircvs = neighbor_procs.size();
414 Vector<MPI_Status> stats(num_ircvs);
415 Vector<MPI_Request> rreqs(num_ircvs);
416
418
419 // Post receives
420 for (int i = 0; i < num_ircvs; ++i)
421 {
422 const int Who = neighbor_procs[i];
423 const Long Cnt = 1;
424
425 AMREX_ASSERT(Who >= 0 && Who < NProcs);
426
427 rreqs[i] = ParallelDescriptor::Arecv(&ircvs[Who], Cnt, Who, SeqNum,
429 }
430
431 // Send.
432 for (int i = 0; i < num_ircvs; ++i) {
433 const int Who = neighbor_procs[i];
434 const Long Cnt = 1;
435
436 AMREX_ASSERT(Who >= 0 && Who < NProcs);
437
438 ParallelDescriptor::Send(&isnds[Who], Cnt, Who, SeqNum,
440 }
441
442 if (num_ircvs > 0) { ParallelDescriptor::Waitall(rreqs, stats); }
443 }
444
445 Vector<int> RcvProc;
446 Vector<std::size_t> rOffset; // Offset (in bytes) in the receive buffer
447 std::size_t TotRcvBytes = 0;
448 for (int i = 0; i < NProcs; ++i) {
449 if (ircvs[i] > 0) {
450 RcvProc.push_back(i);
451 rOffset.push_back(TotRcvBytes);
452 TotRcvBytes += ircvs[i];
453 }
454 }
455
456 const auto nrcvs = int(RcvProc.size());
457 Vector<MPI_Status> stats(nrcvs);
458 Vector<MPI_Request> rreqs(nrcvs);
459
461
462 // Allocate data for rcvs as one big chunk.
463 Vector<char> recvdata(TotRcvBytes);
464
465 // Post receives.
466 for (int i = 0; i < nrcvs; ++i) {
467 const auto Who = RcvProc[i];
468 const auto offset = rOffset[i];
469 const auto Cnt = ircvs[Who];
470
471 AMREX_ASSERT(Cnt > 0);
472 AMREX_ASSERT(Cnt < std::numeric_limits<int>::max());
473 AMREX_ASSERT(Who >= 0 && Who < NProcs);
474
475 rreqs[i] = ParallelDescriptor::Arecv(&recvdata[offset], Cnt, Who, SeqNum,
477 }
478
479 // Send.
480 for (const auto& kv : not_ours) {
481 const auto Who = kv.first;
482 const auto Cnt = kv.second.size();
483
484 AMREX_ASSERT(Cnt > 0);
485 AMREX_ASSERT(Who >= 0 && Who < NProcs);
486 AMREX_ASSERT(Cnt < std::numeric_limits<int>::max());
487
488 ParallelDescriptor::Send(kv.second.data(), Cnt, Who, SeqNum,
490 }
491
492 // unpack the received data and put them into the proper neighbor buffers
493 if (nrcvs > 0)
494 {
495 ParallelDescriptor::Waitall(rreqs, stats);
496
497 const size_t data_size =
498 real_num_comp*sizeof(ParticleReal) + int_num_comp*sizeof(int) + 4 * sizeof(int);
499
500 if (recvdata.size() % data_size != 0) {
501 amrex::Print() << recvdata.size() << " " << data_size << "\n";
502 if (this->m_verbose) {
503 amrex::AllPrint() << "NeighborParticles::sumNeighbors: sizes = "
504 << recvdata.size() << ", " << data_size << "\n";
505 }
506 amrex::Abort("NeighborParticles::sumNeighbors: How did this happen?");
507 }
508
509 auto npart = int(recvdata.size() / data_size);
510
511 char* buffer = recvdata.data();
512 for (int j = 0; j < npart; ++j)
513 {
514 int grid, tile, index, lev;
515 std::memcpy(&grid, buffer, sizeof(int)); buffer += sizeof(int);
516 std::memcpy(&tile, buffer, sizeof(int)); buffer += sizeof(int);
517 std::memcpy(&index, buffer, sizeof(int)); buffer += sizeof(int);
518 std::memcpy(&lev, buffer, sizeof(int)); buffer += sizeof(int);
519
520 auto pair = std::make_pair(grid, tile);
521 auto& ptile = this->GetParticles(lev)[pair];
522 auto dst_ptd = ptile.getParticleTileData();
523
524 for (int comp = real_start_comp; comp < real_start_comp + real_num_comp; ++comp)
525 {
526 ParticleReal data;
527 std::memcpy(&data, buffer, sizeof(ParticleReal));
528 detail::addSummedRealComp(dst_ptd, index, comp, data);
529 buffer += sizeof(ParticleReal);
530 }
531
532 for (int comp = int_start_comp; comp < int_start_comp + int_num_comp; ++comp)
533 {
534 int data;
535 std::memcpy(&data, buffer, sizeof(int));
536 detail::addSummedIntComp(dst_ptd, index, comp, data);
537 buffer += sizeof(int);
538 }
539 }
540 }
541#else
542 amrex::ignore_unused(not_ours, real_start_comp, real_num_comp, int_start_comp, int_num_comp);
543#endif
544}
545
546template <typename T_ParticleType, int NArrayReal, int NArrayInt,
547 template<class> class Allocator, class CellAssignor>
548void
549NeighborParticleContainer_impl<T_ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>
550::updateNeighborsCPU (bool reuse_rcv_counts) {
551
552 BL_PROFILE_VAR("NeighborParticleContainer::updateNeighborsCPU", update);
553
555
556 for (int lev = 0; lev < this->numLevels(); ++lev) {
557 const Periodicity& periodicity = this->Geom(lev).periodicity();
558 const RealBox& prob_domain = this->Geom(lev).ProbDomain();
559
560 int num_threads = OpenMP::get_max_threads();
561
562 for (MyParIter pti(*this, lev); pti.isValid(); ++pti) {
563 PairIndex src_index(pti.index(), pti.LocalTileIndex());
564 auto const src = pti.GetParticleTile().getConstParticleTileData();
565 for (int j = 0; j < num_threads; ++j) {
566 auto& tags = buffer_tag_cache[lev][src_index][j];
567 int num_tags = tags.size();
568#ifdef AMREX_USE_OMP
569#pragma omp parallel for
570#endif
571 for (int i = 0; i < num_tags; ++i) {
572 const NeighborCopyTag& tag = tags[i];
573 const int global_who = this->ParticleDistributionMap(tag.level)[tag.grid];
574 const int who = ParallelContext::global_to_local_rank(global_who);
575 if (who == MyProc) {
576 PairIndex dst_index(tag.grid, tag.tile);
577 auto dst = neighbors[tag.level][dst_index].getParticleTileData();
578 copyParticle(dst, src, tag.src_index, tag.dst_index);
579 if (periodicity.isAnyPeriodic()) {
580 detail::shiftNeighborParticlePositions(dst, tag.dst_index, tag.periodic_shift,
581 periodicity, prob_domain);
582 }
583
584 if ( enableInverse() )
585 {
586 auto& itags = inverse_tags[tag.level][dst_index];
587 AMREX_ASSERT(tag.dst_index < itags.size());
588 itags[tag.dst_index].src_grid = src_index.first;
589 itags[tag.dst_index].src_tile = src_index.second;
590 itags[tag.dst_index].src_index = tag.src_index;
591 itags[tag.dst_index].src_level = lev;
592 }
593 } else {
594 char* dst_ptr = &send_data[who][tag.dst_index];
595 detail::packNeighborParticleData(dst_ptr, src, tag.src_index,
596 this->NumRealComps(), this->NumIntComps(),
597 ghost_real_comp, ghost_int_comp,
598 tag.periodic_shift, periodicity, prob_domain);
599 if ( enableInverse() )
600 {
601 std::memcpy(dst_ptr,&(src_index.first),sizeof(int)); dst_ptr += sizeof(int);
602 std::memcpy(dst_ptr,&(src_index.second),sizeof(int)); dst_ptr += sizeof(int);
603 std::memcpy(dst_ptr,&(tag.src_index),sizeof(int)); dst_ptr += sizeof(int);
604 std::memcpy(dst_ptr,&(lev),sizeof(int)); dst_ptr += sizeof(int);
605 }
606 }
607 }
608 }
609 }
610
611#ifdef AMREX_USE_OMP
612#pragma omp parallel
613#endif
614 for (MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi) {
615 const int grid = mfi.index();
616 const int tile = mfi.LocalTileIndex();
617 PairIndex dst_index(grid, tile);
618 neighbors[lev][dst_index].resize(local_neighbor_sizes[lev][dst_index]);
619 if ( enableInverse() ) {
620 inverse_tags[lev][dst_index].resize(local_neighbor_sizes[lev][dst_index]);
621 }
622 }
623 }
624 BL_PROFILE_VAR_STOP(update);
625
626 fillNeighborsMPI(reuse_rcv_counts);
627
628 for (int lev = 0; lev < this->numLevels(); ++lev)
629 {
630 for(MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi)
631 {
632 int src_grid = mfi.index();
633 int src_tile = mfi.LocalTileIndex();
634 auto index = std::make_pair(src_grid, src_tile);
635 auto& ptile = this->GetParticles(lev)[index];
636 ptile.setNumNeighbors(neighbors[lev][index].size());
637 amrex::copyParticles(ptile, neighbors[lev][index], 0,
638 ptile.numRealParticles(), ptile.numNeighborParticles());
639 }
640 }
641
642}
643
644template <typename T_ParticleType, int NArrayReal, int NArrayInt,
645 template<class> class Allocator, class CellAssignor>
646void
647NeighborParticleContainer_impl<T_ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>
648::clearNeighborsCPU ()
649{
650 BL_PROFILE("NeighborParticleContainer::clearNeighborsCPU");
651
652 resizeContainers(this->numLevels());
653 for (int lev = 0; lev < this->numLevels(); ++lev) {
654 neighbors[lev].clear();
655 if ( enableInverse() ) { inverse_tags[lev].clear(); }
656 buffer_tag_cache[lev].clear();
657
658 for(MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi)
659 {
660 int src_grid = mfi.index();
661 int src_tile = mfi.LocalTileIndex();
662 auto index = std::make_pair(src_grid, src_tile);
663 auto& ptile = this->GetParticles(lev)[index];
664 ptile.setNumNeighbors(0);
665 }
666 }
667
668 send_data.clear();
669}
670
671template <typename T_ParticleType, int NArrayReal, int NArrayInt,
672 template<class> class Allocator, class CellAssignor>
673void
676
677 BL_PROFILE("NeighborParticleContainer::getRcvCountsMPI");
678
679#ifdef AMREX_USE_MPI
681
682 // each proc figures out how many bytes it will send, and how
683 // many it will receive
684 Vector<Long> snds(NProcs, 0);
685 rcvs.resize(NProcs);
686 for (int i = 0; i < NProcs; ++i) {
687 rcvs[i] = 0;
688 }
689
690 num_snds = 0;
691 for (const auto& kv : send_data) {
692 num_snds += kv.second.size();
693 snds[kv.first] = kv.second.size();
694 }
695
697
698 if (num_snds == 0) { return; }
699
700 const int num_rcvs = neighbor_procs.size();
701 Vector<MPI_Status> stats(num_rcvs);
702 Vector<MPI_Request> rreqs(num_rcvs);
703
705
706 // Post receives
707 for (int i = 0; i < num_rcvs; ++i) {
708 const int Who = neighbor_procs[i];
709 const Long Cnt = 1;
710
711 AMREX_ASSERT(Who >= 0 && Who < NProcs);
712
713 rreqs[i] = ParallelDescriptor::Arecv(&rcvs[Who], Cnt, Who, SeqNum,
715 }
716
717 // Send.
718 for (int i = 0; i < num_rcvs; ++i) {
719 const int Who = neighbor_procs[i];
720 const Long Cnt = 1;
721
722 AMREX_ASSERT(Who >= 0 && Who < NProcs);
723
724 ParallelDescriptor::Send(&snds[Who], Cnt, Who, SeqNum,
726 }
727
728 if (num_rcvs > 0) { ParallelDescriptor::Waitall(rreqs, stats); }
729
730#endif // AMREX_USE_MPI
731}
732
733template <typename T_ParticleType, int NArrayReal, int NArrayInt,
734 template<class> class Allocator, class CellAssignor>
735void
737fillNeighborsMPI (bool reuse_rcv_counts) {
738
739 BL_PROFILE("NeighborParticleContainer::fillNeighborsMPI");
740
741#ifdef AMREX_USE_MPI
743
744 // each proc figures out how many bytes it will send, and how
745 // many it will receive
746 if (!reuse_rcv_counts) { getRcvCountsMPI(); }
747 if (num_snds == 0) { return; }
748
749 Vector<int> RcvProc;
750 Vector<std::size_t> rOffset; // Offset (in bytes) in the receive buffer
751 std::size_t TotRcvBytes = 0;
752 for (int i = 0; i < NProcs; ++i) {
753 if (rcvs[i] > 0) {
754 RcvProc.push_back(i);
755 rOffset.push_back(TotRcvBytes);
756 TotRcvBytes += rcvs[i];
757 }
758 }
759
760 const auto nrcvs = int(RcvProc.size());
761 Vector<MPI_Status> stats(nrcvs);
762 Vector<MPI_Request> rreqs(nrcvs);
763
765
766 // Allocate data for rcvs as one big chunk.
767 Vector<char> recvdata(TotRcvBytes);
768
769 // Post receives.
770 for (int i = 0; i < nrcvs; ++i) {
771 const auto Who = RcvProc[i];
772 const auto offset = rOffset[i];
773 const auto Cnt = rcvs[Who];
774
775 AMREX_ASSERT(Cnt > 0);
776 AMREX_ASSERT(Cnt < std::numeric_limits<int>::max());
777 AMREX_ASSERT(Who >= 0 && Who < NProcs);
778
779 rreqs[i] = ParallelDescriptor::Arecv(&recvdata[offset], Cnt, Who, SeqNum,
781 }
782
783 // Send.
784 for (const auto& kv : send_data) {
785 const auto Who = kv.first;
786 const auto Cnt = kv.second.size();
787
788 AMREX_ASSERT(Cnt > 0);
789 AMREX_ASSERT(Who >= 0 && Who < NProcs);
790 AMREX_ASSERT(Cnt < std::numeric_limits<int>::max());
791
792 ParallelDescriptor::Send(kv.second.data(), Cnt, Who, SeqNum,
794 }
795
796 // unpack the received data and put them into the proper neighbor buffers
797 if (nrcvs > 0) {
798 ParallelDescriptor::Waitall(rreqs, stats);
799 for (int i = 0; i < nrcvs; ++i) {
800 const std::size_t offset = rOffset[i];
801 char* buffer = &recvdata[offset];
802 int num_tiles, lev, gid, tid, size, np;
803 std::memcpy(&num_tiles, buffer, sizeof(int)); buffer += sizeof(int);
804 for (int j = 0; j < num_tiles; ++j) {
805 std::memcpy(&lev, buffer, sizeof(int)); buffer += sizeof(int);
806 std::memcpy(&gid, buffer, sizeof(int)); buffer += sizeof(int);
807 std::memcpy(&tid, buffer, sizeof(int)); buffer += sizeof(int);
808 std::memcpy(&size, buffer, sizeof(int)); buffer += sizeof(int);
809
810 if (size == 0) { continue; }
811
812 np = size / cdata_size;
813
814 AMREX_ASSERT(size % cdata_size == 0);
815
816 PairIndex dst_index(gid, tid);
817 size_t old_size = neighbors[lev][dst_index].size();
818 size_t new_size = neighbors[lev][dst_index].size() + np;
819 if ( enableInverse() )
820 {
821 AMREX_ASSERT(neighbors[lev][dst_index].size() ==
822 size_t(inverse_tags[lev][dst_index].size()));
823 inverse_tags[lev][dst_index].resize(new_size);
824 }
825 neighbors[lev][dst_index].resize(new_size);
826
827 auto dst_ptd = neighbors[lev][dst_index].getParticleTileData();
828 char const* src = buffer;
829 for (int n = 0; n < np; ++n) {
830 detail::unpackNeighborParticleData(dst_ptd, int(old_size+n), src,
831 this->NumRealComps(), this->NumIntComps(),
832 ghost_real_comp, ghost_int_comp);
833
834 if ( enableInverse() )
835 {
836 auto& tag = inverse_tags[lev][dst_index][old_size+n];
837 std::memcpy(&(tag.src_grid),src,sizeof(int));
838 src += sizeof(int);
839
840 std::memcpy(&(tag.src_tile),src,sizeof(int));
841 src += sizeof(int);
842
843 std::memcpy(&(tag.src_index),src,sizeof(int));
844 src += sizeof(int);
845
846 std::memcpy(&(tag.src_level),src,sizeof(int));
847 src += sizeof(int);
848 }
849 }
850 buffer += size;
851 }
852 }
853 }
854#else
855 amrex::ignore_unused(reuse_rcv_counts);
856#endif
857}
858
859}
860
862
863#endif
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define BL_PROFILE_VAR_STOP(vname)
Definition AMReX_BLProfiler.H:563
#define BL_PROFILE_VAR(fname, vname)
Definition AMReX_BLProfiler.H:560
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1139
Print on all processors of the default communicator.
Definition AMReX_Print.H:113
void fillNeighborsMPI(bool reuse_rcv_counts)
void sumNeighborsMPI(std::map< int, Vector< char > > &not_ours, int real_start_comp, int real_num_comp, int int_start_comp, int int_num_comp)
MPI_Request req() const
Definition AMReX_ParallelDescriptor.H:74
This class provides the user with a few print options.
Definition AMReX_Print.H:35
amrex_particle_real ParticleReal
Floating Point Type for Particles.
Definition AMReX_REAL.H:90
amrex_long Long
Definition AMReX_INT.H:30
void Max(KeyValuePair< K, V > &vi, MPI_Comm comm)
Definition AMReX_ParallelReduce.H:133
int NProcs()
Process ID in MPI_COMM_WORLD.
Definition AMReX_MPMD.cpp:122
int MyProc()
Definition AMReX_MPMD.cpp:117
constexpr int get_max_threads()
Definition AMReX_OpenMP.H:36
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
int global_to_local_rank(int rank) noexcept
Definition AMReX_ParallelContext.H:98
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
void Waitall(Vector< MPI_Request > &, Vector< MPI_Status > &)
Definition AMReX_ParallelDescriptor.cpp:1308
Message Send(const T *buf, size_t n, int dst_pid, int tag)
Definition AMReX_ParallelDescriptor.H:1193
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition AMReX_ParallelDescriptor.H:696
Message Arecv(T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1214
Definition AMReX_Amr.cpp:50
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139
__host__ __device__ void copyParticle(const ParticleTileData< T_ParticleType, NAR, NAI > &dst, const ConstParticleTileData< T_ParticleType, NAR, NAI > &src, int src_i, int dst_i) noexcept
A general single particle copying routine that can run on the GPU.
Definition AMReX_ParticleTransformation.H:32
void copyParticles(DstTile &dst, const SrcTile &src) noexcept
Copy particles from src to dst. This version copies all the particles, writing them to the beginning ...
Definition AMReX_ParticleTransformation.H:222
IntVectND< 3 > IntVect
IntVect is an alias for amrex::IntVectND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:33
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:241
const int[]
Definition AMReX_BLProfiler.cpp:1664