Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_ParticleCommunication.H
Go to the documentation of this file.
1#ifndef AMREX_PARTICLECOMMUNICATION_H_
2#define AMREX_PARTICLECOMMUNICATION_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_Gpu.H>
7#include <AMReX_IntVect.H>
9#include <AMReX_MFIter.H>
10#include <AMReX_Scan.H>
11#include <AMReX_TypeTraits.H>
12#include <AMReX_MakeParticle.H>
13
14#include <map>
15
16namespace amrex {
17
19{
20 template <class PTile>
21 void resizeTiles (std::vector<PTile*>& tiles, const std::vector<int>& sizes, std::vector<int>& offsets) const
22 {
23 for(int i = 0; i < static_cast<int>(sizes.size()); ++i)
24 {
25 int offset = tiles[i]->numTotalParticles();
26 int nn = tiles[i]->getNumNeighbors();
27 tiles[i]->setNumNeighbors(nn + sizes[i]);
28 offsets.push_back(offset);
29 }
30 }
31};
32
34{
35 template <class PTile>
36 void resizeTiles (std::vector<PTile*>& tiles, const std::vector<int>& sizes, std::vector<int>& offsets) const
37 {
38 int N = static_cast<int>(sizes.size());
39
40 std::map<PTile*, int> tile_sizes;
41 for(int i = 0; i < N; ++i) {
42 tile_sizes[tiles[i]] = tiles[i]->numParticles();
43 }
44
45 for(int i = 0; i < N; ++i)
46 {
47 offsets.push_back(tile_sizes[tiles[i]]);
48 tile_sizes[tiles[i]] += sizes[i];
49 }
50
51 for (auto& kv : tile_sizes) {
52 kv.first->resize(kv.second);
53 }
54 }
55};
56
58{
63
64 void clear ();
65
66 void setNumLevels (int num_levels);
67
68 void resize (int gid, int lev, int size);
69
70 [[nodiscard]] int numCopies (int gid, int lev) const
71 {
72 if (m_boxes.size() <= lev) { return 0; }
73 auto mit = m_boxes[lev].find(gid);
74 return mit == m_boxes[lev].end() ? 0 : int(mit->second.size());
75 }
76
77 [[nodiscard]] int numLevels () const { return int(m_boxes.size()); }
78};
79
81{
83
87
93
95 int m_nrcvs = 0;
98
101
104
107
109
115
118
121
124
127
129
130 template <class PC, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
131 void build (const PC& pc,
132 const ParticleCopyOp& op,
133 const Vector<int>& int_comp_mask,
134 const Vector<int>& real_comp_mask,
135 bool local)
136 {
137 BL_PROFILE("ParticleCopyPlan::build");
138
139 m_local = local;
140
141 const int ngrow = 1; // note - fix
142
143 const int num_levels = op.numLevels();
144 const int num_buckets = pc.BufferMap().numBuckets();
145
146 if (m_local)
147 {
148 m_neighbor_procs = pc.NeighborProcs(ngrow);
149 }
150 else
151 {
153 std::iota(m_neighbor_procs.begin(), m_neighbor_procs.end(), 0);
154 }
155
156 m_box_counts_d.resize(0);
157 m_box_counts_d.resize(num_buckets+1, 0);
158 m_box_offsets.resize(num_buckets+1);
159 auto* p_dst_box_counts = m_box_counts_d.dataPtr();
160 auto getBucket = pc.stableRedistribute() ? pc.BufferMap().getHostBucketFunctor() : pc.BufferMap().getBucketFunctor();
161
163 if (pc.stableRedistribute() ) {
164 h_box_counts.resize(m_box_counts_d.size(), 0);
165 }
166
167 m_dst_indices.resize(num_levels);
168 for (int lev = 0; lev < num_levels; ++lev)
169 {
170 for (const auto& kv : pc.GetParticles(lev))
171 {
172 int gid = kv.first.first;
173 int num_copies = op.numCopies(gid, lev);
174 if (num_copies == 0) { continue; }
175 m_dst_indices[lev][gid].resize(num_copies);
176
177 if (pc.stableRedistribute()) {
178 const Gpu::DeviceVector<int>& d_boxes = op.m_boxes[lev].at(gid);
179 Gpu::HostVector<int> h_boxes(d_boxes.size());
180 Gpu::copy(Gpu::deviceToHost,d_boxes.begin(),d_boxes.end(),h_boxes.begin());
181
182 const Gpu::DeviceVector<int>& d_levs = op.m_levels[lev].at(gid);
183 Gpu::HostVector<int> h_levs(d_levs.size());
184 Gpu::copy(Gpu::deviceToHost,d_levs.begin(),d_levs.end(),h_levs.begin());
185
186 Gpu::HostVector<int> h_dst_indices(num_copies);
187 for (int i = 0; i < num_copies; ++i) {
188 int dst_box = h_boxes[i];
189 if (dst_box >= 0) {
190 int dst_lev = h_levs[i];
191 int index = static_cast<int>(h_box_counts[getBucket(dst_lev, dst_box)]++);
192 h_dst_indices[i] = index;
193 }
194 }
195 Gpu::copy(Gpu::hostToDevice,h_dst_indices.begin(),h_dst_indices.end(),m_dst_indices[lev][gid].begin());
196 }
197 else {
198 const auto* p_boxes = op.m_boxes[lev].at(gid).dataPtr();
199 const auto* p_levs = op.m_levels[lev].at(gid).dataPtr();
200 auto* p_dst_indices = m_dst_indices[lev][gid].dataPtr();
201 AMREX_FOR_1D ( num_copies, i,
202 {
203 int dst_box = p_boxes[i];
204 if (dst_box >= 0)
205 {
206 int dst_lev = p_levs[i];
207 int index = static_cast<int>(Gpu::Atomic::Add(
208 &p_dst_box_counts[getBucket(dst_lev, dst_box)], 1U));
209 p_dst_indices[i] = index;
210 }
211 });
212 }
213 }
214 }
215
216 if (pc.stableRedistribute()) {
217 Gpu::copy(Gpu::hostToDevice,h_box_counts.begin(),h_box_counts.end(),m_box_counts_d.begin());
218 }
219
221 m_box_offsets.begin());
222
223 m_box_counts_h.resize(m_box_counts_d.size());
225 m_box_counts_h.begin());
226
227 m_snd_pad_correction_h.resize(0);
229
232 m_snd_pad_correction_d.begin());
233
234 d_int_comp_mask.resize(int_comp_mask.size());
235 Gpu::copyAsync(Gpu::hostToDevice, int_comp_mask.begin(), int_comp_mask.end(),
236 d_int_comp_mask.begin());
237 d_real_comp_mask.resize(real_comp_mask.size());
238 Gpu::copyAsync(Gpu::hostToDevice, real_comp_mask.begin(), real_comp_mask.end(),
239 d_real_comp_mask.begin());
240
242
243 int NStructReal = PC::ParticleContainerType::NStructReal;
244 int NStructInt = PC::ParticleContainerType::NStructInt;
245
246 int num_real_comm_comp = 0;
247 int comm_comps_start = 0;
248 if constexpr (!PC::ParticleType::is_soa_particle) {
249 comm_comps_start += AMREX_SPACEDIM + NStructReal;
250 }
251 for (int i = comm_comps_start; i < real_comp_mask.size(); ++i) {
252 if (real_comp_mask[i]) {++num_real_comm_comp;}
253 }
254
255 int num_int_comm_comp = 0;
256 for (int i = 2 + NStructInt; i < int_comp_mask.size(); ++i) {
257 if (int_comp_mask[i]) {++num_int_comm_comp;}
258 }
259
260 if constexpr (PC::ParticleType::is_soa_particle) {
261 m_superparticle_size = sizeof(uint64_t); // idcpu
262 } else {
263 m_superparticle_size = sizeof(typename PC::ParticleType);
264 }
265 m_superparticle_size += num_real_comm_comp * sizeof(typename PC::ParticleType::RealType)
266 + num_int_comm_comp * sizeof(int);
267
268 buildMPIStart(pc.BufferMap(), m_superparticle_size);
269 }
270
271 void clear ();
272
273 void buildMPIFinish (const ParticleBufferMap& map);
274
275private:
276
277 void buildMPIStart (const ParticleBufferMap& map, Long psize);
278
279 //
280 // Snds - a Vector with the number of bytes that is process will send to each proc.
281 // Rcvs - a Vector that, after calling this method, will contain the
282 // number of bytes this process will receive from each proc.
283 //
284 void doHandShake (const Vector<Long>& Snds, Vector<Long>& Rcvs) const;
285
286 //
287 // In the local version of this method, each proc knows which other
288 // procs it could possibly receive messages from, meaning we can do
289 // this purely with point-to-point communication.
290 //
291 void doHandShakeLocal (const Vector<Long>& Snds, Vector<Long>& Rcvs) const;
292
293 //
294 // In the global version, we don't know who we'll receive from, so we
295 // need to do some collective communication first.
296 //
297 static void doHandShakeGlobal (const Vector<Long>& Snds, Vector<Long>& Rcvs);
298
299 //
300 // Another version of the above that is implemented using MPI All-to-All
301 //
302 static void doHandShakeAllToAll (const Vector<Long>& Snds, Vector<Long>& Rcvs);
303
304 bool m_local;
305};
306
308{
309 const unsigned int* m_box_offsets;
310 const std::size_t* m_pad_correction;
311
314
316 : m_box_offsets(plan.m_box_offsets.dataPtr()),
317 m_pad_correction(plan.m_snd_pad_correction_d.dataPtr()),
318 m_get_pid(map.getPIDFunctor()),
319 m_get_bucket(map.getBucketFunctor())
320 {}
321
323 Long operator() (int dst_box, int dst_lev, std::size_t psize, int i) const
324 {
325 int dst_pid = m_get_pid(dst_lev, dst_box);
326 Long dst_offset = Long(psize)*(m_box_offsets[m_get_bucket(dst_lev, dst_box)] + i);
327 dst_offset += Long(m_pad_correction[dst_pid]);
328 return dst_offset;
329 }
330};
331
332template <class PC, class Buffer,
333 std::enable_if_t<IsParticleContainer<PC>::value &&
334 std::is_base_of_v<PolymorphicArenaAllocator<typename Buffer::value_type>,
335 Buffer>, int> foo = 0>
336void packBuffer (const PC& pc, const ParticleCopyOp& op, const ParticleCopyPlan& plan,
337 Buffer& snd_buffer)
338{
339 BL_PROFILE("amrex::packBuffer");
340
341 Long psize = plan.superParticleSize();
342
343 int num_levels = op.numLevels();
344 int num_buckets = pc.BufferMap().numBuckets();
345
346 std::size_t total_buffer_size = 0;
347 if (plan.m_snd_offsets.empty())
348 {
349 unsigned int np = 0;
350 Gpu::copy(Gpu::deviceToHost, plan.m_box_offsets.begin() + num_buckets,
351 plan.m_box_offsets.begin() + num_buckets + 1, &np);
352 total_buffer_size = np*psize;
353 }
354 else
355 {
356 total_buffer_size = plan.m_snd_offsets.back();
357 }
358
359 if (! snd_buffer.arena()->hasFreeDeviceMemory(total_buffer_size)) {
360 snd_buffer.clear();
361 snd_buffer.setArena(The_Pinned_Arena());
362 }
363 snd_buffer.resize(total_buffer_size);
364
365 const auto* p_comm_real = plan.d_real_comp_mask.dataPtr();
366 const auto* p_comm_int = plan.d_int_comp_mask.dataPtr();
367
368 const auto plo = pc.Geom(0).ProbLoArray();
369 const auto phi = pc.Geom(0).ProbHiArray();
370 const auto is_per = pc.Geom(0).isPeriodicArray();
371 for (int lev = 0; lev < num_levels; ++lev)
372 {
373 auto& plev = pc.GetParticles(lev);
374 for (auto& kv : plev)
375 {
376 int gid = kv.first.first;
377 int tid = kv.first.second;
378 auto index = std::make_pair(gid, tid);
379
380 auto& src_tile = plev.at(index);
381 const auto& ptd = src_tile.getConstParticleTileData();
382
383 int num_copies = op.numCopies(gid, lev);
384 if (num_copies == 0) { continue; }
385
386 const auto* p_boxes = op.m_boxes[lev].at(gid).dataPtr();
387 const auto* p_levels = op.m_levels[lev].at(gid).dataPtr();
388 const auto* p_src_indices = op.m_src_indices[lev].at(gid).dataPtr();
389 const auto* p_periodic_shift = op.m_periodic_shift[lev].at(gid).dataPtr();
390 const auto* p_dst_indices = plan.m_dst_indices[lev].at(gid).dataPtr();
391 auto* p_snd_buffer = snd_buffer.dataPtr();
392 GetSendBufferOffset get_offset(plan, pc.BufferMap());
393
394 AMREX_FOR_1D ( num_copies, i,
395 {
396 int dst_box = p_boxes[i];
397 if (dst_box >= 0)
398 {
399 int dst_lev = p_levels[i];
400 auto dst_offset = get_offset(dst_box, dst_lev, psize, p_dst_indices[i]);
401 int src_index = p_src_indices[i];
402 ptd.packParticleData(p_snd_buffer, src_index, dst_offset, p_comm_real, p_comm_int);
403
404 const IntVect& pshift = p_periodic_shift[i];
405 bool do_periodic_shift =
406 AMREX_D_TERM( (is_per[0] && pshift[0] != 0),
407 || (is_per[1] && pshift[1] != 0),
408 || (is_per[2] && pshift[2] != 0) );
409
410 if (do_periodic_shift)
411 {
412 ParticleReal pos[AMREX_SPACEDIM];
413 Long pos_offset = dst_offset;
414 // for pure SoA positions come after idcpu
415 if constexpr (PC::ParticleType::is_soa_particle) {
416 pos_offset += sizeof(uint64_t);
417 }
418 amrex::Gpu::memcpy(&pos[0], &p_snd_buffer[pos_offset],
419 AMREX_SPACEDIM*sizeof(ParticleReal));
420 for (int idim = 0; idim < AMREX_SPACEDIM; ++idim)
421 {
422 if (! is_per[idim]) { continue; }
423 if (pshift[idim] > 0) {
424 pos[idim] += phi[idim] - plo[idim];
425 } else if (pshift[idim] < 0) {
426 pos[idim] -= phi[idim] - plo[idim];
427 }
428 }
429 amrex::Gpu::memcpy(&p_snd_buffer[pos_offset], &pos[0],
430 AMREX_SPACEDIM*sizeof(ParticleReal));
431 }
432 }
433 });
434 }
435 }
436}
437
438template <class PC, class Buffer, class UnpackPolicy,
439 std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
440void unpackBuffer (PC& pc, const ParticleCopyPlan& plan, const Buffer& snd_buffer, UnpackPolicy const& policy)
441{
442 BL_PROFILE("amrex::unpackBuffer");
443
444 using PTile = typename PC::ParticleTileType;
445
446 int num_levels = pc.BufferMap().numLevels();
447 Long psize = plan.superParticleSize();
448
449 // count how many particles we have to add to each tile
450 std::vector<int> sizes;
451 std::vector<PTile*> tiles;
452 for (int lev = 0; lev < num_levels; ++lev)
453 {
454 for(MFIter mfi = pc.MakeMFIter(lev); mfi.isValid(); ++mfi)
455 {
456 int gid = mfi.index();
457 int tid = mfi.LocalTileIndex();
458 auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
459 int num_copies = plan.m_box_counts_h[pc.BufferMap().gridAndLevToBucket(gid, lev)];
460 sizes.push_back(num_copies);
461 tiles.push_back(&tile);
462 }
463 }
464
465 // resize the tiles and compute offsets
466 std::vector<int> offsets;
467 policy.resizeTiles(tiles, sizes, offsets);
468
469 const auto* p_comm_real = plan.d_real_comp_mask.dataPtr();
470 const auto* p_comm_int = plan.d_int_comp_mask.dataPtr();
471
472 // local unpack
473 int uindex = 0;
474 for (int lev = 0; lev < num_levels; ++lev)
475 {
476 auto& plev = pc.GetParticles(lev);
477 for(MFIter mfi = pc.MakeMFIter(lev); mfi.isValid(); ++mfi)
478 {
479 int gid = mfi.index();
480 int tid = mfi.LocalTileIndex();
481 auto index = std::make_pair(gid, tid);
482
483 auto& tile = plev[index];
484
485 GetSendBufferOffset get_offset(plan, pc.BufferMap());
486 auto p_snd_buffer = snd_buffer.dataPtr();
487
488 int offset = offsets[uindex];
489 int size = sizes[uindex];
490 ++uindex;
491
492 auto ptd = tile.getParticleTileData();
493 AMREX_FOR_1D ( size, i,
494 {
495 auto src_offset = get_offset(gid, lev, psize, i);
496 int dst_index = offset + i;
497 ptd.unpackParticleData(p_snd_buffer, src_offset, dst_index, p_comm_real, p_comm_int);
498 });
499 }
500 }
501}
502
503template <class PC, class SndBuffer, class RcvBuffer,
504 std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
505void communicateParticlesStart (const PC& pc, ParticleCopyPlan& plan, const SndBuffer& snd_buffer, RcvBuffer& rcv_buffer)
506{
507 BL_PROFILE("amrex::communicateParticlesStart");
508
509#ifdef AMREX_USE_MPI
510 Long psize = plan.superParticleSize();
511 const int NProcs = ParallelContext::NProcsSub();
512 const int MyProc = ParallelContext::MyProcSub();
513
514 if (NProcs == 1) { return; }
515
516 Vector<int> RcvProc;
517 Vector<Long> rOffset;
518
519 plan.m_rcv_pad_correction_h.resize(0);
520 plan.m_rcv_pad_correction_h.push_back(0);
521
522 Long TotRcvBytes = 0;
523 for (int i = 0; i < NProcs; ++i) {
524 if (plan.m_rcv_num_particles[i] > 0) {
525 RcvProc.push_back(i);
526 Long nbytes = plan.m_rcv_num_particles[i]*psize;
527 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
528 TotRcvBytes = Long(amrex::aligned_size(acd, TotRcvBytes));
529 rOffset.push_back(TotRcvBytes);
530 TotRcvBytes += Long(amrex::aligned_size(acd, nbytes));
531 plan.m_rcv_pad_correction_h.push_back(plan.m_rcv_pad_correction_h.back() + nbytes);
532 }
533 }
534
535 for (int i = 0; i < plan.m_nrcvs; ++i)
536 {
537 plan.m_rcv_pad_correction_h[i] = rOffset[i] - plan.m_rcv_pad_correction_h[i];
538 }
539
542 plan.m_rcv_pad_correction_d.begin());
543
544 rcv_buffer.resize(TotRcvBytes);
545
546 plan.m_nrcvs = int(RcvProc.size());
547
548 plan.m_particle_rstats.resize(0);
549 plan.m_particle_rstats.resize(plan.m_nrcvs);
550
551 plan.m_particle_rreqs.resize(0);
552 plan.m_particle_rreqs.resize(plan.m_nrcvs);
553
554 plan.m_particle_sstats.resize(0);
555 plan.m_particle_sreqs.resize(0);
556
557 const int SeqNum = ParallelDescriptor::SeqNum();
558
559 // Post receives.
560 for (int i = 0; i < plan.m_nrcvs; ++i) {
561 const auto Who = RcvProc[i];
562 const auto offset = rOffset[i];
563 Long nbytes = plan.m_rcv_num_particles[Who]*psize;
564 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
565 const auto Cnt = amrex::aligned_size(acd, nbytes);
566
567 AMREX_ASSERT(Cnt > 0);
568 AMREX_ASSERT(Who >= 0 && Who < NProcs);
569 AMREX_ASSERT(amrex::aligned_size(acd, nbytes) % acd == 0);
570
571 plan.m_particle_rreqs[i] =
572 ParallelDescriptor::Arecv((char*) (rcv_buffer.dataPtr() + offset), Cnt, Who, SeqNum, ParallelContext::CommunicatorSub()).req();
573 }
574
575 if (plan.m_NumSnds == 0) { return; }
576
577 // Send.
578 for (int i = 0; i < NProcs; ++i)
579 {
580 if (i == MyProc) { continue; }
581 const auto Who = i;
582 const auto Cnt = plan.m_snd_counts[i];
583 if (Cnt == 0) { continue; }
584
585 auto snd_offset = plan.m_snd_offsets[i];
586 AMREX_ASSERT(plan.m_snd_counts[i] % ParallelDescriptor::sizeof_selected_comm_data_type(plan.m_snd_num_particles[i]*psize) == 0);
587 AMREX_ASSERT(Who >= 0 && Who < NProcs);
588
589 plan.m_particle_sreqs.push_back(ParallelDescriptor::Asend((char const*)(snd_buffer.dataPtr()+snd_offset), Cnt, Who, SeqNum,
591 }
592
593 plan.m_particle_sstats.resize(plan.m_particle_sreqs.size());
594
596#else
597 amrex::ignore_unused(pc,plan,snd_buffer,rcv_buffer);
598#endif // MPI
599}
600
601void communicateParticlesFinish (const ParticleCopyPlan& plan);
602
603template <class PC, class Buffer, class UnpackPolicy,
604 std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
605void unpackRemotes (PC& pc, const ParticleCopyPlan& plan, Buffer& rcv_buffer, UnpackPolicy const& policy)
606{
607 BL_PROFILE("amrex::unpackRemotes");
608
609#ifdef AMREX_USE_MPI
610 const int NProcs = ParallelContext::NProcsSub();
611 if (NProcs == 1) { return; }
612
613 const int MyProc = ParallelContext::MyProcSub();
614 amrex::ignore_unused(MyProc);
615 using PTile = typename PC::ParticleTileType;
616
617 if (plan.m_nrcvs > 0)
618 {
619 const auto* p_comm_real = plan.d_real_comp_mask.dataPtr();
620 const auto* p_comm_int = plan.d_int_comp_mask.dataPtr();
621 auto* p_rcv_buffer = rcv_buffer.dataPtr();
622
623 std::vector<int> sizes;
624 std::vector<PTile*> tiles;
625 for (int i = 0, N = int(plan.m_rcv_box_counts.size()); i < N; ++i)
626 {
627 int copy_size = plan.m_rcv_box_counts[i];
628 int lev = plan.m_rcv_box_levs[i];
629 int gid = plan.m_rcv_box_ids[i];
630 int tid = 0;
631 auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
632 sizes.push_back(copy_size);
633 tiles.push_back(&tile);
634 }
635
636 Vector<int> offsets;
637 policy.resizeTiles(tiles, sizes, offsets);
639 int uindex = 0;
640 int procindex = 0, rproc = plan.m_rcv_box_pids[0];
641 for (int i = 0, N = int(plan.m_rcv_box_counts.size()); i < N; ++i)
642 {
643 int lev = plan.m_rcv_box_levs[i];
644 int gid = plan.m_rcv_box_ids[i];
645 int tid = 0;
646 auto offset = plan.m_rcv_box_offsets[i];
647 procindex = (rproc == plan.m_rcv_box_pids[i]) ? procindex : procindex+1;
648 rproc = plan.m_rcv_box_pids[i];
649
650 auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
651 auto ptd = tile.getParticleTileData();
652
653 AMREX_ASSERT(MyProc ==
654 ParallelContext::global_to_local_rank(pc.ParticleDistributionMap(lev)[gid]));
655
656 int dst_offset = offsets[uindex];
657 int size = sizes[uindex];
658 ++uindex;
659
660 Long psize = plan.superParticleSize();
661 const auto* p_pad_adjust = plan.m_rcv_pad_correction_d.dataPtr();
662
663 AMREX_FOR_1D ( size, ip, {
664 Long src_offset = psize*(offset + ip) + p_pad_adjust[procindex];
665 int dst_index = dst_offset + ip;
666 ptd.unpackParticleData(p_rcv_buffer, src_offset, dst_index,
667 p_comm_real, p_comm_int);
668 });
669
671 }
672 }
673#else
674 amrex::ignore_unused(pc,plan,rcv_buffer,policy);
675#endif // MPI
676}
677
678} // namespace amrex
679
680#endif // AMREX_PARTICLECOMMUNICATION_H_
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_FOR_1D(...)
Definition AMReX_GpuLaunchMacrosC.nolint.H:97
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1139
#define AMREX_D_TERM(a, b, c)
Definition AMReX_SPACE.H:172
Iterator for looping ever tiles and boxes of amrex::FabArray based containers.
Definition AMReX_MFIter.H:85
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:169
Dynamically allocated vector for trivially copyable data.
Definition AMReX_PODVector.H:308
size_type size() const noexcept
Definition AMReX_PODVector.H:648
iterator begin() noexcept
Definition AMReX_PODVector.H:674
void resize(size_type a_new_size, GrowthStrategy strategy=GrowthStrategy::Poisson)
Definition AMReX_PODVector.H:728
iterator end() noexcept
Definition AMReX_PODVector.H:678
MPI_Request req() const
Definition AMReX_ParallelDescriptor.H:74
Definition AMReX_ParticleBufferMap.H:53
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
Long size() const noexcept
Definition AMReX_Vector.H:53
amrex_particle_real ParticleReal
Floating Point Type for Particles.
Definition AMReX_REAL.H:90
amrex_long Long
Definition AMReX_INT.H:30
OutIter exclusive_scan(InIter begin, InIter end, OutIter result)
Definition AMReX_Scan.H:1440
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:845
__host__ __device__ AMREX_FORCE_INLINE T Add(T *sum, T value) noexcept
Definition AMReX_GpuAtomic.H:201
void copy(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition AMReX_GpuContainers.H:128
void copyAsync(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition AMReX_GpuContainers.H:228
static constexpr DeviceToHost deviceToHost
Definition AMReX_GpuContainers.H:106
static constexpr HostToDevice hostToDevice
Definition AMReX_GpuContainers.H:105
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310
__host__ __device__ void * memcpy(void *dest, const void *src, std::size_t count)
Definition AMReX_GpuUtility.H:220
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
int global_to_local_rank(int rank) noexcept
Definition AMReX_ParallelContext.H:98
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
Message Asend(const T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1172
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition AMReX_ParallelDescriptor.H:696
Message Arecv(T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1214
Definition AMReX_Amr.cpp:49
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139
void communicateParticlesStart(const PC &pc, ParticleCopyPlan &plan, const SndBuffer &snd_buffer, RcvBuffer &rcv_buffer)
Definition AMReX_ParticleCommunication.H:505
void unpackRemotes(PC &pc, const ParticleCopyPlan &plan, Buffer &rcv_buffer, UnpackPolicy const &policy)
Definition AMReX_ParticleCommunication.H:605
void communicateParticlesFinish(const ParticleCopyPlan &plan)
Definition AMReX_ParticleCommunication.cpp:384
const int[]
Definition AMReX_BLProfiler.cpp:1664
std::size_t aligned_size(std::size_t align_requirement, std::size_t size) noexcept
Given a minimum required size in bytes, this returns the smallest size greater or equal to size that ...
Definition AMReX_Arena.H:33
void unpackBuffer(PC &pc, const ParticleCopyPlan &plan, const Buffer &snd_buffer, UnpackPolicy const &policy)
Definition AMReX_ParticleCommunication.H:440
void packBuffer(const PC &pc, const ParticleCopyOp &op, const ParticleCopyPlan &plan, Buffer &snd_buffer)
Definition AMReX_ParticleCommunication.H:336
Definition AMReX_ParticleBufferMap.H:35
Definition AMReX_ParticleBufferMap.H:14
Definition AMReX_ParticleCommunication.H:308
const unsigned int * m_box_offsets
Definition AMReX_ParticleCommunication.H:309
GetPID m_get_pid
Definition AMReX_ParticleCommunication.H:312
GetBucket m_get_bucket
Definition AMReX_ParticleCommunication.H:313
const std::size_t * m_pad_correction
Definition AMReX_ParticleCommunication.H:310
__device__ Long operator()(int dst_box, int dst_lev, std::size_t psize, int i) const
Definition AMReX_ParticleCommunication.H:323
GetSendBufferOffset(const ParticleCopyPlan &plan, const ParticleBufferMap &map)
Definition AMReX_ParticleCommunication.H:315
Definition AMReX_ParticleCommunication.H:19
void resizeTiles(std::vector< PTile * > &tiles, const std::vector< int > &sizes, std::vector< int > &offsets) const
Definition AMReX_ParticleCommunication.H:21
Definition AMReX_ParticleCommunication.H:58
int numCopies(int gid, int lev) const
Definition AMReX_ParticleCommunication.H:70
void setNumLevels(int num_levels)
Definition AMReX_ParticleCommunication.cpp:14
int numLevels() const
Definition AMReX_ParticleCommunication.H:77
Vector< std::map< int, Gpu::DeviceVector< IntVect > > > m_periodic_shift
Definition AMReX_ParticleCommunication.H:62
Vector< std::map< int, Gpu::DeviceVector< int > > > m_boxes
Definition AMReX_ParticleCommunication.H:59
Vector< std::map< int, Gpu::DeviceVector< int > > > m_levels
Definition AMReX_ParticleCommunication.H:60
Vector< std::map< int, Gpu::DeviceVector< int > > > m_src_indices
Definition AMReX_ParticleCommunication.H:61
void resize(int gid, int lev, int size)
Definition AMReX_ParticleCommunication.cpp:22
void clear()
Definition AMReX_ParticleCommunication.cpp:6
Definition AMReX_ParticleCommunication.H:81
Vector< int > m_rcv_box_ids
Definition AMReX_ParticleCommunication.H:90
Vector< std::size_t > m_snd_offsets
Definition AMReX_ParticleCommunication.H:116
Vector< int > m_rcv_box_counts
Definition AMReX_ParticleCommunication.H:88
Vector< std::size_t > m_snd_counts
Definition AMReX_ParticleCommunication.H:117
Long m_NumSnds
Definition AMReX_ParticleCommunication.H:94
void buildMPIFinish(const ParticleBufferMap &map)
Definition AMReX_ParticleCommunication.cpp:213
Vector< int > m_neighbor_procs
Definition AMReX_ParticleCommunication.H:108
void clear()
Definition AMReX_ParticleCommunication.cpp:34
Vector< int > m_rcv_box_pids
Definition AMReX_ParticleCommunication.H:91
Vector< int > m_rcv_box_levs
Definition AMReX_ParticleCommunication.H:92
Gpu::DeviceVector< int > d_real_comp_mask
Definition AMReX_ParticleCommunication.H:125
Gpu::DeviceVector< std::size_t > m_snd_pad_correction_d
Definition AMReX_ParticleCommunication.H:120
Long m_superparticle_size
Definition AMReX_ParticleCommunication.H:126
Vector< Long > m_Snds
Definition AMReX_ParticleCommunication.H:110
Vector< MPI_Request > m_particle_sreqs
Definition AMReX_ParticleCommunication.H:103
Gpu::DeviceVector< unsigned int > m_box_counts_d
Definition AMReX_ParticleCommunication.H:84
Vector< Long > m_Rcvs
Definition AMReX_ParticleCommunication.H:111
Vector< int > m_rcv_box_offsets
Definition AMReX_ParticleCommunication.H:89
Vector< std::size_t > m_snd_pad_correction_h
Definition AMReX_ParticleCommunication.H:119
Vector< std::map< int, Gpu::DeviceVector< int > > > m_dst_indices
Definition AMReX_ParticleCommunication.H:82
Vector< MPI_Status > m_particle_sstats
Definition AMReX_ParticleCommunication.H:102
Gpu::DeviceVector< unsigned int > m_box_offsets
Definition AMReX_ParticleCommunication.H:86
Gpu::DeviceVector< std::size_t > m_rcv_pad_correction_d
Definition AMReX_ParticleCommunication.H:123
Vector< std::size_t > m_rOffset
Definition AMReX_ParticleCommunication.H:113
Vector< MPI_Status > m_particle_rstats
Definition AMReX_ParticleCommunication.H:99
Vector< Long > m_snd_num_particles
Definition AMReX_ParticleCommunication.H:105
Vector< MPI_Request > m_particle_rreqs
Definition AMReX_ParticleCommunication.H:100
Long superParticleSize() const
Definition AMReX_ParticleCommunication.H:128
Gpu::HostVector< int > m_rcv_data
Definition AMReX_ParticleCommunication.H:114
Vector< MPI_Status > m_build_stats
Definition AMReX_ParticleCommunication.H:96
void build(const PC &pc, const ParticleCopyOp &op, const Vector< int > &int_comp_mask, const Vector< int > &real_comp_mask, bool local)
Definition AMReX_ParticleCommunication.H:131
Vector< int > m_RcvProc
Definition AMReX_ParticleCommunication.H:112
Vector< std::size_t > m_rcv_pad_correction_h
Definition AMReX_ParticleCommunication.H:122
int m_nrcvs
Definition AMReX_ParticleCommunication.H:95
Gpu::HostVector< unsigned int > m_box_counts_h
Definition AMReX_ParticleCommunication.H:85
Vector< MPI_Request > m_build_rreqs
Definition AMReX_ParticleCommunication.H:97
Vector< Long > m_rcv_num_particles
Definition AMReX_ParticleCommunication.H:106
Gpu::DeviceVector< int > d_int_comp_mask
Definition AMReX_ParticleCommunication.H:125
Definition AMReX_ParticleCommunication.H:34
void resizeTiles(std::vector< PTile * > &tiles, const std::vector< int > &sizes, std::vector< int > &offsets) const
Definition AMReX_ParticleCommunication.H:36