Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_ParticleCommunication.H
Go to the documentation of this file.
1#ifndef AMREX_PARTICLECOMMUNICATION_H_
2#define AMREX_PARTICLECOMMUNICATION_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_Gpu.H>
7#include <AMReX_IntVect.H>
9#include <AMReX_MFIter.H>
10#include <AMReX_OpenMP.H>
11#include <AMReX_Scan.H>
12#include <AMReX_TypeTraits.H>
13#include <AMReX_MakeParticle.H>
14#include <AMReX_ParmParse.H>
15
16#include <algorithm>
17#include <map>
18#include <numeric>
19#include <utility>
20
21namespace amrex {
22
23class ParticleContainerBase;
24
26{
27 template <class PTile>
28 void resizeTiles (std::vector<PTile*>& tiles, const std::vector<int>& sizes, std::vector<int>& offsets) const
29 {
30 for(int i = 0; i < static_cast<int>(sizes.size()); ++i)
31 {
32 int offset = tiles[i]->numTotalParticles();
33 int nn = tiles[i]->getNumNeighbors();
34 tiles[i]->setNumNeighbors(nn + sizes[i]);
35 offsets.push_back(offset);
36 }
37 }
38};
39
41{
42 template <class PTile>
43 void resizeTiles (std::vector<PTile*>& tiles, const std::vector<int>& sizes, std::vector<int>& offsets) const
44 {
45 int N = static_cast<int>(sizes.size());
46
47 std::map<PTile*, int> tile_sizes;
48 for(int i = 0; i < N; ++i) {
49 tile_sizes[tiles[i]] = tiles[i]->numParticles();
50 }
51
52 for(int i = 0; i < N; ++i)
53 {
54 offsets.push_back(tile_sizes[tiles[i]]);
55 tile_sizes[tiles[i]] += sizes[i];
56 }
57
58 for (auto& kv : tile_sizes) {
59 kv.first->resize(kv.second);
60 }
61 }
62};
63
65{
66 using TileKey = std::pair<int, int>;
67
73
74 void clear ();
75
76 void setNumLevels (int num_levels);
77
78 void resize (int gid, int tid, int lev, int size);
79
80 [[nodiscard]] int numCopies (TileKey const& index, int lev) const
81 {
82 if (m_boxes.size() <= lev) { return 0; }
83 auto mit = m_boxes[lev].find(index);
84 return mit == m_boxes[lev].end() ? 0 : int(mit->second.size());
85 }
86
87 [[nodiscard]] int numLevels () const { return int(m_boxes.size()); }
88};
89
91{
92 using TileKey = std::pair<int, int>;
93
97
98#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
99 struct OmpCopyWork
100 {
101 const int* boxes = nullptr;
102 const int* levs = nullptr;
103 const int* tiles = nullptr;
104 int* dst_indices = nullptr;
105 int num_copies = 0;
106 };
107#endif
108
110 {
111 explicit BuildWorkspace (int a_num_buckets)
112 : num_buckets(a_num_buckets)
113#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
114 , omp_copy_offsets(1, 0)
115#endif
116 {}
117
118 int num_buckets = 0;
120
121#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
122 Vector<unsigned int> omp_thread_box_counts;
123 Vector<OmpCopyWork> omp_copy_work;
124 Vector<Long> omp_copy_offsets;
125#endif
126 };
127
128 template <class PC, class F>
129 void forEachCopyBatch (const PC& pc, const ParticleCopyOp& op, F&& f)
130 {
131 const int num_levels = op.numLevels();
132 m_dst_indices.resize(num_levels);
133 auto&& batch_fn = std::forward<F>(f);
134
135 for (int lev = 0; lev < num_levels; ++lev)
136 {
137 for (const auto& kv : pc.GetParticles(lev))
138 {
139 auto index = kv.first;
140 int num_copies = op.numCopies(index, lev);
141 if (num_copies == 0) { continue; }
142
143 auto& dst_indices = m_dst_indices[lev][index];
144 dst_indices.resize(num_copies);
145
146 batch_fn(lev, index, num_copies, dst_indices);
147 }
148 }
149 }
150
151 template <class PC, class GetBucket>
152 void buildCopies (const PC& pc, const ParticleCopyOp& op,
153 StableOrderedAlgorithm, BuildWorkspace& workspace, GetBucket const& getBucket)
154 {
155 BL_PROFILE("ParticleCopyPlan::buildCopiesStableOrdered");
156
157 forEachCopyBatch(pc, op,
158 [&] (int lev, TileKey const& index, int num_copies, Gpu::DeviceVector<int>& dst_indices)
159 {
160#ifdef AMREX_USE_GPU
161 const Gpu::DeviceVector<int>& d_boxes = op.m_boxes[lev].at(index);
162 Gpu::HostVector<int> h_boxes(d_boxes.size());
163 Gpu::copy(Gpu::deviceToHost, d_boxes.begin(), d_boxes.end(), h_boxes.begin());
164
165 const Gpu::DeviceVector<int>& d_levs = op.m_levels[lev].at(index);
166 Gpu::HostVector<int> h_levs(d_levs.size());
167 Gpu::copy(Gpu::deviceToHost, d_levs.begin(), d_levs.end(), h_levs.begin());
168
169 const Gpu::DeviceVector<int>& d_tiles = op.m_tiles[lev].at(index);
170 Gpu::HostVector<int> h_tiles(d_tiles.size());
171 Gpu::copy(Gpu::deviceToHost, d_tiles.begin(), d_tiles.end(), h_tiles.begin());
172
173 Gpu::HostVector<int> h_dst_indices(num_copies);
174#else
175 const Gpu::DeviceVector<int>& h_boxes = op.m_boxes[lev].at(index);
176 const Gpu::DeviceVector<int>& h_levs = op.m_levels[lev].at(index);
177 const Gpu::DeviceVector<int>& h_tiles = op.m_tiles[lev].at(index);
178
179 Gpu::DeviceVector<int>& h_dst_indices = dst_indices;
180#endif
181 for (int i = 0; i < num_copies; ++i) {
182 int dst_box = h_boxes[i];
183 if (dst_box >= 0) {
184 int dst_tile = h_tiles[i];
185 int dst_lev = h_levs[i];
186 int dst_index = static_cast<int>(workspace.h_box_counts[getBucket(dst_lev, dst_box, dst_tile)]++);
187 h_dst_indices[i] = dst_index;
188 }
189 }
190
191#ifdef AMREX_USE_GPU
193 h_dst_indices.begin(), h_dst_indices.end(),
194 dst_indices.begin());
195#endif
196 });
197 }
198
199#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
200 template <class PC, class GetBucket>
201 void buildCopies (const PC& pc, const ParticleCopyOp& op,
202 TwoPassHostAlgorithm, BuildWorkspace& workspace, GetBucket const& getBucket)
203 {
204 BL_PROFILE("ParticleCopyPlan::buildCopiesTwoPassHost");
205
206 workspace.omp_thread_box_counts.resize(OpenMP::get_max_threads()*workspace.num_buckets, 0U);
207
208 forEachCopyBatch(pc, op,
209 [&op, &workspace] (int lev, TileKey const& index, int num_copies, Gpu::DeviceVector<int>& dst_indices)
210 {
211 const auto* p_boxes = op.m_boxes[lev].at(index).dataPtr();
212 const auto* p_levs = op.m_levels[lev].at(index).dataPtr();
213 const auto* p_tiles = op.m_tiles[lev].at(index).dataPtr();
214 auto* p_dst_indices = dst_indices.dataPtr();
215
216 workspace.omp_copy_work.push_back({p_boxes, p_levs, p_tiles, p_dst_indices, num_copies});
217 workspace.omp_copy_offsets.push_back(workspace.omp_copy_offsets.back() + num_copies);
218 });
219
220 if (workspace.omp_copy_work.empty()) { return; }
221
222 std::fill(workspace.omp_thread_box_counts.begin(), workspace.omp_thread_box_counts.end(), 0U);
223 auto* p_omp_thread_box_counts = workspace.omp_thread_box_counts.data();
224 const auto* p_omp_copy_work = workspace.omp_copy_work.data();
225 const auto* p_omp_copy_offsets = workspace.omp_copy_offsets.data();
226 const Long total_num_copies = workspace.omp_copy_offsets.back();
227
228#pragma omp parallel
229 {
230 int thread_num = OpenMP::get_thread_num();
231 int num_threads = OpenMP::get_num_threads();
232 Long ibegin = thread_num*total_num_copies/num_threads;
233 Long iend = (thread_num+1)*total_num_copies/num_threads;
234 auto* p_thread_box_counts = p_omp_thread_box_counts + thread_num*workspace.num_buckets;
235
236 if (ibegin < iend)
237 {
238 int iwork = static_cast<int>(std::upper_bound(workspace.omp_copy_offsets.begin(),
239 workspace.omp_copy_offsets.end(),
240 ibegin)
241 - workspace.omp_copy_offsets.begin()) - 1;
242 while (iwork < static_cast<int>(workspace.omp_copy_work.size()) &&
243 p_omp_copy_offsets[iwork] < iend)
244 {
245 auto const& work = p_omp_copy_work[iwork];
246 Long global_begin = std::max(ibegin, p_omp_copy_offsets[iwork]);
247 Long global_end = std::min(iend, p_omp_copy_offsets[iwork+1]);
248 int local_begin = static_cast<int>(global_begin - p_omp_copy_offsets[iwork]);
249 int local_end = static_cast<int>(global_end - p_omp_copy_offsets[iwork]);
250 for (int i = local_begin; i < local_end; ++i)
251 {
252 int dst_box = work.boxes[i];
253 if (dst_box >= 0)
254 {
255 int dst_tile = work.tiles[i];
256 int dst_lev = work.levs[i];
257 ++p_thread_box_counts[getBucket(dst_lev, dst_box, dst_tile)];
258 }
259 }
260 ++iwork;
261 }
262 }
263
264#pragma omp barrier
265#pragma omp for
266 for (int ibucket = 0; ibucket < workspace.num_buckets; ++ibucket)
267 {
268 unsigned int offset = workspace.h_box_counts[ibucket];
269 for (int tid = 0; tid < num_threads; ++tid)
270 {
271 auto& count = p_omp_thread_box_counts[tid*workspace.num_buckets + ibucket];
272 unsigned int total = count;
273 count = offset;
274 offset += total;
275 }
276 workspace.h_box_counts[ibucket] = offset;
277 }
278
279 if (ibegin < iend)
280 {
281 int iwork = static_cast<int>(std::upper_bound(workspace.omp_copy_offsets.begin(),
282 workspace.omp_copy_offsets.end(),
283 ibegin)
284 - workspace.omp_copy_offsets.begin()) - 1;
285 while (iwork < static_cast<int>(workspace.omp_copy_work.size()) &&
286 p_omp_copy_offsets[iwork] < iend)
287 {
288 auto const& work = p_omp_copy_work[iwork];
289 Long global_begin = std::max(ibegin, p_omp_copy_offsets[iwork]);
290 Long global_end = std::min(iend, p_omp_copy_offsets[iwork+1]);
291 int local_begin = static_cast<int>(global_begin - p_omp_copy_offsets[iwork]);
292 int local_end = static_cast<int>(global_end - p_omp_copy_offsets[iwork]);
293 for (int i = local_begin; i < local_end; ++i)
294 {
295 int dst_box = work.boxes[i];
296 if (dst_box >= 0)
297 {
298 int dst_tile = work.tiles[i];
299 int dst_lev = work.levs[i];
300 int bucket = getBucket(dst_lev, dst_box, dst_tile);
301 work.dst_indices[i] = static_cast<int>(p_thread_box_counts[bucket]++);
302 }
303 }
304 ++iwork;
305 }
306 }
307 }
308 }
309#endif
310
311 template <class PC, class GetBucket>
312 void buildCopies (const PC& pc, const ParticleCopyOp& op,
314 {
315 BL_PROFILE("ParticleCopyPlan::buildCopiesAtomicScatter");
316
317 auto* p_dst_box_counts = m_box_counts_d.dataPtr();
318
319 forEachCopyBatch(pc, op,
320 [&op, &getBucket, p_dst_box_counts] (int lev, TileKey const& index,
321 int num_copies, Gpu::DeviceVector<int>& dst_indices)
322 {
323 const auto* p_boxes = op.m_boxes[lev].at(index).dataPtr();
324 const auto* p_levs = op.m_levels[lev].at(index).dataPtr();
325 const auto* p_tiles = op.m_tiles[lev].at(index).dataPtr();
326 auto* p_dst_indices = dst_indices.dataPtr();
327
328 amrex::ParallelFor(num_copies, [=] AMREX_GPU_DEVICE (int i)
329 {
330 int dst_box = p_boxes[i];
331 if (dst_box >= 0)
332 {
333 int dst_tile = p_tiles[i];
334 int dst_lev = p_levs[i];
335 int dst_index = static_cast<int>(HostDevice::Atomic::FetchAdd(
336 &p_dst_box_counts[getBucket(dst_lev, dst_box, dst_tile)], 1U));
337 p_dst_indices[i] = dst_index;
338 }
339 });
340 });
341 }
342
343 void finalizeBuildBoxCounts (BuildWorkspace const& workspace, bool use_host_box_counters)
344 {
345 if (use_host_box_counters) {
347 workspace.h_box_counts.begin(), workspace.h_box_counts.end(),
348 m_box_counts_d.begin());
349 }
350
352 m_box_offsets.begin());
353 }
354
355public:
356
358
362
369
371 int m_nrcvs = 0;
374
377
380
383
385
391
394
397
400
403
405
406 template <class PC, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
407 void build (const PC& pc,
408 const ParticleCopyOp& op,
409 const Vector<int>& int_comp_mask,
410 const Vector<int>& real_comp_mask,
411 int local)
412 {
413 BL_PROFILE("ParticleCopyPlan::build");
414
415 ParmParse pp("particles");
416 pp.query("do_one_sided_comms", m_do_one_sided_comms);
417 const int num_buckets = pc.BufferMap().numBuckets();
418
419 m_local = local;
420 if (local)
421 {
422 m_neighbor_procs = pc.NeighborProcs(local);
423 }
424 else
425 {
427 std::iota(m_neighbor_procs.begin(), m_neighbor_procs.end(), 0);
428 }
429
430 m_box_counts_d.resize(0);
431 m_box_counts_d.resize(num_buckets+1, 0);
432 m_box_offsets.resize(num_buckets+1);
433
434#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
435 constexpr bool use_host_bucket_counters = true;
436#else
437 constexpr bool use_host_bucket_counters = false;
438#endif
439 BuildWorkspace workspace(num_buckets);
440 bool use_host_box_counters = pc.stableRedistribute() || use_host_bucket_counters;
441 if (use_host_box_counters) {
442 workspace.h_box_counts.resize(m_box_counts_d.size(), 0);
443 }
444
445 if (pc.stableRedistribute())
446 {
447 buildCopies(pc, op, StableOrderedAlgorithm{}, workspace, pc.BufferMap().getHostBucketFunctor());
448 }
449 else
450 {
451#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
452 buildCopies(pc, op, TwoPassHostAlgorithm{}, workspace, pc.BufferMap().getBucketFunctor());
453#else
454 buildCopies(pc, op, AtomicScatterAlgorithm{}, workspace, pc.BufferMap().getBucketFunctor());
455#endif
456 }
457
458 finalizeBuildBoxCounts(workspace, use_host_box_counters);
459
460 m_box_counts_h.resize(m_box_counts_d.size());
462 m_box_counts_h.begin());
463
464 m_snd_pad_correction_h.resize(0);
466
469 m_snd_pad_correction_d.begin());
470
471 d_int_comp_mask.resize(int_comp_mask.size());
472 Gpu::copyAsync(Gpu::hostToDevice, int_comp_mask.begin(), int_comp_mask.end(),
473 d_int_comp_mask.begin());
474 d_real_comp_mask.resize(real_comp_mask.size());
475 Gpu::copyAsync(Gpu::hostToDevice, real_comp_mask.begin(), real_comp_mask.end(),
476 d_real_comp_mask.begin());
477
479
480 int NStructReal = PC::ParticleContainerType::NStructReal;
481 int NStructInt = PC::ParticleContainerType::NStructInt;
482
483 int num_real_comm_comp = 0;
484 int comm_comps_start = 0;
485 if constexpr (!PC::ParticleType::is_soa_particle) {
486 comm_comps_start += AMREX_SPACEDIM + NStructReal;
487 }
488 for (int i = comm_comps_start; i < real_comp_mask.size(); ++i) {
489 if (real_comp_mask[i]) {++num_real_comm_comp;}
490 }
491
492 int num_int_comm_comp = 0;
493 for (int i = 2 + NStructInt; i < int_comp_mask.size(); ++i) {
494 if (int_comp_mask[i]) {++num_int_comm_comp;}
495 }
496
497 if constexpr (PC::ParticleType::is_soa_particle) {
498 m_superparticle_size = sizeof(uint64_t); // idcpu
499 } else {
500 m_superparticle_size = sizeof(typename PC::ParticleType);
501 }
502 m_superparticle_size += num_real_comm_comp * sizeof(typename PC::ParticleType::RealType)
503 + num_int_comm_comp * sizeof(int);
504
505 buildMPIStart(pc, pc.BufferMap(), m_superparticle_size);
506 }
507
508 void clear ();
509
510 void buildMPIFinish (const ParticleBufferMap& map);
511
512private:
513
514 void buildMPIStart (const ParticleContainerBase& pc, const ParticleBufferMap& map, Long psize);
515
516 //
517 // Snds - a Vector with the number of bytes that is process will send to each proc.
518 // Rcvs - a Vector that, after calling this method, will contain the
519 // number of bytes this process will receive from each proc.
520 //
521 void doHandShake (const ParticleContainerBase& pc, const Vector<Long>& Snds, Vector<Long>& Rcvs) const;
522
523 //
524 // In the local version of this method, each proc knows which other
525 // procs it could possibly receive messages from, meaning we can do
526 // this purely with point-to-point communication.
527 //
528 void doHandShakeLocal (const Vector<Long>& Snds, Vector<Long>& Rcvs) const;
529
530 //
531 // In the global version, we don't know who we'll receive from, so we
532 // need to do some collective communication first.
533 //
534 static void doHandShakeReduceScatter (const Vector<Long>& Snds, Vector<Long>& Rcvs);
535
536 //
537 // Another version of the global handshake implemented with MPI-3
538 // one-sided communication.
539 //
540 static void doHandShakeOneSided (const ParticleContainerBase& pc,
541 const Vector<Long>& Snds, Vector<Long>& Rcvs);
542
543 //
544 // Another version of the above that is implemented using MPI All-to-All
545 //
546 static void doHandShakeAllToAll (const Vector<Long>& Snds, Vector<Long>& Rcvs);
547
548 bool m_local = false;
549 int m_do_one_sided_comms = 0;
550};
551
553{
554 const unsigned int* m_box_offsets;
555 const std::size_t* m_pad_correction;
556
559
561 : m_box_offsets(plan.m_box_offsets.dataPtr()),
562 m_pad_correction(plan.m_snd_pad_correction_d.dataPtr()),
563 m_get_pid(map.getPIDFunctor()),
564 m_get_bucket(map.getBucketFunctor())
565 {}
566
568 Long operator() (int dst_box, int dst_tile, int dst_lev, std::size_t psize, int i) const
569 {
570 int dst_pid = m_get_pid(dst_lev, dst_box, dst_tile);
571 Long dst_offset = Long(psize)*(m_box_offsets[m_get_bucket(dst_lev, dst_box, dst_tile)] + i);
572 dst_offset += Long(m_pad_correction[dst_pid]);
573 return dst_offset;
574 }
575};
576
577template <class PC, class Buffer,
578 std::enable_if_t<IsParticleContainer<PC>::value &&
579 std::is_base_of_v<PolymorphicArenaAllocator<typename Buffer::value_type>,
580 Buffer>, int> foo = 0>
581void packBuffer (const PC& pc, const ParticleCopyOp& op, const ParticleCopyPlan& plan,
582 Buffer& snd_buffer)
583{
584 BL_PROFILE("amrex::packBuffer");
585
586 Long psize = plan.superParticleSize();
587
588 int num_levels = op.numLevels();
589 int num_buckets = pc.BufferMap().numBuckets();
590
591 std::size_t total_buffer_size = 0;
592 if (plan.m_snd_offsets.empty())
593 {
594 unsigned int np = 0;
595 Gpu::copy(Gpu::deviceToHost, plan.m_box_offsets.begin() + num_buckets,
596 plan.m_box_offsets.begin() + num_buckets + 1, &np);
597 total_buffer_size = np*psize;
598 }
599 else
600 {
601 total_buffer_size = plan.m_snd_offsets.back();
602 }
603
604 if (! snd_buffer.arena()->hasFreeDeviceMemory(total_buffer_size)) {
605 snd_buffer.clear();
606 snd_buffer.setArena(The_Pinned_Arena());
607 }
608 snd_buffer.resize(total_buffer_size);
609
610 const auto* p_comm_real = plan.d_real_comp_mask.dataPtr();
611 const auto* p_comm_int = plan.d_int_comp_mask.dataPtr();
612
613 const auto plo = pc.Geom(0).ProbLoArray();
614 const auto phi = pc.Geom(0).ProbHiArray();
615 const auto is_per = pc.Geom(0).isPeriodicArray();
616#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
617 struct OmpPackWork
618 {
619 typename PC::ParticleTileType const* src_tile = nullptr;
620 const int* boxes = nullptr;
621 const int* levels = nullptr;
622 const int* tiles = nullptr;
623 const int* src_indices = nullptr;
624 const IntVect* periodic_shift = nullptr;
625 const int* dst_indices = nullptr;
626 int num_copies = 0;
627 };
628 Vector<OmpPackWork> omp_pack_work;
629 Vector<Long> omp_pack_offsets(1, 0);
630#endif
631 for (int lev = 0; lev < num_levels; ++lev)
632 {
633 auto& plev = pc.GetParticles(lev);
634 for (auto& kv : plev)
635 {
636 auto index = kv.first;
637 auto& src_tile = plev.at(index);
638 int num_copies = op.numCopies(index, lev);
639 if (num_copies == 0) { continue; }
640
641 const auto* p_boxes = op.m_boxes[lev].at(index).dataPtr();
642 const auto* p_levels = op.m_levels[lev].at(index).dataPtr();
643 const auto* p_tiles = op.m_tiles[lev].at(index).dataPtr();
644 const auto* p_src_indices = op.m_src_indices[lev].at(index).dataPtr();
645 const auto* p_periodic_shift = op.m_periodic_shift[lev].at(index).dataPtr();
646 const auto* p_dst_indices = plan.m_dst_indices[lev].at(index).dataPtr();
647#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
648 omp_pack_work.push_back({&src_tile, p_boxes, p_levels, p_tiles,
649 p_src_indices, p_periodic_shift, p_dst_indices, num_copies});
650 omp_pack_offsets.push_back(omp_pack_offsets.back() + num_copies);
651#else
652 const auto& ptd = src_tile.getConstParticleTileData();
653 auto* p_snd_buffer = snd_buffer.dataPtr();
654 GetSendBufferOffset get_offset(plan, pc.BufferMap());
655 amrex::ParallelFor(num_copies, [=] AMREX_GPU_DEVICE (int i)
656 {
657 int dst_box = p_boxes[i];
658 if (dst_box >= 0)
659 {
660 int dst_tile = p_tiles[i];
661 int dst_lev = p_levels[i];
662 auto dst_offset = get_offset(dst_box, dst_tile, dst_lev, psize, p_dst_indices[i]);
663 int src_index = p_src_indices[i];
664 ptd.packParticleData(p_snd_buffer, src_index, dst_offset, p_comm_real, p_comm_int);
665
666 const IntVect& pshift = p_periodic_shift[i];
667 bool do_periodic_shift =
668 AMREX_D_TERM( (is_per[0] && pshift[0] != 0),
669 || (is_per[1] && pshift[1] != 0),
670 || (is_per[2] && pshift[2] != 0) );
671
672 if (do_periodic_shift)
673 {
674 ParticleReal pos[AMREX_SPACEDIM];
675 Long pos_offset = dst_offset;
676 // for pure SoA positions come after idcpu
677 if constexpr (PC::ParticleType::is_soa_particle) {
678 pos_offset += sizeof(uint64_t);
679 }
680 amrex::Gpu::memcpy(&pos[0], &p_snd_buffer[pos_offset],
681 AMREX_SPACEDIM*sizeof(ParticleReal));
682 for (int idim = 0; idim < AMREX_SPACEDIM; ++idim)
683 {
684 if (! is_per[idim]) { continue; }
685 if (pshift[idim] > 0) {
686 pos[idim] += phi[idim] - plo[idim];
687 } else if (pshift[idim] < 0) {
688 pos[idim] -= phi[idim] - plo[idim];
689 }
690 }
691 amrex::Gpu::memcpy(&p_snd_buffer[pos_offset], &pos[0],
692 AMREX_SPACEDIM*sizeof(ParticleReal));
693 }
694 }
695 });
696#endif
697 }
698 }
699
700#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
701 if (!omp_pack_work.empty())
702 {
703 auto* p_snd_buffer = snd_buffer.dataPtr();
704 GetSendBufferOffset get_offset(plan, pc.BufferMap());
705 const Long total_num_copies = omp_pack_offsets.back();
706
707#pragma omp parallel
708 {
709 int thread_num = OpenMP::get_thread_num();
710 int num_threads = OpenMP::get_num_threads();
711 Long ibegin = thread_num*total_num_copies/num_threads;
712 Long iend = (thread_num+1)*total_num_copies/num_threads;
713
714 if (ibegin < iend)
715 {
716 int iwork = static_cast<int>(std::upper_bound(omp_pack_offsets.begin(),
717 omp_pack_offsets.end(),
718 ibegin)
719 - omp_pack_offsets.begin()) - 1;
720 while (iwork < static_cast<int>(omp_pack_work.size()) &&
721 omp_pack_offsets[iwork] < iend)
722 {
723 auto const& work = omp_pack_work[iwork];
724 auto const& ptd = work.src_tile->getConstParticleTileData();
725 Long global_begin = std::max(ibegin, omp_pack_offsets[iwork]);
726 Long global_end = std::min(iend, omp_pack_offsets[iwork+1]);
727 int local_begin = static_cast<int>(global_begin - omp_pack_offsets[iwork]);
728 int local_end = static_cast<int>(global_end - omp_pack_offsets[iwork]);
729 for (int i = local_begin; i < local_end; ++i)
730 {
731 int dst_box = work.boxes[i];
732 if (dst_box >= 0)
733 {
734 int dst_tile = work.tiles[i];
735 int dst_lev = work.levels[i];
736 auto dst_offset = get_offset(dst_box, dst_tile, dst_lev, psize,
737 work.dst_indices[i]);
738 int src_index = work.src_indices[i];
739 ptd.packParticleData(p_snd_buffer, src_index, dst_offset,
740 p_comm_real, p_comm_int);
741
742 const IntVect& pshift = work.periodic_shift[i];
743 bool do_periodic_shift =
744 AMREX_D_TERM( (is_per[0] && pshift[0] != 0),
745 || (is_per[1] && pshift[1] != 0),
746 || (is_per[2] && pshift[2] != 0) );
747
748 if (do_periodic_shift)
749 {
750 ParticleReal pos[AMREX_SPACEDIM];
751 Long pos_offset = dst_offset;
752 if constexpr (PC::ParticleType::is_soa_particle) {
753 pos_offset += sizeof(uint64_t);
754 }
755 amrex::Gpu::memcpy(&pos[0], &p_snd_buffer[pos_offset],
756 AMREX_SPACEDIM*sizeof(ParticleReal));
757 for (int idim = 0; idim < AMREX_SPACEDIM; ++idim)
758 {
759 if (! is_per[idim]) { continue; }
760 if (pshift[idim] > 0) {
761 pos[idim] += phi[idim] - plo[idim];
762 } else if (pshift[idim] < 0) {
763 pos[idim] -= phi[idim] - plo[idim];
764 }
765 }
766 amrex::Gpu::memcpy(&p_snd_buffer[pos_offset], &pos[0],
767 AMREX_SPACEDIM*sizeof(ParticleReal));
768 }
769 }
770 }
771 ++iwork;
772 }
773 }
774 }
775 }
776#endif
777}
778
779template <class PC, class Buffer, class UnpackPolicy,
780 std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
781void unpackBuffer (PC& pc, const ParticleCopyPlan& plan, const Buffer& snd_buffer, UnpackPolicy const& policy)
782{
783 BL_PROFILE("amrex::unpackBuffer");
784
785 using PTile = typename PC::ParticleTileType;
786
787 int num_levels = pc.BufferMap().numLevels();
788 Long psize = plan.superParticleSize();
789
790 // count how many particles we have to add to each tile
791 std::vector<int> sizes;
792 std::vector<PTile*> tiles;
793 for (int lev = 0; lev < num_levels; ++lev)
794 {
795 for(MFIter mfi = pc.MakeMFIter(lev); mfi.isValid(); ++mfi)
796 {
797 int gid = mfi.index();
798 int tid = mfi.LocalTileIndex();
799 auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
800 int num_copies = plan.m_box_counts_h[pc.BufferMap().gridAndTileAndLevToBucket(gid, tid, lev)];
801 sizes.push_back(num_copies);
802 tiles.push_back(&tile);
803 }
804 }
805
806 // resize the tiles and compute offsets
807 std::vector<int> offsets;
808 policy.resizeTiles(tiles, sizes, offsets);
809
810 const auto* p_comm_real = plan.d_real_comp_mask.dataPtr();
811 const auto* p_comm_int = plan.d_int_comp_mask.dataPtr();
812
813#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
814 struct OmpUnpackWork
815 {
816 PTile* tile = nullptr;
817 int gid = 0;
818 int tid = 0;
819 int lev = 0;
820 int offset = 0;
821 int size = 0;
822 };
823 Vector<OmpUnpackWork> omp_unpack_work;
824 Vector<Long> omp_unpack_offsets(1, 0);
825#endif
826
827 // local unpack
828 int uindex = 0;
829 for (int lev = 0; lev < num_levels; ++lev)
830 {
831 auto& plev = pc.GetParticles(lev);
832 for(MFIter mfi = pc.MakeMFIter(lev); mfi.isValid(); ++mfi)
833 {
834 int gid = mfi.index();
835 int tid = mfi.LocalTileIndex();
836 auto index = std::make_pair(gid, tid);
837
838 auto& tile = plev[index];
839
840 GetSendBufferOffset get_offset(plan, pc.BufferMap());
841
842 int offset = offsets[uindex];
843 int size = sizes[uindex];
844 ++uindex;
845
846#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
847 omp_unpack_work.push_back({&tile, gid, tid, lev, offset, size});
848 omp_unpack_offsets.push_back(omp_unpack_offsets.back() + size);
849#else
850 auto p_snd_buffer = snd_buffer.dataPtr();
851 auto ptd = tile.getParticleTileData();
852 amrex::ParallelFor(size, [=] AMREX_GPU_DEVICE (int i)
853 {
854 auto src_offset = get_offset(gid, tid, lev, psize, i);
855 int dst_index = offset + i;
856 ptd.unpackParticleData(p_snd_buffer, src_offset, dst_index, p_comm_real, p_comm_int);
857 });
858#endif
859 }
860 }
861
862#if defined(AMREX_USE_OMP) && !defined(AMREX_USE_GPU)
863 if (!omp_unpack_work.empty())
864 {
865 GetSendBufferOffset get_offset(plan, pc.BufferMap());
866 auto p_snd_buffer = snd_buffer.dataPtr();
867 const Long total_num_copies = omp_unpack_offsets.back();
868
869#pragma omp parallel
870 {
871 int thread_num = OpenMP::get_thread_num();
872 int num_threads = OpenMP::get_num_threads();
873 Long ibegin = thread_num*total_num_copies/num_threads;
874 Long iend = (thread_num+1)*total_num_copies/num_threads;
875
876 if (ibegin < iend)
877 {
878 int iwork = static_cast<int>(std::upper_bound(omp_unpack_offsets.begin(),
879 omp_unpack_offsets.end(),
880 ibegin)
881 - omp_unpack_offsets.begin()) - 1;
882 while (iwork < static_cast<int>(omp_unpack_work.size()) &&
883 omp_unpack_offsets[iwork] < iend)
884 {
885 auto const& work = omp_unpack_work[iwork];
886 auto ptd = work.tile->getParticleTileData();
887 Long global_begin = std::max(ibegin, omp_unpack_offsets[iwork]);
888 Long global_end = std::min(iend, omp_unpack_offsets[iwork+1]);
889 int local_begin = static_cast<int>(global_begin - omp_unpack_offsets[iwork]);
890 int local_end = static_cast<int>(global_end - omp_unpack_offsets[iwork]);
891 for (int i = local_begin; i < local_end; ++i)
892 {
893 auto src_offset = get_offset(work.gid, work.tid, work.lev, psize, i);
894 int dst_index = work.offset + i;
895 ptd.unpackParticleData(p_snd_buffer, src_offset, dst_index,
896 p_comm_real, p_comm_int);
897 }
898 ++iwork;
899 }
900 }
901 }
902 }
903#endif
904}
905
906template <class PC, class SndBuffer, class RcvBuffer,
907 std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
908void communicateParticlesStart (const PC& pc, ParticleCopyPlan& plan, const SndBuffer& snd_buffer, RcvBuffer& rcv_buffer)
909{
910 BL_PROFILE("amrex::communicateParticlesStart");
911
912#ifdef AMREX_USE_MPI
913 Long psize = plan.superParticleSize();
914 const int NProcs = ParallelContext::NProcsSub();
915 const int MyProc = ParallelContext::MyProcSub();
916
917 if (NProcs == 1) { return; }
918
919 Vector<int> RcvProc;
920 Vector<Long> rOffset;
921
922 plan.m_rcv_pad_correction_h.resize(0);
923 plan.m_rcv_pad_correction_h.push_back(0);
924
925 Long TotRcvBytes = 0;
926 for (int i = 0; i < NProcs; ++i) {
927 if (plan.m_rcv_num_particles[i] > 0) {
928 RcvProc.push_back(i);
929 Long nbytes = plan.m_rcv_num_particles[i]*psize;
930 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
931 TotRcvBytes = Long(amrex::aligned_size(acd, TotRcvBytes));
932 rOffset.push_back(TotRcvBytes);
933 TotRcvBytes += Long(amrex::aligned_size(acd, nbytes));
934 plan.m_rcv_pad_correction_h.push_back(plan.m_rcv_pad_correction_h.back() + nbytes);
935 }
936 }
937
938 for (int i = 0; i < plan.m_nrcvs; ++i)
939 {
940 plan.m_rcv_pad_correction_h[i] = rOffset[i] - plan.m_rcv_pad_correction_h[i];
941 }
942
945 plan.m_rcv_pad_correction_d.begin());
946
947 rcv_buffer.resize(TotRcvBytes);
948
949 plan.m_nrcvs = int(RcvProc.size());
950
951 plan.m_particle_rstats.resize(0);
952 plan.m_particle_rstats.resize(plan.m_nrcvs);
953
954 plan.m_particle_rreqs.resize(0);
955 plan.m_particle_rreqs.resize(plan.m_nrcvs);
956
957 plan.m_particle_sstats.resize(0);
958 plan.m_particle_sreqs.resize(0);
959
960 const int SeqNum = ParallelDescriptor::SeqNum();
961
962 // Post receives.
963 for (int i = 0; i < plan.m_nrcvs; ++i) {
964 const auto Who = RcvProc[i];
965 const auto offset = rOffset[i];
966 Long nbytes = plan.m_rcv_num_particles[Who]*psize;
967 std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
968 const auto Cnt = amrex::aligned_size(acd, nbytes);
969
970 AMREX_ASSERT(Cnt > 0);
971 AMREX_ASSERT(Who >= 0 && Who < NProcs);
972 AMREX_ASSERT(amrex::aligned_size(acd, nbytes) % acd == 0);
973
974 plan.m_particle_rreqs[i] =
975 ParallelDescriptor::Arecv((char*) (rcv_buffer.dataPtr() + offset), Cnt, Who, SeqNum, ParallelContext::CommunicatorSub()).req();
976 }
977
978 if (plan.m_NumSnds == 0) { return; }
979
980 // Send.
981 for (int i = 0; i < NProcs; ++i)
982 {
983 if (i == MyProc) { continue; }
984 const auto Who = i;
985 const auto Cnt = plan.m_snd_counts[i];
986 if (Cnt == 0) { continue; }
987
988 auto snd_offset = plan.m_snd_offsets[i];
989 AMREX_ASSERT(plan.m_snd_counts[i] % ParallelDescriptor::sizeof_selected_comm_data_type(plan.m_snd_num_particles[i]*psize) == 0);
990 AMREX_ASSERT(Who >= 0 && Who < NProcs);
991
992 plan.m_particle_sreqs.push_back(ParallelDescriptor::Asend((char const*)(snd_buffer.dataPtr()+snd_offset), Cnt, Who, SeqNum,
994 }
995
996 plan.m_particle_sstats.resize(plan.m_particle_sreqs.size());
997
999#else
1000 amrex::ignore_unused(pc,plan,snd_buffer,rcv_buffer);
1001#endif // MPI
1002}
1003
1004void communicateParticlesFinish (const ParticleCopyPlan& plan);
1005
1006template <class PC, class Buffer, class UnpackPolicy,
1007 std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
1008void unpackRemotes (PC& pc, const ParticleCopyPlan& plan, Buffer& rcv_buffer, UnpackPolicy const& policy)
1009{
1010 BL_PROFILE("amrex::unpackRemotes");
1011
1012#ifdef AMREX_USE_MPI
1013 const int NProcs = ParallelContext::NProcsSub();
1014 if (NProcs == 1) { return; }
1015
1016 const int MyProc = ParallelContext::MyProcSub();
1017 amrex::ignore_unused(MyProc);
1018 using PTile = typename PC::ParticleTileType;
1019
1020 if (plan.m_nrcvs > 0)
1021 {
1022 const auto* p_comm_real = plan.d_real_comp_mask.dataPtr();
1023 const auto* p_comm_int = plan.d_int_comp_mask.dataPtr();
1024 auto* p_rcv_buffer = rcv_buffer.dataPtr();
1025
1026 std::vector<int> sizes;
1027 std::vector<PTile*> tiles;
1028 for (int i = 0, N = int(plan.m_rcv_box_counts.size()); i < N; ++i)
1029 {
1030 int copy_size = plan.m_rcv_box_counts[i];
1031 int lev = plan.m_rcv_box_levs[i];
1032 int gid = plan.m_rcv_box_ids[i];
1033 int tid = plan.m_rcv_box_tids[i];
1034 auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
1035 sizes.push_back(copy_size);
1036 tiles.push_back(&tile);
1037 }
1038
1039 Vector<int> offsets;
1040 policy.resizeTiles(tiles, sizes, offsets);
1042 int uindex = 0;
1043 int procindex = 0, rproc = plan.m_rcv_box_pids[0];
1044 for (int i = 0, N = int(plan.m_rcv_box_counts.size()); i < N; ++i)
1045 {
1046 int lev = plan.m_rcv_box_levs[i];
1047 int gid = plan.m_rcv_box_ids[i];
1048 int tid = plan.m_rcv_box_tids[i];
1049 auto offset = plan.m_rcv_box_offsets[i];
1050 procindex = (rproc == plan.m_rcv_box_pids[i]) ? procindex : procindex+1;
1051 rproc = plan.m_rcv_box_pids[i];
1052
1053 auto& tile = pc.DefineAndReturnParticleTile(lev, gid, tid);
1054 auto ptd = tile.getParticleTileData();
1055
1056 AMREX_ASSERT(MyProc ==
1057 ParallelContext::global_to_local_rank(pc.ParticleDistributionMap(lev)[gid]));
1058
1059 int dst_offset = offsets[uindex];
1060 int size = sizes[uindex];
1061 ++uindex;
1062
1063 Long psize = plan.superParticleSize();
1064 const auto* p_pad_adjust = plan.m_rcv_pad_correction_d.dataPtr();
1065
1066 amrex::ParallelForOMP(size, [=] AMREX_GPU_DEVICE (int ip) {
1067 Long src_offset = psize * static_cast<Long>(offset + ip)
1068 + static_cast<Long>(p_pad_adjust[procindex]);
1069 int dst_index = dst_offset + ip;
1070 ptd.unpackParticleData(p_rcv_buffer, src_offset, dst_index,
1071 p_comm_real, p_comm_int);
1072 });
1073 }
1074 }
1075#else
1076 amrex::ignore_unused(pc,plan,rcv_buffer,policy);
1077#endif // MPI
1078}
1079
1080} // namespace amrex
1081
1082#endif // AMREX_PARTICLECOMMUNICATION_H_
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
amrex::ParmParse pp
Input file parser instance for the given namespace.
Definition AMReX_HypreIJIface.cpp:15
Array4< int const > offset
Definition AMReX_HypreMLABecLap.cpp:1139
#define AMREX_D_TERM(a, b, c)
Definition AMReX_SPACE.H:172
Iterator for looping ever tiles and boxes of amrex::FabArray based containers.
Definition AMReX_MFIter.H:88
Dynamically allocated vector for trivially copyable data.
Definition AMReX_PODVector.H:308
size_type size() const noexcept
Definition AMReX_PODVector.H:648
iterator begin() noexcept
Definition AMReX_PODVector.H:674
iterator end() noexcept
Definition AMReX_PODVector.H:678
T * dataPtr() noexcept
Definition AMReX_PODVector.H:670
MPI_Request req() const
Definition AMReX_ParallelDescriptor.H:74
Parse Parameters From Command Line and Input Files.
Definition AMReX_ParmParse.H:349
int query(std::string_view name, bool &ref, int ival=FIRST) const
Same as querykth() but searches for the last occurrence of name.
Definition AMReX_ParmParse.cpp:1946
Definition AMReX_ParticleBufferMap.H:59
Definition AMReX_ParticleContainerBase.H:43
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
T * dataPtr() noexcept
get access to the underlying data pointer
Definition AMReX_Vector.H:49
Long size() const noexcept
Definition AMReX_Vector.H:53
amrex_particle_real ParticleReal
Floating Point Type for Particles.
Definition AMReX_REAL.H:90
amrex_long Long
Definition AMReX_INT.H:30
OutIter exclusive_scan(InIter begin, InIter end, OutIter result)
Definition AMReX_Scan.H:1442
void ParallelForOMP(T n, L const &f) noexcept
Performance-portable kernel launch function with optional OpenMP threading.
Definition AMReX_GpuLaunch.H:326
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:860
void copy(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition AMReX_GpuContainers.H:128
void copyAsync(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition AMReX_GpuContainers.H:228
static constexpr DeviceToHost deviceToHost
Definition AMReX_GpuContainers.H:106
static constexpr HostToDevice hostToDevice
Definition AMReX_GpuContainers.H:105
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310
__host__ __device__ void * memcpy(void *dest, const void *src, std::size_t count)
Definition AMReX_GpuUtility.H:220
__host__ __device__ AMREX_FORCE_INLINE T FetchAdd(T *const sum, T const value) noexcept
Definition AMReX_GpuAtomic.H:644
constexpr int get_thread_num()
Definition AMReX_OpenMP.H:37
constexpr int get_num_threads()
Definition AMReX_OpenMP.H:35
constexpr int get_max_threads()
Definition AMReX_OpenMP.H:36
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70
int MyProcSub() noexcept
my sub-rank in current frame
Definition AMReX_ParallelContext.H:76
int global_to_local_rank(int rank) noexcept
Definition AMReX_ParallelContext.H:98
int NProcsSub() noexcept
number of ranks in current frame
Definition AMReX_ParallelContext.H:74
Message Asend(const T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1172
int SeqNum() noexcept
Returns sequential message sequence numbers, usually used as tags for send/recv.
Definition AMReX_ParallelDescriptor.H:696
Message Arecv(T *, size_t n, int pid, int tag)
Definition AMReX_ParallelDescriptor.H:1214
Definition AMReX_Amr.cpp:49
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139
void communicateParticlesStart(const PC &pc, ParticleCopyPlan &plan, const SndBuffer &snd_buffer, RcvBuffer &rcv_buffer)
Definition AMReX_ParticleCommunication.H:908
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:193
void unpackRemotes(PC &pc, const ParticleCopyPlan &plan, Buffer &rcv_buffer, UnpackPolicy const &policy)
Definition AMReX_ParticleCommunication.H:1008
void communicateParticlesFinish(const ParticleCopyPlan &plan)
Definition AMReX_ParticleCommunication.cpp:443
const int[]
Definition AMReX_BLProfiler.cpp:1664
std::size_t aligned_size(std::size_t align_requirement, std::size_t size) noexcept
Given a minimum required size in bytes, this returns the smallest size greater or equal to size that ...
Definition AMReX_Arena.H:33
void unpackBuffer(PC &pc, const ParticleCopyPlan &plan, const Buffer &snd_buffer, UnpackPolicy const &policy)
Definition AMReX_ParticleCommunication.H:781
void packBuffer(const PC &pc, const ParticleCopyOp &op, const ParticleCopyPlan &plan, Buffer &snd_buffer)
Definition AMReX_ParticleCommunication.H:581
Definition AMReX_ParticleBufferMap.H:38
Definition AMReX_ParticleBufferMap.H:14
Definition AMReX_ParticleCommunication.H:553
const unsigned int * m_box_offsets
Definition AMReX_ParticleCommunication.H:554
GetPID m_get_pid
Definition AMReX_ParticleCommunication.H:557
GetBucket m_get_bucket
Definition AMReX_ParticleCommunication.H:558
const std::size_t * m_pad_correction
Definition AMReX_ParticleCommunication.H:555
GetSendBufferOffset(const ParticleCopyPlan &plan, const ParticleBufferMap &map)
Definition AMReX_ParticleCommunication.H:560
__device__ Long operator()(int dst_box, int dst_tile, int dst_lev, std::size_t psize, int i) const
Definition AMReX_ParticleCommunication.H:568
Definition AMReX_ParticleCommunication.H:26
void resizeTiles(std::vector< PTile * > &tiles, const std::vector< int > &sizes, std::vector< int > &offsets) const
Definition AMReX_ParticleCommunication.H:28
Definition AMReX_ParticleCommunication.H:65
void resize(int gid, int tid, int lev, int size)
Definition AMReX_ParticleCommunication.cpp:25
void setNumLevels(int num_levels)
Definition AMReX_ParticleCommunication.cpp:16
Vector< std::map< TileKey, Gpu::DeviceVector< IntVect > > > m_periodic_shift
Definition AMReX_ParticleCommunication.H:72
int numLevels() const
Definition AMReX_ParticleCommunication.H:87
Vector< std::map< TileKey, Gpu::DeviceVector< int > > > m_levels
Definition AMReX_ParticleCommunication.H:69
Vector< std::map< TileKey, Gpu::DeviceVector< int > > > m_boxes
Definition AMReX_ParticleCommunication.H:68
int numCopies(TileKey const &index, int lev) const
Definition AMReX_ParticleCommunication.H:80
std::pair< int, int > TileKey
Definition AMReX_ParticleCommunication.H:66
Vector< std::map< TileKey, Gpu::DeviceVector< int > > > m_tiles
Definition AMReX_ParticleCommunication.H:70
Vector< std::map< TileKey, Gpu::DeviceVector< int > > > m_src_indices
Definition AMReX_ParticleCommunication.H:71
void clear()
Definition AMReX_ParticleCommunication.cpp:7
Definition AMReX_ParticleCommunication.H:96
Definition AMReX_ParticleCommunication.H:110
BuildWorkspace(int a_num_buckets)
Definition AMReX_ParticleCommunication.H:111
Gpu::HostVector< unsigned int > h_box_counts
Definition AMReX_ParticleCommunication.H:119
int num_buckets
Definition AMReX_ParticleCommunication.H:118
Definition AMReX_ParticleCommunication.H:94
Definition AMReX_ParticleCommunication.H:95
Definition AMReX_ParticleCommunication.H:91
Vector< int > m_rcv_box_ids
Definition AMReX_ParticleCommunication.H:365
Vector< std::size_t > m_snd_offsets
Definition AMReX_ParticleCommunication.H:392
Vector< int > m_rcv_box_counts
Definition AMReX_ParticleCommunication.H:363
Vector< std::size_t > m_snd_counts
Definition AMReX_ParticleCommunication.H:393
void finalizeBuildBoxCounts(BuildWorkspace const &workspace, bool use_host_box_counters)
Definition AMReX_ParticleCommunication.H:343
Long m_NumSnds
Definition AMReX_ParticleCommunication.H:370
void buildMPIFinish(const ParticleBufferMap &map)
Definition AMReX_ParticleCommunication.cpp:221
Vector< int > m_neighbor_procs
Definition AMReX_ParticleCommunication.H:384
void clear()
Definition AMReX_ParticleCommunication.cpp:39
Vector< int > m_rcv_box_pids
Definition AMReX_ParticleCommunication.H:367
void buildCopies(const PC &pc, const ParticleCopyOp &op, AtomicScatterAlgorithm, BuildWorkspace &, GetBucket const &getBucket)
Definition AMReX_ParticleCommunication.H:312
Vector< int > m_rcv_box_levs
Definition AMReX_ParticleCommunication.H:368
void build(const PC &pc, const ParticleCopyOp &op, const Vector< int > &int_comp_mask, const Vector< int > &real_comp_mask, int local)
Definition AMReX_ParticleCommunication.H:407
Gpu::DeviceVector< int > d_real_comp_mask
Definition AMReX_ParticleCommunication.H:401
std::pair< int, int > TileKey
Definition AMReX_ParticleCommunication.H:92
Gpu::DeviceVector< std::size_t > m_snd_pad_correction_d
Definition AMReX_ParticleCommunication.H:396
void forEachCopyBatch(const PC &pc, const ParticleCopyOp &op, F &&f)
Definition AMReX_ParticleCommunication.H:129
Long m_superparticle_size
Definition AMReX_ParticleCommunication.H:402
Vector< int > m_rcv_box_tids
Definition AMReX_ParticleCommunication.H:366
Vector< Long > m_Snds
Definition AMReX_ParticleCommunication.H:386
Vector< MPI_Request > m_particle_sreqs
Definition AMReX_ParticleCommunication.H:379
Vector< std::map< TileKey, Gpu::DeviceVector< int > > > m_dst_indices
Definition AMReX_ParticleCommunication.H:357
Gpu::DeviceVector< unsigned int > m_box_counts_d
Definition AMReX_ParticleCommunication.H:359
Vector< Long > m_Rcvs
Definition AMReX_ParticleCommunication.H:387
Vector< int > m_rcv_box_offsets
Definition AMReX_ParticleCommunication.H:364
Vector< std::size_t > m_snd_pad_correction_h
Definition AMReX_ParticleCommunication.H:395
Vector< MPI_Status > m_particle_sstats
Definition AMReX_ParticleCommunication.H:378
Gpu::DeviceVector< unsigned int > m_box_offsets
Definition AMReX_ParticleCommunication.H:361
Gpu::DeviceVector< std::size_t > m_rcv_pad_correction_d
Definition AMReX_ParticleCommunication.H:399
Vector< std::size_t > m_rOffset
Definition AMReX_ParticleCommunication.H:389
Vector< MPI_Status > m_particle_rstats
Definition AMReX_ParticleCommunication.H:375
Vector< Long > m_snd_num_particles
Definition AMReX_ParticleCommunication.H:381
Vector< MPI_Request > m_particle_rreqs
Definition AMReX_ParticleCommunication.H:376
Long superParticleSize() const
Definition AMReX_ParticleCommunication.H:404
Gpu::HostVector< int > m_rcv_data
Definition AMReX_ParticleCommunication.H:390
void buildCopies(const PC &pc, const ParticleCopyOp &op, StableOrderedAlgorithm, BuildWorkspace &workspace, GetBucket const &getBucket)
Definition AMReX_ParticleCommunication.H:152
Vector< MPI_Status > m_build_stats
Definition AMReX_ParticleCommunication.H:372
Vector< int > m_RcvProc
Definition AMReX_ParticleCommunication.H:388
Vector< std::size_t > m_rcv_pad_correction_h
Definition AMReX_ParticleCommunication.H:398
int m_nrcvs
Definition AMReX_ParticleCommunication.H:371
Gpu::HostVector< unsigned int > m_box_counts_h
Definition AMReX_ParticleCommunication.H:360
Vector< MPI_Request > m_build_rreqs
Definition AMReX_ParticleCommunication.H:373
Vector< Long > m_rcv_num_particles
Definition AMReX_ParticleCommunication.H:382
Gpu::DeviceVector< int > d_int_comp_mask
Definition AMReX_ParticleCommunication.H:401
Definition AMReX_ParticleCommunication.H:41
void resizeTiles(std::vector< PTile * > &tiles, const std::vector< int > &sizes, std::vector< int > &offsets) const
Definition AMReX_ParticleCommunication.H:43