Block-Structured AMR Software Framework
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Loading...
Searching...
No Matches
AMReX_NeighborParticlesGPUImpl.H
Go to the documentation of this file.
1#ifndef AMREX_NEIGHBORPARTICLESGPUIMPL_H_
2#define AMREX_NEIGHBORPARTICLESGPUIMPL_H_
3#include <AMReX_Config.H>
4
5namespace amrex {
6
7namespace detail
8{
9 inline Vector<Box> getBoundaryBoxes(const Box& box, int ncells)
10 {
11 AMREX_ASSERT_WITH_MESSAGE(box.size() > 2*IntVect(AMREX_D_DECL(ncells, ncells, ncells)),
12 "Too many cells requested in getBoundaryBoxes");
13
14 AMREX_ASSERT_WITH_MESSAGE(box.ixType().cellCentered(),
15 "Box must be cell-centered");
16
17 Vector<Box> bl;
18 for (int i = 0; i < AMREX_SPACEDIM; ++i) {
19 BoxList face_boxes;
20 Box hi_face_box = adjCellHi(box, i, ncells);
21 Box lo_face_box = adjCellLo(box, i, ncells);
22 face_boxes.push_back(hi_face_box); bl.push_back(hi_face_box);
23 face_boxes.push_back(lo_face_box); bl.push_back(lo_face_box);
24 for (auto face_box : face_boxes) {
25 for (int j = 0; j < AMREX_SPACEDIM; ++j) {
26 if (i == j) { continue; }
27 BoxList edge_boxes;
28 Box hi_edge_box = adjCellHi(face_box, j, ncells);
29 Box lo_edge_box = adjCellLo(face_box, j, ncells);
30 edge_boxes.push_back(hi_edge_box); bl.push_back(hi_edge_box);
31 edge_boxes.push_back(lo_edge_box); bl.push_back(lo_edge_box);
32 for (auto edge_box : edge_boxes) {
33 for (int k = 0; k < AMREX_SPACEDIM; ++k) {
34 if ((j == k) || (i == k)) { continue; }
35 Box hi_corner_box = adjCellHi(edge_box, k, ncells);
36 Box lo_corner_box = adjCellLo(edge_box, k, ncells);
37 bl.push_back(hi_corner_box);
38 bl.push_back(lo_corner_box);
39 }
40 }
41 }
42 }
43 }
44
46 return bl;
47 }
48}
49
50template <int NStructReal, int NStructInt, int NArrayReal, int NArrayInt>
51void
54{
55 BL_PROFILE("NeighborParticleContainer<NStructReal, NStructInt, NArrayReal, NArrayInt>::buildNeighborMask");
56 m_neighbor_mask_initialized = true;
57 const int lev = 0;
58 const Geometry& geom = this->Geom(lev);
59 const BoxArray& ba = this->ParticleBoxArray(lev);
60 const DistributionMapping& dmap = this->ParticleDistributionMap(lev);
61
62 if (ba.size() == 1 && (! geom.isAnyPeriodic()) ) { return; }
63
64 if (m_neighbor_mask_ptr == nullptr ||
65 ! BoxArray::SameRefs(m_neighbor_mask_ptr->boxArray(), ba) ||
66 ! DistributionMapping::SameRefs(m_neighbor_mask_ptr->DistributionMap(), dmap))
67 {
68 const Periodicity& periodicity = geom.periodicity();
69 const std::vector<IntVect>& pshifts = periodicity.shiftIntVect();
70
71 for (MFIter mfi(ba, dmap); mfi.isValid(); ++mfi)
72 {
73 int grid = mfi.index();
74
75 std::set<NeighborTask> neighbor_grids;
76 for (auto pshift : pshifts)
77 {
78 const Box box = ba[mfi] + pshift;
79
80 const bool first_only = false;
81 auto isecs = ba.intersections(box, first_only, m_num_neighbor_cells);
82
83 for (auto& isec : isecs)
84 {
85 int nbor_grid = isec.first;
86 const Box isec_box = isec.second - pshift;
87 if ( (grid == nbor_grid) && (pshift == 0)) { continue; }
88 neighbor_grids.insert(NeighborTask(nbor_grid, isec_box, pshift));
89 const int global_rank = dmap[nbor_grid];
90 neighbor_procs.push_back(ParallelContext::global_to_local_rank(global_rank));
91 }
92 }
93
94 Gpu::HostVector<Box> h_isec_boxes;
96 for (auto nbor_grid : neighbor_grids)
97 {
98 NeighborCode code;
99 code.grid_id = nbor_grid.grid_id;
100 code.periodic_shift = nbor_grid.periodic_shift;
101 h_code_arr.push_back(code);
102 h_isec_boxes.push_back(nbor_grid.box);
103 }
104
105 m_code_array[grid].resize(h_code_arr.size());
106 Gpu::copyAsync(Gpu::hostToDevice, h_code_arr.begin(), h_code_arr.end(),
107 m_code_array[grid].begin());
108 m_isec_boxes[grid].resize(h_isec_boxes.size());
109 Gpu::copyAsync(Gpu::hostToDevice, h_isec_boxes.begin(), h_isec_boxes.end(),
110 m_isec_boxes[grid].begin());
111
113 }
114
115 RemoveDuplicates(neighbor_procs);
116 }
117}
118
119template <int NStructReal, int NStructInt, int NArrayReal, int NArrayInt>
120void
122buildNeighborCopyOp (bool use_boundary_neighbor)
123{
124 BL_PROFILE("NeighborParticleContainer<NStructReal, NStructInt, NArrayReal, NArrayInt>::buildNeighborCopyOp()");
125
126 AMREX_ASSERT(!hasNeighbors() || use_boundary_neighbor);
127
128 const int lev = 0;
129 const auto& geom = this->Geom(lev);
130 const auto dxi = this->Geom(lev).InvCellSizeArray();
131 const auto plo = this->Geom(lev).ProbLoArray();
132 const auto domain = this->Geom(lev).Domain();
133 auto& plev = this->GetParticles(lev);
134 auto& ba = this->ParticleBoxArray(lev);
135
136 if (ba.size() == 1 && (! geom.isAnyPeriodic()) ) { return; }
137
138 for(MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi)
139 {
140 int gid = mfi.index();
141 int tid = mfi.LocalTileIndex();
142 auto index = std::make_pair(gid, tid);
143
144 auto& src_tile = plev[index];
145 auto& aos = src_tile.GetArrayOfStructs();
146 const size_t np_real = aos.numParticles();
147
148 size_t np = np_real;
149 if (use_boundary_neighbor) {
150 np = m_boundary_particle_ids[lev][index].size();
151 }
152 else {
153 m_boundary_particle_ids.resize(1);
154 m_boundary_particle_ids[lev][index];
155 }
156
157 const auto* p_bndry_pid = m_boundary_particle_ids[lev][index].dataPtr();
158
159 Gpu::DeviceVector<int> counts(np + 1, 0);
160 Gpu::DeviceVector<int> offsets(np + 1);
161 auto p_counts = counts.dataPtr();
162 auto p_offsets = offsets.dataPtr();
163
164 ParticleType* p_ptr = aos.data();
165 auto p_code_array = m_code_array[gid].dataPtr();
166 auto p_isec_boxes = m_isec_boxes[gid].dataPtr();
167 const int nisec_box = m_isec_boxes[gid].size();
168 // auto p_code_offsets = m_code_offsets[gid].dataPtr();
169
170 AMREX_FOR_1D ( np, i,
171 {
172 // note that cannot use ternary statement here because p_bndry is not
173 // properly allocated when use_boundary_neighbor=false
174 int pid = i;
175 if (use_boundary_neighbor) {
176 pid = p_bndry_pid[i];
177 }
178 IntVect iv = getParticleCell(p_ptr[pid], plo, dxi, domain);
179 for (int j=0; j<nisec_box; ++j) {
180 if (p_isec_boxes[j].contains(iv)) {
181 ++p_counts[i];
182 }
183 }
184 });
185
186 amrex::Gpu::exclusive_scan(counts.begin(), counts.end(), offsets.begin());
187
188 int num_copies;
189 Gpu::dtoh_memcpy_async(&num_copies, offsets.data()+np, sizeof(int));
191
192 neighbor_copy_op.resize(gid, lev, num_copies);
193
194 auto p_boxes = neighbor_copy_op.m_boxes[lev][gid].dataPtr();
195 auto p_levs = neighbor_copy_op.m_levels[lev][gid].dataPtr();
196 auto p_src_indices = neighbor_copy_op.m_src_indices[lev][gid].dataPtr();
197 auto p_periodic_shift = neighbor_copy_op.m_periodic_shift[lev][gid].dataPtr();
198
200 AMREX_FOR_1D ( np, i,
201 {
202 int pid = i;
203 if (use_boundary_neighbor) {
204 pid = p_bndry_pid[i];
205 }
206 IntVect iv = getParticleCell(p_ptr[pid], plo, dxi, domain);
207 int k = p_offsets[i];
208 for (int j=0; j<nisec_box; ++j) {
209 if (p_isec_boxes[j].contains(iv)) {
210 p_boxes[k] = p_code_array[j].grid_id;
211 p_levs[k] = 0;
212 p_periodic_shift[k] = p_code_array[j].periodic_shift;
213 p_src_indices[k] = pid;
214 ++k;
215 }
216 }
217 AMREX_ALWAYS_ASSERT(k == p_offsets[i+1]);
218 });
219
221 }
222}
223
224template <int NStructReal, int NStructInt, int NArrayReal, int NArrayInt>
225void
228{
229 BL_PROFILE("NeighborParticleContainer::fillNeighbors");
230
231 AMREX_ASSERT(numParticlesOutOfRange(*this, 0) == 0);
232
233 buildNeighborMask();
234 this->defineBufferMap();
235
236 neighbor_copy_op.clear();
237 neighbor_copy_plan.clear();
238 buildNeighborCopyOp();
239 neighbor_copy_plan.build(*this, neighbor_copy_op, ghost_int_comp,
240 ghost_real_comp, true);
241 updateNeighborsGPU(false);
242}
243
244template <int NStructReal, int NStructInt, int NArrayReal, int NArrayInt>
245void
247updateNeighborsGPU (bool boundary_neighbors_only)
248{
249 BL_PROFILE("NeighborParticleContainer::updateNeighborsGPU");
250
251 if (boundary_neighbors_only) {
252 neighbor_copy_op.clear();
253 neighbor_copy_plan.clear();
254 buildNeighborCopyOp(true);
255 neighbor_copy_plan.build(*this, neighbor_copy_op, ghost_int_comp,
256 ghost_real_comp, true);
257 }
258
259 clearNeighbors();
260
261 if (this->use_comms_arena) {
262 snd_buffer.setArena(The_Comms_Arena());
263 rcv_buffer.setArena(The_Comms_Arena());
264 }
265
266 packBuffer(*this, neighbor_copy_op, neighbor_copy_plan, snd_buffer);
268 {
269 neighbor_copy_plan.buildMPIFinish(this->BufferMap());
270 communicateParticlesStart(*this, neighbor_copy_plan, snd_buffer, rcv_buffer);
271 unpackBuffer(*this, neighbor_copy_plan, snd_buffer, NeighborUnpackPolicy());
272 communicateParticlesFinish(neighbor_copy_plan);
273 unpackRemotes(*this, neighbor_copy_plan, rcv_buffer, NeighborUnpackPolicy());
274 }
275 else
276 {
278 if (snd_buffer.arena()->isPinned()) {
279 neighbor_copy_plan.buildMPIFinish(this->BufferMap());
281 communicateParticlesStart(*this, neighbor_copy_plan, snd_buffer, pinned_rcv_buffer);
282 } else {
283 pinned_snd_buffer.resize(snd_buffer.size());
284 Gpu::dtoh_memcpy_async(pinned_snd_buffer.dataPtr(), snd_buffer.dataPtr(), snd_buffer.size());
285 neighbor_copy_plan.buildMPIFinish(this->BufferMap());
287 communicateParticlesStart(*this, neighbor_copy_plan, pinned_snd_buffer, pinned_rcv_buffer);
288 }
289
290 rcv_buffer.resize(pinned_rcv_buffer.size());
291 unpackBuffer(*this, neighbor_copy_plan, snd_buffer, NeighborUnpackPolicy());
292 communicateParticlesFinish(neighbor_copy_plan);
293 Gpu::htod_memcpy_async(rcv_buffer.dataPtr(), pinned_rcv_buffer.dataPtr(), pinned_rcv_buffer.size());
294 unpackRemotes(*this, neighbor_copy_plan, rcv_buffer, NeighborUnpackPolicy());
295 }
296
298}
299
300template <int NStructReal, int NStructInt, int NArrayReal, int NArrayInt>
301void
304{
305 BL_PROFILE("NeighborParticleContainer::clearNeighborsGPU");
306
307 this->reserveData();
308 this->resizeData();
309 for (int lev = 0; lev < this->numLevels(); ++lev)
310 {
311 for(MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi)
312 {
313 auto& ptile = this->DefineAndReturnParticleTile(lev, mfi);
314 ptile.setNumNeighbors(0);
315 }
316 }
317}
318
319}
320
321#endif
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:37
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_ALWAYS_ASSERT(EX)
Definition AMReX_BLassert.H:50
#define AMREX_FOR_1D(...)
Definition AMReX_GpuLaunchMacrosC.nolint.H:97
A collection of Boxes stored in an Array.
Definition AMReX_BoxArray.H:550
std::vector< std::pair< int, Box > > intersections(const Box &bx) const
Return intersections of Box and BoxArray.
static bool SameRefs(const BoxArray &lhs, const BoxArray &rhs)
whether two BoxArrays share the same data
Definition AMReX_BoxArray.H:820
Long size() const noexcept
Return the number of boxes in the BoxArray.
Definition AMReX_BoxArray.H:597
A class for managing a List of Boxes that share a common IndexType. This class implements operations ...
Definition AMReX_BoxList.H:52
void push_back(const Box &bn)
Append a Box to this BoxList.
Definition AMReX_BoxList.H:93
AMREX_GPU_HOST_DEVICE IndexTypeND< dim > ixType() const noexcept
Returns the indexing type.
Definition AMReX_Box.H:127
AMREX_GPU_HOST_DEVICE IntVectND< dim > size() const noexcept
Return the length of the BoxND.
Definition AMReX_Box.H:139
Calculates the distribution of FABs to MPI processes.
Definition AMReX_DistributionMapping.H:41
static bool SameRefs(const DistributionMapping &lhs, const DistributionMapping &rhs)
Definition AMReX_DistributionMapping.H:187
Rectangular problem domain geometry.
Definition AMReX_Geometry.H:73
bool isAnyPeriodic() const noexcept
Is domain periodic in any direction?
Definition AMReX_Geometry.H:333
Periodicity periodicity() const noexcept
Definition AMReX_Geometry.H:355
Definition AMReX_MFIter.H:57
bool isValid() const noexcept
Is the iterator valid i.e. is it associated with a FAB?
Definition AMReX_MFIter.H:141
void clearNeighborsGPU()
Definition AMReX_NeighborParticlesGPUImpl.H:303
typename ParticleContainerType::ParticleType ParticleType
Definition AMReX_NeighborParticles.H:38
void buildNeighborCopyOp(bool use_boundary_neighbor=false)
Definition AMReX_NeighborParticlesGPUImpl.H:122
void updateNeighborsGPU(bool boundary_neighbors_only=false)
Definition AMReX_NeighborParticlesGPUImpl.H:247
void buildNeighborMask()
Definition AMReX_NeighborParticlesGPUImpl.H:53
void fillNeighborsGPU()
Definition AMReX_NeighborParticlesGPUImpl.H:227
Definition AMReX_PODVector.H:262
size_type size() const noexcept
Definition AMReX_PODVector.H:591
iterator begin() noexcept
Definition AMReX_PODVector.H:617
iterator end() noexcept
Definition AMReX_PODVector.H:621
T * dataPtr() noexcept
Definition AMReX_PODVector.H:613
T * data() noexcept
Definition AMReX_PODVector.H:609
void push_back(const T &a_value)
Definition AMReX_PODVector.H:572
This provides length of period for periodic domains. 0 means it is not periodic in that direction....
Definition AMReX_Periodicity.H:17
std::vector< IntVect > shiftIntVect(IntVect const &nghost=IntVect(0)) const
Definition AMReX_Periodicity.cpp:8
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:27
void copyAsync(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition AMReX_GpuContainers.H:233
OutIter exclusive_scan(InIter begin, InIter end, OutIter result)
Definition AMReX_Scan.H:1377
static constexpr HostToDevice hostToDevice
Definition AMReX_GpuContainers.H:98
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:237
void dtoh_memcpy_async(void *p_h, const void *p_d, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:265
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:251
int global_to_local_rank(int rank) noexcept
Definition AMReX_ParallelContext.H:98
bool UseGpuAwareMpi()
Definition AMReX_ParallelDescriptor.H:111
Vector< Box > getBoundaryBoxes(const Box &box, int ncells)
Definition AMReX_NeighborParticlesGPUImpl.H:9
Definition AMReX_Amr.cpp:49
void communicateParticlesStart(const PC &pc, ParticleCopyPlan &plan, const SndBuffer &snd_buffer, RcvBuffer &rcv_buffer)
Definition AMReX_ParticleCommunication.H:500
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE IntVect getParticleCell(P const &p, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &plo, amrex::GpuArray< amrex::Real, AMREX_SPACEDIM > const &dxi) noexcept
Returns the cell index for a given particle using the provided lower bounds and cell sizes.
Definition AMReX_ParticleUtil.H:374
void unpackRemotes(PC &pc, const ParticleCopyPlan &plan, Buffer &rcv_buffer, UnpackPolicy const &policy)
Definition AMReX_ParticleCommunication.H:600
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > adjCellLo(const BoxND< dim > &b, int dir, int len=1) noexcept
Returns the cell centered BoxND of length len adjacent to b on the low end along the coordinate direc...
Definition AMReX_Box.H:1591
void communicateParticlesFinish(const ParticleCopyPlan &plan)
Definition AMReX_ParticleCommunication.cpp:384
Arena * The_Comms_Arena()
Definition AMReX_Arena.cpp:676
int numParticlesOutOfRange(Iterator const &pti, int nGrow)
Returns the number of particles that are more than nGrow cells from the box correspond to the input i...
Definition AMReX_ParticleUtil.H:34
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > adjCellHi(const BoxND< dim > &b, int dir, int len=1) noexcept
Similar to adjCellLo but builds an adjacent BoxND on the high end.
Definition AMReX_Box.H:1612
void RemoveDuplicates(Vector< T > &vec)
Definition AMReX_Vector.H:208
void unpackBuffer(PC &pc, const ParticleCopyPlan &plan, const Buffer &snd_buffer, UnpackPolicy const &policy)
Definition AMReX_ParticleCommunication.H:435
void packBuffer(const PC &pc, const ParticleCopyOp &op, const ParticleCopyPlan &plan, Buffer &snd_buffer)
Definition AMReX_ParticleCommunication.H:336
Definition AMReX_FabArrayCommI.H:896
Definition AMReX_NeighborParticles.H:16
IntVect periodic_shift
Definition AMReX_NeighborParticles.H:18
int grid_id
Definition AMReX_NeighborParticles.H:17
Definition AMReX_NeighborParticles.H:432
Definition AMReX_ParticleCommunication.H:19