Block-Structured AMR Software Framework
AMReX_ParticleTransformation.H
Go to the documentation of this file.
1 #ifndef AMREX_PARTICLETRANSFORMATION_H_
2 #define AMREX_PARTICLETRANSFORMATION_H_
3 #include <AMReX_Config.H>
4 
5 #include <AMReX_IntVect.H>
6 #include <AMReX_Box.H>
7 #include <AMReX_Gpu.H>
8 #include <AMReX_Print.H>
9 #include <AMReX_ParticleTile.H>
10 #include <AMReX_ParticleUtil.H>
11 
12 namespace amrex
13 {
14 
29 template <typename T_ParticleType, int NAR, int NAI>
33  int src_i, int dst_i) noexcept
34 {
35  AMREX_ASSERT(dst.m_num_runtime_real == src.m_num_runtime_real);
36  AMREX_ASSERT(dst.m_num_runtime_int == src.m_num_runtime_int );
37 
38  if constexpr(!T_ParticleType::is_soa_particle) {
39  dst.m_aos[dst_i] = src.m_aos[src_i];
40  } else {
41  dst.m_idcpu[dst_i] = src.m_idcpu[src_i];
42  }
43  if constexpr(NAR > 0) {
44  for (int j = 0; j < NAR; ++j) {
45  dst.m_rdata[j][dst_i] = src.m_rdata[j][src_i];
46  }
47  }
48  for (int j = 0; j < dst.m_num_runtime_real; ++j) {
49  dst.m_runtime_rdata[j][dst_i] = src.m_runtime_rdata[j][src_i];
50  }
51  if constexpr(NAI > 0) {
52  for (int j = 0; j < NAI; ++j) {
53  dst.m_idata[j][dst_i] = src.m_idata[j][src_i];
54  }
55  }
56  for (int j = 0; j < dst.m_num_runtime_int; ++j) {
57  dst.m_runtime_idata[j][dst_i] = src.m_runtime_idata[j][src_i];
58  }
59 }
60 
75 template <typename T_ParticleType, int NAR, int NAI>
79  int src_i, int dst_i) noexcept
80 {
81  AMREX_ASSERT(dst.m_num_runtime_real == src.m_num_runtime_real);
82  AMREX_ASSERT(dst.m_num_runtime_int == src.m_num_runtime_int );
83 
84  if constexpr(T_ParticleType::is_soa_particle) {
85  dst.m_idcpu[dst_i] = src.m_idcpu[src_i];
86  } else {
87  dst.m_aos[dst_i] = src.m_aos[src_i];
88  }
89  for (int j = 0; j < NAR; ++j) {
90  dst.m_rdata[j][dst_i] = src.m_rdata[j][src_i];
91  }
92  for (int j = 0; j < dst.m_num_runtime_real; ++j) {
93  dst.m_runtime_rdata[j][dst_i] = src.m_runtime_rdata[j][src_i];
94  }
95  for (int j = 0; j < NAI; ++j) {
96  dst.m_idata[j][dst_i] = src.m_idata[j][src_i];
97  }
98  for (int j = 0; j < dst.m_num_runtime_int; ++j) {
99  dst.m_runtime_idata[j][dst_i] = src.m_runtime_idata[j][src_i];
100  }
101 }
102 
117 template <typename T_ParticleType, int NAR, int NAI>
121  int src_i, int dst_i) noexcept
122 {
123  AMREX_ASSERT(dst.m_num_runtime_real == src.m_num_runtime_real);
124  AMREX_ASSERT(dst.m_num_runtime_int == src.m_num_runtime_int );
125 
126  if constexpr(T_ParticleType::is_soa_particle) {
127  amrex::Swap(src.m_idcpu[src_i], dst.m_idcpu[dst_i]);
128  } else {
129  amrex::Swap(src.m_aos[src_i], dst.m_aos[dst_i]);
130  }
131  for (int j = 0; j < NAR; ++j) {
132  amrex::Swap(dst.m_rdata[j][dst_i], src.m_rdata[j][src_i]);
133  }
134  for (int j = 0; j < dst.m_num_runtime_real; ++j) {
135  amrex::Swap(dst.m_runtime_rdata[j][dst_i], src.m_runtime_rdata[j][src_i]);
136  }
137  for (int j = 0; j < NAI; ++j) {
138  amrex::Swap(dst.m_idata[j][dst_i], src.m_idata[j][src_i]);
139  }
140  for (int j = 0; j < dst.m_num_runtime_int; ++j) {
141  amrex::Swap(dst.m_runtime_idata[j][dst_i], src.m_runtime_idata[j][src_i]);
142  }
143 }
144 
157 template <typename DstTile, typename SrcTile>
158 void copyParticles (DstTile& dst, const SrcTile& src) noexcept
159 {
160  auto np = src.numParticles();
161  copyParticles(dst, src, 0, 0, np);
162 }
163 
180 template <typename DstTile, typename SrcTile, typename Index, typename N,
181  std::enable_if_t<std::is_integral_v<Index>, int> foo = 0>
182 void copyParticles (DstTile& dst, const SrcTile& src,
183  Index src_start, Index dst_start, N n) noexcept
184 {
185  const auto src_data = src.getConstParticleTileData();
186  auto dst_data = dst.getParticleTileData();
187 
189  {
190  copyParticle(dst_data, src_data, src_start+i, dst_start+i);
191  });
192 
194 }
195 
209 template <typename DstTile, typename SrcTile, typename F>
210 void transformParticles (DstTile& dst, const SrcTile& src, F&& f) noexcept
211 {
212  auto np = src.numParticles();
213  transformParticles(dst, src, 0, 0, np, std::forward<F>(f));
214 }
215 
234 template <typename DstTile, typename SrcTile, typename Index, typename N, typename F,
235  std::enable_if_t<std::is_integral_v<Index>, int> foo = 0>
236 void transformParticles (DstTile& dst, const SrcTile& src,
237  Index src_start, Index dst_start, N n, F const& f) noexcept
238 {
239  const auto src_data = src.getConstParticleTileData();
240  auto dst_data = dst.getParticleTileData();
241 
243  {
244  f(dst_data, src_data, src_start+i, dst_start+i);
245  });
246 
248 }
249 
265 template <typename DstTile1, typename DstTile2, typename SrcTile, typename F>
266 void transformParticles (DstTile1& dst1, DstTile2& dst2, const SrcTile& src, F&& f) noexcept
267 {
268  auto np = src.numParticles();
269  transformParticles(dst1, dst2, src, 0, 0, 0, np, std::forward<F>(f));
270 }
271 
293 template <typename DstTile1, typename DstTile2, typename SrcTile,
294  typename Index, typename N, typename F,
295  std::enable_if_t<std::is_integral_v<Index>, int> foo = 0>
296 void transformParticles (DstTile1& dst1, DstTile2& dst2, const SrcTile& src,
297  Index src_start, Index dst1_start, Index dst2_start, N n, F const& f) noexcept
298 {
299  const auto src_data = src.getConstParticleTileData();
300  auto dst1_data = dst1.getParticleTileData();
301  auto dst2_data = dst2.getParticleTileData();
302 
304  {
305  f(dst1_data, dst2_data, src_data, src_start+i, dst1_start+i, dst2_start+i);
306  });
307 
309 }
310 
323 template <typename DstTile, typename SrcTile, typename Index, typename N,
324  std::enable_if_t<std::is_integral_v<Index>, int> foo = 0>
325 Index filterParticles (DstTile& dst, const SrcTile& src, const Index* mask) noexcept
326 {
327  return filterParticles(dst, src, mask, 0, 0, src.numParticles());
328 }
329 
347 template <typename DstTile, typename SrcTile, typename Index, typename N,
348  std::enable_if_t<std::is_integral_v<Index>, int> foo = 0>
349 Index filterParticles (DstTile& dst, const SrcTile& src, const Index* mask,
350  Index src_start, Index dst_start, N n) noexcept
351 {
352  Gpu::DeviceVector<Index> offsets(n);
353  Gpu::exclusive_scan(mask, mask+n, offsets.begin());
354 
355  Index last_mask=0, last_offset=0;
356  Gpu::copyAsync(Gpu::deviceToHost, mask+n-1, mask + n, &last_mask);
357  Gpu::copyAsync(Gpu::deviceToHost, offsets.data()+n-1, offsets.data()+n, &last_offset);
358 
359  auto* p_offsets = offsets.dataPtr();
360 
361  const auto src_data = src.getConstParticleTileData();
362  auto dst_data = dst.getParticleTileData();
363 
365  {
366  if (mask[i]) { copyParticle(dst_data, src_data, src_start+i, dst_start+p_offsets[i]); }
367  });
368 
370  return last_mask + last_offset;
371 }
372 
385 template <typename DstTile, typename SrcTile, typename Pred,
386  std::enable_if_t<!std::is_pointer_v<std::decay_t<Pred>>,int> foo = 0>
387 int filterParticles (DstTile& dst, const SrcTile& src, Pred&& p) noexcept
388 {
389  return filterParticles(dst, src, std::forward<Pred>(p), 0, 0, src.numParticles());
390 }
391 
409 template <typename DstTile, typename SrcTile, typename Pred, typename Index, typename N,
410  std::enable_if_t<!std::is_pointer_v<std::decay_t<Pred>>,Index> nvccfoo = 0>
411 Index filterParticles (DstTile& dst, const SrcTile& src, Pred const& p,
412  Index src_start, Index dst_start, N n) noexcept
413 {
415 
416  auto* p_mask = mask.dataPtr();
417  const auto src_data = src.getConstParticleTileData();
418 
420  [p, p_mask, src_data, src_start] AMREX_GPU_DEVICE (int i, amrex::RandomEngine const& engine) noexcept
421  {
422  amrex::ignore_unused(p, p_mask, src_data, src_start, engine);
423  if constexpr (IsCallable<Pred,decltype(src_data),Index,RandomEngine>::value) {
424  p_mask[i] = p(src_data, src_start+i, engine);
425  } else {
426  p_mask[i] = p(src_data, src_start+i);
427  }
428  });
429  return filterParticles(dst, src, mask.dataPtr(), src_start, dst_start, n);
430 }
431 
447 template <typename DstTile, typename SrcTile, typename Index, typename F,
448  std::enable_if_t<std::is_integral_v<Index>, int> foo = 0>
449 Index filterAndTransformParticles (DstTile& dst, const SrcTile& src, Index* mask, F const& f,
450  Index src_start, Index dst_start) noexcept
451 {
452  auto np = src.numParticles();
453  Gpu::DeviceVector<Index> offsets(np);
454  Gpu::exclusive_scan(mask, mask+np, offsets.begin());
455 
456  Index last_mask=0, last_offset=0;
457  Gpu::copyAsync(Gpu::deviceToHost, mask+np-1, mask + np, &last_mask);
458  Gpu::copyAsync(Gpu::deviceToHost, offsets.data()+np-1, offsets.data()+np, &last_offset);
459 
460  auto const* p_offsets = offsets.dataPtr();
461 
462  const auto src_data = src.getConstParticleTileData();
463  auto dst_data = dst.getParticleTileData();
464 
466  {
467  if (mask[i]) {
468  f(dst_data, src_data, src_start+i,
469  dst_start+p_offsets[src_start+i]);
470  }
471  });
472 
474  return last_mask + last_offset;
475 }
476 
492 template <typename DstTile, typename SrcTile, typename Index, typename F,
493  std::enable_if_t<std::is_integral_v<Index>, int> foo = 0>
494 Index filterAndTransformParticles (DstTile& dst, const SrcTile& src, Index* mask, F&& f) noexcept
495 {
496  return filterAndTransformParticles(dst, src, mask, std::forward<F>(f), 0, 0);
497 }
498 
514 template <typename DstTile, typename SrcTile, typename Pred, typename F,
515  std::enable_if_t<!std::is_pointer_v<std::decay_t<Pred>>,int> foo = 0>
516 int filterAndTransformParticles (DstTile& dst, const SrcTile& src, Pred&& p, F&& f) noexcept
517 {
518  return filterAndTransformParticles(dst, src, std::forward<Pred>(p), std::forward<F>(f), 0, 0);
519 }
520 
538 template <typename DstTile1, typename DstTile2, typename SrcTile, typename Index, typename F,
539  std::enable_if_t<std::is_integral_v<Index>, int> foo = 0>
540 Index filterAndTransformParticles (DstTile1& dst1, DstTile2& dst2,
541  const SrcTile& src, Index* mask, F const& f) noexcept
542 {
543  auto np = src.numParticles();
544  Gpu::DeviceVector<Index> offsets(np);
545  Gpu::exclusive_scan(mask, mask+np, offsets.begin());
546 
547  Index last_mask=0, last_offset=0;
548  Gpu::copyAsync(Gpu::deviceToHost, mask+np-1, mask + np, &last_mask);
549  Gpu::copyAsync(Gpu::deviceToHost, offsets.data()+np-1, offsets.data()+np, &last_offset);
550 
551  auto* p_offsets = offsets.dataPtr();
552 
553  const auto src_data = src.getConstParticleTileData();
554  auto dst_data1 = dst1.getParticleTileData();
555  auto dst_data2 = dst2.getParticleTileData();
556 
558  {
559  if (mask[i]) { f(dst_data1, dst_data2, src_data, i, p_offsets[i], p_offsets[i]); }
560  });
561 
563  return last_mask + last_offset;
564 }
565 
583 template <typename DstTile1, typename DstTile2, typename SrcTile, typename Pred, typename F,
584  std::enable_if_t<!std::is_pointer_v<std::decay_t<Pred>>, int> foo = 0>
585 int filterAndTransformParticles (DstTile1& dst1, DstTile2& dst2, const SrcTile& src,
586  Pred const& p, F&& f) noexcept
587 {
588  auto np = src.numParticles();
590 
591  auto* p_mask = mask.dataPtr();
592  const auto src_data = src.getConstParticleTileData();
593 
595  [p, p_mask, src_data] AMREX_GPU_DEVICE (int i, amrex::RandomEngine const& engine) noexcept
596  {
597  amrex::ignore_unused(p, p_mask, src_data, engine);
598  if constexpr (IsCallable<Pred,decltype(src_data),int,RandomEngine>::value) {
599  p_mask[i] = p(src_data, i, engine);
600  } else {
601  p_mask[i] = p(src_data, i);
602  }
603  });
604  return filterAndTransformParticles(dst1, dst2, src, mask.dataPtr(), std::forward<F>(f));
605 }
606 
607 
624 template <typename DstTile, typename SrcTile, typename Pred, typename F, typename Index,
625  std::enable_if_t<!std::is_pointer_v<std::decay_t<Pred>>,Index> nvccfoo = 0>
626 Index filterAndTransformParticles (DstTile& dst, const SrcTile& src, Pred const& p, F&& f,
627  Index src_start, Index dst_start) noexcept
628 {
629  auto np = src.numParticles();
631 
632  auto* p_mask = mask.dataPtr();
633  const auto src_data = src.getConstParticleTileData();
634 
636  [p, p_mask, src_data, src_start] AMREX_GPU_DEVICE (int i, amrex::RandomEngine const& engine) noexcept
637  {
638  amrex::ignore_unused(p, p_mask, src_data, src_start, engine);
639  if constexpr (IsCallable<Pred,decltype(src_data),Index,RandomEngine>::value) {
640  p_mask[i] = p(src_data, src_start+i, engine);
641  } else {
642  p_mask[i] = p(src_data, src_start+i);
643  }
644  });
645  return filterAndTransformParticles(dst, src, mask.dataPtr(), std::forward<F>(f), src_start, dst_start);
646 }
647 
648 
664 template <typename PTile, typename N, typename Index,
665  std::enable_if_t<std::is_integral_v<Index>, int> foo = 0>
666 void gatherParticles (PTile& dst, const PTile& src, N np, const Index* inds)
667 {
668  const auto src_data = src.getConstParticleTileData();
669  auto dst_data = dst.getParticleTileData();
670 
672  {
673  copyParticle(dst_data, src_data, inds[i], i);
674  });
675 
677 }
678 
694 template <typename PTile, typename N, typename Index,
695  std::enable_if_t<std::is_integral_v<Index>, int> foo = 0>
696 void scatterParticles (PTile& dst, const PTile& src, N np, const Index* inds)
697 {
698  const auto src_data = src.getConstParticleTileData();
699  auto dst_data = dst.getParticleTileData();
700 
702  {
703  copyParticle(dst_data, src_data, i, inds[i]);
704  });
705 
707 }
708 
709 }
710 
711 #endif // include guard
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition: AMReX_Extension.H:119
#define AMREX_HOST_DEVICE_FOR_1D(...)
Definition: AMReX_GpuLaunch.nolint.H:49
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition: AMReX_GpuQualifiers.H:20
Array4< int const > mask
Definition: AMReX_InterpFaceRegister.cpp:93
Definition: AMReX_PODVector.H:246
T * data() noexcept
Definition: AMReX_PODVector.H:593
iterator begin() noexcept
Definition: AMReX_PODVector.H:601
T * dataPtr() noexcept
Definition: AMReX_PODVector.H:597
void copyAsync(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition: AMReX_GpuContainers.H:233
OutIter exclusive_scan(InIter begin, InIter end, OutIter result)
Definition: AMReX_Scan.H:1377
static constexpr DeviceToHost deviceToHost
Definition: AMReX_GpuContainers.H:99
void streamSynchronize() noexcept
Definition: AMReX_GpuDevice.H:237
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44
Definition: AMReX_Amr.cpp:49
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void swapParticle(const ParticleTileData< T_ParticleType, NAR, NAI > &dst, const ParticleTileData< T_ParticleType, NAR, NAI > &src, int src_i, int dst_i) noexcept
A general single particle swapping routine that can run on the GPU.
Definition: AMReX_ParticleTransformation.H:119
void copyParticles(DstTile &dst, const SrcTile &src) noexcept
Copy particles from src to dst. This version copies all the particles, writing them to the beginning ...
Definition: AMReX_ParticleTransformation.H:158
Index filterAndTransformParticles(DstTile &dst, const SrcTile &src, Index *mask, F const &f, Index src_start, Index dst_start) noexcept
Conditionally copy particles from src to dst based on the value of mask. A transformation will also b...
Definition: AMReX_ParticleTransformation.H:449
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void Swap(T &t1, T &t2) noexcept
Definition: AMReX_Algorithm.H:75
Index filterParticles(DstTile &dst, const SrcTile &src, const Index *mask) noexcept
Conditionally copy particles from src to dst based on the value of mask.
Definition: AMReX_ParticleTransformation.H:325
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition: AMReX.H:111
void gatherParticles(PTile &dst, const PTile &src, N np, const Index *inds)
Gather particles copies particles into contiguous order from an arbitrary order. Specifically,...
Definition: AMReX_ParticleTransformation.H:666
void transformParticles(DstTile &dst, const SrcTile &src, F &&f) noexcept
Apply the function f to all the particles in src, writing the result to dst. This version does all th...
Definition: AMReX_ParticleTransformation.H:210
void scatterParticles(PTile &dst, const PTile &src, N np, const Index *inds)
Scatter particles copies particles from contiguous order into an arbitrary order. Specifically,...
Definition: AMReX_ParticleTransformation.H:696
AMREX_ATTRIBUTE_FLATTEN_FOR void ParallelForRNG(T n, L const &f) noexcept
Definition: AMReX_GpuLaunchFunctsC.H:1221
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void copyParticle(const ParticleTileData< T_ParticleType, NAR, NAI > &dst, const ConstParticleTileData< T_ParticleType, NAR, NAI > &src, int src_i, int dst_i) noexcept
A general single particle copying routine that can run on the GPU.
Definition: AMReX_ParticleTransformation.H:31
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T * dataPtr() const noexcept
Definition: AMReX_Array4.H:238
Definition: AMReX_ParticleTile.H:492
Test if a given type T is callable with arguments of type Args...
Definition: AMReX_TypeTraits.H:201
Definition: AMReX_ParticleTile.H:29
Definition: AMReX_RandomEngine.H:57