Block-Structured AMR Software Framework
AMReX_ParticleReduce.H
Go to the documentation of this file.
1 #ifndef AMREX_PARTICLEREDUCE_H_
2 #define AMREX_PARTICLEREDUCE_H_
3 #include <AMReX_Config.H>
4 
5 #include <AMReX_IntVect.H>
6 #include <AMReX_Box.H>
7 #include <AMReX_Gpu.H>
8 #include <AMReX_Print.H>
9 #include <AMReX_GpuUtility.H>
10 #include <AMReX_TypeTraits.H>
11 #include <AMReX_ParticleUtil.H>
12 #include <AMReX_Vector.H>
13 
14 #include <limits>
15 #include <type_traits>
16 
17 namespace amrex {
18 
19 namespace particle_detail {
20 
21 template <typename F, typename T_ParticleType, int NAR, int NAI>
23 auto call_f (F const& f,
25  const int i) noexcept
26 {
27  if constexpr ( ! T_ParticleType::is_soa_particle &&
29  return f(p.m_aos[i]);
30  } else if constexpr (IsCallable<F, decltype(p.getSuperParticle(i))>::value) {
31  return f(p.getSuperParticle(i));
32  } else {
33  return f(p, i);
34  }
35 }
36 }
37 
77 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
78 auto
79 ReduceSum (PC const& pc, F&& f)
80  -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()))
81 {
82  return ReduceSum(pc, 0, pc.finestLevel(), std::forward<F>(f));
83 }
84 
125 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
126 auto
127 ReduceSum (PC const& pc, int lev, F&& f)
128  -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()))
129 {
130  return ReduceSum(pc, lev, lev, std::forward<F>(f));
131 }
132 
174 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
175 auto
176 ReduceSum (PC const& pc, int lev_min, int lev_max, F const& f)
177  -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()))
178 {
179  using value_type = decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()));
180  value_type sm = 0;
181 
182 #ifdef AMREX_USE_GPU
183  if (Gpu::inLaunchRegion())
184  {
185  ReduceOps<ReduceOpSum> reduce_op;
186  ReduceData<value_type> reduce_data(reduce_op);
187  using ReduceTuple = typename decltype(reduce_data)::Type;
188 
189  for (int lev = lev_min; lev <= lev_max; ++lev)
190  {
191  const auto& plev = pc.GetParticles(lev);
192  for (const auto& kv : plev)
193  {
194  const auto& tile = plev.at(kv.first);
195  const auto np = tile.numParticles();
196  const auto& ptd = tile.getConstParticleTileData();
197  reduce_op.eval(np, reduce_data,
198  [=] AMREX_GPU_DEVICE (const int i) -> ReduceTuple {
199  return particle_detail::call_f(f, ptd, i);
200  });
201  }
202  }
203 
204  ReduceTuple hv = reduce_data.value(reduce_op);
205  sm = amrex::get<0>(hv);
206  }
207  else
208 #endif
209  {
210  for (int lev = lev_min; lev <= lev_max; ++lev)
211  {
212  const auto& plev = pc.GetParticles(lev);
213  Vector<std::pair<int, int> > grid_tile_ids;
215  for (auto& kv : plev)
216  {
217  grid_tile_ids.push_back(kv.first);
218  ptile_ptrs.push_back(&(kv.second));
219  }
220 #ifdef AMREX_USE_OMP
221 #pragma omp parallel for if (!system::regtest_reduction) reduction(+:sm)
222 #endif
223  for (int pmap_it = 0; pmap_it < static_cast<int>(ptile_ptrs.size()); ++pmap_it)
224  {
225  const auto& tile = plev.at(grid_tile_ids[pmap_it]);
226  const auto np = tile.numParticles();
227  const auto& ptd = tile.getConstParticleTileData();
228  for (int i = 0; i < np; ++i) {
229  sm += particle_detail::call_f(f, ptd, i);
230  }
231  }
232  }
233  }
234 
235  return sm;
236 }
237 
277 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
278 auto
279 ReduceMax (PC const& pc, F&& f)
280  -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()))
281 {
282  return ReduceMax(pc, 0, pc.finestLevel(), std::forward<F>(f));
283 }
284 
325 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
326 auto
327 ReduceMax (PC const& pc, int lev, F&& f)
328  -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()))
329 {
330  return ReduceMax(pc, lev, lev, std::forward<F>(f));
331 }
332 
374 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
375 auto
376 ReduceMax (PC const& pc, int lev_min, int lev_max, F const& f)
377  -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()))
378 {
379  using value_type = decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()));
380  constexpr value_type value_lowest = std::numeric_limits<value_type>::lowest();
381  value_type r = value_lowest;
382 
383 #ifdef AMREX_USE_GPU
384  if (Gpu::inLaunchRegion())
385  {
386  ReduceOps<ReduceOpMax> reduce_op;
387  ReduceData<value_type> reduce_data(reduce_op);
388  using ReduceTuple = typename decltype(reduce_data)::Type;
389 
390  for (int lev = lev_min; lev <= lev_max; ++lev)
391  {
392  const auto& plev = pc.GetParticles(lev);
393  for (const auto& kv : plev)
394  {
395  const auto& tile = plev.at(kv.first);
396  const auto np = tile.numParticles();
397  const auto& ptd = tile.getConstParticleTileData();
398  reduce_op.eval(np, reduce_data,
399  [=] AMREX_GPU_DEVICE (const int i) -> ReduceTuple {
400  return particle_detail::call_f(f, ptd, i);
401  });
402  }
403  }
404 
405  ReduceTuple hv = reduce_data.value(reduce_op);
406  r = amrex::get<0>(hv);
407  }
408  else
409 #endif
410  {
411  for (int lev = lev_min; lev <= lev_max; ++lev)
412  {
413  const auto& plev = pc.GetParticles(lev);
414  Vector<std::pair<int, int> > grid_tile_ids;
416  for (auto& kv : plev)
417  {
418  grid_tile_ids.push_back(kv.first);
419  ptile_ptrs.push_back(&(kv.second));
420  }
421 #ifdef AMREX_USE_OMP
422 #pragma omp parallel for if (!system::regtest_reduction) reduction(max:r)
423 #endif
424  for (int pmap_it = 0; pmap_it < static_cast<int>(ptile_ptrs.size()); ++pmap_it)
425  {
426  const auto& tile = plev.at(grid_tile_ids[pmap_it]);
427  const auto np = tile.numParticles();
428  const auto& ptd = tile.getConstParticleTileData();
429  for (int i = 0; i < np; ++i) {
430  r = std::max(r, particle_detail::call_f(f, ptd, i));
431  }
432  }
433  }
434  }
435 
436  return r;
437 }
438 
478 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
479 auto
480 ReduceMin (PC const& pc, F&& f)
481  -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()))
482 {
483  return ReduceMin(pc, 0, pc.finestLevel(), std::forward<F>(f));
484 }
485 
526 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
527 auto
528 ReduceMin (PC const& pc, int lev, F&& f)
529  -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()))
530 {
531  return ReduceMin(pc, lev, lev, std::forward<F>(f));
532 }
533 
575 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
576 auto
577 ReduceMin (PC const& pc, int lev_min, int lev_max, F const& f)
578  -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()))
579 {
580  using value_type = decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()));
581  constexpr value_type value_max = std::numeric_limits<value_type>::max();
582  value_type r = value_max;
583 
584 #ifdef AMREX_USE_GPU
585  if (Gpu::inLaunchRegion())
586  {
587  ReduceOps<ReduceOpMin> reduce_op;
588  ReduceData<value_type> reduce_data(reduce_op);
589  using ReduceTuple = typename decltype(reduce_data)::Type;
590 
591  for (int lev = lev_min; lev <= lev_max; ++lev)
592  {
593  const auto& plev = pc.GetParticles(lev);
594  for (const auto& kv : plev)
595  {
596  const auto& tile = plev.at(kv.first);
597  const auto np = tile.numParticles();
598  const auto& ptd = tile.getConstParticleTileData();
599  reduce_op.eval(np, reduce_data,
600  [=] AMREX_GPU_DEVICE (const int i) -> ReduceTuple {
601  return particle_detail::call_f(f, ptd, i);
602  });
603  }
604  }
605 
606  ReduceTuple hv = reduce_data.value(reduce_op);
607  r = amrex::get<0>(hv);
608  }
609  else
610 #endif
611  {
612  for (int lev = lev_min; lev <= lev_max; ++lev)
613  {
614  const auto& plev = pc.GetParticles(lev);
615  Vector<std::pair<int, int> > grid_tile_ids;
617  for (auto& kv : plev)
618  {
619  grid_tile_ids.push_back(kv.first);
620  ptile_ptrs.push_back(&(kv.second));
621  }
622 #ifdef AMREX_USE_OMP
623 #pragma omp parallel for if (!system::regtest_reduction) reduction(min:r)
624 #endif
625  for (int pmap_it = 0; pmap_it < static_cast<int>(ptile_ptrs.size()); ++pmap_it)
626  {
627  const auto& tile = plev.at(grid_tile_ids[pmap_it]);
628  const auto np = tile.numParticles();
629  const auto& ptd = tile.getConstParticleTileData();
630  for (int i = 0; i < np; ++i) {
631  r = std::min(r, particle_detail::call_f(f, ptd, i));
632  }
633  }
634  }
635  }
636 
637  return r;
638 }
639 
679 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
680 bool
681 ReduceLogicalAnd (PC const& pc, F&& f)
682 {
683  return ReduceLogicalAnd(pc, 0, pc.finestLevel(), std::forward<F>(f));
684 }
685 
726 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
727 bool
728 ReduceLogicalAnd (PC const& pc, int lev, F&& f)
729 {
730  return ReduceLogicalAnd(pc, lev, lev, std::forward<F>(f));
731 }
732 
774 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
775 bool
776 ReduceLogicalAnd (PC const& pc, int lev_min, int lev_max, F const& f)
777 {
778  int r = true;
779 
780 #ifdef AMREX_USE_GPU
781  if (Gpu::inLaunchRegion())
782  {
784  ReduceData<int> reduce_data(reduce_op);
785  using ReduceTuple = typename decltype(reduce_data)::Type;
786 
787  for (int lev = lev_min; lev <= lev_max; ++lev)
788  {
789  const auto& plev = pc.GetParticles(lev);
790  for (const auto& kv : plev)
791  {
792  const auto& tile = plev.at(kv.first);
793  const auto np = tile.numParticles();
794  const auto& ptd = tile.getConstParticleTileData();
795  reduce_op.eval(np, reduce_data,
796  [=] AMREX_GPU_DEVICE (const int i) -> ReduceTuple {
797  return particle_detail::call_f(f, ptd, i);
798  });
799  }
800  }
801 
802  ReduceTuple hv = reduce_data.value(reduce_op);
803  r = amrex::get<0>(hv);
804  }
805  else
806 #endif
807  {
808  for (int lev = lev_min; lev <= lev_max; ++lev)
809  {
810  const auto& plev = pc.GetParticles(lev);
811  Vector<std::pair<int, int> > grid_tile_ids;
813  for (auto& kv : plev)
814  {
815  grid_tile_ids.push_back(kv.first);
816  ptile_ptrs.push_back(&(kv.second));
817  }
818 #ifdef AMREX_USE_OMP
819 #pragma omp parallel for if (!system::regtest_reduction) reduction(&&:r)
820 #endif
821  for (int pmap_it = 0; pmap_it < static_cast<int>(ptile_ptrs.size()); ++pmap_it)
822  {
823  const auto& tile = plev.at(grid_tile_ids[pmap_it]);
824  const auto np = tile.numParticles();
825  const auto& ptd = tile.getConstParticleTileData();
826  for (int i = 0; i < np; ++i) {
827  r = r && particle_detail::call_f(f, ptd, i);
828  }
829  }
830  }
831  }
832 
833  return r;
834 }
835 
875 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
876 bool
877 ReduceLogicalOr (PC const& pc, F&& f)
878 {
879  return ReduceLogicalOr(pc, 0, pc.finestLevel(), std::forward<F>(f));
880 }
881 
922 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
923 bool
924 ReduceLogicalOr (PC const& pc, int lev, F&& f)
925 {
926  return ReduceLogicalOr(pc, lev, lev, std::forward<F>(f));
927 }
928 
970 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
971 bool
972 ReduceLogicalOr (PC const& pc, int lev_min, int lev_max, F const& f)
973 {
974  int r = false;
975 
976 #ifdef AMREX_USE_GPU
977  if (Gpu::inLaunchRegion())
978  {
980  ReduceData<int> reduce_data(reduce_op);
981  using ReduceTuple = typename decltype(reduce_data)::Type;
982 
983  for (int lev = lev_min; lev <= lev_max; ++lev)
984  {
985  const auto& plev = pc.GetParticles(lev);
986  for (const auto& kv : plev)
987  {
988  const auto& tile = plev.at(kv.first);
989  const auto np = tile.numParticles();
990  const auto& ptd = tile.getConstParticleTileData();
991  reduce_op.eval(np, reduce_data,
992  [=] AMREX_GPU_DEVICE (const int i) -> ReduceTuple
993  {
994  return particle_detail::call_f(f, ptd, i);
995  });
996  }
997  }
998 
999  ReduceTuple hv = reduce_data.value(reduce_op);
1000  r = amrex::get<0>(hv);
1001  }
1002  else
1003 #endif
1004  {
1005  for (int lev = lev_min; lev <= lev_max; ++lev)
1006  {
1007  const auto& plev = pc.GetParticles(lev);
1008  Vector<std::pair<int, int> > grid_tile_ids;
1010  for (auto& kv : plev)
1011  {
1012  grid_tile_ids.push_back(kv.first);
1013  ptile_ptrs.push_back(&(kv.second));
1014  }
1015 #ifdef AMREX_USE_OMP
1016 #pragma omp parallel for if (!system::regtest_reduction) reduction(||:r)
1017 #endif
1018  for (int pmap_it = 0; pmap_it < static_cast<int>(ptile_ptrs.size()); ++pmap_it)
1019  {
1020  const auto& tile = plev.at(grid_tile_ids[pmap_it]);
1021  const auto np = tile.numParticles();
1022  const auto& ptd = tile.getConstParticleTileData();
1023  for (int i = 0; i < np; ++i) {
1024  r = r || particle_detail::call_f(f, ptd, i);
1025  }
1026  }
1027  }
1028  }
1029 
1030  return r;
1031 }
1032 
1094 template <class RD, class PC, class F, class ReduceOps,
1095  std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
1096 typename RD::Type
1097 ParticleReduce (PC const& pc, F&& f, ReduceOps& reduce_ops)
1098 {
1099  return ParticleReduce<RD>(pc, 0, pc.finestLevel(), std::forward<F>(f), reduce_ops);
1100 }
1101 
1164 template <class RD, class PC, class F, class ReduceOps,
1165  std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
1166 typename RD::Type
1167 ParticleReduce (PC const& pc, int lev, F&& f, ReduceOps& reduce_ops)
1168 {
1169  return ParticleReduce<RD>(pc, lev, lev, std::forward<F>(f), reduce_ops);
1170 }
1171 
1235 template <class RD, class PC, class F, class ReduceOps,
1236  std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
1237 typename RD::Type
1238 ParticleReduce (PC const& pc, int lev_min, int lev_max, F const& f, ReduceOps& reduce_ops)
1239 {
1240  RD reduce_data(reduce_ops);
1241  for (int lev = lev_min; lev <= lev_max; ++lev) {
1242  const auto& plev = pc.GetParticles(lev);
1243  Vector<std::pair<int, int> > grid_tile_ids;
1245  for (auto& kv : plev)
1246  {
1247  grid_tile_ids.push_back(kv.first);
1248  ptile_ptrs.push_back(&(kv.second));
1249  }
1250 #if !defined(AMREX_USE_GPU) && defined(AMREX_USE_OMP)
1251 #pragma omp parallel for
1252 #endif
1253  for (int pmap_it = 0; pmap_it < static_cast<int>(ptile_ptrs.size()); ++pmap_it)
1254  {
1255  const auto& tile = plev.at(grid_tile_ids[pmap_it]);
1256  const auto np = tile.numParticles();
1257  const auto& ptd = tile.getConstParticleTileData();
1258  reduce_ops.eval(np, reduce_data,
1259  [=] AMREX_GPU_DEVICE (const int i) noexcept
1260  {
1261  return particle_detail::call_f(f, ptd, i);
1262  });
1263  }
1264  }
1265  return reduce_data.value(reduce_ops);
1266 }
1267 }
1268 #endif
#define AMREX_FORCE_INLINE
Definition: AMReX_Extension.H:119
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition: AMReX_GpuQualifiers.H:20
Definition: AMReX_Reduce.H:249
Type value()
Definition: AMReX_Reduce.H:281
Definition: AMReX_Reduce.H:364
std::enable_if_t< IsFabArray< MF >::value > eval(MF const &mf, IntVect const &nghost, D &reduce_data, F &&f)
Definition: AMReX_Reduce.H:441
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition: AMReX_Vector.H:27
Long size() const noexcept
Definition: AMReX_Vector.H:50
bool inLaunchRegion() noexcept
Definition: AMReX_GpuControl.H:86
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44
@ min
Definition: AMReX_ParallelReduce.H:18
@ max
Definition: AMReX_ParallelReduce.H:17
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f(F const &f, const PTDType< T_ParticleType, NAR, NAI > &p, const int i, Array4< T > const &fabarr, GpuArray< Real, AMREX_SPACEDIM > const &plo, GpuArray< Real, AMREX_SPACEDIM > const &dxi) noexcept
Definition: AMReX_ParticleMesh.H:16
Definition: AMReX_Amr.cpp:49
FAB::value_type ReduceMax(FabArray< FAB > const &fa, int nghost, F &&f)
Definition: AMReX_FabArrayUtility.H:531
bool ReduceLogicalAnd(FabArray< FAB > const &fa, int nghost, F &&f)
Definition: AMReX_FabArrayUtility.H:758
bool ReduceLogicalOr(FabArray< FAB > const &fa, int nghost, F &&f)
Definition: AMReX_FabArrayUtility.H:905
RD::Type ParticleReduce(PC const &pc, F &&f, ReduceOps &reduce_ops)
A general reduction method for the particles in a ParticleContainer that can run on either CPUs or GP...
Definition: AMReX_ParticleReduce.H:1097
FAB::value_type ReduceMin(FabArray< FAB > const &fa, int nghost, F &&f)
Definition: AMReX_FabArrayUtility.H:305
FAB::value_type ReduceSum(FabArray< FAB > const &fa, int nghost, F &&f)
Definition: AMReX_FabArrayUtility.H:16
Definition: AMReX_WriteBinaryParticleData.H:19
Definition: AMReX_ParticleTile.H:495
Test if a given type T is callable with arguments of type Args...
Definition: AMReX_TypeTraits.H:201