Block-Structured AMR Software Framework
AMReX_DenseBins.H
Go to the documentation of this file.
1 #ifndef AMREX_DENSEBINS_H_
2 #define AMREX_DENSEBINS_H_
3 #include <AMReX_Config.H>
4 
5 #include <AMReX_Gpu.H>
6 #include <AMReX_Scan.H>
7 #include <AMReX_IntVect.H>
8 #include <AMReX_BLProfiler.H>
9 #include <AMReX_BinIterator.H>
10 
11 namespace amrex
12 {
13 namespace BinPolicy
14 {
15  struct GPUBinPolicy {};
16  struct OpenMPBinPolicy {};
17  struct SerialBinPolicy {};
18 
19  static constexpr GPUBinPolicy GPU{};
20  static constexpr OpenMPBinPolicy OpenMP{};
21  static constexpr SerialBinPolicy Serial{};
22 
23 #ifdef AMREX_USE_GPU
24  static constexpr GPUBinPolicy Default{};
25 #else
26  static constexpr OpenMPBinPolicy Default{};
27 #endif
28 }
29 
30 template <typename T>
32 {
33  using index_type = int;
34 
35  using const_pointer_type = std::conditional_t<IsParticleTileData<T>(),
36  T,
37  const T*
38  >;
39 
41  const Gpu::DeviceVector<index_type>& permutation,
42  const T* items)
43  : m_offsets_ptr(offsets.dataPtr()),
44  m_permutation_ptr(permutation.dataPtr()),
45  m_items(items)
46  {}
47 
48  [[nodiscard]] AMREX_GPU_HOST_DEVICE
49  BinIterator<T> getBinIterator(const int bin_number) const noexcept
50  {
52  }
53 
57 };
58 
59 
75 template <typename T>
76 class DenseBins
77 {
78 public:
79 
81  using index_type = int;
82 
83  using const_pointer_type = std::conditional_t<IsParticleTileData<T>(),
84  T,
85  const T*
86  >;
87 
88  using const_pointer_input_type = std::conditional_t<IsParticleTileData<T>(),
89  const T&,
90  const T*
91  >;
92 
93 private:
94 
95  template <typename F, typename I>
97  static auto call_f (F const& f, const_pointer_input_type v, I& index) {
98  if constexpr (IsCallable<F, decltype(v), I>::value) {
99  return f(v, index);
100  } else {
101  return f(v[index]);
102  }
103  }
104 
105 public:
106 
129  template <typename N, typename F>
130  void build (N nitems, const_pointer_input_type v, const Box& bx, F&& f)
131  {
132  build(BinPolicy::Default, nitems, v, bx, std::forward<F>(f));
133  }
134 
156  template <typename N, typename F>
157  void build (N nitems, const_pointer_input_type v, int nbins, F&& f)
158  {
159  build(BinPolicy::Default, nitems, v, nbins, std::forward<F>(f));
160  }
161 
184  template <typename N, typename F>
185  void build (BinPolicy::GPUBinPolicy, N nitems, const_pointer_input_type v, const Box& bx, F const& f)
186  {
187  const auto lo = lbound(bx);
188  const auto hi = ubound(bx);
189  build(BinPolicy::GPU, nitems, v, bx.numPts(),
191  {
192  auto iv = call_f(f,t,i);
193  auto iv3 = iv.dim3();
194  int nx = hi.x-lo.x+1;
195  int ny = hi.y-lo.y+1;
196  int nz = hi.z-lo.z+1;
197  index_type uix = amrex::min(nx-1,amrex::max(0,iv3.x));
198  index_type uiy = amrex::min(ny-1,amrex::max(0,iv3.y));
199  index_type uiz = amrex::min(nz-1,amrex::max(0,iv3.z));
200  return (uix * ny + uiy) * nz + uiz;
201  });
202  }
203 
225  template <typename N, typename F>
226  void build (BinPolicy::GPUBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const& f)
227  {
228  BL_PROFILE("DenseBins<T>::buildGPU");
229 
230  m_items = v;
231 
232  m_bins.resize(nitems);
233  m_perm.resize(nitems);
234  m_local_offsets.resize(nitems);
235 
236  m_counts.resize(0);
237  m_counts.resize(nbins+1, 0);
238 
239  m_offsets.resize(0);
240  m_offsets.resize(nbins+1);
241 
242  index_type* pbins = m_bins.dataPtr();
243  index_type* pcount = m_counts.dataPtr();
244  index_type* plocal_offsets = m_local_offsets.dataPtr();
245  amrex::ParallelFor(nitems, [=] AMREX_GPU_DEVICE (int i) noexcept
246  {
247  pbins[i] = call_f(f,v,i);
248  index_type off = Gpu::Atomic::Add(&pcount[pbins[i]], index_type{ 1 });
249  plocal_offsets[i] = off;
250  });
251 
252  Gpu::exclusive_scan(m_counts.begin(), m_counts.end(), m_offsets.begin());
253 
254  index_type* pperm = m_perm.dataPtr();
255  index_type* poffsets = m_offsets.dataPtr();
256  amrex::ParallelFor(nitems, [=] AMREX_GPU_DEVICE (int i) noexcept
257  {
258  index_type index = poffsets[pbins[i]] + plocal_offsets[i];
259  pperm[index] = i;
260  });
261 
263  }
264 
288  template <typename N, typename F>
289  void build (BinPolicy::OpenMPBinPolicy, N nitems, const_pointer_input_type v, const Box& bx, F const& f)
290  {
291  const auto lo = lbound(bx);
292  const auto hi = ubound(bx);
293  build(BinPolicy::OpenMP, nitems, v, bx.numPts(),
294  [=] (const_pointer_type t, index_type i) noexcept
295  {
296  auto iv = call_f(f,t,i);
297  auto iv3 = iv.dim3();
298  int nx = hi.x-lo.x+1;
299  int ny = hi.y-lo.y+1;
300  int nz = hi.z-lo.z+1;
301  index_type uix = amrex::min(nx-1,amrex::max(0,iv3.x));
302  index_type uiy = amrex::min(ny-1,amrex::max(0,iv3.y));
303  index_type uiz = amrex::min(nz-1,amrex::max(0,iv3.z));
304  return (uix * ny + uiy) * nz + uiz;
305  });
306  }
307 
330  template <typename N, typename F>
331  void build (BinPolicy::OpenMPBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const& f)
332  {
333  BL_PROFILE("DenseBins<T>::buildOpenMP");
334 
335  m_items = v;
336 
337  m_bins.resize(nitems);
338  m_perm.resize(nitems);
339 
340  int nchunks = OpenMP::get_max_threads();
341  int chunksize = nitems / nchunks;
342  auto* counts = (index_type*)(The_Arena()->alloc(nchunks*nbins*sizeof(index_type)));
343  for (int i = 0; i < nbins*nchunks; ++i) { counts[i] = 0;}
344 
345  m_counts.resize(0);
346  m_counts.resize(nbins+1, 0);
347 
348  m_offsets.resize(0);
349  m_offsets.resize(nbins+1);
350 
351 #ifdef AMREX_USE_OMP
352 #pragma omp parallel for
353 #endif
354  for (int j = 0; j < nchunks; ++j) {
355  int istart = j*chunksize;
356  int istop = (j == nchunks-1) ? nitems : (j+1)*chunksize;
357  for (int i = istart; i < istop; ++i) {
358  m_bins[i] = call_f(f,v,i);
359  ++counts[nbins*j+m_bins[i]];
360  }
361  }
362 
363 #ifdef AMREX_USE_OMP
364 #pragma omp parallel for
365 #endif
366  for (int i = 0; i < nbins; ++i) {
367  index_type total = 0;
368  for (int j = 0; j < nchunks; ++j) {
369  auto tmp = counts[nbins*j+i];
370  counts[nbins*j+i] = total;
371  total += tmp;
372  }
373  m_counts[i] = total;
374  }
375 
376  // note - this part has to be serial
377  m_offsets[0] = 0;
378  for (int i = 0; i < nbins; ++i) {m_offsets[i+1] = m_offsets[i] + m_counts[i];}
379 
380 #ifdef AMREX_USE_OMP
381 #pragma omp parallel for
382 #endif
383  for (int i = 0; i < nbins; ++i) {
384  for (int j = 0; j < nchunks; ++j) {
385  counts[nbins*j+i] += m_offsets[i];
386  }
387  }
388 
389 #ifdef AMREX_USE_OMP
390 #pragma omp parallel for
391 #endif
392  for (int j = 0; j < nchunks; ++j) {
393  int istart = j*chunksize;
394  int istop = (j == nchunks-1) ? nitems : (j+1)*chunksize;
395  for (int i = istart; i < istop; ++i) {
396  auto bid = m_bins[i];
397  m_perm[counts[nbins*j+bid]++] = i;
398  }
399  }
400 
401  The_Arena()->free(counts);
402  }
403 
427  template <typename N, typename F>
428  void build (BinPolicy::SerialBinPolicy, N nitems, const_pointer_input_type v, const Box& bx, F const& f)
429  {
430  const auto lo = lbound(bx);
431  const auto hi = ubound(bx);
432  build(BinPolicy::Serial, nitems, v, bx.numPts(),
433  [=] (const_pointer_type t, index_type i) noexcept
434  {
435  auto iv = call_f(f,t,i);
436  auto iv3 = iv.dim3();
437  int nx = hi.x-lo.x+1;
438  int ny = hi.y-lo.y+1;
439  int nz = hi.z-lo.z+1;
440  index_type uix = amrex::min(nx-1,amrex::max(0,iv3.x));
441  index_type uiy = amrex::min(ny-1,amrex::max(0,iv3.y));
442  index_type uiz = amrex::min(nz-1,amrex::max(0,iv3.z));
443  return (uix * ny + uiy) * nz + uiz;
444  });
445  }
446 
469  template <typename N, typename F>
470  void build (BinPolicy::SerialBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const& f)
471  {
472  BL_PROFILE("DenseBins<T>::buildSerial");
473 
474  m_items = v;
475 
476  m_bins.resize(nitems);
477  m_perm.resize(nitems);
478 
479  m_counts.resize(0);
480  m_counts.resize(nbins+1, 0);
481 
482  m_offsets.resize(0);
483  m_offsets.resize(nbins+1);
484 
485  for (N i = 0; i < nitems; ++i) {
486  m_bins[i] = call_f(f,v,i);
487  ++m_counts[m_bins[i]];
488  }
489 
490  Gpu::exclusive_scan(m_counts.begin(), m_counts.end(), m_offsets.begin());
491 
492  Gpu::copy(Gpu::deviceToDevice, m_offsets.begin(), m_offsets.end(), m_counts.begin());
493 
494  for (N i = 0; i < nitems; ++i) {
495  index_type index = m_counts[m_bins[i]]++;
496  m_perm[index] = i;
497  }
498  }
499 
501  [[nodiscard]] Long numItems () const noexcept { return m_perm.size(); }
502 
504  [[nodiscard]] Long numBins () const noexcept { return m_offsets.size()-1; }
505 
507  [[nodiscard]] index_type* permutationPtr () noexcept { return m_perm.dataPtr(); }
508 
510  [[nodiscard]] index_type* offsetsPtr () noexcept { return m_offsets.dataPtr(); }
511 
513  [[nodiscard]] index_type* binsPtr () noexcept { return m_bins.dataPtr(); }
514 
516  [[nodiscard]] const index_type* permutationPtr () const noexcept { return m_perm.dataPtr(); }
517 
519  [[nodiscard]] const index_type* offsetsPtr () const noexcept { return m_offsets.dataPtr(); }
520 
522  [[nodiscard]] const index_type* binsPtr () const noexcept { return m_bins.dataPtr(); }
523 
525  [[nodiscard]] DenseBinIteratorFactory<T> getBinIteratorFactory() const noexcept
526  {
528  }
529 
530 private:
531 
533 
539 };
540 
541 }
542 
543 #endif
#define BL_PROFILE(a)
Definition: AMReX_BLProfiler.H:551
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition: AMReX_GpuQualifiers.H:20
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
Returns the number of points contained in the BoxND.
Definition: AMReX_Box.H:346
A container for storing items in a set of bins.
Definition: AMReX_DenseBins.H:77
std::conditional_t< IsParticleTileData< T >(), T, const T * > const_pointer_type
Definition: AMReX_DenseBins.H:86
index_type * binsPtr() noexcept
returns the pointer to the bins array
Definition: AMReX_DenseBins.H:513
const_pointer_type m_items
Definition: AMReX_DenseBins.H:532
index_type * offsetsPtr() noexcept
returns the pointer to the offsets array
Definition: AMReX_DenseBins.H:510
std::conditional_t< IsParticleTileData< T >(), const T &, const T * > const_pointer_input_type
Definition: AMReX_DenseBins.H:91
Gpu::DeviceVector< index_type > m_local_offsets
Definition: AMReX_DenseBins.H:536
static AMREX_GPU_HOST_DEVICE auto call_f(F const &f, const_pointer_input_type v, I &index)
Definition: AMReX_DenseBins.H:97
const index_type * offsetsPtr() const noexcept
returns const pointer to the offsets array
Definition: AMReX_DenseBins.H:519
void build(BinPolicy::OpenMPBinPolicy, N nitems, const_pointer_input_type v, const Box &bx, F const &f)
Populate the bins with a set of items.
Definition: AMReX_DenseBins.H:289
DenseBinIteratorFactory< T > getBinIteratorFactory() const noexcept
returns a GPU-capable object that can create iterators over the items in a bin.
Definition: AMReX_DenseBins.H:525
const index_type * permutationPtr() const noexcept
returns const pointer to the permutation array
Definition: AMReX_DenseBins.H:516
void build(BinPolicy::GPUBinPolicy, N nitems, const_pointer_input_type v, const Box &bx, F const &f)
Populate the bins with a set of items.
Definition: AMReX_DenseBins.H:185
void build(N nitems, const_pointer_input_type v, const Box &bx, F &&f)
Populate the bins with a set of items.
Definition: AMReX_DenseBins.H:130
int index_type
Definition: AMReX_DenseBins.H:81
void build(BinPolicy::SerialBinPolicy, N nitems, const_pointer_input_type v, const Box &bx, F const &f)
Populate the bins with a set of items.
Definition: AMReX_DenseBins.H:428
index_type * permutationPtr() noexcept
returns the pointer to the permutation array
Definition: AMReX_DenseBins.H:507
void build(BinPolicy::GPUBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const &f)
Populate the bins with a set of items.
Definition: AMReX_DenseBins.H:226
const index_type * binsPtr() const noexcept
returns the const pointer to the bins array
Definition: AMReX_DenseBins.H:522
void build(BinPolicy::OpenMPBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const &f)
Populate the bins with a set of items.
Definition: AMReX_DenseBins.H:331
void build(N nitems, const_pointer_input_type v, int nbins, F &&f)
Populate the bins with a set of items.
Definition: AMReX_DenseBins.H:157
Gpu::DeviceVector< index_type > m_perm
Definition: AMReX_DenseBins.H:538
Gpu::DeviceVector< index_type > m_offsets
Definition: AMReX_DenseBins.H:537
Long numItems() const noexcept
the number of items in the container
Definition: AMReX_DenseBins.H:501
void build(BinPolicy::SerialBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const &f)
Populate the bins with a set of items.
Definition: AMReX_DenseBins.H:470
Gpu::DeviceVector< index_type > m_counts
Definition: AMReX_DenseBins.H:535
Gpu::DeviceVector< index_type > m_bins
Definition: AMReX_DenseBins.H:534
Long numBins() const noexcept
the number of bins in the container
Definition: AMReX_DenseBins.H:504
static void streamSynchronize() noexcept
Definition: AMReX_GpuDevice.cpp:641
Definition: AMReX_PODVector.H:246
static constexpr OpenMPBinPolicy OpenMP
Definition: AMReX_DenseBins.H:20
static constexpr GPUBinPolicy Default
Definition: AMReX_DenseBins.H:24
static constexpr SerialBinPolicy Serial
Definition: AMReX_DenseBins.H:21
static constexpr GPUBinPolicy GPU
Definition: AMReX_DenseBins.H:19
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T Add(T *sum, T value) noexcept
Definition: AMReX_GpuAtomic.H:198
void copy(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition: AMReX_GpuContainers.H:121
OutIter exclusive_scan(InIter begin, InIter end, OutIter result)
Definition: AMReX_Scan.H:1377
static constexpr DeviceToDevice deviceToDevice
Definition: AMReX_GpuContainers.H:100
constexpr int get_max_threads()
Definition: AMReX_OpenMP.H:36
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44
Definition: AMReX_Amr.cpp:49
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:200
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & max(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:35
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & min(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:21
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 ubound(Array4< T > const &a) noexcept
Definition: AMReX_Array4.H:315
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
Definition: AMReX_Array4.H:308
const int[]
Definition: AMReX_BLProfiler.cpp:1664
Arena * The_Arena()
Definition: AMReX_Arena.cpp:594
Definition: AMReX_BinIterator.H:24
Definition: AMReX_DenseBins.H:15
Definition: AMReX_DenseBins.H:16
Definition: AMReX_DenseBins.H:17
Definition: AMReX_DenseBins.H:32
DenseBinIteratorFactory(const Gpu::DeviceVector< index_type > &offsets, const Gpu::DeviceVector< index_type > &permutation, const T *items)
Definition: AMReX_DenseBins.H:40
const index_type * m_permutation_ptr
Definition: AMReX_DenseBins.H:55
int index_type
Definition: AMReX_DenseBins.H:33
AMREX_GPU_HOST_DEVICE BinIterator< T > getBinIterator(const int bin_number) const noexcept
Definition: AMReX_DenseBins.H:49
const_pointer_type m_items
Definition: AMReX_DenseBins.H:56
const index_type * m_offsets_ptr
Definition: AMReX_DenseBins.H:54
std::conditional_t< IsParticleTileData< T >(), T, const T * > const_pointer_type
Definition: AMReX_DenseBins.H:38
Test if a given type T is callable with arguments of type Args...
Definition: AMReX_TypeTraits.H:201