Block-Structured AMR Software Framework
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Loading...
Searching...
No Matches
AMReX_DenseBins.H
Go to the documentation of this file.
1#ifndef AMREX_DENSEBINS_H_
2#define AMREX_DENSEBINS_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_Gpu.H>
6#include <AMReX_Scan.H>
7#include <AMReX_IntVect.H>
8#include <AMReX_BLProfiler.H>
9#include <AMReX_BinIterator.H>
10
11namespace amrex
12{
13namespace BinPolicy
14{
15 struct GPUBinPolicy {};
16 struct OpenMPBinPolicy {};
17 struct SerialBinPolicy {};
18
19 static constexpr GPUBinPolicy GPU{};
20 static constexpr OpenMPBinPolicy OpenMP{};
21 static constexpr SerialBinPolicy Serial{};
22
23#ifdef AMREX_USE_GPU
24 static constexpr GPUBinPolicy Default{};
25#else
26 static constexpr OpenMPBinPolicy Default{};
27#endif
28}
29
30template <typename T>
32{
33 using index_type = int;
34
35 using const_pointer_type = std::conditional_t<IsParticleTileData<T>(),
36 T,
37 const T*
38 >;
39
41 const Gpu::DeviceVector<index_type>& permutation,
42 const T* items)
43 : m_offsets_ptr(offsets.dataPtr()),
44 m_permutation_ptr(permutation.dataPtr()),
45 m_items(items)
46 {}
47
48 [[nodiscard]] AMREX_GPU_HOST_DEVICE
49 BinIterator<T> getBinIterator(const int bin_number) const noexcept
50 {
52 }
53
57};
58
59
75template <typename T>
77{
78public:
79
81 using index_type = int;
82
83 using const_pointer_type = std::conditional_t<IsParticleTileData<T>(),
84 T,
85 const T*
86 >;
87
88 using const_pointer_input_type = std::conditional_t<IsParticleTileData<T>(),
89 const T&,
90 const T*
91 >;
92
93private:
94
95 template <typename F, typename I>
97 static auto call_f (F const& f, const_pointer_input_type v, I& index) {
98 if constexpr (IsCallable<F, decltype(v), I>::value) {
99 return f(v, index);
100 } else {
101 return f(v[index]);
102 }
103 }
104
105public:
106
129 template <typename N, typename F>
130 void build (N nitems, const_pointer_input_type v, const Box& bx, F&& f)
131 {
132 build(BinPolicy::Default, nitems, v, bx, std::forward<F>(f));
133 }
134
156 template <typename N, typename F>
157 void build (N nitems, const_pointer_input_type v, int nbins, F&& f)
158 {
159 build(BinPolicy::Default, nitems, v, nbins, std::forward<F>(f));
160 }
161
184 template <typename N, typename F>
185 void build (BinPolicy::GPUBinPolicy, N nitems, const_pointer_input_type v, const Box& bx, F const& f)
186 {
187 const auto lo = lbound(bx);
188 const auto hi = ubound(bx);
189 build(BinPolicy::GPU, nitems, v, bx.numPts(),
191 {
192 auto iv = call_f(f,t,i);
193 auto iv3 = iv.dim3();
194 int nx = hi.x-lo.x+1;
195 int ny = hi.y-lo.y+1;
196 int nz = hi.z-lo.z+1;
197 index_type uix = amrex::min(nx-1,amrex::max(0,iv3.x));
198 index_type uiy = amrex::min(ny-1,amrex::max(0,iv3.y));
199 index_type uiz = amrex::min(nz-1,amrex::max(0,iv3.z));
200 return (uix * ny + uiy) * nz + uiz;
201 });
202 }
203
225 template <typename N, typename F>
226 void build (BinPolicy::GPUBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const& f)
227 {
228 BL_PROFILE("DenseBins<T>::buildGPU");
229
230 m_items = v;
231
232 m_bins.resize(nitems);
233 m_perm.resize(nitems);
234 m_local_offsets.resize(nitems);
235
236 m_counts.resize(0);
237 m_counts.resize(nbins+1, 0);
238
239 m_offsets.resize(0);
240 m_offsets.resize(nbins+1);
241
242 index_type* pbins = m_bins.dataPtr();
243 index_type* pcount = m_counts.dataPtr();
244 index_type* plocal_offsets = m_local_offsets.dataPtr();
245 amrex::ParallelFor(nitems, [=] AMREX_GPU_DEVICE (int i) noexcept
246 {
247 pbins[i] = call_f(f,v,i);
248 index_type off = Gpu::Atomic::Add(&pcount[pbins[i]], index_type{ 1 });
249 plocal_offsets[i] = off;
250 });
251
252 Gpu::exclusive_scan(m_counts.begin(), m_counts.end(), m_offsets.begin());
253
254 index_type* pperm = m_perm.dataPtr();
255 index_type* poffsets = m_offsets.dataPtr();
256 amrex::ParallelFor(nitems, [=] AMREX_GPU_DEVICE (int i) noexcept
257 {
258 index_type index = poffsets[pbins[i]] + plocal_offsets[i];
259 pperm[index] = i;
260 });
261
263 }
264
288 template <typename N, typename F>
289 void build (BinPolicy::OpenMPBinPolicy, N nitems, const_pointer_input_type v, const Box& bx, F const& f)
290 {
291 const auto lo = lbound(bx);
292 const auto hi = ubound(bx);
293 build(BinPolicy::OpenMP, nitems, v, bx.numPts(),
294 [=] (const_pointer_type t, index_type i) noexcept
295 {
296 auto iv = call_f(f,t,i);
297 auto iv3 = iv.dim3();
298 int nx = hi.x-lo.x+1;
299 int ny = hi.y-lo.y+1;
300 int nz = hi.z-lo.z+1;
301 index_type uix = amrex::min(nx-1,amrex::max(0,iv3.x));
302 index_type uiy = amrex::min(ny-1,amrex::max(0,iv3.y));
303 index_type uiz = amrex::min(nz-1,amrex::max(0,iv3.z));
304 return (uix * ny + uiy) * nz + uiz;
305 });
306 }
307
330 template <typename N, typename F>
331 void build (BinPolicy::OpenMPBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const& f)
332 {
333 BL_PROFILE("DenseBins<T>::buildOpenMP");
334
335 if (nbins <= 0) { return; }
336
337 m_items = v;
338
339 m_bins.resize(nitems);
340 m_perm.resize(nitems);
341
342 int nchunks = OpenMP::get_max_threads();
343 int chunksize = nitems / nchunks;
344 auto* counts = (index_type*)(The_Arena()->alloc(nchunks*nbins*sizeof(index_type)));
345 for (int i = 0; i < nbins*nchunks; ++i) { counts[i] = 0;}
346
347 m_counts.resize(0);
348 m_counts.resize(nbins+1, 0);
349
350 m_offsets.resize(0);
351 m_offsets.resize(nbins+1);
352
353#ifdef AMREX_USE_OMP
354#pragma omp parallel for
355#endif
356 for (int j = 0; j < nchunks; ++j) {
357 int istart = j*chunksize;
358 int istop = (j == nchunks-1) ? nitems : (j+1)*chunksize;
359 for (int i = istart; i < istop; ++i) {
360 m_bins[i] = call_f(f,v,i);
361 ++counts[nbins*j+m_bins[i]];
362 }
363 }
364
365#ifdef AMREX_USE_OMP
366#pragma omp parallel for
367#endif
368 for (int i = 0; i < nbins; ++i) {
369 index_type total = 0;
370 for (int j = 0; j < nchunks; ++j) {
371 auto tmp = counts[nbins*j+i];
372 counts[nbins*j+i] = total;
373 total += tmp;
374 }
375 m_counts[i] = total;
376 }
377
378 // note - this part has to be serial
379 m_offsets[0] = 0;
380 for (int i = 0; i < nbins; ++i) {m_offsets[i+1] = m_offsets[i] + m_counts[i];}
381
382#ifdef AMREX_USE_OMP
383#pragma omp parallel for
384#endif
385 for (int i = 0; i < nbins; ++i) {
386 for (int j = 0; j < nchunks; ++j) {
387 counts[nbins*j+i] += m_offsets[i];
388 }
389 }
390
391#ifdef AMREX_USE_OMP
392#pragma omp parallel for
393#endif
394 for (int j = 0; j < nchunks; ++j) {
395 int istart = j*chunksize;
396 int istop = (j == nchunks-1) ? nitems : (j+1)*chunksize;
397 for (int i = istart; i < istop; ++i) {
398 auto bid = m_bins[i];
399 m_perm[counts[nbins*j+bid]++] = i;
400 }
401 }
402
403 The_Arena()->free(counts);
404 }
405
429 template <typename N, typename F>
430 void build (BinPolicy::SerialBinPolicy, N nitems, const_pointer_input_type v, const Box& bx, F const& f)
431 {
432 const auto lo = lbound(bx);
433 const auto hi = ubound(bx);
434 build(BinPolicy::Serial, nitems, v, bx.numPts(),
435 [=] (const_pointer_type t, index_type i) noexcept
436 {
437 auto iv = call_f(f,t,i);
438 auto iv3 = iv.dim3();
439 int nx = hi.x-lo.x+1;
440 int ny = hi.y-lo.y+1;
441 int nz = hi.z-lo.z+1;
442 index_type uix = amrex::min(nx-1,amrex::max(0,iv3.x));
443 index_type uiy = amrex::min(ny-1,amrex::max(0,iv3.y));
444 index_type uiz = amrex::min(nz-1,amrex::max(0,iv3.z));
445 return (uix * ny + uiy) * nz + uiz;
446 });
447 }
448
471 template <typename N, typename F>
472 void build (BinPolicy::SerialBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const& f)
473 {
474 BL_PROFILE("DenseBins<T>::buildSerial");
475
476 m_items = v;
477
478 m_bins.resize(nitems);
479 m_perm.resize(nitems);
480
481 m_counts.resize(0);
482 m_counts.resize(nbins+1, 0);
483
484 m_offsets.resize(0);
485 m_offsets.resize(nbins+1);
486
487 for (N i = 0; i < nitems; ++i) {
488 m_bins[i] = call_f(f,v,i);
489 ++m_counts[m_bins[i]];
490 }
491
492 Gpu::exclusive_scan(m_counts.begin(), m_counts.end(), m_offsets.begin());
493
494 Gpu::copy(Gpu::deviceToDevice, m_offsets.begin(), m_offsets.end(), m_counts.begin());
495
496 for (N i = 0; i < nitems; ++i) {
497 index_type index = m_counts[m_bins[i]]++;
498 m_perm[index] = i;
499 }
500 }
501
503 [[nodiscard]] Long numItems () const noexcept { return m_perm.size(); }
504
506 [[nodiscard]] Long numBins () const noexcept { return m_offsets.size()-1; }
507
509 [[nodiscard]] index_type* permutationPtr () noexcept { return m_perm.dataPtr(); }
510
512 [[nodiscard]] index_type* offsetsPtr () noexcept { return m_offsets.dataPtr(); }
513
515 [[nodiscard]] index_type* binsPtr () noexcept { return m_bins.dataPtr(); }
516
518 [[nodiscard]] const index_type* permutationPtr () const noexcept { return m_perm.dataPtr(); }
519
521 [[nodiscard]] const index_type* offsetsPtr () const noexcept { return m_offsets.dataPtr(); }
522
524 [[nodiscard]] const index_type* binsPtr () const noexcept { return m_bins.dataPtr(); }
525
531
532private:
533
535
541};
542
543}
544
545#endif
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
Returns the number of points contained in the BoxND.
Definition AMReX_Box.H:346
A container for storing items in a set of bins.
Definition AMReX_DenseBins.H:77
std::conditional_t< IsParticleTileData< T >(), T, const T * > const_pointer_type
Definition AMReX_DenseBins.H:86
const_pointer_type m_items
Definition AMReX_DenseBins.H:534
const index_type * permutationPtr() const noexcept
returns const pointer to the permutation array
Definition AMReX_DenseBins.H:518
std::conditional_t< IsParticleTileData< T >(), const T &, const T * > const_pointer_input_type
Definition AMReX_DenseBins.H:91
DenseBinIteratorFactory< T > getBinIteratorFactory() const noexcept
returns a GPU-capable object that can create iterators over the items in a bin.
Definition AMReX_DenseBins.H:527
const index_type * binsPtr() const noexcept
returns the const pointer to the bins array
Definition AMReX_DenseBins.H:524
Gpu::DeviceVector< index_type > m_local_offsets
Definition AMReX_DenseBins.H:538
static AMREX_GPU_HOST_DEVICE auto call_f(F const &f, const_pointer_input_type v, I &index)
Definition AMReX_DenseBins.H:97
index_type * offsetsPtr() noexcept
returns the pointer to the offsets array
Definition AMReX_DenseBins.H:512
index_type * permutationPtr() noexcept
returns the pointer to the permutation array
Definition AMReX_DenseBins.H:509
void build(BinPolicy::OpenMPBinPolicy, N nitems, const_pointer_input_type v, const Box &bx, F const &f)
Populate the bins with a set of items.
Definition AMReX_DenseBins.H:289
void build(BinPolicy::GPUBinPolicy, N nitems, const_pointer_input_type v, const Box &bx, F const &f)
Populate the bins with a set of items.
Definition AMReX_DenseBins.H:185
void build(N nitems, const_pointer_input_type v, const Box &bx, F &&f)
Populate the bins with a set of items.
Definition AMReX_DenseBins.H:130
int index_type
Definition AMReX_DenseBins.H:81
void build(BinPolicy::SerialBinPolicy, N nitems, const_pointer_input_type v, const Box &bx, F const &f)
Populate the bins with a set of items.
Definition AMReX_DenseBins.H:430
void build(BinPolicy::GPUBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const &f)
Populate the bins with a set of items.
Definition AMReX_DenseBins.H:226
const index_type * offsetsPtr() const noexcept
returns const pointer to the offsets array
Definition AMReX_DenseBins.H:521
void build(BinPolicy::OpenMPBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const &f)
Populate the bins with a set of items.
Definition AMReX_DenseBins.H:331
void build(N nitems, const_pointer_input_type v, int nbins, F &&f)
Populate the bins with a set of items.
Definition AMReX_DenseBins.H:157
Gpu::DeviceVector< index_type > m_perm
Definition AMReX_DenseBins.H:540
Gpu::DeviceVector< index_type > m_offsets
Definition AMReX_DenseBins.H:539
Long numItems() const noexcept
the number of items in the container
Definition AMReX_DenseBins.H:503
void build(BinPolicy::SerialBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const &f)
Populate the bins with a set of items.
Definition AMReX_DenseBins.H:472
Gpu::DeviceVector< index_type > m_counts
Definition AMReX_DenseBins.H:537
Gpu::DeviceVector< index_type > m_bins
Definition AMReX_DenseBins.H:536
Long numBins() const noexcept
the number of bins in the container
Definition AMReX_DenseBins.H:506
index_type * binsPtr() noexcept
returns the pointer to the bins array
Definition AMReX_DenseBins.H:515
static void streamSynchronize() noexcept
Definition AMReX_GpuDevice.cpp:681
Definition AMReX_PODVector.H:262
static constexpr OpenMPBinPolicy OpenMP
Definition AMReX_DenseBins.H:20
static constexpr GPUBinPolicy Default
Definition AMReX_DenseBins.H:24
static constexpr SerialBinPolicy Serial
Definition AMReX_DenseBins.H:21
static constexpr GPUBinPolicy GPU
Definition AMReX_DenseBins.H:19
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T Add(T *sum, T value) noexcept
Definition AMReX_GpuAtomic.H:198
void copy(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition AMReX_GpuContainers.H:121
OutIter exclusive_scan(InIter begin, InIter end, OutIter result)
Definition AMReX_Scan.H:1377
static constexpr DeviceToDevice deviceToDevice
Definition AMReX_GpuContainers.H:100
constexpr int get_max_threads()
Definition AMReX_OpenMP.H:36
Definition AMReX_Amr.cpp:49
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:191
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr const T & min(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:21
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 ubound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:315
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:308
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:35
const int[]
Definition AMReX_BLProfiler.cpp:1664
Arena * The_Arena()
Definition AMReX_Arena.cpp:616
Definition AMReX_BinIterator.H:24
Definition AMReX_DenseBins.H:15
Definition AMReX_DenseBins.H:16
Definition AMReX_DenseBins.H:17
Definition AMReX_DenseBins.H:32
DenseBinIteratorFactory(const Gpu::DeviceVector< index_type > &offsets, const Gpu::DeviceVector< index_type > &permutation, const T *items)
Definition AMReX_DenseBins.H:40
const index_type * m_permutation_ptr
Definition AMReX_DenseBins.H:55
int index_type
Definition AMReX_DenseBins.H:33
const_pointer_type m_items
Definition AMReX_DenseBins.H:56
AMREX_GPU_HOST_DEVICE BinIterator< T > getBinIterator(const int bin_number) const noexcept
Definition AMReX_DenseBins.H:49
const index_type * m_offsets_ptr
Definition AMReX_DenseBins.H:54
std::conditional_t< IsParticleTileData< T >(), T, const T * > const_pointer_type
Definition AMReX_DenseBins.H:38
Test if a given type T is callable with arguments of type Args...
Definition AMReX_TypeTraits.H:201