1#ifndef AMREX_DENSEBINS_H_
2#define AMREX_DENSEBINS_H_
3#include <AMReX_Config.H>
26 static constexpr OpenMPBinPolicy
Default{};
95 template <
typename F,
typename I>
98 if constexpr (
IsCallable<
F,
decltype(v), I>::value) {
129 template <
typename N,
typename F>
156 template <
typename N,
typename F>
184 template <
typename N,
typename F>
187 const auto lo =
lbound(bx);
188 const auto hi =
ubound(bx);
192 auto iv = call_f(f,t,i);
193 auto iv3 = iv.dim3();
194 int nx = hi.x-lo.x+1;
195 int ny = hi.y-lo.y+1;
196 int nz = hi.z-lo.z+1;
200 return (uix * ny + uiy) * nz + uiz;
225 template <
typename N,
typename F>
232 m_bins.resize(nitems);
233 m_perm.resize(nitems);
234 m_local_offsets.resize(nitems);
237 m_counts.resize(nbins+1, 0);
240 m_offsets.resize(nbins+1);
244 index_type* plocal_offsets = m_local_offsets.dataPtr();
247 pbins[i] = call_f(f,v,i);
249 plocal_offsets[i] = off;
258 index_type index = poffsets[pbins[i]] + plocal_offsets[i];
288 template <
typename N,
typename F>
291 const auto lo =
lbound(bx);
292 const auto hi =
ubound(bx);
296 auto iv = call_f(f,t,i);
297 auto iv3 = iv.dim3();
298 int nx = hi.x-lo.x+1;
299 int ny = hi.y-lo.y+1;
300 int nz = hi.z-lo.z+1;
301 index_type uix = amrex::min(nx-1,amrex::max(0,iv3.x));
302 index_type uiy = amrex::min(ny-1,amrex::max(0,iv3.y));
303 index_type uiz = amrex::min(nz-1,amrex::max(0,iv3.z));
304 return (uix * ny + uiy) * nz + uiz;
330 template <
typename N,
typename F>
335 if (nbins <= 0) {
return; }
339 m_bins.resize(nitems);
340 m_perm.resize(nitems);
343 int chunksize = nitems / nchunks;
345 for (
int i = 0; i < nbins*nchunks; ++i) { counts[i] = 0;}
348 m_counts.resize(nbins+1, 0);
351 m_offsets.resize(nbins+1);
354#pragma omp parallel for
356 for (
int j = 0; j < nchunks; ++j) {
357 int istart = j*chunksize;
358 int istop = (j == nchunks-1) ? nitems : (j+1)*chunksize;
359 for (
int i = istart; i < istop; ++i) {
360 m_bins[i] = call_f(f,v,i);
361 ++counts[nbins*j+m_bins[i]];
366#pragma omp parallel for
368 for (
int i = 0; i < nbins; ++i) {
370 for (
int j = 0; j < nchunks; ++j) {
371 auto tmp = counts[nbins*j+i];
372 counts[nbins*j+i] = total;
380 for (
int i = 0; i < nbins; ++i) {m_offsets[i+1] = m_offsets[i] + m_counts[i];}
383#pragma omp parallel for
385 for (
int i = 0; i < nbins; ++i) {
386 for (
int j = 0; j < nchunks; ++j) {
387 counts[nbins*j+i] += m_offsets[i];
392#pragma omp parallel for
394 for (
int j = 0; j < nchunks; ++j) {
395 int istart = j*chunksize;
396 int istop = (j == nchunks-1) ? nitems : (j+1)*chunksize;
397 for (
int i = istart; i < istop; ++i) {
398 auto bid = m_bins[i];
399 m_perm[counts[nbins*j+bid]++] = i;
429 template <
typename N,
typename F>
432 const auto lo =
lbound(bx);
433 const auto hi =
ubound(bx);
437 auto iv = call_f(f,t,i);
438 auto iv3 = iv.dim3();
439 int nx = hi.x-lo.x+1;
440 int ny = hi.y-lo.y+1;
441 int nz = hi.z-lo.z+1;
442 index_type uix = amrex::min(nx-1,amrex::max(0,iv3.x));
443 index_type uiy = amrex::min(ny-1,amrex::max(0,iv3.y));
444 index_type uiz = amrex::min(nz-1,amrex::max(0,iv3.z));
445 return (uix * ny + uiy) * nz + uiz;
471 template <
typename N,
typename F>
478 m_bins.resize(nitems);
479 m_perm.resize(nitems);
482 m_counts.resize(nbins+1, 0);
485 m_offsets.resize(nbins+1);
487 for (N i = 0; i < nitems; ++i) {
488 m_bins[i] = call_f(f,v,i);
489 ++m_counts[m_bins[i]];
496 for (N i = 0; i < nitems; ++i) {
503 [[nodiscard]]
Long numItems () const noexcept {
return m_perm.size(); }
506 [[nodiscard]]
Long numBins () const noexcept {
return m_offsets.size()-1; }
#define BL_PROFILE(a)
Definition AMReX_BLProfiler.H:551
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
__host__ __device__ Long numPts() const noexcept
Return the number of points contained in the BoxND.
Definition AMReX_Box.H:356
A container for storing items in a set of bins.
Definition AMReX_DenseBins.H:77
std::conditional_t< IsParticleTileData< T >(), T, const T * > const_pointer_type
Definition AMReX_DenseBins.H:86
const index_type * permutationPtr() const noexcept
returns const pointer to the permutation array
Definition AMReX_DenseBins.H:518
std::conditional_t< IsParticleTileData< T >(), const T &, const T * > const_pointer_input_type
Definition AMReX_DenseBins.H:91
DenseBinIteratorFactory< T > getBinIteratorFactory() const noexcept
returns a GPU-capable object that can create iterators over the items in a bin.
Definition AMReX_DenseBins.H:527
const index_type * binsPtr() const noexcept
returns the const pointer to the bins array
Definition AMReX_DenseBins.H:524
index_type * offsetsPtr() noexcept
returns the pointer to the offsets array
Definition AMReX_DenseBins.H:512
index_type * permutationPtr() noexcept
returns the pointer to the permutation array
Definition AMReX_DenseBins.H:509
void build(BinPolicy::OpenMPBinPolicy, N nitems, const_pointer_input_type v, const Box &bx, F const &f)
Populate the bins with a set of items.
Definition AMReX_DenseBins.H:289
void build(BinPolicy::GPUBinPolicy, N nitems, const_pointer_input_type v, const Box &bx, F const &f)
Populate the bins with a set of items.
Definition AMReX_DenseBins.H:185
void build(N nitems, const_pointer_input_type v, const Box &bx, F &&f)
Populate the bins with a set of items.
Definition AMReX_DenseBins.H:130
int index_type
Definition AMReX_DenseBins.H:81
void build(BinPolicy::SerialBinPolicy, N nitems, const_pointer_input_type v, const Box &bx, F const &f)
Populate the bins with a set of items.
Definition AMReX_DenseBins.H:430
void build(BinPolicy::GPUBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const &f)
Populate the bins with a set of items.
Definition AMReX_DenseBins.H:226
const index_type * offsetsPtr() const noexcept
returns const pointer to the offsets array
Definition AMReX_DenseBins.H:521
void build(BinPolicy::OpenMPBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const &f)
Populate the bins with a set of items.
Definition AMReX_DenseBins.H:331
void build(N nitems, const_pointer_input_type v, int nbins, F &&f)
Populate the bins with a set of items.
Definition AMReX_DenseBins.H:157
Long numItems() const noexcept
the number of items in the container
Definition AMReX_DenseBins.H:503
void build(BinPolicy::SerialBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const &f)
Populate the bins with a set of items.
Definition AMReX_DenseBins.H:472
Long numBins() const noexcept
the number of bins in the container
Definition AMReX_DenseBins.H:506
index_type * binsPtr() noexcept
returns the pointer to the bins array
Definition AMReX_DenseBins.H:515
static void streamSynchronize() noexcept
Definition AMReX_GpuDevice.cpp:757
Dynamically allocated vector for trivially copyable data.
Definition AMReX_PODVector.H:308
amrex_long Long
Definition AMReX_INT.H:30
OutIter exclusive_scan(InIter begin, InIter end, OutIter result)
Definition AMReX_Scan.H:1440
Arena * The_Arena()
Definition AMReX_Arena.cpp:783
static constexpr OpenMPBinPolicy OpenMP
Definition AMReX_DenseBins.H:20
static constexpr GPUBinPolicy Default
Definition AMReX_DenseBins.H:24
static constexpr SerialBinPolicy Serial
Definition AMReX_DenseBins.H:21
static constexpr GPUBinPolicy GPU
Definition AMReX_DenseBins.H:19
__host__ __device__ AMREX_FORCE_INLINE T Add(T *sum, T value) noexcept
Definition AMReX_GpuAtomic.H:200
void copy(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition AMReX_GpuContainers.H:128
static constexpr DeviceToDevice deviceToDevice
Definition AMReX_GpuContainers.H:107
constexpr int get_max_threads()
Definition AMReX_OpenMP.H:36
Definition AMReX_Amr.cpp:49
__host__ __device__ Dim3 ubound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:1005
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:193
__host__ __device__ constexpr const T & min(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:24
__host__ __device__ constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:44
const int[]
Definition AMReX_BLProfiler.cpp:1664
__host__ __device__ Dim3 lbound(Array4< T > const &a) noexcept
Definition AMReX_Array4.H:998
Definition AMReX_BinIterator.H:24
Definition AMReX_DenseBins.H:15
Definition AMReX_DenseBins.H:16
Definition AMReX_DenseBins.H:17
Definition AMReX_DenseBins.H:32
DenseBinIteratorFactory(const Gpu::DeviceVector< index_type > &offsets, const Gpu::DeviceVector< index_type > &permutation, const T *items)
Definition AMReX_DenseBins.H:40
const index_type * m_permutation_ptr
Definition AMReX_DenseBins.H:55
int index_type
Definition AMReX_DenseBins.H:33
__host__ __device__ BinIterator< T > getBinIterator(const int bin_number) const noexcept
Definition AMReX_DenseBins.H:49
const_pointer_type m_items
Definition AMReX_DenseBins.H:56
const index_type * m_offsets_ptr
Definition AMReX_DenseBins.H:54
std::conditional_t< IsParticleTileData< T >(), T, const T * > const_pointer_type
Definition AMReX_DenseBins.H:38
Test if a given type T is callable with arguments of type Args...
Definition AMReX_TypeTraits.H:213