Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_Partition.H
Go to the documentation of this file.
1#ifndef AMREX_PARTITION_H_
2#define AMREX_PARTITION_H_
3#include <AMReX_Config.H>
4
5#include <AMReX_Gpu.H>
6#include <AMReX_Scan.H>
7#include <AMReX_Algorithm.H>
8
9#include <algorithm>
10
11namespace amrex {
12
13#ifdef AMREX_USE_GPU
14
16namespace detail
17{
18 template <typename T, typename F>
19 int amrex_partition_helper (T const* AMREX_RESTRICT pv, T* AMREX_RESTRICT pv2, int n, F && f)
20 {
21 return Scan::PrefixSum<int> (n,
22 [=] AMREX_GPU_DEVICE (int i) -> int
23 {
24 return f(pv[i]);
25 },
26 [=] AMREX_GPU_DEVICE (int i, int const& s)
27 {
28 // We store true elements from the beginning and false
29 // elements reversely from the end. If all elements
30 // before pv[i] are true, the exclusive sum so far would
31 // be i. But the actual value is s.
32 if (f(pv[i])) {
33 // For true element, s spots from the beginning have
34 // been taken.
35 pv2[s] = pv[i];
36 } else {
37 // There are i-s elements before this element that
38 // are false. From the end, i-s spots have been
39 // taken.
40 pv2[n-1-(i-s)] = pv[i];
41 }
42 },
44 }
45
46 template <typename T>
47 void amrex_stable_partition_helper (T* p, int n2)
48 {
49 if (n2 > 1) {
50 int npairs = n2/2;
51 amrex::ParallelFor(npairs, [=] AMREX_GPU_DEVICE (int i) noexcept
52 {
53 amrex::Swap(p[i], p[n2-1-i]);
54 });
55 if (!Gpu::inNoSyncRegion()) {
57 }
58 }
59 }
60}
62
83template <typename T, typename F>
84int Partition (T* data, int beg, int end, F && f)
85{
86 int n = end - beg;
88 int tot = detail::amrex_partition_helper(data + beg, v2.dataPtr(), n, std::forward<F>(f));
89 Gpu::copy(Gpu::deviceToDevice, v2.begin(), v2.end(), data + beg);
90 return tot;
91}
92
112template <typename T, typename F>
113int Partition (T* data, int n, F && f)
114{
115 return Partition(data, 0, n, std::forward<F>(f));
116}
117
136template <typename T, typename F>
138{
139 int n = v.size();
141 int tot = detail::amrex_partition_helper(v.dataPtr(), v2.dataPtr(), n, std::forward<F>(f));
142 v.swap(v2);
143 return tot;
144}
145
168template <typename T, typename F>
169int StablePartition (T* data, int beg, int end, F && f)
170{
171 int n = Partition(data, beg, end, std::forward<F>(f));
172 int n2 = end - beg - n;
173 detail::amrex_stable_partition_helper(data + beg + n, n2);
174 return n;
175}
176
198template <typename T, typename F>
199int StablePartition (T* data, int n, F && f)
200{
201 return StablePartition(data, 0, n, std::forward<F>(f));
202}
203
224template <typename T, typename F>
226{
227 int n = Partition(v, std::forward<F>(f));
228 int n2 = static_cast<int>(v.size()) - n;
229 detail::amrex_stable_partition_helper(v.dataPtr() + n, n2);
230 return n;
231}
232
233#else
234
255template <typename T, typename F>
256int Partition (T* data, int beg, int end, F && f)
257{
258 auto it = std::partition(data + beg, data + end, f);
259 return static_cast<int>(std::distance(data + beg, it));
260}
261
281template <typename T, typename F>
282int Partition (T* data, int n, F && f)
283{
284 return Partition(data, 0, n, std::forward<F>(f));
285}
286
305template <typename T, typename F>
306int Partition (Gpu::DeviceVector<T>& v, F && f)
307{
308 auto it = std::partition(v.begin(), v.end(), f);
309 return static_cast<int>(std::distance(v.begin(), it));
310}
311
334template <typename T, typename F>
335int StablePartition (T* data, int beg, int end, F && f)
336{
337 auto it = std::stable_partition(data + beg, data + end, f);
338 return static_cast<int>(std::distance(data + beg, it));
339}
340
362template <typename T, typename F>
363int StablePartition (T* data, int n, F && f)
364{
365 return StablePartition(data, 0, n, std::forward<F>(f));
366}
367
388template <typename T, typename F>
389int StablePartition (Gpu::DeviceVector<T>& v, F && f)
390{
391 auto it = std::stable_partition(v.begin(), v.end(), f);
392 return static_cast<int>(std::distance(v.begin(), it));
393}
394
395#endif
396
397}
398
399#endif
#define AMREX_RESTRICT
Definition AMReX_Extension.H:32
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
Dynamically allocated vector for trivially copyable data.
Definition AMReX_PODVector.H:308
size_type size() const noexcept
Definition AMReX_PODVector.H:648
void swap(PODVector< T, Allocator > &a_vector) noexcept
Definition AMReX_PODVector.H:840
iterator begin() noexcept
Definition AMReX_PODVector.H:674
iterator end() noexcept
Definition AMReX_PODVector.H:678
T * dataPtr() noexcept
Definition AMReX_PODVector.H:670
void copy(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition AMReX_GpuContainers.H:128
static constexpr DeviceToDevice deviceToDevice
Definition AMReX_GpuContainers.H:107
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310
bool inNoSyncRegion() noexcept
Definition AMReX_GpuControl.H:152
static constexpr struct amrex::Scan::Type::Exclusive exclusive
Definition AMReX_Amr.cpp:49
__host__ __device__ void Swap(T &t1, T &t2) noexcept
Definition AMReX_Algorithm.H:93
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:193
int Partition(T *data, int beg, int end, F &&f)
A GPU-capable partition function for contiguous data.
Definition AMReX_Partition.H:84
int StablePartition(T *data, int beg, int end, F &&f)
A GPU-capable partition function for contiguous data.
Definition AMReX_Partition.H:169
__host__ __device__ Dim3 end(BoxND< dim > const &box) noexcept
Definition AMReX_Box.H:2015