Block-Structured AMR Software Framework
AMReX_Partition.H
Go to the documentation of this file.
1 #ifndef AMREX_PARTITION_H_
2 #define AMREX_PARTITION_H_
3 #include <AMReX_Config.H>
4 
5 #include <AMReX_Gpu.H>
6 #include <AMReX_Scan.H>
7 #include <AMReX_Algorithm.H>
8 
9 #include <algorithm>
10 
11 namespace amrex {
12 
13 #ifdef AMREX_USE_GPU
14 
15 namespace detail
16 {
17  template <typename T, typename F>
18  int amrex_partition_helper (T const* AMREX_RESTRICT pv, T* AMREX_RESTRICT pv2, int n, F && f)
19  {
20  return Scan::PrefixSum<int> (n,
21  [=] AMREX_GPU_DEVICE (int i) -> int
22  {
23  return f(pv[i]);
24  },
25  [=] AMREX_GPU_DEVICE (int i, int const& s)
26  {
27  // We store true elements from the beginning and false
28  // elements reversely from the end. If all elements
29  // before pv[i] are true, the exclusive sum so far would
30  // be i. But the actual value is s.
31  if (f(pv[i])) {
32  // For true element, s spots from the beginning have
33  // been taken.
34  pv2[s] = pv[i];
35  } else {
36  // There are i-s elements before this element that
37  // are false. From the end, i-s spots have been
38  // taken.
39  pv2[n-1-(i-s)] = pv[i];
40  }
41  },
43  }
44 
45  template <typename T>
46  void amrex_stable_partition_helper (T* p, int n2)
47  {
48  if (n2 > 1) {
49  int npairs = n2/2;
50  amrex::ParallelFor(npairs, [=] AMREX_GPU_DEVICE (int i) noexcept
51  {
52  amrex::Swap(p[i], p[n2-1-i]);
53  });
55  }
56  }
57 }
58 
79 template <typename T, typename F>
80 int Partition (T* data, int beg, int end, F && f)
81 {
82  int n = end - beg;
84  int tot = detail::amrex_partition_helper(data + beg, v2.dataPtr(), n, std::forward<F>(f));
85  Gpu::copy(Gpu::deviceToDevice, v2.begin(), v2.end(), data + beg);
86  return tot;
87 }
88 
108 template <typename T, typename F>
109 int Partition (T* data, int n, F && f)
110 {
111  return Partition(data, 0, n, std::forward<F>(f));
112 }
113 
132 template <typename T, typename F>
134 {
135  int n = v.size();
136  Gpu::DeviceVector<T> v2(n);
137  int tot = detail::amrex_partition_helper(v.dataPtr(), v2.dataPtr(), n, std::forward<F>(f));
138  v.swap(v2);
139  return tot;
140 }
141 
164 template <typename T, typename F>
165 int StablePartition (T* data, int beg, int end, F && f)
166 {
167  int n = Partition(data, beg, end, std::forward<F>(f));
168  int n2 = end - beg - n;
169  detail::amrex_stable_partition_helper(data + beg + n, n2);
170  return n;
171 }
172 
194 template <typename T, typename F>
195 int StablePartition (T* data, int n, F && f)
196 {
197  return StablePartition(data, 0, n, std::forward<F>(f));
198 }
199 
220 template <typename T, typename F>
222 {
223  int n = Partition(v, std::forward<F>(f));
224  int n2 = static_cast<int>(v.size()) - n;
226  return n;
227 }
228 
229 #else
230 
251 template <typename T, typename F>
252 int Partition (T* data, int beg, int end, F && f)
253 {
254  auto it = std::partition(data + beg, data + end, f);
255  return static_cast<int>(std::distance(data + beg, it));
256 }
257 
277 template <typename T, typename F>
278 int Partition (T* data, int n, F && f)
279 {
280  return Partition(data, 0, n, std::forward<F>(f));
281 }
282 
301 template <typename T, typename F>
302 int Partition (Gpu::DeviceVector<T>& v, F && f)
303 {
304  auto it = std::partition(v.begin(), v.end(), f);
305  return static_cast<int>(std::distance(v.begin(), it));
306 }
307 
330 template <typename T, typename F>
331 int StablePartition (T* data, int beg, int end, F && f)
332 {
333  auto it = std::stable_partition(data + beg, data + end, f);
334  return static_cast<int>(std::distance(data + beg, it));
335 }
336 
358 template <typename T, typename F>
359 int StablePartition (T* data, int n, F && f)
360 {
361  return StablePartition(data, 0, n, std::forward<F>(f));
362 }
363 
384 template <typename T, typename F>
385 int StablePartition (Gpu::DeviceVector<T>& v, F && f)
386 {
387  auto it = std::stable_partition(v.begin(), v.end(), f);
388  return static_cast<int>(std::distance(v.begin(), it));
389 }
390 
391 #endif
392 
393 }
394 
395 #endif
#define AMREX_RESTRICT
Definition: AMReX_Extension.H:37
#define AMREX_GPU_DEVICE
Definition: AMReX_GpuQualifiers.H:18
Definition: AMReX_PODVector.H:246
size_type size() const noexcept
Definition: AMReX_PODVector.H:575
void swap(PODVector< T, Allocator > &a_vector) noexcept
Definition: AMReX_PODVector.H:677
iterator begin() noexcept
Definition: AMReX_PODVector.H:601
iterator end() noexcept
Definition: AMReX_PODVector.H:605
T * dataPtr() noexcept
Definition: AMReX_PODVector.H:597
void copy(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition: AMReX_GpuContainers.H:121
static constexpr DeviceToDevice deviceToDevice
Definition: AMReX_GpuContainers.H:100
void streamSynchronize() noexcept
Definition: AMReX_GpuDevice.H:237
static constexpr struct amrex::Scan::Type::Exclusive exclusive
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44
void amrex_stable_partition_helper(T *p, int n2)
Definition: AMReX_Partition.H:46
int amrex_partition_helper(T const *AMREX_RESTRICT pv, T *AMREX_RESTRICT pv2, int n, F &&f)
Definition: AMReX_Partition.H:18
Definition: AMReX_Amr.cpp:49
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:200
int Partition(T *data, int beg, int end, F &&f)
A GPU-capable partition function for contiguous data.
Definition: AMReX_Partition.H:80
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void Swap(T &t1, T &t2) noexcept
Definition: AMReX_Algorithm.H:75
int StablePartition(T *data, int beg, int end, F &&f)
A GPU-capable partition function for contiguous data.
Definition: AMReX_Partition.H:165
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 end(BoxND< dim > const &box) noexcept
Definition: AMReX_Box.H:1890
Definition: AMReX_FabArrayCommI.H:841