Block-Structured AMR Software Framework
AMReX_Algorithm.H
Go to the documentation of this file.
1 #ifndef AMREX_ALGORITHM_H_
2 #define AMREX_ALGORITHM_H_
3 #include <AMReX_Config.H>
4 
5 #include <AMReX_GpuQualifiers.H>
6 #include <AMReX_Extension.H>
7 #include <AMReX_Dim3.H>
8 #include <AMReX_BLassert.H>
9 #include <AMReX_Math.H>
10 
11 #include <algorithm>
12 #include <limits>
13 #include <type_traits>
14 #include <cstdint>
15 #include <climits>
16 
17 namespace amrex
18 {
19  template <class T>
21  AMREX_FORCE_INLINE constexpr const T& min (const T& a, const T& b) noexcept
22  {
23  return std::min(a,b);
24  }
25 
26  template <class T, class ... Ts>
28  AMREX_FORCE_INLINE constexpr const T& min (const T& a, const T& b, const Ts& ... c) noexcept
29  {
30  return min(min(a,b),c...);
31  }
32 
33  template <class T>
35  AMREX_FORCE_INLINE constexpr const T& max (const T& a, const T& b) noexcept
36  {
37  return std::max(a,b);
38  }
39 
40  template <class T, class ... Ts>
42  AMREX_FORCE_INLINE constexpr const T& max (const T& a, const T& b, const Ts& ... c) noexcept
43  {
44  return max(max(a,b),c...);
45  }
46 
47  template <class T>
49  T elemwiseMin (T const& a, T const& b) noexcept {
50  return T{amrex::min(a.x,b.x),amrex::min(a.y,b.y),amrex::min(a.z,b.z)};
51  }
52 
53  template <class T, class ... Ts>
55  T elemwiseMin (const T& a, const T& b, const Ts& ... c) noexcept
56  {
57  return elemwiseMin(elemwiseMin(a,b),c...);
58  }
59 
60  template <class T>
62  T elemwiseMax (T const& a, T const& b) noexcept {
63  return T{amrex::max(a.x,b.x),amrex::max(a.y,b.y),amrex::max(a.z,b.z)};
64  }
65 
66  template <class T, class ... Ts>
68  T elemwiseMax (const T& a, const T& b, const Ts& ... c) noexcept
69  {
70  return elemwiseMax(elemwiseMax(a,b),c...);
71  }
72 
73  template<typename T>
75  void Swap (T& t1, T& t2) noexcept
76  {
77  T temp = std::move(t1);
78  t1 = std::move(t2);
79  t2 = std::move(temp);
80  }
81 
82  template <typename T>
84  constexpr const T& Clamp (const T& v, const T& lo, const T& hi)
85  {
86  return (v < lo) ? lo : (hi < v) ? hi : v;
87  }
88 
89  // Reference: https://en.cppreference.com/w/cpp/types/numeric_limits/epsilon
90  template <typename T>
92  std::enable_if_t<std::is_floating_point_v<T>,bool>
93  almostEqual (T x, T y, int ulp = 2)
94  {
95  // the machine epsilon has to be scaled to the magnitude of the values used
96  // and multiplied by the desired precision in ULPs (units in the last place)
97  return std::abs(x-y) <= std::numeric_limits<T>::epsilon() * std::abs(x+y) * ulp
98  // unless the result is subnormal
100  }
101 
102  template <class T, class F,
103  std::enable_if_t<std::is_floating_point_v<T>,int>FOO = 0>
105  T bisect (T lo, T hi, F f, T tol=1e-12, int max_iter=100)
106  {
108  "Error - calling bisect but lo and hi don't describe a reasonable interval.");
109 
110  T flo = f(lo);
111  T fhi = f(hi);
112 
113  if (flo == T(0)) { return flo; }
114  if (fhi == T(0)) { return fhi; }
115 
116  AMREX_ASSERT_WITH_MESSAGE(flo * fhi <= T(0),
117  "Error - calling bisect but lo and hi don't bracket a root.");
118 
119  T mi = (lo + hi) / T(2);
120  T fmi = T(0);
121  int n = 1;
122  while (n <= max_iter)
123  {
124  if (hi - lo < tol || almostEqual(lo,hi)) { break; }
125  mi = (lo + hi) / T(2);
126  fmi = f(mi);
127  if (fmi == T(0)) { break; }
128  fmi*flo < T(0) ? hi = mi : lo = mi;
129  flo = f(lo);
130  fhi = f(hi);
131  ++n;
132  }
133 
134  AMREX_ASSERT_WITH_MESSAGE(n < max_iter,
135  "Error - maximum number of iterations reached in bisect.");
136 
137  return mi;
138  }
139 
140  // Find I in the range [lo,hi) that T[I] <= v < T[I+1].
141  // It is assumed that the input data are sorted and T[lo] <= v < T[hi].
142  // Note that this is different from std::lower_bound.
143  template <typename T, typename I,
144  std::enable_if_t<std::is_integral_v<I>,int> = 0>
146  I bisect (T const* d, I lo, I hi, T const& v) {
147  while (lo <= hi) {
148  int mid = lo + (hi-lo)/2;
149  if (v >= d[mid] && v < d[mid+1]) {
150  return mid;
151  } else if (v < d[mid]) {
152  hi = mid-1;
153  } else {
154  lo = mid+1;
155  }
156  };
157  return hi;
158  }
159 
160  template<typename ItType, typename ValType>
162  ItType upper_bound (ItType first, ItType last, const ValType& val)
163  {
165  std::ptrdiff_t count = last-first;
166  while(count>0){
167  auto it = first;
168  const auto step = count/2;
169  it += step;
170  if (!(val < *it)){
171  first = ++it;
172  count -= step + 1;
173  }
174  else{
175  count = step;
176  }
177  }
178  return first;
179  ))
181  return std::upper_bound(first, last, val);
182  ))
183  }
184 
185  template<typename ItType, typename ValType>
187  ItType lower_bound (ItType first, ItType last, const ValType& val)
188  {
190  std::ptrdiff_t count = last-first;
191  while(count>0)
192  {
193  auto it = first;
194  const auto step = count/2;
195  it += step;
196  if (*it < val){
197  first = ++it;
198  count -= step + 1;
199  }
200  else{
201  count = step;
202  }
203  }
204 
205  return first;
206  ))
208  return std::lower_bound(first, last, val);
209  ))
210  }
211 
212  template<typename ItType, typename ValType,
213  std::enable_if_t<
214  std::is_floating_point_v<typename std::iterator_traits<ItType>::value_type> &&
215  std::is_floating_point_v<ValType>,
216  int> = 0>
218  void linspace (ItType first, const ItType& last, const ValType& start, const ValType& stop)
219  {
220  const std::ptrdiff_t count = last-first;
221  if (count >= 2){
222  const auto delta = (stop - start)/(count - 1);
223  for (std::ptrdiff_t i = 0; i < count-1; ++i){
224  *(first++) = start + i*delta;
225  }
226  *first = stop;
227  }
228  }
229 
230  template<typename ItType, typename ValType,
231  std::enable_if_t<
232  std::is_floating_point_v<typename std::iterator_traits<ItType>::value_type> &&
233  std::is_floating_point_v<ValType>,
234  int> = 0>
236  void logspace (ItType first, const ItType& last,
237  const ValType& start, const ValType& stop, const ValType& base)
238  {
239  const std::ptrdiff_t count = last-first;
240  if (count >= 2){
241  const auto delta = (stop - start)/(count - 1);
242  for (std::ptrdiff_t i = 0; i < count-1; ++i){
243  *(first++) = std::pow(base, start + i*delta);
244  }
245  *first = std::pow(base, stop);
246  }
247  }
248 
249 namespace detail {
250 
251 struct clzll_tag {};
252 struct clzl_tag : clzll_tag {};
253 struct clz_tag : clzl_tag {};
254 
255 // in gcc and clang, there are three versions of __builtin_clz taking unsigned int,
256 // unsigned long, and unsigned long long inputs. Because the sizes of these data types
257 // vary on different platforms, we work with fixed-width integer types.
258 // these tags and overloads select the smallest version of __builtin_clz that will hold the input type
259 template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(unsigned int)>>
260 AMREX_FORCE_INLINE
261 int builtin_clz_wrapper (clz_tag, T x) noexcept
262 {
263  return static_cast<int>(__builtin_clz(x) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT));
264 }
265 
266 template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(unsigned long)>>
267 AMREX_FORCE_INLINE
268 int builtin_clz_wrapper (clzl_tag, T x) noexcept
269 {
270  return static_cast<int>(__builtin_clzl(x) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
271 }
272 
273 template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(unsigned long long)>>
274 AMREX_FORCE_INLINE
275 int builtin_clz_wrapper (clzll_tag, T x) noexcept
276 {
277  return static_cast<int>(__builtin_clzll(x) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
278 }
279 
280 }
281 
282 template <class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t> ||
283  std::is_same_v<std::decay_t<T>,std::uint16_t> ||
284  std::is_same_v<std::decay_t<T>,std::uint32_t> ||
285  std::is_same_v<std::decay_t<T>,std::uint64_t>, int> = 0>
287 int clz (T x) noexcept;
288 
290 int clz_generic (std::uint8_t x) noexcept
291 {
292 #if !defined(__NVCOMPILER)
293  static constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
294 #else
295  constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
296 #endif
297  auto upper = x >> 4;
298  auto lower = x & 0xF;
299  return upper ? clz_lookup[upper] : 4 + clz_lookup[lower];
300 }
301 
303 int clz_generic (std::uint16_t x) noexcept
304 {
305  auto upper = std::uint8_t(x >> 8);
306  auto lower = std::uint8_t(x & 0xFF);
307  return upper ? clz(upper) : 8 + clz(lower);
308 }
309 
311 int clz_generic (std::uint32_t x) noexcept
312 {
313  auto upper = std::uint16_t(x >> 16);
314  auto lower = std::uint16_t(x & 0xFFFF);
315  return upper ? clz(upper) : 16 + clz(lower);
316 }
317 
319 int clz_generic (std::uint64_t x) noexcept
320 {
321  auto upper = std::uint32_t(x >> 32);
322  auto lower = std::uint32_t(x & 0xFFFFFFFF);
323  return upper ? clz(upper) : 32 + clz(lower);
324 }
325 
326 #if defined AMREX_USE_CUDA
327 
328 namespace detail {
329  // likewise with CUDA, there are __clz functions that take (signed) int and long long int
330  template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(int)> >
331  AMREX_GPU_DEVICE AMREX_FORCE_INLINE
332  int clz_wrapper (clz_tag, T x) noexcept
333  {
334  return __clz((int) x) - (sizeof(int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
335  }
336 
337  template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(long long int)> >
338  AMREX_GPU_DEVICE AMREX_FORCE_INLINE
339  int clz_wrapper (clzll_tag, T x) noexcept
340  {
341  return __clzll((long long int) x) - (sizeof(long long int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
342  }
343 }
344 
345 template <class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t> ||
346  std::is_same_v<std::decay_t<T>,std::uint16_t> ||
347  std::is_same_v<std::decay_t<T>,std::uint32_t> ||
348  std::is_same_v<std::decay_t<T>,std::uint64_t>, int> >
350 int clz (T x) noexcept
351 {
352  AMREX_IF_ON_DEVICE((return detail::clz_wrapper(detail::clz_tag{}, x);))
353 #if AMREX_HAS_BUILTIN_CLZ
354  AMREX_IF_ON_HOST((return detail::builtin_clz_wrapper(detail::clz_tag{}, x);))
355 #else
356  AMREX_IF_ON_HOST((return clz_generic(x);))
357 #endif
358 }
359 
360 #else // !defined AMREX_USE_CUDA
361 
362 template <class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t> ||
363  std::is_same_v<std::decay_t<T>,std::uint16_t> ||
364  std::is_same_v<std::decay_t<T>,std::uint32_t> ||
365  std::is_same_v<std::decay_t<T>,std::uint64_t>, int> >
367 int clz (T x) noexcept
368 {
369 #if (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
370  return detail::builtin_clz_wrapper(detail::clz_tag{}, x);
371 #else
372  return clz_generic(x);
373 #endif
374 }
375 
376 #endif // defined AMREX_USE_CUDA
377 
378 }
379 
380 #endif
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: AMReX_BLassert.H:37
#define AMREX_FORCE_INLINE
Definition: AMReX_Extension.H:119
#define AMREX_IF_ON_DEVICE(CODE)
Definition: AMReX_GpuQualifiers.H:56
#define AMREX_IF_ON_HOST(CODE)
Definition: AMReX_GpuQualifiers.H:58
#define AMREX_GPU_HOST_DEVICE
Definition: AMReX_GpuQualifiers.H:20
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44
Definition: AMReX_Amr.cpp:49
AMREX_GPU_HOST_DEVICE void logspace(ItType first, const ItType &last, const ValType &start, const ValType &stop, const ValType &base)
Definition: AMReX_Algorithm.H:236
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & max(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:35
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int clz_generic(std::uint8_t x) noexcept
Definition: AMReX_Algorithm.H:290
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & min(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:21
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T abs(const GpuComplex< T > &a_z) noexcept
Return the absolute value of a complex number.
Definition: AMReX_GpuComplex.H:356
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & max(const T &a, const T &b, const Ts &... c) noexcept
Definition: AMReX_Algorithm.H:42
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void Swap(T &t1, T &t2) noexcept
Definition: AMReX_Algorithm.H:75
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
Definition: AMReX_Algorithm.H:105
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T elemwiseMax(T const &a, T const &b) noexcept
Definition: AMReX_Algorithm.H:62
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & min(const T &a, const T &b, const Ts &... c) noexcept
Definition: AMReX_Algorithm.H:28
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE std::enable_if_t< std::is_floating_point_v< T >, bool > almostEqual(T x, T y, int ulp=2)
Definition: AMReX_Algorithm.H:93
AMREX_GPU_HOST_DEVICE void linspace(ItType first, const ItType &last, const ValType &start, const ValType &stop)
Definition: AMReX_Algorithm.H:218
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE GpuComplex< T > pow(const GpuComplex< T > &a_z, const T &a_y) noexcept
Raise a complex number to a (real) power.
Definition: AMReX_GpuComplex.H:418
AMREX_GPU_HOST_DEVICE ItType upper_bound(ItType first, ItType last, const ValType &val)
Definition: AMReX_Algorithm.H:162
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & Clamp(const T &v, const T &lo, const T &hi)
Definition: AMReX_Algorithm.H:84
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T elemwiseMin(T const &a, T const &b) noexcept
Definition: AMReX_Algorithm.H:49
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int clz(T x) noexcept
Definition: AMReX_Algorithm.H:350
AMREX_GPU_HOST_DEVICE ItType lower_bound(ItType first, ItType last, const ValType &val)
Definition: AMReX_Algorithm.H:187
Definition: AMReX_FabArrayCommI.H:841
Definition: AMReX_Algorithm.H:253
Definition: AMReX_Algorithm.H:252
Definition: AMReX_Algorithm.H:251