Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_Algorithm.H
Go to the documentation of this file.
1#ifndef AMREX_ALGORITHM_H_
2#define AMREX_ALGORITHM_H_
3#include <AMReX_Config.H>
4
6#include <AMReX_Extension.H>
7#include <AMReX_Dim3.H>
8#include <AMReX_BLassert.H>
9#include <AMReX_Math.H>
10
11#include <algorithm>
12#include <limits>
13#include <type_traits>
14#include <cstdint>
15#include <climits>
16
17namespace amrex
18{
22 template <class T>
24 AMREX_FORCE_INLINE constexpr const T& min (const T& a, const T& b) noexcept
25 {
26 return std::min(a,b);
27 }
28
32 template <class T, class ... Ts>
34 AMREX_FORCE_INLINE constexpr const T& min (const T& a, const T& b, const Ts& ... c) noexcept
35 {
36 return min(min(a,b),c...);
37 }
38
42 template <class T>
44 AMREX_FORCE_INLINE constexpr const T& max (const T& a, const T& b) noexcept
45 {
46 return std::max(a,b);
47 }
48
52 template <class T, class ... Ts>
54 AMREX_FORCE_INLINE constexpr const T& max (const T& a, const T& b, const Ts& ... c) noexcept
55 {
56 return max(max(a,b),c...);
57 }
58
60 template <class T>
62 T elemwiseMin (T const& a, T const& b) noexcept {
63 return T{amrex::min(a.x,b.x),amrex::min(a.y,b.y),amrex::min(a.z,b.z)};
64 }
65
67 template <class T, class ... Ts>
69 T elemwiseMin (const T& a, const T& b, const Ts& ... c) noexcept
70 {
71 return elemwiseMin(elemwiseMin(a,b),c...);
72 }
73
75 template <class T>
77 T elemwiseMax (T const& a, T const& b) noexcept {
78 return T{amrex::max(a.x,b.x),amrex::max(a.y,b.y),amrex::max(a.z,b.z)};
79 }
80
82 template <class T, class ... Ts>
84 T elemwiseMax (const T& a, const T& b, const Ts& ... c) noexcept
85 {
86 return elemwiseMax(elemwiseMax(a,b),c...);
87 }
88
91 template<typename T>
93 void Swap (T& t1, T& t2) noexcept
94 {
95 T temp = std::move(t1);
96 t1 = std::move(t2);
97 t2 = std::move(temp);
98 }
99
104 template <typename T>
106 constexpr const T& Clamp (const T& v, const T& lo, const T& hi)
107 {
108 return (v < lo) ? lo : (hi < v) ? hi : v;
109 }
110
113 template <typename T>
115 std::enable_if_t<std::is_floating_point_v<T>,bool>
116 almostEqual (T x, T y, int ulp = 2)
117 {
118 // Reference: https://en.cppreference.com/w/cpp/types/numeric_limits/epsilon
119 // The machine epsilon has to be scaled to the magnitude of the values used
120 // and multiplied by the desired precision in ULPs (units in the last place)
121 return std::abs(x-y) <= std::numeric_limits<T>::epsilon() * std::abs(x+y) * ulp
122 // unless the result is subnormal
123 || std::abs(x-y) < std::numeric_limits<T>::min();
124 }
125
142 template <class T, class F,
143 std::enable_if_t<std::is_floating_point_v<T>,int>FOO = 0>
145 T bisect (T lo, T hi, F f, T tol=1e-12, int max_iter=100)
146 {
148 "Error - calling bisect but lo and hi don't describe a reasonable interval.");
149
150 T flo = f(lo);
151 T fhi = f(hi);
152
153 if (flo == T(0)) { return flo; }
154 if (fhi == T(0)) { return fhi; }
155
156 AMREX_ASSERT_WITH_MESSAGE(flo * fhi <= T(0),
157 "Error - calling bisect but lo and hi don't bracket a root.");
158
159 T mi = (lo + hi) / T(2);
160 T fmi = T(0);
161 int n = 1;
162 while (n <= max_iter)
163 {
164 if (hi - lo < tol || almostEqual(lo,hi)) { break; }
165 mi = (lo + hi) / T(2);
166 fmi = f(mi);
167 if (fmi == T(0)) { break; }
168 fmi*flo < T(0) ? hi = mi : lo = mi;
169 flo = f(lo);
170 fhi = f(hi);
171 ++n;
172 }
173
174 AMREX_ASSERT_WITH_MESSAGE(n < max_iter,
175 "Error - maximum number of iterations reached in bisect.");
176
177 return mi;
178 }
179
197 template <typename T, typename I,
198 std::enable_if_t<std::is_integral_v<I>,int> = 0>
200 I bisect (T const* d, I lo, I hi, T const& v) {
201 while (lo <= hi) {
202 int mid = lo + (hi-lo)/2;
203 if (v >= d[mid] && v < d[mid+1]) {
204 return mid;
205 } else if (v < d[mid]) {
206 hi = mid-1;
207 } else {
208 lo = mid+1;
209 }
210 };
211 return hi;
212 }
213
228 template<typename ItType, typename ValType>
230 ItType upper_bound (ItType first, ItType last, const ValType& val)
231 {
233 std::ptrdiff_t count = last-first;
234 while(count>0){
235 auto it = first;
236 const auto step = count/2;
237 it += step;
238 if (!(val < *it)){
239 first = ++it;
240 count -= step + 1;
241 }
242 else{
243 count = step;
244 }
245 }
246 return first;
247 ))
249 return std::upper_bound(first, last, val);
250 ))
251 }
252
267 template<typename ItType, typename ValType>
269 ItType lower_bound (ItType first, ItType last, const ValType& val)
270 {
272 std::ptrdiff_t count = last-first;
273 while(count>0)
274 {
275 auto it = first;
276 const auto step = count/2;
277 it += step;
278 if (*it < val){
279 first = ++it;
280 count -= step + 1;
281 }
282 else{
283 count = step;
284 }
285 }
286
287 return first;
288 ))
290 return std::lower_bound(first, last, val);
291 ))
292 }
293
311 template<typename ItType, typename ValType,
312 std::enable_if_t<
313 std::is_floating_point_v<typename std::iterator_traits<ItType>::value_type> &&
314 std::is_floating_point_v<ValType>,
315 int> = 0>
317 void linspace (ItType first, const ItType& last, const ValType& start, const ValType& stop)
318 {
319 const std::ptrdiff_t count = last-first;
320 if (count >= 2){
321 const auto delta = (stop - start)/(count - 1);
322 for (std::ptrdiff_t i = 0; i < count-1; ++i){
323 *(first++) = start + i*delta;
324 }
325 *first = stop;
326 }
327 }
328
347 template<typename ItType, typename ValType,
348 std::enable_if_t<
349 std::is_floating_point_v<typename std::iterator_traits<ItType>::value_type> &&
350 std::is_floating_point_v<ValType>,
351 int> = 0>
353 void logspace (ItType first, const ItType& last,
354 const ValType& start, const ValType& stop, const ValType& base)
355 {
356 const std::ptrdiff_t count = last-first;
357 if (count >= 2){
358 const auto delta = (stop - start)/(count - 1);
359 for (std::ptrdiff_t i = 0; i < count-1; ++i){
360 *(first++) = std::pow(base, start + i*delta);
361 }
362 *first = std::pow(base, stop);
363 }
364 }
365
367namespace detail {
368
369struct clzll_tag {};
370struct clzl_tag : clzll_tag {};
371struct clz_tag : clzl_tag {};
372
373// in gcc and clang, there are three versions of __builtin_clz taking unsigned int,
374// unsigned long, and unsigned long long inputs. Because the sizes of these data types
375// vary on different platforms, we work with fixed-width integer types.
376// these tags and overloads select the smallest version of __builtin_clz that will hold the input type
377template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(unsigned int)>>
378AMREX_FORCE_INLINE
379int builtin_clz_wrapper (clz_tag, T x) noexcept
380{
381 return static_cast<int>(__builtin_clz(x) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT));
382}
383
384template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(unsigned long)>>
385AMREX_FORCE_INLINE
386int builtin_clz_wrapper (clzl_tag, T x) noexcept
387{
388 return static_cast<int>(__builtin_clzl(x) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
389}
390
391template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(unsigned long long)>>
392AMREX_FORCE_INLINE
393int builtin_clz_wrapper (clzll_tag, T x) noexcept
394{
395 return static_cast<int>(__builtin_clzll(x) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
396}
397
398}
400
402template <class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t> ||
403 std::is_same_v<std::decay_t<T>,std::uint16_t> ||
404 std::is_same_v<std::decay_t<T>,std::uint32_t> ||
405 std::is_same_v<std::decay_t<T>,std::uint64_t>, int> = 0>
407int clz (T x) noexcept;
408
410
412int clz_generic (std::uint8_t x) noexcept
413{
414#if !defined(__NVCOMPILER)
415 static constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
416#else
417 constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
418#endif
419 auto upper = x >> 4;
420 auto lower = x & 0xF;
421 return upper ? clz_lookup[upper] : 4 + clz_lookup[lower];
422}
423
425int clz_generic (std::uint16_t x) noexcept
426{
427 auto upper = std::uint8_t(x >> 8);
428 auto lower = std::uint8_t(x & 0xFF);
429 return upper ? clz(upper) : 8 + clz(lower);
430}
431
433int clz_generic (std::uint32_t x) noexcept
434{
435 auto upper = std::uint16_t(x >> 16);
436 auto lower = std::uint16_t(x & 0xFFFF);
437 return upper ? clz(upper) : 16 + clz(lower);
438}
439
441int clz_generic (std::uint64_t x) noexcept
442{
443 auto upper = std::uint32_t(x >> 32);
444 auto lower = std::uint32_t(x & 0xFFFFFFFF);
445 return upper ? clz(upper) : 32 + clz(lower);
446}
447
449
450#if defined AMREX_USE_CUDA
451
453namespace detail {
454 // likewise with CUDA, there are __clz functions that take (signed) int and long long int
455 template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(int)> >
456 AMREX_GPU_DEVICE AMREX_FORCE_INLINE
457 int clz_wrapper (clz_tag, T x) noexcept
458 {
459 return __clz((int) x) - (sizeof(int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
460 }
461
462 template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(long long int)> >
463 AMREX_GPU_DEVICE AMREX_FORCE_INLINE
464 int clz_wrapper (clzll_tag, T x) noexcept
465 {
466 return __clzll((long long int) x) - (sizeof(long long int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
467 }
468}
470
471template <class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t> ||
472 std::is_same_v<std::decay_t<T>,std::uint16_t> ||
473 std::is_same_v<std::decay_t<T>,std::uint32_t> ||
474 std::is_same_v<std::decay_t<T>,std::uint64_t>, int> >
476int clz (T x) noexcept
477{
478 AMREX_IF_ON_DEVICE((return detail::clz_wrapper(detail::clz_tag{}, x);))
479#if AMREX_HAS_BUILTIN_CLZ
480 AMREX_IF_ON_HOST((return detail::builtin_clz_wrapper(detail::clz_tag{}, x);))
481#else
482 AMREX_IF_ON_HOST((return clz_generic(x);))
483#endif
484}
485
486#else // !defined AMREX_USE_CUDA
487
488template <class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t> ||
489 std::is_same_v<std::decay_t<T>,std::uint16_t> ||
490 std::is_same_v<std::decay_t<T>,std::uint32_t> ||
491 std::is_same_v<std::decay_t<T>,std::uint64_t>, int> >
493int clz (T x) noexcept
494{
495#if (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
496 return detail::builtin_clz_wrapper(detail::clz_tag{}, x);
497#else
498 return clz_generic(x);
499#endif
500}
501
502#endif // defined AMREX_USE_CUDA
503
504}
505
506#endif
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:37
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_IF_ON_DEVICE(CODE)
Definition AMReX_GpuQualifiers.H:56
#define AMREX_IF_ON_HOST(CODE)
Definition AMReX_GpuQualifiers.H:58
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Definition AMReX_Amr.cpp:49
__host__ __device__ T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
Find a root of a scalar function on a bracketing interval using bisection.
Definition AMReX_Algorithm.H:145
__host__ __device__ ItType upper_bound(ItType first, ItType last, const ValType &val)
Return an iterator to the first element greater than a given value.
Definition AMReX_Algorithm.H:230
__host__ __device__ void Swap(T &t1, T &t2) noexcept
Definition AMReX_Algorithm.H:93
__host__ __device__ constexpr T elemwiseMax(T const &a, T const &b) noexcept
Return the element-wise maximum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:77
__host__ __device__ constexpr const T & Clamp(const T &v, const T &lo, const T &hi)
Definition AMReX_Algorithm.H:106
__host__ __device__ constexpr const T & min(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:24
__host__ __device__ ItType lower_bound(ItType first, ItType last, const ValType &val)
Return an iterator to the first element not less than a given value.
Definition AMReX_Algorithm.H:269
__host__ __device__ std::enable_if_t< std::is_floating_point_v< T >, bool > almostEqual(T x, T y, int ulp=2)
Definition AMReX_Algorithm.H:116
__host__ __device__ constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:44
__host__ __device__ void logspace(ItType first, const ItType &last, const ValType &start, const ValType &stop, const ValType &base)
Fill a range with logarithmically spaced values over a closed interval.
Definition AMReX_Algorithm.H:353
__host__ __device__ void linspace(ItType first, const ItType &last, const ValType &start, const ValType &stop)
Fill a range with linearly spaced values over a closed interval.
Definition AMReX_Algorithm.H:317
__host__ __device__ int clz(T x) noexcept
Return the number of leading zeros of the given integer.
Definition AMReX_Algorithm.H:476
__host__ __device__ constexpr T elemwiseMin(T const &a, T const &b) noexcept
Return the element-wise minimum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:62