Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_Algorithm.H
Go to the documentation of this file.
1#ifndef AMREX_ALGORITHM_H_
2#define AMREX_ALGORITHM_H_
3#include <AMReX_Config.H>
4
6#include <AMReX_Extension.H>
7#include <AMReX_Dim3.H>
8#include <AMReX_BLassert.H>
9#include <AMReX_Math.H>
10
11#include <algorithm>
12#include <concepts>
13#include <limits>
14#include <type_traits>
15#include <cstdint>
16#include <climits>
17
18namespace amrex
19{
23 template <class T>
25 AMREX_FORCE_INLINE constexpr const T& min (const T& a, const T& b) noexcept
26 {
27 return std::min(a,b);
28 }
29
33 template <class T, class ... Ts>
35 AMREX_FORCE_INLINE constexpr const T& min (const T& a, const T& b, const Ts& ... c) noexcept
36 {
37 return min(min(a,b),c...);
38 }
39
43 template <class T>
45 AMREX_FORCE_INLINE constexpr const T& max (const T& a, const T& b) noexcept
46 {
47 return std::max(a,b);
48 }
49
53 template <class T, class ... Ts>
55 AMREX_FORCE_INLINE constexpr const T& max (const T& a, const T& b, const Ts& ... c) noexcept
56 {
57 return max(max(a,b),c...);
58 }
59
61 template <class T>
63 T elemwiseMin (T const& a, T const& b) noexcept {
64 return T{amrex::min(a.x,b.x),amrex::min(a.y,b.y),amrex::min(a.z,b.z)};
65 }
66
68 template <class T, class ... Ts>
70 T elemwiseMin (const T& a, const T& b, const Ts& ... c) noexcept
71 {
72 return elemwiseMin(elemwiseMin(a,b),c...);
73 }
74
76 template <class T>
78 T elemwiseMax (T const& a, T const& b) noexcept {
79 return T{amrex::max(a.x,b.x),amrex::max(a.y,b.y),amrex::max(a.z,b.z)};
80 }
81
83 template <class T, class ... Ts>
85 T elemwiseMax (const T& a, const T& b, const Ts& ... c) noexcept
86 {
87 return elemwiseMax(elemwiseMax(a,b),c...);
88 }
89
92 template<typename T>
94 void Swap (T& t1, T& t2) noexcept
95 {
96 T temp = std::move(t1);
97 t1 = std::move(t2);
98 t2 = std::move(temp);
99 }
100
105 template <typename T>
107 constexpr const T& Clamp (const T& v, const T& lo, const T& hi)
108 {
109 if (v < lo) {
110 return lo; // NOLINT(bugprone-return-const-ref-from-parameter)
111 } else if (hi < v) {
112 return hi; // NOLINT(bugprone-return-const-ref-from-parameter)
113 } else {
114 return v; // NOLINT(bugprone-return-const-ref-from-parameter)
115 }
116 }
117
120 template <std::floating_point T>
122 bool
123 almostEqual (T x, T y, int ulp = 2)
124 {
125 // Reference: https://en.cppreference.com/w/cpp/types/numeric_limits/epsilon
126 // The machine epsilon has to be scaled to the magnitude of the values used
127 // and multiplied by the desired precision in ULPs (units in the last place)
128 return std::abs(x-y) <= std::numeric_limits<T>::epsilon() * std::abs(x+y) * ulp
129 // unless the result is subnormal
130 || std::abs(x-y) < std::numeric_limits<T>::min();
131 }
132
149 template <std::floating_point T, class F>
151 T bisect (T lo, T hi, F f, T tol=1e-12, int max_iter=100)
152 {
154 "Error - calling bisect but lo and hi don't describe a reasonable interval.");
155
156 T flo = f(lo);
157 T fhi = f(hi);
158
159 if (flo == T(0)) { return lo; }
160 if (fhi == T(0)) { return hi; }
161
162 AMREX_ASSERT_WITH_MESSAGE(flo * fhi <= T(0),
163 "Error - calling bisect but lo and hi don't bracket a root.");
164
165 T mi = (lo + hi) / T(2);
166 T fmi = T(0);
167 int n = 1;
168 while (n <= max_iter)
169 {
170 if (hi - lo < tol || almostEqual(lo,hi)) { break; }
171 mi = (lo + hi) / T(2);
172 fmi = f(mi);
173 if (fmi == T(0)) { break; }
174 fmi*flo < T(0) ? hi = mi : lo = mi;
175 flo = f(lo);
176 fhi = f(hi);
177 ++n;
178 }
179
180 AMREX_ASSERT_WITH_MESSAGE(n <= max_iter,
181 "Error - maximum number of iterations reached in bisect.");
182
183 return mi;
184 }
185
207 template <typename T, std::integral I>
209 I bisect (T const* d, I lo, I hi, T const& v)
210 {
211 while (hi - lo > 1) {
212 I mid = lo + (hi - lo) / 2;
213 if (v < d[mid]) {
214 hi = mid;
215 } else {
216 lo = mid;
217 }
218 };
219 return lo;
220 }
221
236 template<typename ItType, typename ValType>
238 ItType upper_bound (ItType first, ItType last, const ValType& val)
239 {
241 std::ptrdiff_t count = last-first;
242 while(count>0){
243 auto it = first;
244 const auto step = count/2;
245 it += step;
246 if (!(val < *it)){
247 first = ++it;
248 count -= step + 1;
249 }
250 else{
251 count = step;
252 }
253 }
254 return first;
255 ))
257 return std::upper_bound(first, last, val);
258 ))
259 }
260
275 template<typename ItType, typename ValType>
277 ItType lower_bound (ItType first, ItType last, const ValType& val)
278 {
280 std::ptrdiff_t count = last-first;
281 while(count>0)
282 {
283 auto it = first;
284 const auto step = count/2;
285 it += step;
286 if (*it < val){
287 first = ++it;
288 count -= step + 1;
289 }
290 else{
291 count = step;
292 }
293 }
294
295 return first;
296 ))
298 return std::lower_bound(first, last, val);
299 ))
300 }
301
319 template<typename ItType, std::floating_point ValType>
320 requires (std::floating_point<typename std::iterator_traits<ItType>::value_type>)
322 void linspace (ItType first, const ItType& last, const ValType& start, const ValType& stop)
323 {
324 const std::ptrdiff_t count = last-first;
325 if (count >= 2){
326 const auto delta = (stop - start)/(count - 1);
327 for (std::ptrdiff_t i = 0; i < count-1; ++i){
328 *(first++) = start + i*delta;
329 }
330 *first = stop;
331 }
332 }
333
352 template<typename ItType, std::floating_point ValType>
353 requires (std::floating_point<typename std::iterator_traits<ItType>::value_type>)
355 void logspace (ItType first, const ItType& last,
356 const ValType& start, const ValType& stop, const ValType& base)
357 {
358 const std::ptrdiff_t count = last-first;
359 if (count >= 2){
360 const auto delta = (stop - start)/(count - 1);
361 for (std::ptrdiff_t i = 0; i < count-1; ++i){
362 *(first++) = std::pow(base, start + i*delta);
363 }
364 *first = std::pow(base, stop);
365 }
366 }
367
369namespace detail {
370
371struct clzll_tag {};
372struct clzl_tag : clzll_tag {};
373struct clz_tag : clzl_tag {};
374
375// in gcc and clang, there are three versions of __builtin_clz taking unsigned int,
376// unsigned long, and unsigned long long inputs. Because the sizes of these data types
377// vary on different platforms, we work with fixed-width integer types.
378// these tags and overloads select the smallest version of __builtin_clz that will hold the input type
379template <typename T>
380requires (sizeof(T) <= sizeof(unsigned int))
382int builtin_clz_wrapper (clz_tag, T x) noexcept
383{
384 return static_cast<int>(__builtin_clz(x) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT));
385}
386
387template <typename T>
388requires (sizeof(T) <= sizeof(unsigned long))
390int builtin_clz_wrapper (clzl_tag, T x) noexcept
391{
392 return static_cast<int>(__builtin_clzl(x) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
393}
394
395template <typename T>
396requires (sizeof(T) <= sizeof(unsigned long long))
398int builtin_clz_wrapper (clzll_tag, T x) noexcept
399{
400 return static_cast<int>(__builtin_clzll(x) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
401}
402
403}
405
407template <class T>
408requires (std::same_as<std::remove_cvref_t<T>,std::uint8_t> ||
409 std::same_as<std::remove_cvref_t<T>,std::uint16_t> ||
410 std::same_as<std::remove_cvref_t<T>,std::uint32_t> ||
411 std::same_as<std::remove_cvref_t<T>,std::uint64_t>)
413int clz (T x) noexcept;
414
416
418int clz_generic (std::uint8_t x) noexcept
419{
420#if !defined(__NVCOMPILER)
421 static constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
422#else
423 constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
424#endif
425 auto upper = x >> 4;
426 auto lower = x & 0xF;
427 return upper ? clz_lookup[upper] : 4 + clz_lookup[lower];
428}
429
431int clz_generic (std::uint16_t x) noexcept
432{
433 auto upper = std::uint8_t(x >> 8);
434 auto lower = std::uint8_t(x & 0xFF);
435 return upper ? clz(upper) : 8 + clz(lower);
436}
437
439int clz_generic (std::uint32_t x) noexcept
440{
441 auto upper = std::uint16_t(x >> 16);
442 auto lower = std::uint16_t(x & 0xFFFF);
443 return upper ? clz(upper) : 16 + clz(lower);
444}
445
447int clz_generic (std::uint64_t x) noexcept
448{
449 auto upper = std::uint32_t(x >> 32);
450 auto lower = std::uint32_t(x & 0xFFFFFFFF);
451 return upper ? clz(upper) : 32 + clz(lower);
452}
453
455
456#if defined AMREX_USE_CUDA
457
459namespace detail {
460 // likewise with CUDA, there are __clz functions that take (signed) int and long long int
461 template <typename T>
462 requires (sizeof(T) <= sizeof(int))
464 int clz_wrapper (clz_tag, T x) noexcept
465 {
466 return __clz((int) x) - (sizeof(int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
467 }
468
469 template <typename T>
470 requires (sizeof(T) <= sizeof(long long int))
472 int clz_wrapper (clzll_tag, T x) noexcept
473 {
474 return __clzll((long long int) x) - (sizeof(long long int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
475 }
476}
478
479template <class T>
480requires (std::same_as<std::remove_cvref_t<T>,std::uint8_t> ||
481 std::same_as<std::remove_cvref_t<T>,std::uint16_t> ||
482 std::same_as<std::remove_cvref_t<T>,std::uint32_t> ||
483 std::same_as<std::remove_cvref_t<T>,std::uint64_t>)
485int clz (T x) noexcept
486{
487 AMREX_IF_ON_DEVICE((return detail::clz_wrapper(detail::clz_tag{}, x);))
488#if AMREX_HAS_BUILTIN_CLZ
489 AMREX_IF_ON_HOST((return detail::builtin_clz_wrapper(detail::clz_tag{}, x);))
490#else
491 AMREX_IF_ON_HOST((return clz_generic(x);))
492#endif
493}
494
495#else // !defined AMREX_USE_CUDA
496
497template <class T>
498requires (std::same_as<std::remove_cvref_t<T>,std::uint8_t> ||
499 std::same_as<std::remove_cvref_t<T>,std::uint16_t> ||
500 std::same_as<std::remove_cvref_t<T>,std::uint32_t> ||
501 std::same_as<std::remove_cvref_t<T>,std::uint64_t>)
503int clz (T x) noexcept
504{
505#if (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
506 return detail::builtin_clz_wrapper(detail::clz_tag{}, x);
507#else
508 return clz_generic(x);
509#endif
510}
511
512#endif // defined AMREX_USE_CUDA
513
514}
515
516#endif
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:37
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_IF_ON_DEVICE(CODE)
Definition AMReX_GpuQualifiers.H:56
#define AMREX_IF_ON_HOST(CODE)
Definition AMReX_GpuQualifiers.H:58
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Definition AMReX_Amr.cpp:50
__host__ __device__ ItType upper_bound(ItType first, ItType last, const ValType &val)
Return an iterator to the first element greater than a given value.
Definition AMReX_Algorithm.H:238
__host__ __device__ void logspace(ItType first, const ItType &last, const ValType &start, const ValType &stop, const ValType &base)
Fill a range with logarithmically spaced values over a closed interval.
Definition AMReX_Algorithm.H:355
__host__ __device__ void Swap(T &t1, T &t2) noexcept
Definition AMReX_Algorithm.H:94
__host__ __device__ T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
Find a root of a scalar function on a bracketing interval using bisection.
Definition AMReX_Algorithm.H:151
__host__ __device__ constexpr T elemwiseMax(T const &a, T const &b) noexcept
Return the element-wise maximum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:78
__host__ __device__ constexpr const T & Clamp(const T &v, const T &lo, const T &hi)
Definition AMReX_Algorithm.H:107
__host__ __device__ constexpr const T & min(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:25
__host__ __device__ int clz(T x) noexcept
Return the number of leading zeros of the given integer.
Definition AMReX_Algorithm.H:485
__host__ __device__ ItType lower_bound(ItType first, ItType last, const ValType &val)
Return an iterator to the first element not less than a given value.
Definition AMReX_Algorithm.H:277
__host__ __device__ void linspace(ItType first, const ItType &last, const ValType &start, const ValType &stop)
Fill a range with linearly spaced values over a closed interval.
Definition AMReX_Algorithm.H:322
__host__ __device__ constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:45
__host__ __device__ bool almostEqual(T x, T y, int ulp=2)
Definition AMReX_Algorithm.H:123
const int[]
Definition AMReX_BLProfiler.cpp:1664
__host__ __device__ constexpr T elemwiseMin(T const &a, T const &b) noexcept
Return the element-wise minimum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:63