Block-Structured AMR Software Framework
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Loading...
Searching...
No Matches
AMReX_Algorithm.H
Go to the documentation of this file.
1#ifndef AMREX_ALGORITHM_H_
2#define AMREX_ALGORITHM_H_
3#include <AMReX_Config.H>
4
6#include <AMReX_Extension.H>
7#include <AMReX_Dim3.H>
8#include <AMReX_BLassert.H>
9#include <AMReX_Math.H>
10
11#include <algorithm>
12#include <limits>
13#include <type_traits>
14#include <cstdint>
15#include <climits>
16
17namespace amrex
18{
19 template <class T>
21 AMREX_FORCE_INLINE constexpr const T& min (const T& a, const T& b) noexcept
22 {
23 return std::min(a,b);
24 }
25
26 template <class T, class ... Ts>
28 AMREX_FORCE_INLINE constexpr const T& min (const T& a, const T& b, const Ts& ... c) noexcept
29 {
30 return min(min(a,b),c...);
31 }
32
33 template <class T>
35 AMREX_FORCE_INLINE constexpr const T& max (const T& a, const T& b) noexcept
36 {
37 return std::max(a,b);
38 }
39
40 template <class T, class ... Ts>
42 AMREX_FORCE_INLINE constexpr const T& max (const T& a, const T& b, const Ts& ... c) noexcept
43 {
44 return max(max(a,b),c...);
45 }
46
47 template <class T>
49 T elemwiseMin (T const& a, T const& b) noexcept {
50 return T{amrex::min(a.x,b.x),amrex::min(a.y,b.y),amrex::min(a.z,b.z)};
51 }
52
53 template <class T, class ... Ts>
55 T elemwiseMin (const T& a, const T& b, const Ts& ... c) noexcept
56 {
57 return elemwiseMin(elemwiseMin(a,b),c...);
58 }
59
60 template <class T>
62 T elemwiseMax (T const& a, T const& b) noexcept {
63 return T{amrex::max(a.x,b.x),amrex::max(a.y,b.y),amrex::max(a.z,b.z)};
64 }
65
66 template <class T, class ... Ts>
68 T elemwiseMax (const T& a, const T& b, const Ts& ... c) noexcept
69 {
70 return elemwiseMax(elemwiseMax(a,b),c...);
71 }
72
73 template<typename T>
75 void Swap (T& t1, T& t2) noexcept
76 {
77 T temp = std::move(t1);
78 t1 = std::move(t2);
79 t2 = std::move(temp);
80 }
81
82 template <typename T>
84 constexpr const T& Clamp (const T& v, const T& lo, const T& hi)
85 {
86 return (v < lo) ? lo : (hi < v) ? hi : v;
87 }
88
89 // Reference: https://en.cppreference.com/w/cpp/types/numeric_limits/epsilon
90 template <typename T>
92 std::enable_if_t<std::is_floating_point_v<T>,bool>
93 almostEqual (T x, T y, int ulp = 2)
94 {
95 // the machine epsilon has to be scaled to the magnitude of the values used
96 // and multiplied by the desired precision in ULPs (units in the last place)
97 return std::abs(x-y) <= std::numeric_limits<T>::epsilon() * std::abs(x+y) * ulp
98 // unless the result is subnormal
99 || std::abs(x-y) < std::numeric_limits<T>::min();
100 }
101
102 template <class T, class F,
103 std::enable_if_t<std::is_floating_point_v<T>,int>FOO = 0>
105 T bisect (T lo, T hi, F f, T tol=1e-12, int max_iter=100)
106 {
108 "Error - calling bisect but lo and hi don't describe a reasonable interval.");
109
110 T flo = f(lo);
111 T fhi = f(hi);
112
113 if (flo == T(0)) { return flo; }
114 if (fhi == T(0)) { return fhi; }
115
116 AMREX_ASSERT_WITH_MESSAGE(flo * fhi <= T(0),
117 "Error - calling bisect but lo and hi don't bracket a root.");
118
119 T mi = (lo + hi) / T(2);
120 T fmi = T(0);
121 int n = 1;
122 while (n <= max_iter)
123 {
124 if (hi - lo < tol || almostEqual(lo,hi)) { break; }
125 mi = (lo + hi) / T(2);
126 fmi = f(mi);
127 if (fmi == T(0)) { break; }
128 fmi*flo < T(0) ? hi = mi : lo = mi;
129 flo = f(lo);
130 fhi = f(hi);
131 ++n;
132 }
133
134 AMREX_ASSERT_WITH_MESSAGE(n < max_iter,
135 "Error - maximum number of iterations reached in bisect.");
136
137 return mi;
138 }
139
140 // Find I in the range [lo,hi) that T[I] <= v < T[I+1].
141 // It is assumed that the input data are sorted and T[lo] <= v < T[hi].
142 // Note that this is different from std::lower_bound.
143 template <typename T, typename I,
144 std::enable_if_t<std::is_integral_v<I>,int> = 0>
146 I bisect (T const* d, I lo, I hi, T const& v) {
147 while (lo <= hi) {
148 int mid = lo + (hi-lo)/2;
149 if (v >= d[mid] && v < d[mid+1]) {
150 return mid;
151 } else if (v < d[mid]) {
152 hi = mid-1;
153 } else {
154 lo = mid+1;
155 }
156 };
157 return hi;
158 }
159
160 template<typename ItType, typename ValType>
162 ItType upper_bound (ItType first, ItType last, const ValType& val)
163 {
165 std::ptrdiff_t count = last-first;
166 while(count>0){
167 auto it = first;
168 const auto step = count/2;
169 it += step;
170 if (!(val < *it)){
171 first = ++it;
172 count -= step + 1;
173 }
174 else{
175 count = step;
176 }
177 }
178 return first;
179 ))
181 return std::upper_bound(first, last, val);
182 ))
183 }
184
185 template<typename ItType, typename ValType>
187 ItType lower_bound (ItType first, ItType last, const ValType& val)
188 {
190 std::ptrdiff_t count = last-first;
191 while(count>0)
192 {
193 auto it = first;
194 const auto step = count/2;
195 it += step;
196 if (*it < val){
197 first = ++it;
198 count -= step + 1;
199 }
200 else{
201 count = step;
202 }
203 }
204
205 return first;
206 ))
208 return std::lower_bound(first, last, val);
209 ))
210 }
211
212 template<typename ItType, typename ValType,
213 std::enable_if_t<
214 std::is_floating_point_v<typename std::iterator_traits<ItType>::value_type> &&
215 std::is_floating_point_v<ValType>,
216 int> = 0>
218 void linspace (ItType first, const ItType& last, const ValType& start, const ValType& stop)
219 {
220 const std::ptrdiff_t count = last-first;
221 if (count >= 2){
222 const auto delta = (stop - start)/(count - 1);
223 for (std::ptrdiff_t i = 0; i < count-1; ++i){
224 *(first++) = start + i*delta;
225 }
226 *first = stop;
227 }
228 }
229
230 template<typename ItType, typename ValType,
231 std::enable_if_t<
232 std::is_floating_point_v<typename std::iterator_traits<ItType>::value_type> &&
233 std::is_floating_point_v<ValType>,
234 int> = 0>
236 void logspace (ItType first, const ItType& last,
237 const ValType& start, const ValType& stop, const ValType& base)
238 {
239 const std::ptrdiff_t count = last-first;
240 if (count >= 2){
241 const auto delta = (stop - start)/(count - 1);
242 for (std::ptrdiff_t i = 0; i < count-1; ++i){
243 *(first++) = std::pow(base, start + i*delta);
244 }
245 *first = std::pow(base, stop);
246 }
247 }
248
249namespace detail {
250
251struct clzll_tag {};
252struct clzl_tag : clzll_tag {};
253struct clz_tag : clzl_tag {};
254
255// in gcc and clang, there are three versions of __builtin_clz taking unsigned int,
256// unsigned long, and unsigned long long inputs. Because the sizes of these data types
257// vary on different platforms, we work with fixed-width integer types.
258// these tags and overloads select the smallest version of __builtin_clz that will hold the input type
259template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(unsigned int)>>
260AMREX_FORCE_INLINE
261int builtin_clz_wrapper (clz_tag, T x) noexcept
262{
263 return static_cast<int>(__builtin_clz(x) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT));
264}
265
266template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(unsigned long)>>
267AMREX_FORCE_INLINE
268int builtin_clz_wrapper (clzl_tag, T x) noexcept
269{
270 return static_cast<int>(__builtin_clzl(x) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
271}
272
273template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(unsigned long long)>>
274AMREX_FORCE_INLINE
275int builtin_clz_wrapper (clzll_tag, T x) noexcept
276{
277 return static_cast<int>(__builtin_clzll(x) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
278}
279
280}
281
282template <class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t> ||
283 std::is_same_v<std::decay_t<T>,std::uint16_t> ||
284 std::is_same_v<std::decay_t<T>,std::uint32_t> ||
285 std::is_same_v<std::decay_t<T>,std::uint64_t>, int> = 0>
287int clz (T x) noexcept;
288
290int clz_generic (std::uint8_t x) noexcept
291{
292#if !defined(__NVCOMPILER)
293 static constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
294#else
295 constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
296#endif
297 auto upper = x >> 4;
298 auto lower = x & 0xF;
299 return upper ? clz_lookup[upper] : 4 + clz_lookup[lower];
300}
301
303int clz_generic (std::uint16_t x) noexcept
304{
305 auto upper = std::uint8_t(x >> 8);
306 auto lower = std::uint8_t(x & 0xFF);
307 return upper ? clz(upper) : 8 + clz(lower);
308}
309
311int clz_generic (std::uint32_t x) noexcept
312{
313 auto upper = std::uint16_t(x >> 16);
314 auto lower = std::uint16_t(x & 0xFFFF);
315 return upper ? clz(upper) : 16 + clz(lower);
316}
317
319int clz_generic (std::uint64_t x) noexcept
320{
321 auto upper = std::uint32_t(x >> 32);
322 auto lower = std::uint32_t(x & 0xFFFFFFFF);
323 return upper ? clz(upper) : 32 + clz(lower);
324}
325
326#if defined AMREX_USE_CUDA
327
328namespace detail {
329 // likewise with CUDA, there are __clz functions that take (signed) int and long long int
330 template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(int)> >
331 AMREX_GPU_DEVICE AMREX_FORCE_INLINE
332 int clz_wrapper (clz_tag, T x) noexcept
333 {
334 return __clz((int) x) - (sizeof(int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
335 }
336
337 template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(long long int)> >
338 AMREX_GPU_DEVICE AMREX_FORCE_INLINE
339 int clz_wrapper (clzll_tag, T x) noexcept
340 {
341 return __clzll((long long int) x) - (sizeof(long long int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
342 }
343}
344
345template <class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t> ||
346 std::is_same_v<std::decay_t<T>,std::uint16_t> ||
347 std::is_same_v<std::decay_t<T>,std::uint32_t> ||
348 std::is_same_v<std::decay_t<T>,std::uint64_t>, int> >
350int clz (T x) noexcept
351{
352 AMREX_IF_ON_DEVICE((return detail::clz_wrapper(detail::clz_tag{}, x);))
353#if AMREX_HAS_BUILTIN_CLZ
354 AMREX_IF_ON_HOST((return detail::builtin_clz_wrapper(detail::clz_tag{}, x);))
355#else
356 AMREX_IF_ON_HOST((return clz_generic(x);))
357#endif
358}
359
360#else // !defined AMREX_USE_CUDA
361
362template <class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t> ||
363 std::is_same_v<std::decay_t<T>,std::uint16_t> ||
364 std::is_same_v<std::decay_t<T>,std::uint32_t> ||
365 std::is_same_v<std::decay_t<T>,std::uint64_t>, int> >
367int clz (T x) noexcept
368{
369#if (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
370 return detail::builtin_clz_wrapper(detail::clz_tag{}, x);
371#else
372 return clz_generic(x);
373#endif
374}
375
376#endif // defined AMREX_USE_CUDA
377
378}
379
380#endif
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:37
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_IF_ON_DEVICE(CODE)
Definition AMReX_GpuQualifiers.H:56
#define AMREX_IF_ON_HOST(CODE)
Definition AMReX_GpuQualifiers.H:58
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Definition AMReX_Amr.cpp:49
AMREX_GPU_HOST_DEVICE void logspace(ItType first, const ItType &last, const ValType &start, const ValType &stop, const ValType &base)
Definition AMReX_Algorithm.H:236
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr const T & Clamp(const T &v, const T &lo, const T &hi)
Definition AMReX_Algorithm.H:84
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int clz_generic(std::uint8_t x) noexcept
Definition AMReX_Algorithm.H:290
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr const T & min(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:21
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr T elemwiseMin(T const &a, T const &b) noexcept
Definition AMReX_Algorithm.H:49
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void Swap(T &t1, T &t2) noexcept
Definition AMReX_Algorithm.H:75
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
Definition AMReX_Algorithm.H:105
AMREX_GPU_HOST_DEVICE void linspace(ItType first, const ItType &last, const ValType &start, const ValType &stop)
Definition AMReX_Algorithm.H:218
AMREX_GPU_HOST_DEVICE ItType upper_bound(ItType first, ItType last, const ValType &val)
Definition AMReX_Algorithm.H:162
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE std::enable_if_t< std::is_floating_point_v< T >, bool > almostEqual(T x, T y, int ulp=2)
Definition AMReX_Algorithm.H:93
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:35
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr T elemwiseMax(T const &a, T const &b) noexcept
Definition AMReX_Algorithm.H:62
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int clz(T x) noexcept
Definition AMReX_Algorithm.H:350
AMREX_GPU_HOST_DEVICE ItType lower_bound(ItType first, ItType last, const ValType &val)
Definition AMReX_Algorithm.H:187
Definition AMReX_FabArrayCommI.H:896
Definition AMReX_Algorithm.H:253
Definition AMReX_Algorithm.H:252
Definition AMReX_Algorithm.H:251