1#ifndef AMREX_ALGORITHM_H_
2#define AMREX_ALGORITHM_H_
3#include <AMReX_Config.H>
26 template <
class T,
class ... Ts>
40 template <
class T,
class ... Ts>
53 template <
class T,
class ... Ts>
66 template <
class T,
class ... Ts>
75 void Swap (T& t1, T& t2)
noexcept
77 T temp = std::move(t1);
84 constexpr const T&
Clamp (
const T& v,
const T& lo,
const T& hi)
86 return (v < lo) ? lo : (hi < v) ? hi : v;
92 std::enable_if_t<std::is_floating_point_v<T>,
bool>
97 return std::abs(
x-y) <= std::numeric_limits<T>::epsilon() * std::abs(
x+y) * ulp
99 || std::abs(
x-y) < std::numeric_limits<T>::min();
102 template <
class T,
class F,
103 std::enable_if_t<std::is_floating_point_v<T>,
int>FOO = 0>
105 T
bisect (T lo, T hi,
F f, T tol=1e-12,
int max_iter=100)
108 "Error - calling bisect but lo and hi don't describe a reasonable interval.");
113 if (flo == T(0)) {
return flo; }
114 if (fhi == T(0)) {
return fhi; }
117 "Error - calling bisect but lo and hi don't bracket a root.");
119 T mi = (lo + hi) / T(2);
122 while (n <= max_iter)
124 if (hi - lo < tol ||
almostEqual(lo,hi)) {
break; }
125 mi = (lo + hi) / T(2);
127 if (fmi == T(0)) {
break; }
128 fmi*flo < T(0) ? hi = mi : lo = mi;
135 "Error - maximum number of iterations reached in bisect.");
143 template <
typename T,
typename I,
144 std::enable_if_t<std::is_integral_v<I>,
int> = 0>
146 I
bisect (T
const* d, I lo, I hi, T
const& v) {
148 int mid = lo + (hi-lo)/2;
149 if (v >= d[mid] && v < d[mid+1]) {
151 }
else if (v < d[mid]) {
160 template<
typename ItType,
typename ValType>
162 ItType
upper_bound (ItType first, ItType last,
const ValType& val)
165 std::ptrdiff_t count = last-first;
168 const auto step = count/2;
181 return std::upper_bound(first, last, val);
185 template<
typename ItType,
typename ValType>
187 ItType
lower_bound (ItType first, ItType last,
const ValType& val)
190 std::ptrdiff_t count = last-first;
194 const auto step = count/2;
208 return std::lower_bound(first, last, val);
212 template<
typename ItType,
typename ValType,
214 std::is_floating_point_v<typename std::iterator_traits<ItType>::value_type> &&
215 std::is_floating_point_v<ValType>,
218 void linspace (ItType first,
const ItType& last,
const ValType& start,
const ValType& stop)
220 const std::ptrdiff_t count = last-first;
222 const auto delta = (stop - start)/(count - 1);
223 for (std::ptrdiff_t i = 0; i < count-1; ++i){
224 *(first++) = start + i*delta;
230 template<
typename ItType,
typename ValType,
232 std::is_floating_point_v<typename std::iterator_traits<ItType>::value_type> &&
233 std::is_floating_point_v<ValType>,
237 const ValType& start,
const ValType& stop,
const ValType& base)
239 const std::ptrdiff_t count = last-first;
241 const auto delta = (stop - start)/(count - 1);
242 for (std::ptrdiff_t i = 0; i < count-1; ++i){
243 *(first++) = std::pow(base, start + i*delta);
245 *first = std::pow(base, stop);
259template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
unsigned int)>>
261int builtin_clz_wrapper (clz_tag, T x) noexcept
263 return static_cast<
int>(__builtin_clz(x) - (sizeof(
unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT));
266template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
unsigned long)>>
268int builtin_clz_wrapper (clzl_tag, T x) noexcept
270 return static_cast<
int>(__builtin_clzl(x) - (sizeof(
unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
273template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
unsigned long long)>>
275int builtin_clz_wrapper (clzll_tag, T x) noexcept
277 return static_cast<
int>(__builtin_clzll(x) - (sizeof(
unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
282template <
class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::u
int8_t> ||
283 std::is_same_v<std::decay_t<T>,std::u
int16_t> ||
284 std::is_same_v<std::decay_t<T>,std::u
int32_t> ||
285 std::is_same_v<std::decay_t<T>,std::u
int64_t>,
int> = 0>
287int clz (T
x)
noexcept;
292#if !defined(__NVCOMPILER)
293 static constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
295 constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
298 auto lower =
x & 0xF;
299 return upper ? clz_lookup[upper] : 4 + clz_lookup[lower];
305 auto upper = std::uint8_t(
x >> 8);
306 auto lower = std::uint8_t(
x & 0xFF);
307 return upper ?
clz(upper) : 8 +
clz(lower);
313 auto upper = std::uint16_t(
x >> 16);
314 auto lower = std::uint16_t(
x & 0xFFFF);
315 return upper ?
clz(upper) : 16 +
clz(lower);
321 auto upper = std::uint32_t(
x >> 32);
322 auto lower = std::uint32_t(
x & 0xFFFFFFFF);
323 return upper ?
clz(upper) : 32 +
clz(lower);
326#if defined AMREX_USE_CUDA
330 template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
int)> >
331 AMREX_GPU_DEVICE AMREX_FORCE_INLINE
332 int clz_wrapper (clz_tag, T x) noexcept
334 return __clz((
int) x) - (sizeof(
int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
337 template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
long long int)> >
338 AMREX_GPU_DEVICE AMREX_FORCE_INLINE
339 int clz_wrapper (clzll_tag, T x) noexcept
341 return __clzll((
long long int) x) - (sizeof(
long long int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
345template <
class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::u
int8_t> ||
346 std::is_same_v<std::decay_t<T>,std::u
int16_t> ||
347 std::is_same_v<std::decay_t<T>,std::u
int32_t> ||
348 std::is_same_v<std::decay_t<T>,std::u
int64_t>,
int> >
353#if AMREX_HAS_BUILTIN_CLZ
362template <
class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::u
int8_t> ||
363 std::is_same_v<std::decay_t<T>,std::u
int16_t> ||
364 std::is_same_v<std::decay_t<T>,std::u
int32_t> ||
365 std::is_same_v<std::decay_t<T>,std::u
int64_t>,
int> >
367int clz (T x)
noexcept
369#if (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
370 return detail::builtin_clz_wrapper(detail::clz_tag{},
x);
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:37
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_IF_ON_DEVICE(CODE)
Definition AMReX_GpuQualifiers.H:56
#define AMREX_IF_ON_HOST(CODE)
Definition AMReX_GpuQualifiers.H:58
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Definition AMReX_Amr.cpp:49
AMREX_GPU_HOST_DEVICE void logspace(ItType first, const ItType &last, const ValType &start, const ValType &stop, const ValType &base)
Definition AMReX_Algorithm.H:236
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr const T & Clamp(const T &v, const T &lo, const T &hi)
Definition AMReX_Algorithm.H:84
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int clz_generic(std::uint8_t x) noexcept
Definition AMReX_Algorithm.H:290
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr const T & min(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:21
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr T elemwiseMin(T const &a, T const &b) noexcept
Definition AMReX_Algorithm.H:49
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void Swap(T &t1, T &t2) noexcept
Definition AMReX_Algorithm.H:75
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
Definition AMReX_Algorithm.H:105
AMREX_GPU_HOST_DEVICE void linspace(ItType first, const ItType &last, const ValType &start, const ValType &stop)
Definition AMReX_Algorithm.H:218
AMREX_GPU_HOST_DEVICE ItType upper_bound(ItType first, ItType last, const ValType &val)
Definition AMReX_Algorithm.H:162
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE std::enable_if_t< std::is_floating_point_v< T >, bool > almostEqual(T x, T y, int ulp=2)
Definition AMReX_Algorithm.H:93
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:35
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr T elemwiseMax(T const &a, T const &b) noexcept
Definition AMReX_Algorithm.H:62
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int clz(T x) noexcept
Definition AMReX_Algorithm.H:350
AMREX_GPU_HOST_DEVICE ItType lower_bound(ItType first, ItType last, const ValType &val)
Definition AMReX_Algorithm.H:187
Definition AMReX_FabArrayCommI.H:896
Definition AMReX_Algorithm.H:253
Definition AMReX_Algorithm.H:252
Definition AMReX_Algorithm.H:251