1#ifndef AMREX_ALGORITHM_H_
2#define AMREX_ALGORITHM_H_
3#include <AMReX_Config.H>
32 template <
class T,
class ... Ts>
52 template <
class T,
class ... Ts>
67 template <
class T,
class ... Ts>
69 T
elemwiseMin (
const T& a,
const T& b,
const Ts& ... c)
noexcept
82 template <
class T,
class ... Ts>
84 T
elemwiseMax (
const T& a,
const T& b,
const Ts& ... c)
noexcept
93 void Swap (T& t1, T& t2)
noexcept
95 T temp = std::move(t1);
104 template <
typename T>
106 constexpr const T&
Clamp (
const T& v,
const T& lo,
const T& hi)
119 template <
typename T>
121 std::enable_if_t<std::is_floating_point_v<T>,
bool>
127 return std::abs(
x-
y) <= std::numeric_limits<T>::epsilon() * std::abs(
x+
y) * ulp
129 || std::abs(
x-
y) < std::numeric_limits<T>::min();
148 template <
class T,
class F,
149 std::enable_if_t<std::is_floating_point_v<T>,
int>FOO = 0>
151 T
bisect (T lo, T hi,
F f, T tol=1e-12,
int max_iter=100)
154 "Error - calling bisect but lo and hi don't describe a reasonable interval.");
159 if (flo == T(0)) {
return flo; }
160 if (fhi == T(0)) {
return fhi; }
163 "Error - calling bisect but lo and hi don't bracket a root.");
165 T mi = (lo + hi) / T(2);
168 while (n <= max_iter)
170 if (hi - lo < tol ||
almostEqual(lo,hi)) {
break; }
171 mi = (lo + hi) / T(2);
173 if (fmi == T(0)) {
break; }
174 fmi*flo < T(0) ? hi = mi : lo = mi;
181 "Error - maximum number of iterations reached in bisect.");
203 template <
typename T,
typename I,
204 std::enable_if_t<std::is_integral_v<I>,
int> = 0>
206 I
bisect (T
const* d, I lo, I hi, T
const& v) {
208 int mid = lo + (hi-lo)/2;
209 if (v >= d[mid] && v < d[mid+1]) {
211 }
else if (v < d[mid]) {
234 template<
typename ItType,
typename ValType>
236 ItType
upper_bound (ItType first, ItType last,
const ValType& val)
239 std::ptrdiff_t count = last-first;
242 const auto step = count/2;
255 return std::upper_bound(first, last, val);
273 template<
typename ItType,
typename ValType>
275 ItType
lower_bound (ItType first, ItType last,
const ValType& val)
278 std::ptrdiff_t count = last-first;
282 const auto step = count/2;
296 return std::lower_bound(first, last, val);
317 template<
typename ItType,
typename ValType,
319 std::is_floating_point_v<typename std::iterator_traits<ItType>::value_type> &&
320 std::is_floating_point_v<ValType>,
323 void linspace (ItType first,
const ItType& last,
const ValType& start,
const ValType& stop)
325 const std::ptrdiff_t count = last-first;
327 const auto delta = (stop - start)/(count - 1);
328 for (std::ptrdiff_t i = 0; i < count-1; ++i){
329 *(first++) = start + i*delta;
353 template<
typename ItType,
typename ValType,
355 std::is_floating_point_v<typename std::iterator_traits<ItType>::value_type> &&
356 std::is_floating_point_v<ValType>,
360 const ValType& start,
const ValType& stop,
const ValType& base)
362 const std::ptrdiff_t count = last-first;
364 const auto delta = (stop - start)/(count - 1);
365 for (std::ptrdiff_t i = 0; i < count-1; ++i){
366 *(first++) = std::pow(base, start + i*delta);
368 *first = std::pow(base, stop);
376struct clzl_tag : clzll_tag {};
377struct clz_tag : clzl_tag {};
383template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
unsigned int)>>
385int builtin_clz_wrapper (clz_tag, T x) noexcept
387 return static_cast<
int>(__builtin_clz(x) - (sizeof(
unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT));
390template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
unsigned long)>>
392int builtin_clz_wrapper (clzl_tag, T x) noexcept
394 return static_cast<
int>(__builtin_clzl(x) - (sizeof(
unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
397template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
unsigned long long)>>
399int builtin_clz_wrapper (clzll_tag, T x) noexcept
401 return static_cast<
int>(__builtin_clzll(x) - (sizeof(
unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
408template <
class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::u
int8_t> ||
409 std::is_same_v<std::decay_t<T>,std::u
int16_t> ||
410 std::is_same_v<std::decay_t<T>,std::u
int32_t> ||
411 std::is_same_v<std::decay_t<T>,std::u
int64_t>,
int> = 0>
413int clz (T
x)
noexcept;
418int clz_generic (std::uint8_t
x)
noexcept
420#if !defined(__NVCOMPILER)
421 static constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
423 constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
426 auto lower =
x & 0xF;
427 return upper ? clz_lookup[upper] : 4 + clz_lookup[lower];
431int clz_generic (std::uint16_t
x)
noexcept
433 auto upper = std::uint8_t(
x >> 8);
434 auto lower = std::uint8_t(
x & 0xFF);
435 return upper ?
clz(upper) : 8 +
clz(lower);
439int clz_generic (std::uint32_t
x)
noexcept
441 auto upper = std::uint16_t(
x >> 16);
442 auto lower = std::uint16_t(
x & 0xFFFF);
443 return upper ?
clz(upper) : 16 +
clz(lower);
447int clz_generic (std::uint64_t
x)
noexcept
449 auto upper = std::uint32_t(
x >> 32);
450 auto lower = std::uint32_t(
x & 0xFFFFFFFF);
451 return upper ?
clz(upper) : 32 +
clz(lower);
456#if defined AMREX_USE_CUDA
461 template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
int)> >
462 AMREX_GPU_DEVICE AMREX_FORCE_INLINE
463 int clz_wrapper (clz_tag, T x) noexcept
465 return __clz((
int) x) - (sizeof(
int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
468 template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
long long int)> >
469 AMREX_GPU_DEVICE AMREX_FORCE_INLINE
470 int clz_wrapper (clzll_tag, T x) noexcept
472 return __clzll((
long long int) x) - (sizeof(
long long int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
477template <
class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::u
int8_t> ||
478 std::is_same_v<std::decay_t<T>,std::u
int16_t> ||
479 std::is_same_v<std::decay_t<T>,std::u
int32_t> ||
480 std::is_same_v<std::decay_t<T>,std::u
int64_t>,
int> >
485#if AMREX_HAS_BUILTIN_CLZ
494template <
class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::u
int8_t> ||
495 std::is_same_v<std::decay_t<T>,std::u
int16_t> ||
496 std::is_same_v<std::decay_t<T>,std::u
int32_t> ||
497 std::is_same_v<std::decay_t<T>,std::u
int64_t>,
int> >
499int clz (T x)
noexcept
501#if (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
502 return detail::builtin_clz_wrapper(detail::clz_tag{},
x);
504 return clz_generic(x);
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:37
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_IF_ON_DEVICE(CODE)
Definition AMReX_GpuQualifiers.H:56
#define AMREX_IF_ON_HOST(CODE)
Definition AMReX_GpuQualifiers.H:58
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Definition AMReX_Amr.cpp:49
__host__ __device__ T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
Find a root of a scalar function on a bracketing interval using bisection.
Definition AMReX_Algorithm.H:151
__host__ __device__ ItType upper_bound(ItType first, ItType last, const ValType &val)
Return an iterator to the first element greater than a given value.
Definition AMReX_Algorithm.H:236
__host__ __device__ void Swap(T &t1, T &t2) noexcept
Definition AMReX_Algorithm.H:93
__host__ __device__ constexpr T elemwiseMax(T const &a, T const &b) noexcept
Return the element-wise maximum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:77
__host__ __device__ constexpr const T & Clamp(const T &v, const T &lo, const T &hi)
Definition AMReX_Algorithm.H:106
__host__ __device__ constexpr const T & min(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:24
__host__ __device__ ItType lower_bound(ItType first, ItType last, const ValType &val)
Return an iterator to the first element not less than a given value.
Definition AMReX_Algorithm.H:275
__host__ __device__ std::enable_if_t< std::is_floating_point_v< T >, bool > almostEqual(T x, T y, int ulp=2)
Definition AMReX_Algorithm.H:122
__host__ __device__ constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:44
__host__ __device__ void logspace(ItType first, const ItType &last, const ValType &start, const ValType &stop, const ValType &base)
Fill a range with logarithmically spaced values over a closed interval.
Definition AMReX_Algorithm.H:359
__host__ __device__ void linspace(ItType first, const ItType &last, const ValType &start, const ValType &stop)
Fill a range with linearly spaced values over a closed interval.
Definition AMReX_Algorithm.H:323
__host__ __device__ int clz(T x) noexcept
Return the number of leading zeros of the given integer.
Definition AMReX_Algorithm.H:482
__host__ __device__ constexpr T elemwiseMin(T const &a, T const &b) noexcept
Return the element-wise minimum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:62