1#ifndef AMREX_ALGORITHM_H_
2#define AMREX_ALGORITHM_H_
3#include <AMReX_Config.H>
32 template <
class T,
class ... Ts>
52 template <
class T,
class ... Ts>
67 template <
class T,
class ... Ts>
69 T
elemwiseMin (
const T& a,
const T& b,
const Ts& ... c)
noexcept
82 template <
class T,
class ... Ts>
84 T
elemwiseMax (
const T& a,
const T& b,
const Ts& ... c)
noexcept
93 void Swap (T& t1, T& t2)
noexcept
95 T temp = std::move(t1);
104 template <
typename T>
106 constexpr const T&
Clamp (
const T& v,
const T& lo,
const T& hi)
119 template <
typename T>
121 std::enable_if_t<std::is_floating_point_v<T>,
bool>
127 return std::abs(
x-
y) <= std::numeric_limits<T>::epsilon() * std::abs(
x+
y) * ulp
129 || std::abs(
x-
y) < std::numeric_limits<T>::min();
148 template <
class T,
class F,
149 std::enable_if_t<std::is_floating_point_v<T>,
int>FOO = 0>
151 T
bisect (T lo, T hi,
F f, T tol=1e-12,
int max_iter=100)
154 "Error - calling bisect but lo and hi don't describe a reasonable interval.");
159 if (flo == T(0)) {
return lo; }
160 if (fhi == T(0)) {
return hi; }
163 "Error - calling bisect but lo and hi don't bracket a root.");
165 T mi = (lo + hi) / T(2);
168 while (n <= max_iter)
170 if (hi - lo < tol ||
almostEqual(lo,hi)) {
break; }
171 mi = (lo + hi) / T(2);
173 if (fmi == T(0)) {
break; }
174 fmi*flo < T(0) ? hi = mi : lo = mi;
181 "Error - maximum number of iterations reached in bisect.");
207 template <
typename T,
typename I,
208 std::enable_if_t<std::is_integral_v<I>,
int> = 0>
210 I
bisect (T
const* d, I lo, I hi, T
const& v) {
211 while (hi - lo > 1) {
212 I mid = lo + (hi - lo) / 2;
236 template<
typename ItType,
typename ValType>
238 ItType
upper_bound (ItType first, ItType last,
const ValType& val)
241 std::ptrdiff_t count = last-first;
244 const auto step = count/2;
257 return std::upper_bound(first, last, val);
275 template<
typename ItType,
typename ValType>
277 ItType
lower_bound (ItType first, ItType last,
const ValType& val)
280 std::ptrdiff_t count = last-first;
284 const auto step = count/2;
298 return std::lower_bound(first, last, val);
319 template<
typename ItType,
typename ValType,
321 std::is_floating_point_v<typename std::iterator_traits<ItType>::value_type> &&
322 std::is_floating_point_v<ValType>,
325 void linspace (ItType first,
const ItType& last,
const ValType& start,
const ValType& stop)
327 const std::ptrdiff_t count = last-first;
329 const auto delta = (stop - start)/(count - 1);
330 for (std::ptrdiff_t i = 0; i < count-1; ++i){
331 *(first++) = start + i*delta;
355 template<
typename ItType,
typename ValType,
357 std::is_floating_point_v<typename std::iterator_traits<ItType>::value_type> &&
358 std::is_floating_point_v<ValType>,
362 const ValType& start,
const ValType& stop,
const ValType& base)
364 const std::ptrdiff_t count = last-first;
366 const auto delta = (stop - start)/(count - 1);
367 for (std::ptrdiff_t i = 0; i < count-1; ++i){
368 *(first++) = std::pow(base, start + i*delta);
370 *first = std::pow(base, stop);
378struct clzl_tag : clzll_tag {};
379struct clz_tag : clzl_tag {};
385template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
unsigned int)>>
387int builtin_clz_wrapper (clz_tag, T x) noexcept
389 return static_cast<
int>(__builtin_clz(x) - (sizeof(
unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT));
392template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
unsigned long)>>
394int builtin_clz_wrapper (clzl_tag, T x) noexcept
396 return static_cast<
int>(__builtin_clzl(x) - (sizeof(
unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
399template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
unsigned long long)>>
401int builtin_clz_wrapper (clzll_tag, T x) noexcept
403 return static_cast<
int>(__builtin_clzll(x) - (sizeof(
unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
410template <
class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::u
int8_t> ||
411 std::is_same_v<std::decay_t<T>,std::u
int16_t> ||
412 std::is_same_v<std::decay_t<T>,std::u
int32_t> ||
413 std::is_same_v<std::decay_t<T>,std::u
int64_t>,
int> = 0>
415int clz (T
x)
noexcept;
420int clz_generic (std::uint8_t
x)
noexcept
422#if !defined(__NVCOMPILER)
423 static constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
425 constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
428 auto lower =
x & 0xF;
429 return upper ? clz_lookup[upper] : 4 + clz_lookup[lower];
433int clz_generic (std::uint16_t
x)
noexcept
435 auto upper = std::uint8_t(
x >> 8);
436 auto lower = std::uint8_t(
x & 0xFF);
437 return upper ?
clz(upper) : 8 +
clz(lower);
441int clz_generic (std::uint32_t
x)
noexcept
443 auto upper = std::uint16_t(
x >> 16);
444 auto lower = std::uint16_t(
x & 0xFFFF);
445 return upper ?
clz(upper) : 16 +
clz(lower);
449int clz_generic (std::uint64_t
x)
noexcept
451 auto upper = std::uint32_t(
x >> 32);
452 auto lower = std::uint32_t(
x & 0xFFFFFFFF);
453 return upper ?
clz(upper) : 32 +
clz(lower);
458#if defined AMREX_USE_CUDA
463 template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
int)> >
464 AMREX_GPU_DEVICE AMREX_FORCE_INLINE
465 int clz_wrapper (clz_tag, T x) noexcept
467 return __clz((
int) x) - (sizeof(
int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
470 template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
long long int)> >
471 AMREX_GPU_DEVICE AMREX_FORCE_INLINE
472 int clz_wrapper (clzll_tag, T x) noexcept
474 return __clzll((
long long int) x) - (sizeof(
long long int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
479template <
class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::u
int8_t> ||
480 std::is_same_v<std::decay_t<T>,std::u
int16_t> ||
481 std::is_same_v<std::decay_t<T>,std::u
int32_t> ||
482 std::is_same_v<std::decay_t<T>,std::u
int64_t>,
int> >
487#if AMREX_HAS_BUILTIN_CLZ
496template <
class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::u
int8_t> ||
497 std::is_same_v<std::decay_t<T>,std::u
int16_t> ||
498 std::is_same_v<std::decay_t<T>,std::u
int32_t> ||
499 std::is_same_v<std::decay_t<T>,std::u
int64_t>,
int> >
501int clz (T x)
noexcept
503#if (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
504 return detail::builtin_clz_wrapper(detail::clz_tag{},
x);
506 return clz_generic(x);
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:37
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_IF_ON_DEVICE(CODE)
Definition AMReX_GpuQualifiers.H:56
#define AMREX_IF_ON_HOST(CODE)
Definition AMReX_GpuQualifiers.H:58
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Definition AMReX_Amr.cpp:49
__host__ __device__ T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
Find a root of a scalar function on a bracketing interval using bisection.
Definition AMReX_Algorithm.H:151
__host__ __device__ ItType upper_bound(ItType first, ItType last, const ValType &val)
Return an iterator to the first element greater than a given value.
Definition AMReX_Algorithm.H:238
__host__ __device__ void Swap(T &t1, T &t2) noexcept
Definition AMReX_Algorithm.H:93
__host__ __device__ constexpr T elemwiseMax(T const &a, T const &b) noexcept
Return the element-wise maximum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:77
__host__ __device__ constexpr const T & Clamp(const T &v, const T &lo, const T &hi)
Definition AMReX_Algorithm.H:106
__host__ __device__ constexpr const T & min(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:24
__host__ __device__ ItType lower_bound(ItType first, ItType last, const ValType &val)
Return an iterator to the first element not less than a given value.
Definition AMReX_Algorithm.H:277
__host__ __device__ std::enable_if_t< std::is_floating_point_v< T >, bool > almostEqual(T x, T y, int ulp=2)
Definition AMReX_Algorithm.H:122
__host__ __device__ constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:44
__host__ __device__ void logspace(ItType first, const ItType &last, const ValType &start, const ValType &stop, const ValType &base)
Fill a range with logarithmically spaced values over a closed interval.
Definition AMReX_Algorithm.H:361
__host__ __device__ void linspace(ItType first, const ItType &last, const ValType &start, const ValType &stop)
Fill a range with linearly spaced values over a closed interval.
Definition AMReX_Algorithm.H:325
__host__ __device__ int clz(T x) noexcept
Return the number of leading zeros of the given integer.
Definition AMReX_Algorithm.H:484
__host__ __device__ constexpr T elemwiseMin(T const &a, T const &b) noexcept
Return the element-wise minimum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:62