1#ifndef AMREX_ALGORITHM_H_
2#define AMREX_ALGORITHM_H_
3#include <AMReX_Config.H>
33 template <
class T,
class ... Ts>
53 template <
class T,
class ... Ts>
68 template <
class T,
class ... Ts>
70 T
elemwiseMin (
const T& a,
const T& b,
const Ts& ... c)
noexcept
83 template <
class T,
class ... Ts>
85 T
elemwiseMax (
const T& a,
const T& b,
const Ts& ... c)
noexcept
94 void Swap (T& t1, T& t2)
noexcept
96 T temp = std::move(t1);
105 template <
typename T>
107 constexpr const T&
Clamp (
const T& v,
const T& lo,
const T& hi)
120 template <std::
floating_po
int T>
128 return std::abs(
x-
y) <= std::numeric_limits<T>::epsilon() * std::abs(
x+
y) * ulp
130 || std::abs(
x-
y) < std::numeric_limits<T>::min();
149 template <std::
floating_po
int T,
class F>
151 T
bisect (T lo, T hi,
F f, T tol=1e-12,
int max_iter=100)
154 "Error - calling bisect but lo and hi don't describe a reasonable interval.");
159 if (flo == T(0)) {
return lo; }
160 if (fhi == T(0)) {
return hi; }
163 "Error - calling bisect but lo and hi don't bracket a root.");
165 T mi = (lo + hi) / T(2);
168 while (n <= max_iter)
170 if (hi - lo < tol ||
almostEqual(lo,hi)) {
break; }
171 mi = (lo + hi) / T(2);
173 if (fmi == T(0)) {
break; }
174 fmi*flo < T(0) ? hi = mi : lo = mi;
181 "Error - maximum number of iterations reached in bisect.");
207 template <
typename T, std::
integral I>
209 I
bisect (T
const* d, I lo, I hi, T
const& v)
211 while (hi - lo > 1) {
212 I mid = lo + (hi - lo) / 2;
236 template<
typename ItType,
typename ValType>
238 ItType
upper_bound (ItType first, ItType last,
const ValType& val)
241 std::ptrdiff_t count = last-first;
244 const auto step = count/2;
257 return std::upper_bound(first, last, val);
275 template<
typename ItType,
typename ValType>
277 ItType
lower_bound (ItType first, ItType last,
const ValType& val)
280 std::ptrdiff_t count = last-first;
284 const auto step = count/2;
298 return std::lower_bound(first, last, val);
319 template<
typename ItType, std::
floating_po
int ValType>
320 requires (std::floating_point<typename std::iterator_traits<ItType>::value_type>)
322 void linspace (ItType first,
const ItType& last,
const ValType& start,
const ValType& stop)
324 const std::ptrdiff_t count = last-first;
326 const auto delta = (stop - start)/(count - 1);
327 for (std::ptrdiff_t i = 0; i < count-1; ++i){
328 *(first++) = start + i*delta;
352 template<
typename ItType, std::
floating_po
int ValType>
353 requires (std::floating_point<typename std::iterator_traits<ItType>::value_type>)
356 const ValType& start,
const ValType& stop,
const ValType& base)
358 const std::ptrdiff_t count = last-first;
360 const auto delta = (stop - start)/(count - 1);
361 for (std::ptrdiff_t i = 0; i < count-1; ++i){
362 *(first++) = std::pow(base, start + i*delta);
364 *first = std::pow(base, stop);
372struct clzl_tag : clzll_tag {};
373struct clz_tag : clzl_tag {};
380requires (
sizeof(T) <=
sizeof(
unsigned int))
382int builtin_clz_wrapper (clz_tag, T
x)
noexcept
384 return static_cast<int>(__builtin_clz(
x) - (
sizeof(
unsigned int) * CHAR_BIT -
sizeof(T) * CHAR_BIT));
388requires (
sizeof(T) <=
sizeof(
unsigned long))
390int builtin_clz_wrapper (clzl_tag, T
x)
noexcept
392 return static_cast<int>(__builtin_clzl(
x) - (
sizeof(
unsigned long) * CHAR_BIT -
sizeof(T) * CHAR_BIT));
396requires (
sizeof(T) <=
sizeof(
unsigned long long))
398int builtin_clz_wrapper (clzll_tag, T
x)
noexcept
400 return static_cast<int>(__builtin_clzll(
x) - (
sizeof(
unsigned long long) * CHAR_BIT -
sizeof(T) * CHAR_BIT));
408requires (std::same_as<std::remove_cvref_t<T>,std::uint8_t> ||
409 std::same_as<std::remove_cvref_t<T>,std::uint16_t> ||
410 std::same_as<std::remove_cvref_t<T>,std::uint32_t> ||
411 std::same_as<std::remove_cvref_t<T>,std::uint64_t>)
413int clz (T
x)
noexcept;
418int clz_generic (std::uint8_t
x)
noexcept
420#if !defined(__NVCOMPILER)
421 static constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
423 constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
426 auto lower =
x & 0xF;
427 return upper ? clz_lookup[upper] : 4 + clz_lookup[lower];
431int clz_generic (std::uint16_t
x)
noexcept
433 auto upper = std::uint8_t(
x >> 8);
434 auto lower = std::uint8_t(
x & 0xFF);
435 return upper ?
clz(upper) : 8 +
clz(lower);
439int clz_generic (std::uint32_t
x)
noexcept
441 auto upper = std::uint16_t(
x >> 16);
442 auto lower = std::uint16_t(
x & 0xFFFF);
443 return upper ?
clz(upper) : 16 +
clz(lower);
447int clz_generic (std::uint64_t
x)
noexcept
449 auto upper = std::uint32_t(
x >> 32);
450 auto lower = std::uint32_t(
x & 0xFFFFFFFF);
451 return upper ?
clz(upper) : 32 +
clz(lower);
456#if defined AMREX_USE_CUDA
461 template <
typename T>
462 requires (
sizeof(T) <=
sizeof(
int))
464 int clz_wrapper (clz_tag, T
x)
noexcept
466 return __clz((
int)
x) - (
sizeof(
int) * CHAR_BIT -
sizeof(T) * CHAR_BIT);
469 template <
typename T>
470 requires (
sizeof(T) <=
sizeof(
long long int))
472 int clz_wrapper (clzll_tag, T
x)
noexcept
474 return __clzll((
long long int)
x) - (
sizeof(
long long int) * CHAR_BIT -
sizeof(T) * CHAR_BIT);
480requires (std::same_as<std::remove_cvref_t<T>,std::uint8_t> ||
481 std::same_as<std::remove_cvref_t<T>,std::uint16_t> ||
482 std::same_as<std::remove_cvref_t<T>,std::uint32_t> ||
483 std::same_as<std::remove_cvref_t<T>,std::uint64_t>)
488#if AMREX_HAS_BUILTIN_CLZ
498requires (std::same_as<std::remove_cvref_t<T>,std::uint8_t> ||
499 std::same_as<std::remove_cvref_t<T>,std::uint16_t> ||
500 std::same_as<std::remove_cvref_t<T>,std::uint32_t> ||
501 std::same_as<std::remove_cvref_t<T>,std::uint64_t>)
503int clz (T
x)
noexcept
505#if (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
506 return detail::builtin_clz_wrapper(detail::clz_tag{},
x);
508 return clz_generic(
x);
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:37
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_IF_ON_DEVICE(CODE)
Definition AMReX_GpuQualifiers.H:56
#define AMREX_IF_ON_HOST(CODE)
Definition AMReX_GpuQualifiers.H:58
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Definition AMReX_Amr.cpp:50
__host__ __device__ ItType upper_bound(ItType first, ItType last, const ValType &val)
Return an iterator to the first element greater than a given value.
Definition AMReX_Algorithm.H:238
__host__ __device__ void logspace(ItType first, const ItType &last, const ValType &start, const ValType &stop, const ValType &base)
Fill a range with logarithmically spaced values over a closed interval.
Definition AMReX_Algorithm.H:355
__host__ __device__ void Swap(T &t1, T &t2) noexcept
Definition AMReX_Algorithm.H:94
__host__ __device__ T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
Find a root of a scalar function on a bracketing interval using bisection.
Definition AMReX_Algorithm.H:151
__host__ __device__ constexpr T elemwiseMax(T const &a, T const &b) noexcept
Return the element-wise maximum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:78
__host__ __device__ constexpr const T & Clamp(const T &v, const T &lo, const T &hi)
Definition AMReX_Algorithm.H:107
__host__ __device__ constexpr const T & min(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:25
__host__ __device__ int clz(T x) noexcept
Return the number of leading zeros of the given integer.
Definition AMReX_Algorithm.H:485
__host__ __device__ ItType lower_bound(ItType first, ItType last, const ValType &val)
Return an iterator to the first element not less than a given value.
Definition AMReX_Algorithm.H:277
__host__ __device__ void linspace(ItType first, const ItType &last, const ValType &start, const ValType &stop)
Fill a range with linearly spaced values over a closed interval.
Definition AMReX_Algorithm.H:322
__host__ __device__ constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:45
__host__ __device__ bool almostEqual(T x, T y, int ulp=2)
Definition AMReX_Algorithm.H:123
const int[]
Definition AMReX_BLProfiler.cpp:1664
__host__ __device__ constexpr T elemwiseMin(T const &a, T const &b) noexcept
Return the element-wise minimum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:63