1#ifndef AMREX_ALGORITHM_H_
2#define AMREX_ALGORITHM_H_
3#include <AMReX_Config.H>
32 template <
class T,
class ... Ts>
52 template <
class T,
class ... Ts>
67 template <
class T,
class ... Ts>
69 T
elemwiseMin (
const T& a,
const T& b,
const Ts& ... c)
noexcept
82 template <
class T,
class ... Ts>
84 T
elemwiseMax (
const T& a,
const T& b,
const Ts& ... c)
noexcept
93 void Swap (T& t1, T& t2)
noexcept
95 T temp = std::move(t1);
104 template <
typename T>
106 constexpr const T&
Clamp (
const T& v,
const T& lo,
const T& hi)
108 return (v < lo) ? lo : (hi < v) ? hi : v;
113 template <
typename T>
115 std::enable_if_t<std::is_floating_point_v<T>,
bool>
121 return std::abs(
x-
y) <= std::numeric_limits<T>::epsilon() * std::abs(
x+
y) * ulp
123 || std::abs(
x-
y) < std::numeric_limits<T>::min();
142 template <
class T,
class F,
143 std::enable_if_t<std::is_floating_point_v<T>,
int>FOO = 0>
145 T
bisect (T lo, T hi,
F f, T tol=1e-12,
int max_iter=100)
148 "Error - calling bisect but lo and hi don't describe a reasonable interval.");
153 if (flo == T(0)) {
return flo; }
154 if (fhi == T(0)) {
return fhi; }
157 "Error - calling bisect but lo and hi don't bracket a root.");
159 T mi = (lo + hi) / T(2);
162 while (n <= max_iter)
164 if (hi - lo < tol ||
almostEqual(lo,hi)) {
break; }
165 mi = (lo + hi) / T(2);
167 if (fmi == T(0)) {
break; }
168 fmi*flo < T(0) ? hi = mi : lo = mi;
175 "Error - maximum number of iterations reached in bisect.");
197 template <
typename T,
typename I,
198 std::enable_if_t<std::is_integral_v<I>,
int> = 0>
200 I
bisect (T
const* d, I lo, I hi, T
const& v) {
202 int mid = lo + (hi-lo)/2;
203 if (v >= d[mid] && v < d[mid+1]) {
205 }
else if (v < d[mid]) {
228 template<
typename ItType,
typename ValType>
230 ItType
upper_bound (ItType first, ItType last,
const ValType& val)
233 std::ptrdiff_t count = last-first;
236 const auto step = count/2;
249 return std::upper_bound(first, last, val);
267 template<
typename ItType,
typename ValType>
269 ItType
lower_bound (ItType first, ItType last,
const ValType& val)
272 std::ptrdiff_t count = last-first;
276 const auto step = count/2;
290 return std::lower_bound(first, last, val);
311 template<
typename ItType,
typename ValType,
313 std::is_floating_point_v<typename std::iterator_traits<ItType>::value_type> &&
314 std::is_floating_point_v<ValType>,
317 void linspace (ItType first,
const ItType& last,
const ValType& start,
const ValType& stop)
319 const std::ptrdiff_t count = last-first;
321 const auto delta = (stop - start)/(count - 1);
322 for (std::ptrdiff_t i = 0; i < count-1; ++i){
323 *(first++) = start + i*delta;
347 template<
typename ItType,
typename ValType,
349 std::is_floating_point_v<typename std::iterator_traits<ItType>::value_type> &&
350 std::is_floating_point_v<ValType>,
354 const ValType& start,
const ValType& stop,
const ValType& base)
356 const std::ptrdiff_t count = last-first;
358 const auto delta = (stop - start)/(count - 1);
359 for (std::ptrdiff_t i = 0; i < count-1; ++i){
360 *(first++) = std::pow(base, start + i*delta);
362 *first = std::pow(base, stop);
370struct clzl_tag : clzll_tag {};
371struct clz_tag : clzl_tag {};
377template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
unsigned int)>>
379int builtin_clz_wrapper (clz_tag, T x) noexcept
381 return static_cast<
int>(__builtin_clz(x) - (sizeof(
unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT));
384template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
unsigned long)>>
386int builtin_clz_wrapper (clzl_tag, T x) noexcept
388 return static_cast<
int>(__builtin_clzl(x) - (sizeof(
unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
391template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
unsigned long long)>>
393int builtin_clz_wrapper (clzll_tag, T x) noexcept
395 return static_cast<
int>(__builtin_clzll(x) - (sizeof(
unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT));
402template <
class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::u
int8_t> ||
403 std::is_same_v<std::decay_t<T>,std::u
int16_t> ||
404 std::is_same_v<std::decay_t<T>,std::u
int32_t> ||
405 std::is_same_v<std::decay_t<T>,std::u
int64_t>,
int> = 0>
407int clz (T
x)
noexcept;
412int clz_generic (std::uint8_t
x)
noexcept
414#if !defined(__NVCOMPILER)
415 static constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
417 constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
420 auto lower =
x & 0xF;
421 return upper ? clz_lookup[upper] : 4 + clz_lookup[lower];
425int clz_generic (std::uint16_t
x)
noexcept
427 auto upper = std::uint8_t(
x >> 8);
428 auto lower = std::uint8_t(
x & 0xFF);
429 return upper ?
clz(upper) : 8 +
clz(lower);
433int clz_generic (std::uint32_t
x)
noexcept
435 auto upper = std::uint16_t(
x >> 16);
436 auto lower = std::uint16_t(
x & 0xFFFF);
437 return upper ?
clz(upper) : 16 +
clz(lower);
441int clz_generic (std::uint64_t
x)
noexcept
443 auto upper = std::uint32_t(
x >> 32);
444 auto lower = std::uint32_t(
x & 0xFFFFFFFF);
445 return upper ?
clz(upper) : 32 +
clz(lower);
450#if defined AMREX_USE_CUDA
455 template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
int)> >
456 AMREX_GPU_DEVICE AMREX_FORCE_INLINE
457 int clz_wrapper (clz_tag, T x) noexcept
459 return __clz((
int) x) - (sizeof(
int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
462 template <
typename T,
typename = std::enable_if_t<sizeof(T) <= sizeof(
long long int)> >
463 AMREX_GPU_DEVICE AMREX_FORCE_INLINE
464 int clz_wrapper (clzll_tag, T x) noexcept
466 return __clzll((
long long int) x) - (sizeof(
long long int) * CHAR_BIT - sizeof(T) * CHAR_BIT);
471template <
class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::u
int8_t> ||
472 std::is_same_v<std::decay_t<T>,std::u
int16_t> ||
473 std::is_same_v<std::decay_t<T>,std::u
int32_t> ||
474 std::is_same_v<std::decay_t<T>,std::u
int64_t>,
int> >
479#if AMREX_HAS_BUILTIN_CLZ
488template <
class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::u
int8_t> ||
489 std::is_same_v<std::decay_t<T>,std::u
int16_t> ||
490 std::is_same_v<std::decay_t<T>,std::u
int32_t> ||
491 std::is_same_v<std::decay_t<T>,std::u
int64_t>,
int> >
493int clz (T x)
noexcept
495#if (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)
496 return detail::builtin_clz_wrapper(detail::clz_tag{},
x);
498 return clz_generic(x);
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition AMReX_BLassert.H:37
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_IF_ON_DEVICE(CODE)
Definition AMReX_GpuQualifiers.H:56
#define AMREX_IF_ON_HOST(CODE)
Definition AMReX_GpuQualifiers.H:58
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Definition AMReX_Amr.cpp:49
__host__ __device__ T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
Find a root of a scalar function on a bracketing interval using bisection.
Definition AMReX_Algorithm.H:145
__host__ __device__ ItType upper_bound(ItType first, ItType last, const ValType &val)
Return an iterator to the first element greater than a given value.
Definition AMReX_Algorithm.H:230
__host__ __device__ void Swap(T &t1, T &t2) noexcept
Definition AMReX_Algorithm.H:93
__host__ __device__ constexpr T elemwiseMax(T const &a, T const &b) noexcept
Return the element-wise maximum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:77
__host__ __device__ constexpr const T & Clamp(const T &v, const T &lo, const T &hi)
Definition AMReX_Algorithm.H:106
__host__ __device__ constexpr const T & min(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:24
__host__ __device__ ItType lower_bound(ItType first, ItType last, const ValType &val)
Return an iterator to the first element not less than a given value.
Definition AMReX_Algorithm.H:269
__host__ __device__ std::enable_if_t< std::is_floating_point_v< T >, bool > almostEqual(T x, T y, int ulp=2)
Definition AMReX_Algorithm.H:116
__host__ __device__ constexpr const T & max(const T &a, const T &b) noexcept
Definition AMReX_Algorithm.H:44
__host__ __device__ void logspace(ItType first, const ItType &last, const ValType &start, const ValType &stop, const ValType &base)
Fill a range with logarithmically spaced values over a closed interval.
Definition AMReX_Algorithm.H:353
__host__ __device__ void linspace(ItType first, const ItType &last, const ValType &start, const ValType &stop)
Fill a range with linearly spaced values over a closed interval.
Definition AMReX_Algorithm.H:317
__host__ __device__ int clz(T x) noexcept
Return the number of leading zeros of the given integer.
Definition AMReX_Algorithm.H:476
__host__ __device__ constexpr T elemwiseMin(T const &a, T const &b) noexcept
Return the element-wise minimum of the given values for types like XDim3.
Definition AMReX_Algorithm.H:62