amrex/doxygen/AMReX__Algorithm_8H_source.html

 #ifndef AMREX_ALGORITHM_H_

 #define AMREX_ALGORITHM_H_

 #include <AMReX_Config.H>


 #include <AMReX_GpuQualifiers.H>

 #include <AMReX_Extension.H>

 #include <AMReX_Dim3.H>

 #include <AMReX_BLassert.H>

 #include <AMReX_Math.H>


 #include <algorithm>

 #include <limits>

 #include <type_traits>

 #include <cstdint>

 #include <climits>


 namespace amrex

 {

     template <class T>

     AMREX_GPU_HOST_DEVICE

     AMREX_FORCE_INLINE constexpr const T& min (const T& a, const T& b) noexcept

     {

         return std::min(a,b);

     }


     template <class T, class ... Ts>

     AMREX_GPU_HOST_DEVICE

     AMREX_FORCE_INLINE constexpr const T& min (const T& a, const T& b, const Ts& ... c) noexcept

     {

         return min(min(a,b),c...);

     }


     template <class T>

     AMREX_GPU_HOST_DEVICE

     AMREX_FORCE_INLINE constexpr const T& max (const T& a, const T& b) noexcept

     {

         return std::max(a,b);

     }


     template <class T, class ... Ts>

     AMREX_GPU_HOST_DEVICE

     AMREX_FORCE_INLINE constexpr const T& max (const T& a, const T& b, const Ts& ... c) noexcept

     {

         return max(max(a,b),c...);

     }


     template <class T>

     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr

     T elemwiseMin (T const& a, T const& b) noexcept {

         return T{amrex::min(a.x,b.x),amrex::min(a.y,b.y),amrex::min(a.z,b.z)};

     }


     template <class T, class ... Ts>

     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr

     T elemwiseMin (const T& a, const T& b, const Ts& ... c) noexcept

     {

         return elemwiseMin(elemwiseMin(a,b),c...);

     }


     template <class T>

     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr

     T elemwiseMax (T const& a, T const& b) noexcept {

         return T{amrex::max(a.x,b.x),amrex::max(a.y,b.y),amrex::max(a.z,b.z)};

     }


     template <class T, class ... Ts>

     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr

     T elemwiseMax (const T& a, const T& b, const Ts& ... c) noexcept

     {

         return elemwiseMax(elemwiseMax(a,b),c...);

     }


     template<typename T>

     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE

     void Swap (T& t1, T& t2) noexcept

     {

         T temp = std::move(t1);

         t1 = std::move(t2);

         t2 = std::move(temp);

     }


     template <typename T>

     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE

     constexpr const T& Clamp (const T& v, const T& lo, const T& hi)

     {

         return (v < lo) ? lo : (hi < v) ? hi : v;

     }


     // Reference: https://en.cppreference.com/w/cpp/types/numeric_limits/epsilon

     template <typename T>

     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE

     std::enable_if_t<std::is_floating_point_v<T>,bool>

     almostEqual (T x, T y, int ulp = 2)

     {

         // the machine epsilon has to be scaled to the magnitude of the values used

         // and multiplied by the desired precision in ULPs (units in the last place)

         return std::abs(x-y) <= std::numeric_limits<T>::epsilon() * std::abs(x+y) * ulp

         // unless the result is subnormal

         || std::abs(x-y) < std::numeric_limits<T>::min();

     }


     template <class T, class F,

               std::enable_if_t<std::is_floating_point_v<T>,int>FOO = 0>

     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE

     T bisect (T lo, T hi, F f, T tol=1e-12, int max_iter=100)

     {

         AMREX_ASSERT_WITH_MESSAGE(hi > lo,

             "Error - calling bisect but lo and hi don't describe a reasonable interval.");


         T flo = f(lo);

         T fhi = f(hi);


         if (flo == T(0)) { return flo; }

         if (fhi == T(0)) { return fhi; }


         AMREX_ASSERT_WITH_MESSAGE(flo * fhi <= T(0),

             "Error - calling bisect but lo and hi don't bracket a root.");


         T mi = (lo + hi) / T(2);

         T fmi = T(0);

         int n = 1;

         while (n <= max_iter)

         {

             if (hi - lo < tol || almostEqual(lo,hi)) { break; }

             mi = (lo + hi) / T(2);

             fmi = f(mi);

             if (fmi == T(0)) { break; }

             fmi*flo < T(0) ? hi = mi : lo = mi;

             flo = f(lo);

             fhi = f(hi);

             ++n;

         }


         AMREX_ASSERT_WITH_MESSAGE(n < max_iter,

             "Error - maximum number of iterations reached in bisect.");


         return mi;

     }


     // Find I in the range [lo,hi) that T[I] <= v < T[I+1].

     // It is assumed that the input data are sorted and T[lo] <= v < T[hi].

     // Note that this is different from std::lower_bound.

     template <typename T, typename I,

               std::enable_if_t<std::is_integral_v<I>,int> = 0>

     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE

     I bisect (T const* d, I lo, I hi, T const& v) {

         while (lo <= hi) {

             int mid = lo + (hi-lo)/2;

             if (v >= d[mid] && v < d[mid+1]) {

                 return mid;

             } else if (v < d[mid]) {

                 hi = mid-1;

             } else {

                 lo = mid+1;

             }

         };

         return hi;

     }


     template<typename ItType, typename ValType>

     AMREX_GPU_HOST_DEVICE

     ItType upper_bound (ItType first, ItType last, const ValType& val)

     {

         AMREX_IF_ON_DEVICE((

             std::ptrdiff_t count = last-first;

             while(count>0){

                 auto it = first;

                 const auto step = count/2;

                 it += step;

                 if (!(val < *it)){

                     first = ++it;

                     count -= step + 1;

                 }

                 else{

                     count = step;

                 }

             }

             return first;

         ))

         AMREX_IF_ON_HOST((

             return std::upper_bound(first, last, val);

         ))

     }


     template<typename ItType, typename ValType>

     AMREX_GPU_HOST_DEVICE

     ItType lower_bound (ItType first, ItType last, const ValType& val)

     {

         AMREX_IF_ON_DEVICE((

             std::ptrdiff_t count = last-first;

             while(count>0)

             {

                 auto it = first;

                 const auto step = count/2;

                 it += step;

                 if (*it < val){

                     first = ++it;

                     count -= step + 1;

                 }

                 else{

                     count = step;

                 }

             }


             return first;

         ))

         AMREX_IF_ON_HOST((

             return std::lower_bound(first, last, val);

         ))

     }


     template<typename ItType, typename ValType,

         std::enable_if_t<

             std::is_floating_point_v<typename std::iterator_traits<ItType>::value_type> &&

             std::is_floating_point_v<ValType>,

         int> = 0>

     AMREX_GPU_HOST_DEVICE

     void linspace (ItType first, const ItType& last, const ValType& start, const ValType& stop)

     {

         const std::ptrdiff_t count = last-first;

         if (count >= 2){

             const auto delta = (stop - start)/(count - 1);

             for (std::ptrdiff_t i = 0; i < count-1; ++i){

                 *(first++) = start + i*delta;

             }

             *first = stop;

         }

     }


     template<typename ItType, typename ValType,

         std::enable_if_t<

             std::is_floating_point_v<typename std::iterator_traits<ItType>::value_type> &&

             std::is_floating_point_v<ValType>,

         int> = 0>

     AMREX_GPU_HOST_DEVICE

     void logspace (ItType first, const ItType& last,

         const ValType& start, const ValType& stop, const ValType& base)

     {

         const std::ptrdiff_t count = last-first;

         if (count >= 2){

             const auto delta = (stop - start)/(count - 1);

             for (std::ptrdiff_t i = 0; i < count-1; ++i){

                 *(first++) = std::pow(base, start + i*delta);

             }

             *first = std::pow(base, stop);

         }

     }


 namespace detail {


 struct clzll_tag {};

 struct clzl_tag : clzll_tag {};

 struct clz_tag : clzl_tag {};


 // in gcc and clang, there are three versions of __builtin_clz taking unsigned int,

 // unsigned long, and unsigned long long inputs. Because the sizes of these data types

 // vary on different platforms, we work with fixed-width integer types.

 // these tags and overloads select the smallest version of __builtin_clz that will hold the input type

 template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(unsigned int)>>

 AMREX_FORCE_INLINE

 int builtin_clz_wrapper (clz_tag, T x) noexcept

 {

     return static_cast<int>(__builtin_clz(x) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT));

 }


 template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(unsigned long)>>

 AMREX_FORCE_INLINE

 int builtin_clz_wrapper (clzl_tag, T x) noexcept

 {

     return static_cast<int>(__builtin_clzl(x) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT));

 }


 template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(unsigned long long)>>

 AMREX_FORCE_INLINE

 int builtin_clz_wrapper (clzll_tag, T x) noexcept

 {

     return static_cast<int>(__builtin_clzll(x) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT));

 }


 }


 template <class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t>  ||

                                     std::is_same_v<std::decay_t<T>,std::uint16_t> ||

                                     std::is_same_v<std::decay_t<T>,std::uint32_t> ||

                                     std::is_same_v<std::decay_t<T>,std::uint64_t>, int> = 0>

 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE

 int clz (T x) noexcept;


 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE

 int clz_generic (std::uint8_t x) noexcept

 {

 #if !defined(__NVCOMPILER)

     static constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };

 #else

     constexpr int clz_lookup[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };

 #endif

     auto upper = x >> 4;

     auto lower = x & 0xF;

     return upper ? clz_lookup[upper] : 4 + clz_lookup[lower];

 }


 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE

 int clz_generic (std::uint16_t x) noexcept

 {

     auto upper = std::uint8_t(x >> 8);

     auto lower = std::uint8_t(x & 0xFF);

     return upper ? clz(upper) : 8 + clz(lower);

 }


 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE

 int clz_generic (std::uint32_t x) noexcept

 {

     auto upper = std::uint16_t(x >> 16);

     auto lower = std::uint16_t(x & 0xFFFF);

     return upper ? clz(upper) : 16 + clz(lower);

 }


 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE

 int clz_generic (std::uint64_t x) noexcept

 {

     auto upper = std::uint32_t(x >> 32);

     auto lower = std::uint32_t(x & 0xFFFFFFFF);

     return upper ? clz(upper) : 32 + clz(lower);

 }


 #if defined AMREX_USE_CUDA


 namespace detail {

     // likewise with CUDA, there are __clz functions that take (signed) int and long long int

     template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(int)> >

     AMREX_GPU_DEVICE AMREX_FORCE_INLINE

     int clz_wrapper (clz_tag, T x) noexcept

     {

         return __clz((int) x) - (sizeof(int) * CHAR_BIT - sizeof(T) * CHAR_BIT);

     }


     template <typename T, typename = std::enable_if_t<sizeof(T) <= sizeof(long long int)> >

     AMREX_GPU_DEVICE AMREX_FORCE_INLINE

     int clz_wrapper (clzll_tag, T x) noexcept

     {

         return __clzll((long long int) x) - (sizeof(long long int) * CHAR_BIT - sizeof(T) * CHAR_BIT);

     }

 }


 template <class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t>  ||

                                     std::is_same_v<std::decay_t<T>,std::uint16_t> ||

                                     std::is_same_v<std::decay_t<T>,std::uint32_t> ||

                                     std::is_same_v<std::decay_t<T>,std::uint64_t>, int> >

 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE

 int clz (T x) noexcept

 {

     AMREX_IF_ON_DEVICE((return detail::clz_wrapper(detail::clz_tag{}, x);))

 #if AMREX_HAS_BUILTIN_CLZ

     AMREX_IF_ON_HOST((return detail::builtin_clz_wrapper(detail::clz_tag{}, x);))

 #else

     AMREX_IF_ON_HOST((return clz_generic(x);))

 #endif

 }


 #else // !defined AMREX_USE_CUDA


 template <class T, std::enable_if_t<std::is_same_v<std::decay_t<T>,std::uint8_t>  ||

                                     std::is_same_v<std::decay_t<T>,std::uint16_t> ||

                                     std::is_same_v<std::decay_t<T>,std::uint32_t> ||

                                     std::is_same_v<std::decay_t<T>,std::uint64_t>, int> >

 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE

 int clz (T x) noexcept

 {

 #if (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ)

     return detail::builtin_clz_wrapper(detail::clz_tag{}, x);

 #else

     return clz_generic(x);

 #endif

 }


 #endif // defined AMREX_USE_CUDA


 }


 #endif

AMReX_BLassert.H

AMREX_ASSERT_WITH_MESSAGE
#define AMREX_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: AMReX_BLassert.H:37

AMReX_Dim3.H

AMReX_Extension.H

AMREX_FORCE_INLINE
#define AMREX_FORCE_INLINE
Definition: AMReX_Extension.H:119

AMReX_GpuQualifiers.H

AMREX_IF_ON_DEVICE
#define AMREX_IF_ON_DEVICE(CODE)
Definition: AMReX_GpuQualifiers.H:56

AMREX_IF_ON_HOST
#define AMREX_IF_ON_HOST(CODE)
Definition: AMReX_GpuQualifiers.H:58

AMREX_GPU_HOST_DEVICE
#define AMREX_GPU_HOST_DEVICE
Definition: AMReX_GpuQualifiers.H:20

AMReX_Math.H

amrex::SundialsUserFun::f
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44

amrex
Definition: AMReX_Amr.cpp:49

amrex::logspace
AMREX_GPU_HOST_DEVICE void logspace(ItType first, const ItType &last, const ValType &start, const ValType &stop, const ValType &base)
Definition: AMReX_Algorithm.H:236

amrex::CurlCurlStateType::b
@ b

amrex::CurlCurlStateType::x
@ x

amrex::max
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & max(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:35

amrex::clz_generic
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int clz_generic(std::uint8_t x) noexcept
Definition: AMReX_Algorithm.H:290

amrex::min
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & min(const T &a, const T &b) noexcept
Definition: AMReX_Algorithm.H:21

amrex::abs
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T abs(const GpuComplex< T > &a_z) noexcept
Return the absolute value of a complex number.
Definition: AMReX_GpuComplex.H:356

amrex::max
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & max(const T &a, const T &b, const Ts &... c) noexcept
Definition: AMReX_Algorithm.H:42

amrex::Swap
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void Swap(T &t1, T &t2) noexcept
Definition: AMReX_Algorithm.H:75

amrex::bisect
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE T bisect(T lo, T hi, F f, T tol=1e-12, int max_iter=100)
Definition: AMReX_Algorithm.H:105

amrex::elemwiseMax
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T elemwiseMax(T const &a, T const &b) noexcept
Definition: AMReX_Algorithm.H:62

amrex::min
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & min(const T &a, const T &b, const Ts &... c) noexcept
Definition: AMReX_Algorithm.H:28

amrex::almostEqual
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE std::enable_if_t< std::is_floating_point_v< T >, bool > almostEqual(T x, T y, int ulp=2)
Definition: AMReX_Algorithm.H:93

amrex::linspace
AMREX_GPU_HOST_DEVICE void linspace(ItType first, const ItType &last, const ValType &start, const ValType &stop)
Definition: AMReX_Algorithm.H:218

amrex::pow
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE GpuComplex< T > pow(const GpuComplex< T > &a_z, const T &a_y) noexcept
Raise a complex number to a (real) power.
Definition: AMReX_GpuComplex.H:418

amrex::upper_bound
AMREX_GPU_HOST_DEVICE ItType upper_bound(ItType first, ItType last, const ValType &val)
Definition: AMReX_Algorithm.H:162

amrex::Clamp
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE const T & Clamp(const T &v, const T &lo, const T &hi)
Definition: AMReX_Algorithm.H:84

amrex::elemwiseMin
AMREX_GPU_HOST_DEVICE constexpr AMREX_FORCE_INLINE T elemwiseMin(T const &a, T const &b) noexcept
Definition: AMReX_Algorithm.H:49

amrex::clz
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int clz(T x) noexcept
Definition: AMReX_Algorithm.H:350

amrex::lower_bound
AMREX_GPU_HOST_DEVICE ItType lower_bound(ItType first, ItType last, const ValType &val)
Definition: AMReX_Algorithm.H:187

detail
Definition: AMReX_FabArrayCommI.H:896

amrex::detail::clz_tag
Definition: AMReX_Algorithm.H:253

amrex::detail::clzl_tag
Definition: AMReX_Algorithm.H:252

amrex::detail::clzll_tag
Definition: AMReX_Algorithm.H:251