Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_Math.H
Go to the documentation of this file.
1#ifndef AMREX_MATH_H_
2#define AMREX_MATH_H_
3#include <AMReX_Config.H>
4
6#include <AMReX_Extension.H>
7#include <AMReX_INT.H>
8#include <AMReX_SIMD.H>
9#include <AMReX_REAL.H>
10#include <concepts>
11#include <cmath>
12#include <cstdlib>
13#include <limits>
14#include <numbers>
15#include <type_traits>
16#include <utility>
17
18#ifdef AMREX_USE_SYCL
19# include <sycl/sycl.hpp>
20#endif
21
22namespace amrex { // NOLINT(modernize-concat-nested-namespaces)
24inline namespace disabled {
25 // If it is inside namespace amrex, or amrex namespace is imported with using namespace amrex or
26 // amrex::disabled, unqualified abs functions are disabled with a compile time error such as,
27 // call of overload abs(int&) is ambiguous, or a link time error such as, undefined reference to
28 // `amrex::disabled::abs(double)'. To fix it, one can use `std::abs` or `amrex::Math::abs`.
29 // We have amrex::Math::abs, because std::abs did not work with HIP and SYCL in the past.
30 AMREX_GPU_HOST_DEVICE double abs (double);
31 AMREX_GPU_HOST_DEVICE float abs (float);
32 AMREX_GPU_HOST_DEVICE long double abs (long double);
33 AMREX_GPU_HOST_DEVICE int abs (int);
34 AMREX_GPU_HOST_DEVICE long abs (long);
35 AMREX_GPU_HOST_DEVICE long long abs (long long);
36}
38}
39
40namespace amrex::Math {
41
42// Since Intel's SYCL compiler now supports the following std functions on device,
43// one no longer needs to use amrex::Math::abs, etc. They are kept here for
44// backward compatibility.
45
46using std::abs;
47using std::ceil;
48using std::copysign;
49using std::floor;
50using std::round;
51
52// However, since Intel's SYCL compiler is very aggressive with fast floating
53// point optimisations, the following must be kept, as using the std functions
54// always evaluates to false (even at -O1).
55
56#ifdef AMREX_USE_SYCL
57
58using sycl::isfinite;
59using sycl::isinf;
60
61#else
62
63using std::isfinite;
64using std::isinf;
65
66#endif
67
68template <std::floating_point T>
69constexpr T pi ()
70{
71 return std::numbers::pi_v<T>;
72}
73
76double cospi (double x)
77{
78#if defined(AMREX_USE_SYCL)
79 return sycl::cospi(x);
80#else
81 AMREX_IF_ON_DEVICE(( return ::cospi(x); ))
82 AMREX_IF_ON_HOST(( return std::cos(pi<double>()*x); ))
83#endif
84}
85
88float cospi (float x)
89{
90#if defined(AMREX_USE_SYCL)
91 return sycl::cospi(x);
92#else
93 AMREX_IF_ON_DEVICE(( return ::cospif(x); ))
94 AMREX_IF_ON_HOST(( return std::cos(pi<float>()*x); ))
95#endif
96}
97
100double sinpi (double x)
101{
102#if defined(AMREX_USE_SYCL)
103 return sycl::sinpi(x);
104#else
105 AMREX_IF_ON_DEVICE(( return ::sinpi(x); ))
106 AMREX_IF_ON_HOST(( return std::sin(pi<double>()*x); ))
107#endif
108}
109
112float sinpi (float x)
113{
114#if defined(AMREX_USE_SYCL)
115 return sycl::sinpi(x);
116#else
117 AMREX_IF_ON_DEVICE(( return ::sinpif(x); ))
118 AMREX_IF_ON_HOST(( return std::sin(pi<float>()*x); ))
119#endif
120}
121
123namespace detail {
124 AMREX_FORCE_INLINE void sincos (double x, double* sinx, double* cosx) {
125#if defined(_GNU_SOURCE) && !defined(__APPLE__)
126 ::sincos(x, sinx, cosx);
127#else
128 *sinx = std::sin(x);
129 *cosx = std::cos(x);
130#endif
131 }
132
133 AMREX_FORCE_INLINE void sincosf (float x, float* sinx, float* cosx) {
134#if defined(_GNU_SOURCE) && !defined(__APPLE__)
135 ::sincosf(x, sinx, cosx);
136#else
137 *sinx = std::sin(x);
138 *cosx = std::cos(x);
139#endif
140 }
141}
143
144#ifdef AMREX_USE_SIMD
146template<typename T_Real>
147requires (amrex::simd::stdx::is_simd_v<T_Real>)
149std::pair<T_Real,T_Real> sincos (T_Real x)
150{
151 using namespace amrex::simd::stdx;
152 std::pair<T_Real,T_Real> r;
153 r.first = sin(x);
154 r.second = cos(x);
155 return r;
156}
157#endif
158
161std::pair<double,double> sincos (double x)
162{
163 std::pair<double,double> r;
164#if defined(AMREX_USE_SYCL)
165 r.first = sycl::sincos(x, sycl::private_ptr<double>(&r.second));
166#else
167 AMREX_IF_ON_DEVICE(( ::sincos(x, &r.first, &r.second); ))
168 AMREX_IF_ON_HOST(( detail::sincos(x, &r.first, &r.second); ))
169#endif
170 return r;
171}
172
175std::pair<float,float> sincos (float x)
176{
177 std::pair<float,float> r;
178#if defined(AMREX_USE_SYCL)
179 r.first = sycl::sincos(x, sycl::private_ptr<float>(&r.second));
180#else
181 AMREX_IF_ON_DEVICE(( ::sincosf(x, &r.first, &r.second); ))
182 AMREX_IF_ON_HOST(( detail::sincosf(x, &r.first, &r.second); ))
183#endif
184 return r;
185}
186
187#ifdef AMREX_USE_SIMD
189template<typename T_Real>
190requires (amrex::simd::stdx::is_simd_v<T_Real>)
192std::pair<T_Real,T_Real> sincospi (T_Real x)
193{
194 using namespace amrex::simd::stdx;
195 T_Real const px = pi<typename T_Real::value_type>() * x;
196 std::pair<T_Real,T_Real> r;
197 r.first = sin(px);
198 r.second = cos(px);
199 return r;
200}
201#endif
202
205std::pair<double,double> sincospi (double x)
206{
207 std::pair<double,double> r;
208#if defined(AMREX_USE_SYCL)
209 r = sincos(pi<double>()*x);
210#else
211 AMREX_IF_ON_DEVICE(( ::sincospi(x, &r.first, &r.second); ))
212 AMREX_IF_ON_HOST(( r = sincos(pi<double>()*x); ))
213#endif
214 return r;
215}
216
219std::pair<float,float> sincospi (float x)
220{
221 std::pair<float,float> r;
222#if defined(AMREX_USE_SYCL)
223 r = sincos(pi<float>()*x);
224#else
225 AMREX_IF_ON_DEVICE(( ::sincospif(x, &r.first, &r.second); ))
226 AMREX_IF_ON_HOST(( r = sincos(pi<float>()*x); ))
227#endif
228 return r;
229}
230
232template <int Power, typename T>
233requires (!std::integral<T> || Power >= 0)
235constexpr T powi (T x) noexcept
236{
237 if constexpr (Power < 0) {
238 return T(1)/powi<-Power>(x);
239 } else if constexpr (Power == 0) {
240 //note: 0^0 is implementation-defined, but most compilers return 1
241 return T(1);
242 } else if constexpr (Power == 1) {
243 return x;
244 } else if constexpr (Power == 2) {
245 return x*x;
246 } else if constexpr (Power%2 == 0) {
247 return powi<2>(powi<Power/2>(x));
248 } else {
249 return x*powi<Power-1>(x);
250 }
251}
252
253} // namespace amrex::Math
254
255#if defined(AMREX_USE_CUDA)
256// Forward-declare libdevice integer-exponent intrinsics. They are part of
257// NVIDIA's libdevice and linked at PTX time but are not declared in standard
258// CUDA cmath headers outside __CUDACC_RTC__ / _LIBCPP_VERSION blocks.
259// See https://docs.nvidia.com/cuda/libdevice-users-guide/__nv_powif.html
260extern "C" __device__ float __nv_powif (float, int);
261extern "C" __device__ double __nv_powi (double, int);
262#endif
263
264namespace amrex::Math {
265
276float powi (float x, int n) noexcept
277{
278#if defined(AMREX_USE_SYCL)
279 return sycl::pown(x, n);
280#elif defined(AMREX_USE_HIP)
281 AMREX_IF_ON_DEVICE(( return ::powif(x, n); ))
282 AMREX_IF_ON_HOST(( return std::pow(x, static_cast<float>(n)); ))
283#elif defined(AMREX_USE_CUDA)
284 AMREX_IF_ON_DEVICE(( return __nv_powif(x, n); ))
285 AMREX_IF_ON_HOST(( return std::pow(x, static_cast<float>(n)); ))
286#else
287 return std::pow(x, static_cast<float>(n));
288#endif
289}
290
292double powi (double x, int n) noexcept
293{
294#if defined(AMREX_USE_SYCL)
295 return sycl::pown(x, n);
296#elif defined(AMREX_USE_HIP)
297 AMREX_IF_ON_DEVICE(( return ::powi(x, n); ))
298 AMREX_IF_ON_HOST(( return std::pow(x, n); ))
299#elif defined(AMREX_USE_CUDA)
300 AMREX_IF_ON_DEVICE(( return __nv_powi(x, n); ))
301 AMREX_IF_ON_HOST(( return std::pow(x, n); ))
302#else
303 return std::pow(x, n);
304#endif
305}
306
307#if defined(AMREX_INT128_SUPPORTED)
309std::uint64_t umulhi (std::uint64_t a, std::uint64_t b)
310{
311#if defined(AMREX_USE_SYCL)
312 return sycl::mul_hi(a,b);
313#else
314 AMREX_IF_ON_DEVICE(( return __umul64hi(a, b); ))
316 auto tmp = amrex::UInt128_t(a) * amrex::UInt128_t(b);
317 return std::uint64_t(tmp >> 64);
318 ))
319#endif
320}
321#endif
322
323template <typename T>
326{
327 // Computing K based on DLMF
328 // https://dlmf.nist.gov/19.8
329 T tol = std::numeric_limits<T>::epsilon();
330
331 T a0 = 1.0;
332 T g0 = std::sqrt(1.0 - k*k);
333 T a = a0;
334 T g = g0;
335
336 // Find Arithmetic Geometric mean
337 while(std::abs(a0 - g0) > tol) {
338 a = 0.5*(a0 + g0);
339 g = std::sqrt(a0 * g0);
340
341 a0 = a;
342 g0 = g;
343 }
344
345 return 0.5*pi<T>()/a;
346}
347
348template <typename T>
351{
352 // Computing E based on DLMF
353 // https://dlmf.nist.gov/19.8
354 T Kcomp = amrex::Math::comp_ellint_1<T>(k);
355 T tol = std::numeric_limits<T>::epsilon();
356
357 // Step Zero
358 T a0 = 1.0;
359 T g0 = std::sqrt(1.0 - k*k);
360 T cn = std::sqrt(a0*a0 - g0*g0);
361
362 // Step 1
363 int n = 1;
364 T a = 0.5 * (a0 + g0);
365 T g = std::sqrt(a0*g0);
366 cn = 0.25*cn*cn/a;
367
368 T sum_val = a*a;
369 a0 = a;
370 g0 = g;
371
372 while(std::abs(cn*cn) > tol) {
373 // Compute coefficients for this iteration
374 a = 0.5 * (a0 + g0);
375 g = std::sqrt(a0*g0);
376 cn = 0.25*cn*cn/a;
377
378 n++;
379 sum_val -= std::pow(2,n-1)*cn*cn;
380
381 // Save a and g for next iteration
382 a0 = a;
383 g0 = g;
384 }
385
386 return Kcomp*sum_val;
387}
388
391double rsqrt (double x)
392{
393 double r;
394#if defined(AMREX_USE_SYCL)
395 AMREX_IF_ON_DEVICE(( r = sycl::rsqrt(x); ))
396#else
397 AMREX_IF_ON_DEVICE(( r = ::rsqrt(x); ))
398#endif
399 AMREX_IF_ON_HOST(( r = 1. / std::sqrt(x); ))
400 return r;
401}
402
405float rsqrt (float x)
406{
407 float r;
408#if defined(AMREX_USE_SYCL)
409 AMREX_IF_ON_DEVICE(( r = sycl::rsqrt(x); ))
410#else
411 AMREX_IF_ON_DEVICE(( r = ::rsqrtf(x); ))
412#endif
413 AMREX_IF_ON_HOST(( r = 1.F / std::sqrt(x); ))
414 return r;
415}
416
419double exp10 (double x)
420{
421 double r;
422#if defined(AMREX_USE_SYCL)
423 AMREX_IF_ON_DEVICE(( r = sycl::exp10(x); ))
424#else
425 AMREX_IF_ON_DEVICE(( r = ::exp10(x); ))
426#endif
427 AMREX_IF_ON_HOST(( r = std::pow(10.0, x); ))
428 return r;
429}
430
433float exp10 (float x)
434{
435 float r;
436#if defined(AMREX_USE_SYCL)
437 AMREX_IF_ON_DEVICE(( r = sycl::exp10(x); ))
438#else
439 AMREX_IF_ON_DEVICE(( r = ::exp10f(x); ))
440#endif
441 AMREX_IF_ON_HOST(( r = std::pow(10.0F, x); ))
442 return r;
443}
444
445/***************************************************************************************************
446 * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
447 * SPDX-License-Identifier: BSD-3-Clause
448 *
449 * Redistribution and use in source and binary forms, with or without
450 * modification, are permitted provided that the following conditions are met:
451 *
452 * 1. Redistributions of source code must retain the above copyright notice, this
453 * list of conditions and the following disclaimer.
454 *
455 * 2. Redistributions in binary form must reproduce the above copyright notice,
456 * this list of conditions and the following disclaimer in the documentation
457 * and/or other materials provided with the distribution.
458 *
459 * 3. Neither the name of the copyright holder nor the names of its
460 * contributors may be used to endorse or promote products derived from
461 * this software without specific prior written permission.
462 *
463 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
464 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
465 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
466 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
467 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
468 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
469 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
470 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
471 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
472 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
473 *
474 **************************************************************************************************/
475
477
493{
494 std::uint64_t divisor = 0;
495
496#ifdef AMREX_INT128_SUPPORTED
497 std::uint64_t multiplier = 1U;
498 unsigned int shift_right = 0;
499 unsigned int round_up = 0;
500
501 //
502 // Static methods
503 //
504
506 static std::uint32_t integer_log2 (std::uint64_t x)
507 {
508 std::uint32_t n = 0;
509 while (x >>= 1) {
510 ++n;
511 }
512 return n;
513 }
514
518 FastDivmodU64 (std::uint64_t divisor_)
519 : divisor(divisor_)
520 {
521 if (divisor) {
522 shift_right = integer_log2(divisor);
523
524 if ((divisor & (divisor - 1)) == 0) {
525 multiplier = 0;
526 }
527 else {
528 std::uint64_t power_of_two = (std::uint64_t(1) << shift_right);
529 auto n = amrex::UInt128_t(power_of_two) << 64;
530 std::uint64_t multiplier_lo = n / divisor;
531 n += power_of_two;
532 multiplier = n / divisor;
533 round_up = (multiplier_lo == multiplier ? 1 : 0);
534 }
535 }
536 }
537
538#else
539
540 FastDivmodU64 (std::uint64_t divisor_) : divisor(divisor_) {}
541
542#endif
543
545 FastDivmodU64 () = default;
546
548 [[nodiscard]] AMREX_GPU_HOST_DEVICE
549 std::uint64_t divide (std::uint64_t dividend) const
550 {
551#if defined(AMREX_INT128_SUPPORTED)
552 auto x = dividend;
553 if (multiplier) {
554 x = amrex::Math::umulhi(dividend + round_up, multiplier);
555 }
556 return (x >> shift_right);
557#else
558 return dividend / divisor;
559#endif
560 }
561
563 [[nodiscard]] AMREX_GPU_HOST_DEVICE
564 std::uint64_t modulus (std::uint64_t quotient, std::uint64_t dividend) const
565 {
566 return dividend - quotient * divisor;
567 }
568
570 [[nodiscard]] AMREX_GPU_HOST_DEVICE
571 std::uint64_t divmod (std::uint64_t &remainder, std::uint64_t dividend) const
572 {
573 auto quotient = divide(dividend);
574 remainder = modulus(quotient, dividend);
575 return quotient;
576 }
577
581 void operator() (std::uint64_t &quotient, std::uint64_t &remainder, std::uint64_t dividend) const
582 {
583 quotient = divmod(remainder, dividend);
584 }
585};
586
587}
588
589#endif
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_IF_ON_DEVICE(CODE)
Definition AMReX_GpuQualifiers.H:56
#define AMREX_IF_ON_HOST(CODE)
Definition AMReX_GpuQualifiers.H:58
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
__device__ double __nv_powi(double, int)
__device__ float __nv_powif(float, int)
Definition AMReX_Math.H:40
constexpr T pi()
Definition AMReX_Math.H:69
constexpr T powi(T x) noexcept
Return pow(x, Power), where Power is an integer known at compile time.
Definition AMReX_Math.H:235
__host__ __device__ double sinpi(double x)
Return sin(x*pi) given x.
Definition AMReX_Math.H:100
__host__ __device__ std::pair< double, double > sincospi(double x)
Return sin(pi*x) and cos(pi*x) given x.
Definition AMReX_Math.H:205
__host__ __device__ double cospi(double x)
Return cos(x*pi) given x.
Definition AMReX_Math.H:76
__host__ __device__ double exp10(double x)
Return 10**x.
Definition AMReX_Math.H:419
__host__ __device__ T comp_ellint_1(T k)
Definition AMReX_Math.H:325
__host__ __device__ std::pair< double, double > sincos(double x)
Return sine and cosine of given number.
Definition AMReX_Math.H:161
__host__ __device__ T comp_ellint_2(T k)
Definition AMReX_Math.H:350
__host__ __device__ double rsqrt(double x)
Return inverse square root of x.
Definition AMReX_Math.H:391
Definition AMReX_SIMD.H:25
Definition AMReX_Amr.cpp:50
__host__ __device__ T abs(const GpuComplex< T > &a_z) noexcept
Return the absolute value of a complex number.
Definition AMReX_GpuComplex.H:361
Definition AMReX_Math.H:493
__host__ __device__ std::uint64_t divide(std::uint64_t dividend) const
Returns the quotient of floor(dividend / divisor)
Definition AMReX_Math.H:549
__host__ __device__ std::uint64_t divmod(std::uint64_t &remainder, std::uint64_t dividend) const
Returns the quotient of floor(dividend / divisor) and computes the remainder.
Definition AMReX_Math.H:571
__host__ __device__ void operator()(std::uint64_t &quotient, std::uint64_t &remainder, std::uint64_t dividend) const
Definition AMReX_Math.H:581
__host__ __device__ std::uint64_t modulus(std::uint64_t quotient, std::uint64_t dividend) const
Computes the remainder given a computed quotient and dividend.
Definition AMReX_Math.H:564
std::uint64_t divisor
Definition AMReX_Math.H:494
FastDivmodU64(std::uint64_t divisor_)
Definition AMReX_Math.H:540
FastDivmodU64()=default
Default construct an invalid FastDivmodU64.