9#include <kernel/util/type_traits.hpp>
10#include <kernel/util/half.hpp>
16#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)
21# define CAT_(x,y) x##y
22# define CAT(x,y) CAT_(x,y)
23# define WRAP_QUAD_MATH1(func) \
24 inline __float128 func(__float128 x) {return ::CAT(func,q)(x);}
25# define WRAP_QUAD_MATH2(func) \
26 inline __float128 func(__float128 x, __float128 y) {return ::CAT(func,q)(x, y);}
27# define WRAP_QUAD_MATH2PTR(func) \
28 inline __float128 func(__float128 x, __float128* y) {return ::CAT(func,q)(x, y);}
30# define WRAP_QUAD_MATH1(func)
31# define WRAP_QUAD_MATH2(func)
32# define WRAP_QUAD_MATH2PTR(func)
36#define WRAP_STD_MATH1(func) \
37 inline float func(float x) {return std::func(x);} \
38 inline double func(double x) {return std::func(x);} \
39 inline long double func(long double x) {return std::func(x);}
42#define WRAP_STD_MATH2(func) \
43 inline float func(float x, float y) {return std::func(x,y);} \
44 inline double func(double x, double y) {return std::func(x,y);} \
45 inline long double func(long double x, long double y) {return std::func(x,y);}
48#define WRAP_STD_MATH2PTR(func) \
49 inline float func(float x, float* y) {return std::func(x,y);} \
50 inline double func(double x, double* y) {return std::func(x,y);} \
51 inline long double func(long double x, long double* y) {return std::func(x,y);}
53#define WRAP_STD_MATH2PTR_NO_CONSTEXPR(func) \
54 inline float func(float x, float* y) {return std::func(x,y);} \
55 inline double func(double x, double* y) {return std::func(x,y);} \
56 inline long double func(long double x, long double* y) {return std::func(x,y);}
59#define WRAP_STD_MATH1BRET(func) \
60 inline bool func(float x) {return std::func(x);} \
61 inline bool func(double x) {return std::func(x);} \
62 inline bool func(long double x) {return std::func(x);}
78 WRAP_STD_MATH2PTR(modf)
82 WRAP_QUAD_MATH1(floor)
84 WRAP_QUAD_MATH2PTR(modf)
108 template<
typename T_>
122 template<
typename T_>
125 return (a < b ? a : b);
136 template<
typename T_>
139 return (a < b ? b : a);
153 template<
typename T_>
154 inline void mini(T_& xmin, T_ x)
171 template<
typename T_>
172 inline void maxi(T_& xmax, T_ x)
194 template<
typename T_>
215 template<
typename T_>
230 template<
typename T_>
249 template<
typename T_>
252 return (x < T_(0) ? T_(-1) : (x > T_(0) ? T_(1) : T_(0)));
261 template<
typename T_>
274 template<
typename T_>
277 return (x < T_(0.) ? -x : x);
282#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)
283 inline __float128
abs(__float128 x) {return ::fabsq(x);}
295#ifdef FEAT_COMPILER_MICROSOFT
297#pragma warning(disable:4723)
299 template<
typename T_>
310 const T_ z = T_(1) / x;
316 yn = T_(0.5)*y * (T_(3) - (y*y*z));
321#ifdef FEAT_COMPILER_MICROSOFT
325 #ifdef FEAT_HAVE_HALFMATH
328 return __float2half(
sqrt(__half2float(x)));
334 WRAP_QUAD_MATH1(
sqrt)
343 template<
typename T_>
352 T_ y(x), yl(x+T_(1)), z(x);
361 y += T_(1 -
int(n&2)) * (z *= x*x) * T_(fn);
367 #ifdef FEAT_HAVE_HALFMATH
370 return __float2half(
sin(__half2float(x)));
385 template<
typename T_>
394 T_ y(T_(1)), yl(T_(0)), z(T_(1));
403 y += T_(1 -
int(n&2)) * (z *= x*x) * T_(fn);
413 #ifdef FEAT_HAVE_HALFMATH
416 return __float2half(
cos(__half2float(x)));
427 template<
typename T_>
446 template<
typename T_>
455 T_ y(x), yl(x+T_(1)), z(x);
464 y += (z *= x*x) * T_(fn);
472 WRAP_QUAD_MATH1(
sinh)
474 #ifdef FEAT_HAVE_HALFMATH
477 return __float2half(
sinh(__half2float(x)));
488 template<
typename T_>
497 T_ y(T_(1)), yl(T_(0)), z(T_(1));
506 y += (z *= x*x) * T_(fn);
514 WRAP_QUAD_MATH1(
cosh)
516 #ifdef FEAT_HAVE_HALFMATH
519 return __float2half(
cosh(__half2float(x)));
530 template<
typename T_>
540 WRAP_QUAD_MATH1(
tanh)
549 template<
typename T_>
554 T_ y(T_(1)), yl(T_(0)), z(y);
559 y += ((z *= x) /= T_(++n));
564 }
while(x > T_(0) ? yl < y : n & 1 ? y < yl : yl < y);
579 template<
typename T_>
585 T_ y(T_(0)), yl(T_(0));
590 y += T_(2) * (x - ey) / (x + ey);
595 }
while(x < T_(1) ? y < yl : yl < y);
603 #ifdef FEAT_HAVE_HALFMATH
606 return __float2half(
exp(__half2float(x)));
611 return __float2half(
log(__half2float(x)));
622 template<
typename T_>
627 return log(x) /
log(T_(10));
631 WRAP_STD_MATH1(
log10)
632 WRAP_QUAD_MATH1(
log10)
642 template<
typename T_>
661 template<
typename T_>
676 T_ y(x), yl(x+T_(1)), z(x);
681 T_ t((z *= x*x) / T_(n += 2));
682 y += T_(1 -
int(n&2)) * t;
690 WRAP_QUAD_MATH1(
atan)
692 #ifdef FEAT_HAVE_HALFMATH
695 return __float2half(
atan(__half2float(x)));
707 template<
typename T_>
717 WRAP_STD_MATH2(
atan2)
718 WRAP_QUAD_MATH2(
atan2)
723 template<
typename T_>
732 T_ y(T_(0)), yl(T_(0));
734 const T_ z(T_(1) / T_(16));
739 y += t * (T_(4)/T_(8*k+1) - T_(2)/T_(8*k+4) - T_(1)/T_(8*k+5) - T_(1)/T_(8*k+6));
749 inline float pi<float>()
755 inline double pi<double>()
757 return 3.1415926535897932385;
761 inline long double pi<long double>()
763 return 3.141592653589793238462643383279502884197l;
766#ifdef FEAT_HAVE_QUADMATH
768 inline __float128 pi<__float128>()
770 return 3.141592653589793238462643383279502884197q;
774 #ifdef FEAT_HAVE_HALFMATH
776 inline Half pi<Half>()
778 return __float2half(3.141592654f);
786 template<
typename T_>
798 }
while(z < T_(z+y));
804 inline float eps<float>()
806 return std::numeric_limits<float>::epsilon();
809 inline double eps<double>()
811 return std::numeric_limits<double>::epsilon();
814 inline long double eps<long double>()
816 return std::numeric_limits<long double>::epsilon();
819#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)
821 inline __float128 eps<__float128>()
823 return FLT128_EPSILON;
827 #ifdef FEAT_HAVE_HALFMATH
829 inline Half eps<Half>()
831 return CUDART_ONE_FP16 -
Half(0.99951171);
841 template<
typename T_>
844 return std::numeric_limits<T_>::max();
852 template<
typename T_>
855 return std::numeric_limits<T_>::min();
859#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)
861 inline __float128 huge<__float128>()
867 inline __float128 tiny<__float128>()
873 #ifdef FEAT_HAVE_HALFMATH
875 inline Half huge<Half>()
877 return CUDART_MAX_NORMAL_FP16;
881 inline Half tiny<Half>()
883 return CUDART_MIN_DENORM_FP16;
896 template<
typename T_>
900 return T_(0) / T_(0);
905 inline float nan<float>()
907 return std::nanf(
"");
911 inline double nan<double>()
917 inline long double nan<long double>()
919 return std::nanl(
"");
922#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)
924 inline __float128 nan<__float128>()
930#if defined(FEAT_HAVE_FLOATX) && !defined(__CUDACC__)
948 template<
typename T_>
957 WRAP_QUAD_MATH1(
asin)
966 template<
typename T_>
975 WRAP_QUAD_MATH1(
acos)
988 template<
typename T_>
993#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)
997 return (::finiteq(x) != 0);
1001#if defined(FEAT_HAVE_FLOATX) && !defined(__CUDACC__)
1002 template<
int exp_bits_,
int sig_bits_,
typename Backend_>
1003 inline bool isfinite(
const flx::floatx<exp_bits_, sig_bits_, Backend_>& x)
1007 return isfinite(
static_cast<Backend_
>(x));
1011#ifdef FEAT_HAVE_HALFMATH
1014 return !(__hisinf(x) || __hisnan(x));
1027 template<
typename T_>
1030 WRAP_STD_MATH1BRET(
isinf)
1032#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)
1033 inline bool isinf(__float128 x)
1036 return (::isinfq(x) != 0);
1040#if defined(FEAT_HAVE_FLOATX) && !defined(__CUDACC__)
1041 template<
int exp_bits_,
int sig_bits_,
typename Backend_>
1042 inline bool isinf(
const flx::floatx<exp_bits_, sig_bits_, Backend_>& x)
1046 return isinf(
static_cast<Backend_
>(x));
1050#ifdef FEAT_HAVE_HALFMATH
1066 template<
typename T_>
1069 WRAP_STD_MATH1BRET(
isnan)
1071#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)
1072 inline bool isnan(__float128 x)
1075 return (::isnanq(x) != 0);
1079#if defined(FEAT_HAVE_FLOATX) && !defined(__CUDACC__)
1080 template<
int exp_bits_,
int sig_bits_,
typename Backend_>
1081 inline bool isnan(
const flx::floatx<exp_bits_, sig_bits_, Backend_>& x)
1085 return isnan(
static_cast<Backend_
>(x));
1089 #ifdef FEAT_HAVE_HALFMATH
1107 template<
typename T_>
1112#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)
1116 if(::finiteq(x) == 0)
1119 return !(::fabsq(x) < FLT128_MIN);
1123#if defined(FEAT_HAVE_FLOATX) && !defined(__CUDACC__)
1124 template<
int exp_bits_,
int sig_bits_,
typename Backend_>
1125 inline bool isnormal(
const flx::floatx<exp_bits_, sig_bits_, Backend_>& x)
1129 return isnormal(
static_cast<Backend_
>(x));
1133#ifdef FEAT_HAVE_HALFMATH
1136 return ((__hisinf(x) || __hisnan(x)) && (__habs(x) >= CUDART_MIN_DENORM_FP16));
1163 template<
typename T_>
1170 for(m =
max(T_(1), m); m <= n; ++m)
1195 template<
typename T_>
1204 else if((k <= T_(0)) || (k == n))
1214 for(T_ i(1); i < k; ++i)
1291 template<
typename DT_,
typename IT_>
1295 if((n <= IT_(0)) || (stride < n) || (a ==
nullptr) || (p ==
nullptr))
1302 a[0] = DT_(1) / det;
1307 for(IT_ i(0); i < n; ++i)
1316 for(IT_ k(0); k < n; ++k)
1323 DT_ pivot =
Math::abs(a[p[k]*stride + p[k]]);
1327 for(IT_ j(k+1); j < n; ++j)
1330 DT_ abs_val =
Math::abs(a[p[j]*stride + p[j]]);
1349 const IT_ pk_off = p[k]*stride;
1354 det *= a[pk_off + p[k]];
1357 const DT_ pivot = DT_(1) / a[pk_off + p[k]];
1360 a[pk_off + p[k]] = DT_(1);
1363 for(IT_ j(0); j < n; ++j)
1365 a[pk_off+j] *= pivot;
1372 for(IT_ i(0); i < n; ++i)
1379 const IT_ row_off = i*stride;
1382 const DT_ factor = a[row_off + p[k]];
1385 a[row_off + p[k]] = DT_(0);
1388 for(IT_ j(0); j < n; ++j)
1390 a[row_off + j] -= a[pk_off + j] * factor;
1413 template<
typename DT_>
1423 theta = (dot_prod >= DT_(0)) ? theta : (Math::pi<DT_>() - theta);
1425 theta = (dot_prod >= DT_(0)) ? (DT_(2) * Math::pi<DT_>() + theta) : (Math::pi<DT_>() - theta);
1431 theta = (cross_prod >= DT_(0)) ? theta : (DT_(2) * Math::pi<DT_>() - theta);
1449 template<
typename DT_>
1459 DT_ cross = x1*y2 - y1*x2;
1460 DT_ dot = x1*y1 + x2*y2;
1472 template<
typename T_>
1474 public std::numeric_limits<T_>
1478#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)
1483 static constexpr bool is_specialized =
true;
1484 static __float128
min() noexcept {
return FLT128_MIN; }
1485 static __float128
max() noexcept {
return FLT128_MAX; }
1486 static __float128 lowest() noexcept {
return -FLT128_MAX; }
1487 static constexpr int digits = FLT128_MANT_DIG;
1488 static constexpr int digits10 = FLT128_DIG;
1490 static constexpr int max_digits10 = (2 + FLT128_MANT_DIG * 301 / 1000);
1491 static constexpr bool is_signed =
true;
1492 static constexpr bool is_integer =
false;
1493 static constexpr bool is_exact =
false;
1494 static constexpr int radix = 2;
1495 static __float128 epsilon() noexcept {
return FLT128_EPSILON; }
1496 static constexpr __float128 round_error() noexcept {
return __float128(0.5); }
1497 static constexpr int min_exponent = FLT128_MIN_EXP;
1498 static constexpr int min_exponent10 = FLT128_MIN_10_EXP;
1499 static constexpr int max_exponent = FLT128_MAX_EXP;
1500 static constexpr int max_exponent10 = FLT128_MAX_10_EXP;
1501 static constexpr bool has_infinity =
true;
1502 static constexpr bool has_quiet_NaN =
true;
1503 static constexpr bool has_signaling_NaN =
false;
1504 static constexpr std::float_denorm_style has_denorm = std::denorm_absent;
1505 static constexpr bool has_denorm_loss =
false;
1506 static __float128 infinity() noexcept {
return max()*
max(); }
1507 static __float128 quiet_NaN() noexcept { return ::nanq(
nullptr); }
1508 static __float128 signaling_NaN() noexcept { return ::nanq(
nullptr); }
1509 static __float128 denorm_min() noexcept {
return FLT128_DENORM_MIN; }
1510 static constexpr bool is_iec559 =
false;
1511 static constexpr bool is_bounded =
true;
1512 static constexpr bool is_modulo =
false;
1513 static constexpr bool traps =
true;
1514 static constexpr bool tinyness_before =
true;
1515 static constexpr std::float_round_style round_style = std::round_to_nearest;
Math Limits class template.
T_ sqrt(T_ x)
Returns the square-root of a value.
T_ sinh(T_ x)
Returns the hyperbolic sine of a value.
DT_ calc_opening_angle_intern(DT_ cross_prod, DT_ dot_prod)
Calculates the opening angle from the dot and cross product of two 2D vectors.
T_ tiny()
Returns the minimum positive finite (full-precision) value for a data type.
T_ factorial(T_ n, T_ m=T_(0))
Calculates the (partial) factorial.
T_ atan(T_ x)
Returns the arctangent of a value.
bool isinf(T_ x)
Checks whether a value is infinite.
T_ pi()
Returns the mathematical constant pi = 3.1415...
T_ atan2(T_ y, T_ x)
Returns the arctangent of y/x.
bool isnormal(T_ x)
Checks whether a value is normal.
T_ abs(T_ x)
Returns the absolute value.
T_ exp(T_ x)
Returns the exponential of a value.
T_ clamp(T_ x, T_ a, T_ b)
Clamps a value to a range.
void mini(T_ &xmin, T_ x)
Updates the minimum of a set of values.
T_ cosh(T_ x)
Returns the hyperbolic cosine of a value.
T_ tanh(T_ x)
Returns the hyperbolic tangent of a value.
T_ acos(T_ x)
Returns the arccosine of a value.
T_ nan()
Returns a quiet Not-A-Number (NaN)
DT_ calc_opening_angle(DT_ x1, DT_ x2, DT_ y1, DT_ y2)
Calculates the opening angle of two 2D vectors.
T_ pow(T_ x, T_ y)
Returns x raised to the power of y.
T_ asin(T_ x)
Returns the arcsine of a value.
T_ binomial(T_ n, T_ k)
Calculates the binomial coefficient.
void minimax(T_ x, T_ &a, T_ &b)
Updates the minimum and maximum.
T_ sin(T_ x)
Returns the sine of a value.
T_ sqr(T_ x)
Returns the square of a value.
void maxi(T_ &xmax, T_ x)
Updates the maximum of a set of values.
T_ min(T_ a, T_ b)
Returns the minimum of two values.
T_ cub(T_ x)
Returns the cube of a value.
T_ huge()
Returns the maximum positive finite (full-precision) value for a data type.
T_ tan(T_ x)
Returns the tangent of a value.
DT_ invert_matrix(const IT_ n, const IT_ stride, DT_ a[], IT_ p[])
Inverts a matrix and returns its determinant.
T_ signum(T_ x)
Returns the sign of a value.
T_ log10(T_ x)
Returns the logarithm to the base 10 of a value.
bool signbit(T_ x)
Returns the status of the sign bit.
T_ ilog10(T_ x)
Computes the integral base-10 logarithm of an integer, i.e. its number of non-zero decimal digits.
bool isfinite(T_ x)
Checks whether a value is finite.
bool isnan(T_ x)
Checks whether a value is Not-A-Number.
T_ cos(T_ x)
Returns the cosine of a value.
T_ log(T_ x)
Returns the natural logarithm of a value.
T_ max(T_ a, T_ b)
Returns the maximum of two values.
T_ eps()
Returns the machine precision constant for a floating-point data type.
__half Half
Half data type.