feat3/math_8hpp_source.html

// FEAT3: Finite Element Analysis Toolbox, Version 3

// Copyright (C) 2010 by Stefan Turek & the FEAT group

// FEAT3 is released under the GNU General Public License version 3,

// see the file 'copyright.txt' in the top level directory for details.


#pragma once


// includes, FEAT

#include <kernel/util/type_traits.hpp>

#include <kernel/util/half.hpp>


// includes, system

#include <cmath>

#include <limits>


#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)

extern "C"

{

#    include <quadmath.h>

}

#  define CAT_(x,y) x##y

#  define CAT(x,y) CAT_(x,y)

#  define WRAP_QUAD_MATH1(func) \

    inline __float128 func(__float128 x) {return ::CAT(func,q)(x);}

#  define WRAP_QUAD_MATH2(func) \

    inline __float128 func(__float128 x, __float128 y) {return ::CAT(func,q)(x, y);}

#  define WRAP_QUAD_MATH2PTR(func) \

    inline __float128 func(__float128 x, __float128* y) {return ::CAT(func,q)(x, y);}

#else

#  define WRAP_QUAD_MATH1(func)

#  define WRAP_QUAD_MATH2(func)

#  define WRAP_QUAD_MATH2PTR(func)

#endif // FEAT_HAVE_QUADMATH && !__CUDA_CC__


    // single argument function wrapper

#define WRAP_STD_MATH1(func) \

    inline float func(float x) {return std::func(x);} \

    inline double func(double x) {return std::func(x);} \

    inline long double func(long double x) {return std::func(x);}


    // double argument function wrapper

#define WRAP_STD_MATH2(func) \

    inline float func(float x, float y) {return std::func(x,y);} \

    inline double func(double x, double y) {return std::func(x,y);} \

    inline long double func(long double x, long double y) {return std::func(x,y);}


    // double argument function wrapper, second argument is pointer

#define WRAP_STD_MATH2PTR(func) \

    inline float func(float x, float* y) {return std::func(x,y);} \

    inline double func(double x, double* y) {return std::func(x,y);} \

    inline long double func(long double x, long double* y) {return std::func(x,y);}


#define WRAP_STD_MATH2PTR_NO_CONSTEXPR(func) \

    inline float func(float x, float* y) {return std::func(x,y);} \

    inline double func(double x, double* y) {return std::func(x,y);} \

    inline long double func(long double x, long double* y) {return std::func(x,y);}


    // single argument function wrapper, bool return type

#define WRAP_STD_MATH1BRET(func) \

    inline bool func(float x) {return std::func(x);} \

    inline bool func(double x) {return std::func(x);} \

    inline bool func(long double x) {return std::func(x);}


namespace FEAT

{

  namespace Math

  {

    // include C++ overloads of C89 math functions

    WRAP_STD_MATH1(ceil)

    WRAP_STD_MATH1(floor)

    WRAP_STD_MATH2(fmod)

    WRAP_STD_MATH2PTR(modf)


    // wrap quadmath functions

    WRAP_QUAD_MATH1(ceil)

    WRAP_QUAD_MATH1(floor)

    WRAP_QUAD_MATH2(fmod)

    WRAP_QUAD_MATH2PTR(modf)


    template<typename T_>

    inline T_ sqr(T_ x)

    {

      return x * x;

    }


    template<typename T_>

    inline T_ cub(T_ x)

    {

      return x * x * x;

    }


    template<typename T_>

    inline T_ min(T_ a, T_ b)

    {

      return (a < b ? a : b);

    }


    template<typename T_>

    inline T_ max(T_ a, T_ b)

    {

      return (a < b ? b : a);

    }


    template<typename T_>

    inline void mini(T_& xmin, T_ x)

    {

      if(x < xmin)

        xmin = x;

    }


    template<typename T_>

    inline void maxi(T_& xmax, T_ x)

    {

      if(xmax < x)

        xmax = x;

    }


    template<typename T_>

    inline void minimax(T_ x, T_& a, T_& b)

    {

      if(x < a)

        a = x;

      if(b < x)

        b = x;

    }


    template<typename T_>

    inline T_ clamp(T_ x, T_ a, T_ b)

    {

      return max(a, min(x, b));

    }


    template<typename T_>

    inline T_ ilog10(T_ x)

    {

      static_assert(Type::Traits<T_>::is_int, "ilog10 can only be applied to integral types");

      T_ i(0);

      while(x != T_(0))

      {

        ++i;

        x /= T_(10);

      }

      return i;

    }


    template<typename T_>

    inline T_ signum(T_ x)

    {

      return (x < T_(0) ? T_(-1) : (x > T_(0) ? T_(1) : T_(0)));

    }


    template<typename T_>

    inline bool signbit(T_ x)

    {

      return x < T_(0);

    }


    template<typename T_>

    inline T_ abs(T_ x)

    {

      return (x < T_(0.) ? -x : x);

    }


    // wrap std::abs

    WRAP_STD_MATH1(abs)

#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)

    inline __float128 abs(__float128 x) {return ::fabsq(x);}

#endif // FEAT_HAVE_QUADMATH && !__CUDA_CC__


#ifdef FEAT_COMPILER_MICROSOFT

#pragma warning(push)

#pragma warning(disable:4723)

#endif

    template<typename T_>

    inline T_ sqrt(T_ x)

    {

      static_assert(Type::Traits<T_>::is_float, "sqrt can only be applied to floating point types");


      if(x <= T_(0))

        return T_(0);


      // use Newton iteration: y_{k+1} := y_k/2 * (3 - (y_k^2)/x)

      // we choose y_0 = min(1,x); this ensures that the sequence y_k is monotonically increasing

      // if y_{k+1} is not greater than y_k, we return y_k

      const T_ z = T_(1) / x;

      T_ y(Math::min(T_(1), x));

      T_ yn(y);

      do

      {

        y = yn;

        yn = T_(0.5)*y * (T_(3) - (y*y*z));

      } while(yn > y);


      return y;

    }

#ifdef FEAT_COMPILER_MICROSOFT

#pragma warning(pop)

#endif


    #ifdef FEAT_HAVE_HALFMATH

    inline Half sqrt(Half x)

    {

      return __float2half(sqrt(__half2float(x)));

    }

    #endif


    // wrap std::sqrt

    WRAP_STD_MATH1(sqrt)

    WRAP_QUAD_MATH1(sqrt)


    template<typename T_>

    inline T_ sin(T_ x)

    {

      static_assert(Type::Traits<T_>::is_float, "sin can only be applied to floating point types");


      // use the exponential sum formula:

      //           infty        x^(2*n+1)

      // sin(x) :=  sum (-1)^n -----------

      //            n=0         (2*n+1)!

      T_ y(x), yl(x+T_(1)), z(x);

      T_ fn(1.0);

      int n(1);

      do

      {

        // update 1/(2*n+1)!

        fn /= T_(++n);

        fn /= T_(++n);

        yl = y;

        y += T_(1 - int(n&2)) * (z *= x*x) * T_(fn);

      } while(yl != y);


      return y;

    }


    #ifdef FEAT_HAVE_HALFMATH

    inline Half sin(Half x)

    {

      return __float2half(sin(__half2float(x)));

    }

    #endif


    // wrap std::sin

    WRAP_STD_MATH1(sin)

    WRAP_QUAD_MATH1(sin)


    template<typename T_>

    inline T_ cos(T_ x)

    {

      static_assert(Type::Traits<T_>::is_float, "cos can only be applied to floating point types");


      // use the exponential sum formula:

      //           infty        x^(2*n)

      // cos(x) :=  sum (-1)^n ---------

      //            n=0         (2*n)!

      T_ y(T_(1)), yl(T_(0)), z(T_(1));

      T_ fn(1.0);

      int n(0);

      do

      {

        // update 1/(2*n)!

        fn /= T_(++n);

        fn /= T_(++n);

        yl = y;

        y += T_(1 - int(n&2)) * (z *= x*x) * T_(fn);

      } while(yl != y);


      return y;

    }


    // wrap std::cos

    WRAP_STD_MATH1(cos)

    WRAP_QUAD_MATH1(cos)


    #ifdef FEAT_HAVE_HALFMATH

    inline Half cos(Half x)

    {

      return __float2half(cos(__half2float(x)));

    }

    #endif


    template<typename T_>

    inline T_ tan(T_ x)

    {

      static_assert(Type::Traits<T_>::is_float, "tan can only be applied to floating point types");


      return sin(x) / cos(x);

    }


    // wrap std::tan

    WRAP_STD_MATH1(tan)

    WRAP_QUAD_MATH1(tan)


    template<typename T_>

    inline T_ sinh(T_ x)

    {

      static_assert(Type::Traits<T_>::is_float, "sinh can only be applied to floating point types");


      // use the exponential sum formula:

      //            infty  x^(2*n+1)

      // sinh(x) :=  sum  -----------

      //             n=0   (2*n+1)!

      T_ y(x), yl(x+T_(1)), z(x);

      T_ fn(1.0);

      int n(1);

      do

      {

        // update 1/(2*n+1)!

        fn /= T_(++n);

        fn /= T_(++n);

        yl = y;

        y += (z *= x*x) * T_(fn);

      } while(yl != y);


      return y;

    }


    // wrap std::sinh

    WRAP_STD_MATH1(sinh)

    WRAP_QUAD_MATH1(sinh)


    #ifdef FEAT_HAVE_HALFMATH

    inline Half sinh(Half x)

    {

      return __float2half(sinh(__half2float(x)));

    }

    #endif


    template<typename T_>

    inline T_ cosh(T_ x)

    {

      static_assert(Type::Traits<T_>::is_float, "cosh can only be applied to floating point types");


      // use the exponential sum formula:

      //            infty   x^(2*n)

      // cosh(x) :=  sum   ---------

      //             n=0     (2*n)!

      T_ y(T_(1)), yl(T_(0)), z(T_(1));

      T_ fn(1.0);

      int n(0);

      do

      {

        // update 1/(2*n)!

        fn /= T_(++n);

        fn /= T_(++n);

        yl = y;

        y += (z *= x*x) * T_(fn);

      } while(yl != y);


      return y;

    }


    // wrap std::cosh

    WRAP_STD_MATH1(cosh)

    WRAP_QUAD_MATH1(cosh)


    #ifdef FEAT_HAVE_HALFMATH

    inline Half cosh(Half x)

    {

      return __float2half(cosh(__half2float(x)));

    }

    #endif


    template<typename T_>

    inline T_ tanh(T_ x)

    {

      static_assert(Type::Traits<T_>::is_float, "tanh can only be applied to floating point types");


      return sinh(x) / cosh(x);

    }


    // wrap std::tanh

    WRAP_STD_MATH1(tanh)

    WRAP_QUAD_MATH1(tanh)


    template<typename T_>

    inline T_ exp(T_ x)

    {

      static_assert(Type::Traits<T_>::is_float, "exp can only be applied to floating point types");


      T_ y(T_(1)), yl(T_(0)), z(y);

      int n(0);

      do

      {

        yl = y;

        y += ((z *= x) /= T_(++n));

        // Note about the stopping criterion:

        // For x > 0, the sequence y_k must be strictly increasing.

        // For x < 0, the sequence y_k must be alternating.

        // And encode this into the most beautiful crypto-expression C++ has to offer ^_^

      } while(x > T_(0) ? yl < y : n & 1 ? y < yl : yl < y);

      return yl;

    }


    // wrap std::exp

    WRAP_STD_MATH1(exp)

    WRAP_QUAD_MATH1(exp)


    template<typename T_>

    inline T_ log(T_ x)

    {

      static_assert(Type::Traits<T_>::is_float, "log can only be applied to floating point types");


      // use Newton iteration: y_{k+1} = y_k + 2*(x - exp(y_k))/(x + exp(y_k))

      T_ y(T_(0)), yl(T_(0));

      do

      {

        yl = y;

        T_ ey(Math::exp(y));

        y += T_(2) * (x - ey) / (x + ey);

        // Note about the stopping criterion:

        // For x > 1, the sequence y_k must be strictly increasing.

        // For x < 1, the sequence y_k must be strictly decreasing.

        // Again, encode this into one beautiful expression

      } while(x < T_(1) ? y < yl : yl < y);

      return yl;

    }


    // wrap std::log

    WRAP_STD_MATH1(log)

    WRAP_QUAD_MATH1(log)


    #ifdef FEAT_HAVE_HALFMATH

    inline Half exp(Half x)

    {

      return __float2half(exp(__half2float(x)));

    }


    inline Half log(Half x)

    {

      return __float2half(log(__half2float(x)));

    }

    #endif


    template<typename T_>

    inline T_ log10(T_ x)

    {

      static_assert(Type::Traits<T_>::is_float, "log10 can only be applied to floating point types");


      return log(x) / log(T_(10));

    }


    // wrap std::log10

    WRAP_STD_MATH1(log10)

    WRAP_QUAD_MATH1(log10)


    template<typename T_>

    inline T_ pow(T_ x, T_ y)

    {

      static_assert(Type::Traits<T_>::is_float, "pow can only be applied to floating point types");


      return Math::exp(y * Math::log(x));

    }


    // wrap std::pow

    WRAP_STD_MATH2(pow)

    WRAP_QUAD_MATH2(pow)


    template<typename T_>

    inline T_ atan(T_ x)

    {

      static_assert(Type::Traits<T_>::is_float, "atan can only be applied to floating point types");


      // the exponential sum converges only for |x| < 1, but we can reduce any |x| >= 1 by

      // atan(x) = 2*atan( x / (1 + sqrt(1 + x^2)))

      int k(0);

      for(; Math::abs(x) >= T_(1); ++k)

        x /= (T_(1) + Math::sqrt(T_(1) + x*x));


      // use the exponential sum formula:

      //            infty        x^(2*n+1)

      // atan(x) :=  sum (-1)^n -----------

      //             n=0         (2*n+1)

      T_ y(x), yl(x+T_(1)), z(x);

      int n(1);

      do

      {

        yl = y;

        T_ t((z *= x*x) / T_(n += 2));

        y += T_(1 - int(n&2)) * t;

      } while(yl != y);


      return T_(1<<k) * y;

    }


    // wrap std::atan

    WRAP_STD_MATH1(atan)

    WRAP_QUAD_MATH1(atan)


    #ifdef FEAT_HAVE_HALFMATH

    inline Half atan(Half x)

    {

      return __float2half(atan(__half2float(x)));

    }

    #endif


    template<typename T_>

    inline T_ atan2(T_ y, T_ x)

    {

      static_assert(Type::Traits<T_>::is_float, "atan2 can only be applied to floating point types");


      // see http://en.wikipedia.org/wiki/Atan2#Variations_and_notation

      return T_(2) * atan((Math::sqrt(x*x + y*y) - x) / y);

    }


    // wrap std::atan2

    WRAP_STD_MATH2(atan2)

    WRAP_QUAD_MATH2(atan2)


    template<typename T_>

    inline T_ pi()

    {

      static_assert(Type::Traits<T_>::is_float, "pi can only be applied to floating point types");


      // use the Bailey-Borwein-Plouffe formula:

      //       infty   1     (   4      2      1      1  )

      // pi :=  sum  ----  * ( ---- - ---- - ---- - ---- )

      //        k=0  16^k    ( 8k+1   8k+4   8k+5   8k+6 )

      T_ y(T_(0)), yl(T_(0));

      int k(0);

      const T_ z(T_(1) / T_(16));

      T_ t(T_(1));

      do

      {

        yl = y;

        y += t * (T_(4)/T_(8*k+1) - T_(2)/T_(8*k+4) - T_(1)/T_(8*k+5) - T_(1)/T_(8*k+6));

        t *= z;

        ++k;

      } while(yl != y);


      return y;

    }


    template<>

    inline float pi<float>()

    {

      return 3.141592654f;

    }


    template<>

    inline double pi<double>()

    {

      return 3.1415926535897932385;

    }


    template<>

    inline long double pi<long double>()

    {

      return 3.141592653589793238462643383279502884197l;

    }


#ifdef FEAT_HAVE_QUADMATH

    template<>

    inline __float128 pi<__float128>()

    {

      return 3.141592653589793238462643383279502884197q;

    }

#endif


    #ifdef FEAT_HAVE_HALFMATH

    template<>

    inline Half pi<Half>()

    {

      return __float2half(3.141592654f);

    }

    #endif


    template<typename T_>

    inline T_ eps()

    {

      static_assert(Type::Traits<T_>::is_float, "eps can only be applied to floating point types");


      const T_ z(T_(1));

      const T_ t(T_(0.5));

      T_ y(t), yl(t);

      do

      {

        yl = y;

        y *= t;

      } while(z < T_(z+y));

      return yl;

    }


    template<>

    inline float eps<float>()

    {

      return std::numeric_limits<float>::epsilon();

    }

    template<>

    inline double eps<double>()

    {

      return std::numeric_limits<double>::epsilon();

    }

    template<>

    inline long double eps<long double>()

    {

      return std::numeric_limits<long double>::epsilon();

    }


#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)

    template<>

    inline __float128 eps<__float128>()

    {

      return FLT128_EPSILON;

    }

#endif // FEAT_HAVE_QUADMATH && !__CUDA_CC__


    #ifdef FEAT_HAVE_HALFMATH

    template<>

    inline Half eps<Half>()

    {

      return CUDART_ONE_FP16 - Half(0.99951171);

    }

    #endif


    template<typename T_>

    inline T_ huge()

    {

      return std::numeric_limits<T_>::max();

    }


    template<typename T_>

    inline T_ tiny()

    {

      return std::numeric_limits<T_>::min();

    }


#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)

    template<>

    inline __float128 huge<__float128>()

    {

      return FLT128_MAX;

    }


    template<>

    inline __float128 tiny<__float128>()

    {

      return FLT128_MIN;

    }

#endif // FEAT_HAVE_QUADMATH && !__CUDA_CC__


    #ifdef FEAT_HAVE_HALFMATH

    template<>

    inline Half huge<Half>()

    {

      return CUDART_MAX_NORMAL_FP16;

    }


    template<>

    inline Half tiny<Half>()

    {

      return CUDART_MIN_DENORM_FP16;

    }

    #endif


    template<typename T_>

    inline T_ nan()

    {

      // divide 0 by 0, which hopefully yields NaN

      return T_(0) / T_(0);

    }


    template<>

    inline float nan<float>()

    {

      return std::nanf("");

    }


    template<>

    inline double nan<double>()

    {

      return std::nan("");

    }


    template<>

    inline long double nan<long double>()

    {

      return std::nanl("");

    }


#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)

    template<>

    inline __float128 nan<__float128>()

    {

      return ::nanq("");

    }

#endif // FEAT_HAVE_QUADMATH && !__CUDA_CC__


#if defined(FEAT_HAVE_FLOATX) && !defined(__CUDACC__)

    /*template<int exp_bits_, int sig_bits_, typename Backend_>

    inline flx::floatx<exp_bits_, sig_bits_, Backend_> nan<flx::floatx<exp_bits_, sig_bits_, Backend_>>()

    {

      // FloatX doesn't offer its own nan implementation,

      // so create a backend type NaN and convert it to FloatX

      return flx::floatx<exp_bits_, sig_bits_, Backend_>(Math::nan<Backend_>());

    }*/

#endif // FEAT_HAVE_FLOATX && !__CUDA_CC__


    template<typename T_>

    inline T_ asin(T_ x)

    {

      static_assert(Type::Traits<T_>::is_float, "asin can only be applied to floating point types");


      return signum(x) * Math::atan(Math::sqrt((x*x) / (T_(1) - x*x)));

    }


    WRAP_STD_MATH1(asin)

    WRAP_QUAD_MATH1(asin)


    template<typename T_>

    inline T_ acos(T_ x)

    {

      static_assert(Type::Traits<T_>::is_float, "acos can only be applied to floating point types");


      return T_(0.5) * pi<T_>() - Math::asin(x);

    }


    WRAP_STD_MATH1(acos)

    WRAP_QUAD_MATH1(acos)


    template<typename T_>

    inline bool isfinite(T_ x);


    WRAP_STD_MATH1BRET(isfinite)


#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)

    inline bool isfinite(__float128 x)

    {

      // https://chromium.googlesource.com/native_client/nacl-gcc/+/ng/master/libquadmath/math/finiteq.c

      return (::finiteq(x) != 0);

    }

#endif // FEAT_HAVE_QUADMATH && !__CUDA_CC__


#if defined(FEAT_HAVE_FLOATX) && !defined(__CUDACC__)

    template<int exp_bits_, int sig_bits_, typename Backend_>

    inline bool isfinite(const flx::floatx<exp_bits_, sig_bits_, Backend_>& x)

    {

      // FloatX doesn't offer its own isfinite implementation,

      // so test its backend implementation instead

      return isfinite(static_cast<Backend_>(x));

    }

#endif // FEAT_HAVE_FLOATX && !__CUDA_CC__


#ifdef FEAT_HAVE_HALFMATH

    inline bool isfinite(Half x)

    {

      return !(__hisinf(x) || __hisnan(x));

    }

#endif


    template<typename T_>

    inline bool isinf(T_ x);


    WRAP_STD_MATH1BRET(isinf)


#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)

    inline bool isinf(__float128 x)

    {

      // https://chromium.googlesource.com/native_client/nacl-gcc/+/ng/master/libquadmath/math/isinfq.c

      return (::isinfq(x) != 0);

    }

#endif // FEAT_HAVE_QUADMATH && !__CUDA_CC__


#if defined(FEAT_HAVE_FLOATX) && !defined(__CUDACC__)

    template<int exp_bits_, int sig_bits_, typename Backend_>

    inline bool isinf(const flx::floatx<exp_bits_, sig_bits_, Backend_>& x)

    {

      // FloatX doesn't offer its own isinf implementation,

      // so test its backend implementation instead

      return isinf(static_cast<Backend_>(x));

    }

#endif // FEAT_HAVE_FLOATX && !__CUDA_CC__


#ifdef FEAT_HAVE_HALFMATH

    inline bool isinf(Half x)

    {

      return __hisinf(x);

    }

#endif


    template<typename T_>

    inline bool isnan(T_ x);


    WRAP_STD_MATH1BRET(isnan)


#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)

    inline bool isnan(__float128 x)

    {

      // https://chromium.googlesource.com/native_client/nacl-gcc/+/ng/master/libquadmath/math/isnanq.c

      return (::isnanq(x) != 0);

    }

#endif // FEAT_HAVE_QUADMATH && !__CUDA_CC__


#if defined(FEAT_HAVE_FLOATX) && !defined(__CUDACC__)

    template<int exp_bits_, int sig_bits_, typename Backend_>

    inline bool isnan(const flx::floatx<exp_bits_, sig_bits_, Backend_>& x)

    {

      // FloatX doesn't offer its own isnan implementation,

      // so test its backend implementation instead

      return isnan(static_cast<Backend_>(x));

    }

#endif // FEAT_HAVE_FLOATX && !__CUDA_CC__


    #ifdef FEAT_HAVE_HALFMATH

    inline bool isnan(Half x)

    {

      return __hisnan(x);

    }

    #endif


    template<typename T_>

    inline bool isnormal(T_ x);


    WRAP_STD_MATH1BRET(isnormal)


#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)

    inline bool isnormal(__float128 x)

    {

      // check whether the value is finite

      if(::finiteq(x) == 0)

        return false;

      // check whether the value is not below minimal normal value

      return !(::fabsq(x) < FLT128_MIN);

    }

#endif // FEAT_HAVE_QUADMATH && !__CUDA_CC__


#if defined(FEAT_HAVE_FLOATX) && !defined(__CUDACC__)

    template<int exp_bits_, int sig_bits_, typename Backend_>

    inline bool isnormal(const flx::floatx<exp_bits_, sig_bits_, Backend_>& x)

    {

      // FloatX doesn't offer its own isnormal implementation,

      // so test its backend implementation instead

      return isnormal(static_cast<Backend_>(x));

    }

#endif // FEAT_HAVE_FLOATX && !__CUDA_CC__


#ifdef FEAT_HAVE_HALFMATH

    inline bool isnormal(Half x)

    {

      return ((__hisinf(x) || __hisnan(x)) && (__habs(x) >= CUDART_MIN_DENORM_FP16));

    }

#endif


    template<typename T_>

    inline T_ factorial(T_ n, T_ m = T_(0))

    {

      static_assert(Type::Traits<T_>::is_int, "factorial can only be applied to integral types");


      // calculate the factorial

      T_ k(T_(1));

      for(m = max(T_(1), m); m <= n; ++m)

      {

        k *= m;

      }

      return k;

    }


    template<typename T_>

    inline T_ binomial(T_ n, T_ k)

    {

      static_assert(Type::Traits<T_>::is_int, "binomial can only be applied to integral types");


      if(k > n)

      {

        return T_(0); // by definition

      }

      else if((k <= T_(0)) || (k == n))

      {

        return T_(1); // by definition

      }


      // exploit symmetry: (n \choose k) = (n \choose n-k)

      k = min(k, n-k);


      // use multiplicative formula: (n \choose k+1) = (n - k) * (n \choose k) / (k + 1)

      T_ m = n;

      for(T_ i(1); i < k; ++i)

      {

        m *= (n - i);

        m /= (i + 1);

      }


      return m;

    }


    template<typename DT_, typename IT_>

    DT_ invert_matrix(const IT_ n, const IT_ stride, DT_ a[], IT_ p[])

    {

      // make sure that the parameters are valid

      if((n <= IT_(0)) || (stride < n) || (a == nullptr) || (p == nullptr))

        return DT_(0);


      // invert 1x1 explicitly

      if(n == IT_(1))

      {

        DT_ det = a[0];

        a[0] = DT_(1) / det;

        return det;

      }


      // initialize identity permutation

      for(IT_ i(0); i < n; ++i)

      {

        p[i] = i;

      }


      // initialize determinant to 1

      DT_ det = DT_(1);


      // primary column elimination loop

      for(IT_ k(0); k < n; ++k)

      {

        // step 1: find a pivot for the elimination of column k

        {

          // for this, we only check the rows p[j] with j >= k, as all

          // rows p[j] with j < k have already been eliminated and are

          // therefore not candidates for pivoting

          DT_ pivot = Math::abs(a[p[k]*stride + p[k]]);

          IT_ i = k;


          // loop over all unprocessed rows

          for(IT_ j(k+1); j < n; ++j)

          {

            // get our matrix value and check whether it can be a pivot

            DT_ abs_val = Math::abs(a[p[j]*stride + p[j]]);

            if(abs_val > pivot)

            {

              pivot = abs_val;

              i = j;

            }

          }


          // do we have to swap rows i and k?

          if(i > k)

          {

            // swap rows "virtually" by exchanging their permutation positions

            IT_ t = p[k];

            p[k] = p[i];

            p[i] = t;

          }

        }


        // compute pivot row offset

        const IT_ pk_off = p[k]*stride;


        // step 2: process pivot row

        {

          // update determinant by multiplying with the pivot element

          det *= a[pk_off + p[k]];


          // get our inverted pivot element

          const DT_ pivot = DT_(1) / a[pk_off + p[k]];


          // replace column entry by unit column entry

          a[pk_off + p[k]] = DT_(1);


          // divide the whole row by the inverted pivot

          for(IT_ j(0); j < n; ++j)

          {

            a[pk_off+j] *= pivot;

          }

        }


        // step 3: eliminate pivot column


        // loop over all rows of the matrix

        for(IT_ i(0); i < n; ++i)

        {

          // skip the pivot row

          if(i == p[k])

            continue;


          // compute row and pivot offsets

          const IT_ row_off = i*stride;


          // fetch elimination factor

          const DT_ factor =  a[row_off + p[k]];


          // replace by unit column entry

          a[row_off + p[k]] = DT_(0);


          // process the row

          for(IT_ j(0); j < n; ++j)

          {

            a[row_off + j] -= a[pk_off + j] * factor;

          }

        }

      }


      // return determinant

      return det;

    }


    template<typename DT_>

    DT_ calc_opening_angle_intern(DT_ cross_prod, DT_ dot_prod)

    {

      DT_ theta;

      // We always want to use the version that uses values nearer to 1 for better conditioning

      if(Math::abs(cross_prod) < DT_(0.5))

      {

        theta = Math::asin(cross_prod);

        // Transform to [0,2pi]

        if(theta >= DT_(0))

          theta = (dot_prod >= DT_(0)) ? theta : (Math::pi<DT_>() - theta);

        else

          theta = (dot_prod >= DT_(0)) ? (DT_(2) * Math::pi<DT_>() + theta) : (Math::pi<DT_>() - theta);

      }

      else

      {

        theta = Math::acos(dot_prod);

        //Transform to [0,2pi]

        theta = (cross_prod >= DT_(0)) ? theta : (DT_(2) * Math::pi<DT_>() - theta);

      }

      return theta;

    }


    template<typename DT_>

    DT_ calc_opening_angle(DT_ x1, DT_ x2, DT_ y1, DT_ y2)

    {

      DT_ norm_x = Math::sqrt(Math::sqr(x1) + Math::sqr(x2));

      DT_ norm_y = Math::sqrt(Math::sqr(y1) + Math::sqr(y2));

      x1 /= norm_x;

      x2 /= norm_x;

      y1 /= norm_y;

      y2 /= norm_y;


      DT_ cross = x1*y2 - y1*x2;

      DT_ dot = x1*y1 + x2*y2;


      return calc_opening_angle_intern(cross, dot);

    }


    template<typename T_>

    class Limits :

      public std::numeric_limits<T_>

    {

    }; // class Limits<...>


#if defined(FEAT_HAVE_QUADMATH) && !defined(__CUDACC__)

    template<>

    class Limits<__float128>

    {

    public:

      static constexpr bool is_specialized = true;

      static /*constexpr*/ __float128 min() noexcept { return FLT128_MIN; }

      static /*constexpr*/ __float128 max() noexcept { return FLT128_MAX; }

      static /*constexpr*/ __float128 lowest() noexcept { return -FLT128_MAX; }

      static constexpr int digits = FLT128_MANT_DIG;

      static constexpr int digits10 = FLT128_DIG;

      // Note: The following formula was taken from the MSC implementation...

      static constexpr int max_digits10 = (2 + FLT128_MANT_DIG * 301 / 1000);

      static constexpr bool is_signed = true;

      static constexpr bool is_integer = false;

      static constexpr bool is_exact = false;

      static constexpr int radix = 2;

      static /*constexpr*/ __float128 epsilon() noexcept { return FLT128_EPSILON; }

      static constexpr __float128 round_error() noexcept { return __float128(0.5); }

      static constexpr int min_exponent = FLT128_MIN_EXP;

      static constexpr int min_exponent10 = FLT128_MIN_10_EXP;

      static constexpr int max_exponent = FLT128_MAX_EXP;

      static constexpr int max_exponent10 = FLT128_MAX_10_EXP;

      static constexpr bool has_infinity = true;

      static constexpr bool has_quiet_NaN = true;

      static constexpr bool has_signaling_NaN = false;

      static constexpr std::float_denorm_style has_denorm = std::denorm_absent;

      static constexpr bool has_denorm_loss = false;

      static /*constexpr*/ __float128 infinity() noexcept { return max()*max(); }

      static /*constexpr*/ __float128 quiet_NaN() noexcept { return ::nanq(nullptr); }

      static /*constexpr*/ __float128 signaling_NaN() noexcept { return ::nanq(nullptr); }

      static /*constexpr*/ __float128 denorm_min() noexcept { return FLT128_DENORM_MIN; }

      static constexpr bool is_iec559 = false;

      static constexpr bool is_bounded = true;

      static constexpr bool is_modulo = false;

      static constexpr bool traps = true;

      static constexpr bool tinyness_before = true;

      static constexpr std::float_round_style round_style = std::round_to_nearest;

    };

#endif // FEAT_HAVE_QUADMATH && !__CUDA_CC__

  } // namespace Math

} // namespace FEAT

FEAT::Math::Limits
Math Limits class template.
Definition: math.hpp:1475

FEAT::Math::sqrt
T_ sqrt(T_ x)
Returns the square-root of a value.
Definition: math.hpp:300

FEAT::Math::sinh
T_ sinh(T_ x)
Returns the hyperbolic sine of a value.
Definition: math.hpp:447

FEAT::Math::calc_opening_angle_intern
DT_ calc_opening_angle_intern(DT_ cross_prod, DT_ dot_prod)
Calculates the opening angle from the dot and cross product of two 2D vectors.
Definition: math.hpp:1414

FEAT::Math::tiny
T_ tiny()
Returns the minimum positive finite (full-precision) value for a data type.
Definition: math.hpp:853

FEAT::Math::factorial
T_ factorial(T_ n, T_ m=T_(0))
Calculates the (partial) factorial.
Definition: math.hpp:1164

FEAT::Math::atan
T_ atan(T_ x)
Returns the arctangent of a value.
Definition: math.hpp:662

FEAT::Math::isinf
bool isinf(T_ x)
Checks whether a value is infinite.

FEAT::Math::pi
T_ pi()
Returns the mathematical constant pi = 3.1415...
Definition: math.hpp:724

FEAT::Math::atan2
T_ atan2(T_ y, T_ x)
Returns the arctangent of y/x.
Definition: math.hpp:708

FEAT::Math::isnormal
bool isnormal(T_ x)
Checks whether a value is normal.

FEAT::Math::abs
T_ abs(T_ x)
Returns the absolute value.
Definition: math.hpp:275

FEAT::Math::exp
T_ exp(T_ x)
Returns the exponential of a value.
Definition: math.hpp:550

FEAT::Math::clamp
T_ clamp(T_ x, T_ a, T_ b)
Clamps a value to a range.
Definition: math.hpp:216

FEAT::Math::mini
void mini(T_ &xmin, T_ x)
Updates the minimum of a set of values.
Definition: math.hpp:154

FEAT::Math::cosh
T_ cosh(T_ x)
Returns the hyperbolic cosine of a value.
Definition: math.hpp:489

FEAT::Math::tanh
T_ tanh(T_ x)
Returns the hyperbolic tangent of a value.
Definition: math.hpp:531

FEAT::Math::acos
T_ acos(T_ x)
Returns the arccosine of a value.
Definition: math.hpp:967

FEAT::Math::nan
T_ nan()
Returns a quiet Not-A-Number (NaN)
Definition: math.hpp:897

FEAT::Math::calc_opening_angle
DT_ calc_opening_angle(DT_ x1, DT_ x2, DT_ y1, DT_ y2)
Calculates the opening angle of two 2D vectors.
Definition: math.hpp:1450

FEAT::Math::pow
T_ pow(T_ x, T_ y)
Returns x raised to the power of y.
Definition: math.hpp:643

FEAT::Math::asin
T_ asin(T_ x)
Returns the arcsine of a value.
Definition: math.hpp:949

FEAT::Math::binomial
T_ binomial(T_ n, T_ k)
Calculates the binomial coefficient.
Definition: math.hpp:1196

FEAT::Math::minimax
void minimax(T_ x, T_ &a, T_ &b)
Updates the minimum and maximum.
Definition: math.hpp:195

FEAT::Math::sin
T_ sin(T_ x)
Returns the sine of a value.
Definition: math.hpp:344

FEAT::Math::sqr
T_ sqr(T_ x)
Returns the square of a value.
Definition: math.hpp:95

FEAT::Math::maxi
void maxi(T_ &xmax, T_ x)
Updates the maximum of a set of values.
Definition: math.hpp:172

FEAT::Math::min
T_ min(T_ a, T_ b)
Returns the minimum of two values.
Definition: math.hpp:123

FEAT::Math::cub
T_ cub(T_ x)
Returns the cube of a value.
Definition: math.hpp:109

FEAT::Math::huge
T_ huge()
Returns the maximum positive finite (full-precision) value for a data type.
Definition: math.hpp:842

FEAT::Math::tan
T_ tan(T_ x)
Returns the tangent of a value.
Definition: math.hpp:428

FEAT::Math::invert_matrix
DT_ invert_matrix(const IT_ n, const IT_ stride, DT_ a[], IT_ p[])
Inverts a matrix and returns its determinant.
Definition: math.hpp:1292

FEAT::Math::signum
T_ signum(T_ x)
Returns the sign of a value.
Definition: math.hpp:250

FEAT::Math::log10
T_ log10(T_ x)
Returns the logarithm to the base 10 of a value.
Definition: math.hpp:623

FEAT::Math::signbit
bool signbit(T_ x)
Returns the status of the sign bit.
Definition: math.hpp:262

FEAT::Math::ilog10
T_ ilog10(T_ x)
Computes the integral base-10 logarithm of an integer, i.e. its number of non-zero decimal digits.
Definition: math.hpp:231

FEAT::Math::isfinite
bool isfinite(T_ x)
Checks whether a value is finite.

FEAT::Math::isnan
bool isnan(T_ x)
Checks whether a value is Not-A-Number.

FEAT::Math::cos
T_ cos(T_ x)
Returns the cosine of a value.
Definition: math.hpp:386

FEAT::Math::log
T_ log(T_ x)
Returns the natural logarithm of a value.
Definition: math.hpp:580

FEAT::Math::max
T_ max(T_ a, T_ b)
Returns the maximum of two values.
Definition: math.hpp:137

FEAT::Math::eps
T_ eps()
Returns the machine precision constant for a floating-point data type.
Definition: math.hpp:787

FEAT
FEAT namespace.
Definition: adjactor.hpp:12

FEAT::Half
__half Half
Half data type.
Definition: half.hpp:25

FEAT::Type::Traits
basic Type Traits struct
Definition: type_traits.hpp:73