15#include <kernel/util/math.hpp>
20#include <initializer_list>
22#include <kernel/util/cuda_math.cuh>
133 template<
typename T_>
134 struct DataTypeExtractor
137 typedef T_ MyDataType;
139 static constexpr int level = 0;
156 template<
typename T_,
int n_,
int s_>
157 struct DataTypeExtractor<Vector<T_, n_, s_>>
160 typedef typename DataTypeExtractor<T_>::MyDataType MyDataType;
162 static constexpr int level = DataTypeExtractor<T_>::level+1;
166 template<
typename T_,
int m_,
int n_,
int sm_,
int sn_>
167 struct DataTypeExtractor<Matrix<T_, m_, n_, sm_, sn_>>
170 typedef typename DataTypeExtractor<T_>::MyDataType MyDataType;
172 static constexpr int level = DataTypeExtractor<T_>::level+1;
176 template<
typename T_,
int l_,
int m_,
int n_,
int sl_,
int sm_,
int sn_>
177 struct DataTypeExtractor<Tensor3<T_, l_, m_, n_, sl_, sm_, sn_>>
180 typedef typename DataTypeExtractor<T_>::MyDataType MyDataType;
182 static constexpr int level = DataTypeExtractor<T_>::level+1;
186 template<
int m_,
int n_>
189 template<
int m_,
int n_>
192 template<
int m_,
int n_>
193 struct InverseHelper;
195 template<
int m_,
int n_>
196 struct CofactorHelper;
199 template<
int m_,
int n_>
200 struct CudaGroupedInverseHelper;
211 template<
typename T_,
int n_,
int s_>
214 static_assert(n_ > 0,
"invalid vector length");
215 static_assert(s_ >= n_,
"invalid vector stride");
219 static constexpr int n = n_;
221 static constexpr int s = s_;
226 typedef typename Intern::DataTypeExtractor<ValueType>::MyDataType
DataType;
239 for(
int i(0); i < n_; ++i)
249 for(
int i(0); i < n_; ++i)
256 template<
typename Tx_,
int sx_>
259 for(
int i(0); i < n_; ++i)
266 template<
typename Tx_>
267 CUDA_HOST_DEVICE
explicit Vector(
const std::array<Tx_, n_>& x)
269 for(
int i(0); i < n_; ++i)
276 template<
typename Tx_,
int sx_>
280 for(
int i(0); i < n_; ++i)
282 if constexpr(std::is_same<T_, DataType>::value)
285 v[i] = T_::convert(x.v[i]);
308 template<
typename Tx_>
309 CUDA_HOST_DEVICE
Vector(
const std::initializer_list<Tx_>& x)
311 XASSERTM(std::size_t(n_) == x.size(),
"invalid initializer list size");
313 for(
int i(0); i < n_; ++i, ++it)
320 for(
int i(0); i < n_; ++i)
331 for(
int i(0); i < n_; ++i)
339 template<
typename Tx_,
int sx_>
342 for(
int i(0); i < n_; ++i)
350 template<
typename Tx_>
353 for(
int i(0); i < n_; ++i)
374 template<
typename Tx_>
377 XASSERTM(std::size_t(n_) == x.size(),
"invalid initializer list size");
379 for(
int i(0); i < n_; ++i, ++it)
385 template<
typename Tx_,
int sx_>
388 for(
int i(0); i < n_; ++i)
402 ASSERTM((i >= 0) && (i < n_),
"index i out-of-bounds");
409 ASSERTM((i >= 0) && (i < n_),
"index i out-of-bounds");
416 ASSERTM((i >= 0) && (i < n_),
"index i out-of-bounds");
423 ASSERTM((i >= 0) && (i < n_),
"index i out-of-bounds");
436 for(
int i(0); i < n_; ++i)
451 template<
int nn_,
int nx_,
int snx_>
454 static_assert(nn_ <= n_,
"invalid copy_n size");
455 static_assert(nn_ <= nx_,
"invalid copy_n size");
456 for(
int i(0); i < nn_; ++i)
465 for(
int i(0); i < n_; ++i)
476 for(
int i(0); i < n_; ++i)
487 for(
int i(0); i < n_; ++i)
498 for(
int i(0); i < n_; ++i)
513 for(
int i(0); i < n_; ++i)
531 static_assert(nn_ <= n_,
"invalid format_n size");
532 for(
int i(0); i < nn_; ++i)
546 for(
int i(0); i < n_; ++i)
564 static_assert(nn_ <= n_,
"invalid scale_n size");
565 for(
int i(0); i < nn_; ++i)
580 ASSERTM(norm2 > Math::eps<DataType>(),
"Trying to normalize a null vector!");
581 return ((*
this) *= (
DataType(1)/norm2));
584 ASSERTM(norm2_sqr > CudaMath::cuda_get_eps<DataType>(),
"Trying to normalize a null vector!");
585 return ((*
this) *= CudaMath::cuda_rsqrt(norm2_sqr));
600 static_assert(nn_ <= n_,
"invalid normalize_n size");
602 const DataType norm2(this->
template norm_euclid_n<nn_>());
603 ASSERTM(norm2 > Math::eps<DataType>(),
"Trying to normalize a null vector!");
604 this->
template scale_n<nn_>(
DataType(1)/norm2);
607 const DataType norm2_sqr(this->
template norm_euclid_sqr_n<nn_>());
608 ASSERTM(norm2_sqr > CudaMath::cuda_get_eps<DataType>(),
"Trying to normalize a null vector!");
609 this->
template scale_n<nn_>(CudaMath::cuda_rsqrt(norm2_sqr));
621 for(
int i(0); i < n_; ++i)
637 static_assert(nn_ <= n_,
"invalid negate_n size");
638 for(
int i(0); i < nn_; ++i)
657 for(
int i(0); i < n_; ++i)
658 v[i] += alpha * x.v[i];
676 template<
int nn_,
int nx_,
int snx_>
679 static_assert(nn_ <= n_,
"invalid negate_n size");
680 static_assert(nn_ <= nx_,
"invalid negate_n size");
681 for(
int i(0); i < nn_; ++i)
682 v[i] += alpha * x.v[i];
703 template<
int sna_,
int snb_>
706 for(
int i(0); i < n_; ++i)
707 v[i] = (T_(1) - alpha) * a.v[i] + alpha * b.v[i];
731 template<
int nn_,
int na_,
int nb_,
int sna_,
int snb_>
734 static_assert(nn_ <= n_,
"invalid set_convex_n size");
735 static_assert(nn_ <= na_,
"invalid set_convex_n size");
736 static_assert(nn_ <= nb_,
"invalid set_convex_n size");
737 for(
int i(0); i < nn_; ++i)
738 v[i] = (T_(1) - alpha) * a.v[i] + alpha * b.v[i];
757 template<
int m_,
int sma_,
int sna_,
int sx_>
761 ASSERTM((
const void*)
this != (
const void*)&x,
"result vector and multiplicand vector 'x' must be different objects");
763 for(
int i(0); i < n_; ++i)
766 for(
int j(0); j < m_; ++j)
768 v[i] += a.
v[i][j] * x.
v[j];
793 template<
int mm_,
int nn_,
int ma_,
int na_,
int sna_,
int sma_,
int nx_,
int sx_>
796 static_assert(mm_ <= n_,
"invalid set_mat_vec_mult_n size");
797 static_assert(mm_ <= ma_,
"invalid set_mat_vec_mult_n size");
798 static_assert(nn_ <= nx_,
"invalid set_mat_vec_mult_n size");
799 static_assert(nn_ <= na_,
"invalid set_mat_vec_mult_n size");
802 ASSERTM((
const void*)
this != (
const void*)&x,
"result vector and multiplicand vector 'x' must be different objects");
804 for(
int i(0); i < mm_; ++i)
807 for(
int j(0); j < nn_; ++j)
809 v[i] += a.
v[i][j] * x.
v[j];
830 template<
int m_,
int sma_,
int sna_,
int sx_>
834 ASSERTM((
const void*)
this != (
const void*)&x,
"result vector and multiplicand vector 'x' must be different objects");
836 for(
int j(0); j < n_; ++j)
839 for(
int i(0); i < m_; ++i)
841 v[j] += a.
v[i][j] * x.
v[i];
866 template<
int nn_,
int mm_,
int mx_,
int smx_,
int ma_,
int na_,
int sma_,
int sna_>
869 static_assert(mm_ <= mx_,
"invalid set_mat_vec_mult_n size");
870 static_assert(mm_ <= ma_,
"invalid set_mat_vec_mult_n size");
871 static_assert(nn_ <= n_,
"invalid set_mat_vec_mult_n size");
872 static_assert(nn_ <= na_,
"invalid set_mat_vec_mult_n size");
875 ASSERTM((
const void*)
this != (
const void*)&x,
"result vector and multiplicand vector 'x' must be different objects");
877 for(
int j(0); j < nn_; ++j)
880 for(
int i(0); i < mm_; ++i)
882 v[j] += a.
v[i][j] * x.
v[i];
906 template<
int m_,
int sma_,
int sna_,
int sx_>
910 ASSERTM((
const void*)
this != (
const void*)&x,
"result vector and multiplicand vector 'x' must be different objects");
912 for(
int i(0); i < n_; ++i)
914 for(
int j(0); j < m_; ++j)
916 v[i] += alpha * a.
v[i][j] * x.
v[j];
946 template<
int mm_,
int nn_,
int ma_,
int na_,
int sna_,
int sma_,
int nx_,
int sx_>
949 static_assert(mm_ <= n_,
"invalid add_mat_vec_mult_n size");
950 static_assert(mm_ <= ma_,
"invalid add_mat_vec_mult_n size");
951 static_assert(nn_ <= nx_,
"invalid add_mat_vec_mult_n size");
952 static_assert(nn_ <= na_,
"invalid add_mat_vec_mult_n size");
955 ASSERTM((
const void*)
this != (
const void*)&x,
"result vector and multiplicand vector 'x' must be different objects");
957 for(
int i(0); i < mm_; ++i)
959 for(
int j(0); j < nn_; ++j)
961 v[i] += alpha * a.
v[i][j] * x.
v[j];
985 template<
int m_,
int sma_,
int sna_,
int sx_>
989 ASSERTM((
const void*)
this != (
const void*)&x,
"result vector and multiplicand vector 'x' must be different objects");
991 for(
int j(0); j < n_; ++j)
993 for(
int i(0); i < m_; ++i)
995 v[j] += alpha * a.
v[i][j] * x.
v[i];
1025 template<
int nn_,
int mm_,
int mx_,
int smx_,
int ma_,
int na_,
int sma_,
int sna_>
1028 static_assert(mm_ <= mx_,
"invalid add_vec_mat_mult_n size");
1029 static_assert(mm_ <= ma_,
"invalid add_vec_mat_mult_n size");
1030 static_assert(nn_ <= n_,
"invalid add_vec_mat_mult_n size");
1031 static_assert(nn_ <= na_,
"invalid add_vec_mat_mult_n size");
1034 ASSERTM((
const void*)
this != (
const void*)&x,
"result vector and multiplicand vector 'x' must be different objects");
1036 for(
int j(0); j < nn_; ++j)
1038 for(
int i(0); i < mm_; ++i)
1040 v[j] += alpha * a.
v[i][j] * x.
v[i];
1055 for(
int i(0); i < n_; ++i)
1060 r += CudaMath::cuda_sqr(
v[i]);
1075 static_assert(nn_ <= n_,
"invalid norm_euclid_sqr_n size");
1077 for(
int i(0); i < nn_; ++i)
1082 r += CudaMath::cuda_sqr(
v[i]);
1112 static_assert(nn_ <= n_,
"invalid norm_euclid_n size");
1114 return Math::sqrt(this->
template norm_euclid_sqr_n<nn_>());
1116 return CudaMath::cuda_sqrt(this->
template norm_euclid_sqr_n<nn_>());
1129 for(
int i(0); i < n_; ++i)
1134 r += CudaMath::cuda_abs(
v[i]);
1149 static_assert(nn_ <= n_,
"invalid norm_l1_n size");
1151 for(
int i(0); i < nn_; ++i)
1156 r += CudaMath::cuda_abs(
v[i]);
1171 for(
int i(0); i < n_; ++i)
1176 r = CudaMath::cuda_max(r, CudaMath::cuda_abs(
v[i]));
1191 static_assert(nn_ <= n_,
"invalid norm_l1_n size");
1193 for(
int i(0); i < nn_; ++i)
1198 r = CudaMath::cuda_max(r, CudaMath::cuda_abs(
v[i]));
1229 for (
int i(0) ; i < b.n ; ++i)
1238 CUDA_HOST
friend std::istream& operator>>(std::istream& in,
Vector& vector)
1241 in.ignore(std::numeric_limits<std::streamsize>::max(),
'[');
1249 in.ignore(std::numeric_limits<std::streamsize>::max(),
']');
1255 template<
typename T_,
int sx_,
int sa_>
1256 inline void cross(Vector<T_, 2, sx_>& x,
const Vector<T_, 2, sa_>& a)
1262 template<
typename T_,
int sx_,
int sa_,
int sb_>
1263 inline void cross(Vector<T_, 3, sx_>& x,
const Vector<T_, 3, sa_>& a,
const Vector<T_, 3, sb_>& b)
1265 x.v[0] = a.v[1]*b.v[2] - a.v[2]*b.v[1];
1266 x.v[1] = a.v[2]*b.v[0] - a.v[0]*b.v[2];
1267 x.v[2] = a.v[0]*b.v[1] - a.v[1]*b.v[0];
1271 template<
typename T_,
int n_,
int s_>
1278 template<
typename T_,
int n_,
int s_>
1285 template<
typename T_,
int n_,
int sa_,
int sb_>
1292 template<
typename T_,
int n_,
int sa_,
int sb_>
1299 template<
typename T_,
int n_,
int sa_,
int sb_>
1311 template<
typename T_>
1315 XABORTM(
"calculate_opening_angle not implemented for CUDA");
1330 template<
typename T_,
int dim_>
1333 T_ norm2(y.template norm_euclid_n<dim_>());
1334 if(norm2 < Math::eps<T_>())
1336 const auto tmp_normalized = (T_(1)/norm2) * y;
1337 return dot(x, tmp_normalized) * tmp_normalized;
1347 template<
typename T_,
int m_,
int n_,
int sm_,
int sn_>
1350 static_assert(m_ > 0,
"invalid row count");
1351 static_assert(n_ > 0,
"invalid column count");
1352 static_assert(sm_ >= m_,
"invalid row stride");
1353 static_assert(sn_ >= n_,
"invalid column stride");
1357 static constexpr int m = m_;
1359 static constexpr int n = n_;
1361 static constexpr int sm = sm_;
1363 static constexpr int sn = sn_;
1368 typedef typename Intern::DataTypeExtractor<ValueType>::MyDataType
DataType;
1383 for(
int i(0); i < m_; ++i)
1390 template<
typename T2_,
int sma_,
int sna_>
1393 for(
int i(0); i < m_; ++i)
1395 for(
int j(0); j < n_; ++j)
1397 v[i][j] = T_(a.
v[i][j]);
1413 template<
typename Tx_>
1414 CUDA_HOST_DEVICE
explicit Matrix(
const std::initializer_list<Tx_>& x)
1416 XASSERTM(std::size_t(m_) == x.size(),
"invalid initializer list size");
1418 for(
int i(0); i < m_; ++i, ++it)
1435 template<
typename Tx_>
1436 CUDA_HOST_DEVICE
explicit Matrix(
const std::initializer_list<std::initializer_list<Tx_>>& x)
1438 XASSERTM(std::size_t(m_) == x.size(),
"invalid initializer list size");
1440 for(
int i(0); i < m_; ++i, ++it)
1447 for(
int i(0); i < m_; ++i)
1455 template<
int sma_,
int sna_>
1458 for(
int i(0); i < m_; ++i)
1477 template<
typename Tx_>
1480 XASSERTM(std::size_t(m_) == x.size(),
"invalid initializer list size");
1482 for(
int i(0); i < m_; ++i, ++it)
1498 template<
typename Tx_>
1499 CUDA_HOST_DEVICE
Matrix&
operator=(
const std::initializer_list<std::initializer_list<Tx_>>& x)
1501 XASSERTM(std::size_t(m_) == x.size(),
"invalid initializer list size");
1503 for(
int i(0); i < m_; ++i, ++it)
1509 template<
typename Tx_,
int sma_,
int sna_>
1512 for(
int i(0); i < m_; ++i)
1513 v[i].convert(a.
v[i]);
1527 ASSERTM( (i >= 0) && (i < m_),
"index i out-of-bounds");
1528 ASSERTM( (j >= 0) && (j < n_),
"index j out-of-bounds");
1535 ASSERTM( (i >= 0) && (i < m_),
"index i out-of-bounds");
1536 ASSERTM( (j >= 0) && (j < n_),
"index j out-of-bounds");
1551 ASSERTM( (i >= 0) && (i <m_),
"index i out-of-bounds");
1558 ASSERTM( (i >= 0) && (i <m_),
"index i out-of-bounds");
1565 for(
int i(0); i < m_; ++i)
1573 template<
int sma_,
int sna_>
1576 for(
int i(0); i < m_; ++i)
1584 template<
int sma_,
int sna_>
1587 for(
int i(0); i < m_; ++i)
1600 template<
int sma_,
int sna_>
1603 for(
int i(0); i < m_; ++i)
1604 for(
int j(0); j < n_; ++j)
1605 v[i][j] = a.
v[i][j];
1620 template<
int mm_,
int nn_,
int ma_,
int na_,
int sma_,
int sna_>
1623 static_assert(mm_ <= m_,
"invalid copy_n size");
1624 static_assert(mm_ <= ma_,
"invalid copy_n size");
1625 static_assert(nn_ <= n_,
"invalid copy_n size");
1626 static_assert(nn_ <= na_,
"invalid copy_n size");
1627 for(
int i(0); i < mm_; ++i)
1628 for(
int j(0); j < nn_; ++j)
1629 v[i][j] = a.
v[i][j];
1640 for(
int i(0); i < m_; ++i)
1660 for(
int i(0); i < m_; ++i)
1665 r += CudaMath::cuda_sqr(v[i][i]);
1667 for(
int j(0); j < n_; ++j)
1672 r += CudaMath::cuda_sqr(v[i][j]);
1692 return CudaMath::cuda_sqrt(norm_frobenius_sqr());
1707 for(
int i(0); i < m_; ++i)
1709 for(
int j(0); j < n_; ++j)
1714 r += CudaMath::cuda_sqr(v[i][j]);
1729 for(
int i(0); i < m_; ++i)
1731 for(
int j(0); j < n_; ++j)
1736 r += CudaMath::cuda_sqr(v[i][j] -
DataType(i == j ? 1 : 0));
1743 return CudaMath::cuda_sqrt(r);
1761 int k = CudaMath::cuda_min(m_, n_);
1764 for(
int i(0); i < k; ++i)
1780 return Intern::DetHelper<m_, n_>::compute(*
this);
1796 return Intern::VolHelper<m_, n_>::compute(*
this);
1809 template<
int sma_,
int sna_>
1813 ASSERTM((
const void*)
this != (
const void*)&a,
"result matrix and input matrix 'a' must be different objects");
1814 Intern::InverseHelper<m_, n_>::compute(*
this, a);
1834 template<
typename ThreadGroup_,
int sma_,
int sna_>
1838 ASSERTM((
const void*)
this != (
const void*)&a,
"result matrix and input matrix 'a' must be different objects");
1839 Intern::CudaGroupedInverseHelper<m_, n_>::compute(tg, *
this, a, det);
1861 template<
int sma_,
int sna_>
1865 ASSERTM((
const void*)
this != (
const void*)&a,
"result matrix and input matrix 'a' must be different objects");
1866 Intern::CofactorHelper<m_, n_>::compute(*
this, a);
1878 template<
int sma_,
int sna_>
1882 ASSERTM((
const void*)
this != (
const void*)&a,
"result matrix and input matrix 'a' must be different objects");
1884 for(
int i(0); i < m_; ++i)
1886 for(
int j(0); j < n_; ++j)
1888 v[i][j] = a.
v[j][i];
1906 template<
int l_,
int sla_,
int sna_>
1909 static_assert(m_ == n_,
"Gram matrices must be square");
1913 for(
int k(0); k < l_; ++k)
1915 for(
int i(0); i < n_; ++i)
1917 for(
int j(0); j < n_; ++j)
1919 v[i][j] += a.
v[k][i] * a.
v[k][j];
1942 template<
int snx_,
int sny_>
1946 for(
int i(0); i < m_; ++i)
1948 r += x[i] *
dot(v[i], y);
1970 template<
int snx_,
int sny_>
1976 for(
int i(0); i < m_; ++i)
1978 for(
int j(0); j < n_; ++j)
1980 v[i][j] += alpha * x[i] * y[j];
2000 template<
int snx_,
int sny_>
2005 for(
int i(0); i < m_; ++i)
2007 for(
int j(0); j < n_; ++j)
2009 v[i][j] = x[i] * y[j];
2026 template<
int sma_,
int sna_>
2029 for(
int i(0); i < m_; ++i)
2031 for(
int j(0); j < n_; ++j)
2033 v[i][j] += alpha * a.
v[i][j];
2047 for(
int i(0); (i < m_) && (i < n_); ++i)
2070 template<
int la_,
int lb_,
int sma_,
int sna_,
int smb_,
int snb_>
2077 ASSERTM((
const void*)
this != (
const void*)&a,
"result matrix and multiplicand matrix 'a' must be different objects");
2078 ASSERTM((
const void*)
this != (
const void*)&b,
"result matrix and multiplicand matrix 'b' must be different objects");
2079 ASSERTM(la_ == lb_,
"second dimension of a must be equal to first dimension of b");
2081 for(
int i(0); i < m_; ++i)
2083 for(
int j(0); j < n_; ++j)
2086 for(
int k(0); k < la_; ++k)
2088 r += a.v[i][k] * b.v[k][j];
2090 v[i][j] += alpha * r;
2111 template<
int la_,
int lb_,
int sma_,
int sna_,
int smb_,
int snb_>
2115 ASSERTM((
const void*)
this != (
const void*)&a,
"result matrix and multiplicand matrix 'a' must be different objects");
2116 ASSERTM((
const void*)
this != (
const void*)&b,
"result matrix and multiplicand matrix 'b' must be different objects");
2117 ASSERTM(la_ == lb_,
"second dimension of a must be equal to first dimension of b");
2120 return add_mat_mat_mult(a, b);
2144 template<
int k_,
int l_,
int sma_,
int sna_,
int smb_,
int snb_,
int smd_,
int snd_>
2152 ASSERTM((
const void*)
this != (
const void*)&a,
"result matrix and multiplicand matrix 'a' must be different objects");
2153 ASSERTM((
const void*)
this != (
const void*)&b,
"result matrix and multiplicand matrix 'b' must be different objects");
2154 ASSERTM((
const void*)
this != (
const void*)&d,
"result matrix and multiplicand matrix 'd' must be different objects");
2156 for(
int i(0); i < m_; ++i)
2158 for(
int j(0); j < n_; ++j)
2161 for(
int p(0); p < k_; ++p)
2164 for(
int q(0); q < l_; ++q)
2166 t += a(p,q) * d(q,j);
2170 v[i][j] += alpha * r;
2197 template<
int k_,
int l_,
int sma_,
int sna_,
int smb_,
int snb_,
int smd_,
int snd_>
2205 ASSERTM((
const void*)
this != (
const void*)&a,
"result matrix and multiplicand matrix 'a' must be different objects");
2206 ASSERTM((
const void*)
this != (
const void*)&b,
"result matrix and multiplicand matrix 'b' must be different objects");
2207 ASSERTM((
const void*)
this != (
const void*)&d,
"result matrix and multiplicand matrix 'd' must be different objects");
2210 return add_double_mat_mult(a, b, d, alpha);
2233 template<
int l_,
int snv_,
int slt_,
int smt_,
int snt_>
2239 for(
int i(0); i < m_; ++i)
2241 for(
int j(0); j < n_; ++j)
2244 for(
int k(0); k < l_; ++k)
2246 r += x(k) * t(k,i,j);
2248 v[i][j] += alpha * r;
2274 template<
int l_,
int snv_,
int slt_,
int smt_,
int snt_>
2281 return add_vec_tensor_mult(x, t, alpha);
2291 for(
int i(0); i < m_; ++i)
2292 for(
int j(0); j < n_; ++j)
2293 v[i][j] = (i == j ? T_(1) : T_(0));
2307 static_assert((m_ == 2) && (n_ == 2),
"this function works only for 2x2 matrices");
2310 v[0][1] = -(v[1][0] =
Math::sin(angle));
2312 v[0][0] = (v[1][1] = CudaMath::cuda_cos(angle));
2313 v[0][1] = -(v[1][0] = CudaMath::cuda_sin(angle));
2334 static_assert((m_ == 3) && (n_ == 3),
"this function works only for 3x3 matrices");
2343 const T_ cy = CudaMath::cuda_cos(yaw);
2344 const T_ sy = CudaMath::cuda_sin(yaw);
2345 const T_ cp = CudaMath::cuda_cos(pitch);
2346 const T_ sp = CudaMath::cuda_sin(pitch);
2347 const T_ cr = CudaMath::cuda_cos(roll);
2348 const T_ sr = CudaMath::cuda_sin(roll);
2351 v[0][1] = cy*sp*sr - sy*cr;
2352 v[0][2] = cy*sp*cr + sy*sr;
2354 v[1][1] = sy*sp*sr + cy*cr;
2355 v[1][2] = sy*sp*cr - cy*sr;
2376 CUDA_HOST
friend std::ostream & operator<< (std::ostream & lhs,
const Matrix& A)
2378 for (
int i(0) ; i < m-1 ; ++i)
2380 lhs << A[i] <<
"\n";
2395 for (
int i(0) ; i < m; ++i)
2413 template<
typename T_,
int mx_,
int smx_,
int ma_,
int na_,
int sm_,
int sn_>
2416 static_assert(mx_ >= 2,
"invalid nu vector size for orthogonal_2x1");
2417 static_assert(ma_ >= 2,
"invalid matrix row size for orthogonal_2x1");
2418 static_assert(na_ >= 1,
"invalid matrix column size for orthogonal_2x1");
2434 template<
typename T_,
int mx_,
int smx_,
int ma_,
int na_,
int sm_,
int sn_>
2437 static_assert(mx_ >= 3,
"invalid nu vector size for orthogonal_3x2");
2438 static_assert(ma_ >= 3,
"invalid matrix row size for orthogonal_3x2");
2439 static_assert(na_ >= 2,
"invalid matrix column size for orthogonal_3x2");
2442 nu[0] = tau[1][0]*tau[2][1] - tau[2][0]*tau[1][1];
2443 nu[1] = tau[2][0]*tau[0][1] - tau[0][0]*tau[2][1];
2444 nu[2] = tau[0][0]*tau[1][1] - tau[1][0]*tau[0][1];
2471 template<
typename T_,
int m_,
int sm_,
int sn_>
2476 template<
typename T_,
int sm_,
int sn_>
2484 template<
typename T_,
int sm_,
int sn_>
2485 CUDA_HOST_DEVICE Vector<T_, 3>
orthogonal(
const Matrix<T_, 3, 2, sm_, sn_>& tau)
2487 Vector<T_, 3, sm_> nu(T_(0));
2494 template<
typename T_,
int m_,
int n_,
int sm_,
int sn_,
int sx_>
2501 template<
typename T_,
int m_,
int n_,
int sm_,
int sn_,
int sx_>
2508 template<
typename T_,
int m_,
int n_,
int sm_,
int sn_>
2515 template<
typename T_,
int m_,
int n_,
int sm_,
int sn_>
2516 CUDA_HOST_DEVICE
inline Matrix<T_, m_, n_, sm_, sn_> operator*(
const Matrix<T_, m_, n_, sm_, sn_>& a,
typename Matrix<T_, m_, n_>::DataType alpha)
2522 template<
typename T_,
int m_,
int n_,
int l_,
int sma_,
int sna_,
int smb_,
int snb_>
2523 CUDA_HOST_DEVICE
inline Matrix<T_, m_, n_> operator*(
const Matrix<T_, m_, l_, sma_, sna_>& a,
const Matrix<T_, l_, n_, smb_, snb_>& b)
2529 template<
typename T_,
int m_,
int n_,
int sma_,
int sna_,
int smb_,
int snb_>
2530 CUDA_HOST_DEVICE
inline Matrix<T_, m_, n_> operator+(
const Matrix<T_, m_, n_, sma_, sna_>& a,
const Matrix<T_, m_, n_, smb_, snb_>& b)
2536 template<
typename T_,
int m_,
int n_,
int sma_,
int sna_,
int smb_,
int snb_>
2537 CUDA_HOST_DEVICE
inline Matrix<T_, m_, n_> operator-(
const Matrix<T_, m_, n_, sma_, sna_>& a,
const Matrix<T_, m_, n_, smb_, snb_>& b)
2548 template<
typename T_,
int l_,
int m_,
int n_,
int sl_,
int sm_,
int sn_>
2551 static_assert(l_ > 0,
"invalid tube count");
2552 static_assert(m_ > 0,
"invalid row count");
2553 static_assert(n_ > 0,
"invalid column count");
2554 static_assert(sl_ >= l_,
"invalid tube stride");
2555 static_assert(sm_ >= m_,
"invalid row stride");
2556 static_assert(sn_ >= n_,
"invalid column stride");
2560 static constexpr int l = l_;
2562 static constexpr int m = m_;
2564 static constexpr int n = n_;
2566 static constexpr int sl = sl_;
2568 static constexpr int sm = sm_;
2570 static constexpr int sn = sn_;
2575 typedef typename Intern::DataTypeExtractor<ValueType>::MyDataType
DataType;
2590 for(
int i(0); i < l_; ++i)
2600 template<
typename Tx_>
2601 CUDA_HOST_DEVICE
explicit Tensor3(
const std::initializer_list<Tx_>& x)
2603 XASSERTM(std::size_t(l_) == x.size(),
"invalid initializer list size");
2605 for(
int i(0); i < l_; ++i, ++it)
2615 template<
typename Tx_>
2616 CUDA_HOST_DEVICE
explicit Tensor3(
const std::initializer_list<std::initializer_list<std::initializer_list<Tx_>>>& x)
2618 XASSERTM(std::size_t(l_) == x.size(),
"invalid initializer list size");
2620 for(
int i(0); i < l_; ++i, ++it)
2625 template<
int sla_,
int sma_,
int sna_>
2628 for(
int i(0); i < l_; ++i)
2635 for(
int i(0); i < l_; ++i)
2641 template<
int sla_,
int sma_,
int sna_>
2644 for(
int i(0); i < l_; ++i)
2657 template<
typename Tx_>
2660 XASSERTM(std::size_t(l_) == x.size(),
"invalid initializer list size");
2662 for(
int i(0); i < l_; ++i, ++it)
2675 template<
typename Tx_>
2676 CUDA_HOST_DEVICE
Tensor3&
operator=(
const std::initializer_list<std::initializer_list<std::initializer_list<Tx_>>>& x)
2678 XASSERTM(std::size_t(l_) == x.size(),
"invalid initializer list size");
2680 for(
int i(0); i < l_; ++i, ++it)
2686 template<
typename Tx_,
int sla_,
int sma_,
int sna_>
2689 for(
int i(0); i < l_; ++i)
2704 ASSERTM( (h >= 0) && (h < l_),
"index h out-of-bounds");
2705 ASSERTM( (i >= 0) && (i < m_),
"index i out-of-bounds");
2706 ASSERTM( (j >= 0) && (j < n_),
"index j out-of-bounds");
2713 ASSERTM( (h >= 0) && (h < l_),
"index h out-of-bounds");
2714 ASSERTM( (i >= 0) && (i < m_),
"index i out-of-bounds");
2715 ASSERTM( (j >= 0) && (j < n_),
"index j out-of-bounds");
2730 ASSERTM( (h >= 0) && (h < l_),
"index h out-of-bounds");
2737 ASSERTM( (h >= 0) && (h < l_),
"index h out-of-bounds");
2744 for(
int i(0); i < l_; ++i)
2750 template<
int sla_,
int sma_,
int sna_>
2753 for(
int i(0); i < l_; ++i)
2759 template<
int sla_,
int sma_,
int sna_>
2762 for(
int i(0); i < l_; ++i)
2773 template<
int sla_,
int sma_,
int sna_>
2776 for(
int i(0); i < l_; ++i)
2777 for(
int j(0); j < m_; ++j)
2778 for(
int k(0); k < n_; ++k)
2779 v[i][j][k] = a.
v[i][j][k];
2794 template<
int ll_,
int mm_,
int nn_,
int la_,
int ma_,
int na_,
int sla_,
int sma_,
int sna_>
2797 static_assert(ll_ <= l_,
"invalid copy_n size");
2798 static_assert(ll_ <= la_,
"invalid copy_n size");
2799 static_assert(mm_ <= m_,
"invalid copy_n size");
2800 static_assert(mm_ <= ma_,
"invalid copy_n size");
2801 static_assert(nn_ <= n_,
"invalid copy_n size");
2802 static_assert(nn_ <= na_,
"invalid copy_n size");
2803 for(
int i(0); i < ll_; ++i)
2804 for(
int j(0); j < mm_; ++j)
2805 for(
int k(0); k < nn_; ++k)
2806 v[i][j][k] = a.
v[i][j][k];
2835 template<
int k_,
int sma_,
int sna_,
int slt_,
int smt_,
int snt_>
2842 ASSERTM((
const void*)
this != (
const void*)&t,
"result tensor and multiplicand tensor 't' must be different objects");
2844 for(
int h(0); h < l_; ++h)
2846 for(
int i(0); i < m_; ++i)
2848 for(
int j(0); j < n_; ++j)
2851 for(
int p(0); p < k_; ++p)
2853 r += a(h,p) * t(p,i,j);
2887 int lt_,
int mt_,
int nt_,
2888 int slt_,
int smt_,
int snt_,
2889 int smb_,
int snb_,
int smd_,
int snd_>
2897 ASSERTM((
const void*)
this != (
const void*)&t,
"result tensor and multiplicand tensor 't' must be different objects");
2899 for(
int h(0); h < l_; ++h)
2901 for(
int i(0); i < m_; ++i)
2903 for(
int j(0); j < n_; ++j)
2906 for(
int p(0); p < mt_; ++p)
2908 for(
int q(0); q < nt_; ++q)
2910 r += t(h,p,q) * b(p,i) * d(q,j);
2940 template<
int slx_,
int sma_,
int sna_>
2946 for(
int h(0); h < l_; ++h)
2948 for(
int i(0); i < m_; ++i)
2950 for(
int j(0); j < n_; ++j)
2969 template<
typename T_,
int l_,
int m_,
int n_,
int sl_,
int sm_,
int sn_>
2971 typename Tensor3<T_, l_, m_, n_>::DataType alpha,
const Tensor3<T_, l_, m_, n_, sl_, sm_, sn_>& a)
2977 template<
typename T_,
int l_,
int m_,
int n_,
int sl_,
int sm_,
int sn_>
2979 const Tensor3<T_, l_, m_, n_, sl_, sm_, sn_>& a,
typename Tensor3<T_, l_, m_, n_, sl_, sm_, sn_>::DataType alpha)
3000 template<typename T_, typename std::enable_if<Intern::DataTypeExtractor<T_>::level == 0,
bool>::type =
true>
3001 CUDA_HOST_DEVICE
inline T_
dot(
const T_& a,
const T_& b)
3017 template<
typename T_,
int n_,
int sa_,
int sb_>
3022 for(
int i(0); i < n_; ++i)
3040 template<
typename T_,
int m_,
int n_,
int sma_,
int sna_,
int smb_,
int snb_>
3041 CUDA_HOST_DEVICE
inline typename Matrix<T_, m_, n_>::DataType dot(
const Matrix<T_, m_, n_, sma_, sna_>& a,
const Matrix<T_, m_, n_, smb_, snb_>& b)
3044 for(
int i(0); i < m_; ++i)
3059 template<
typename T_,
int l_,
int m_,
int n_,
int sla_ ,
int sma_,
int sna_,
int slb_,
int smb_,
int snb_>
3065 for(
int i(0); i < l_; ++i)
3081 template<
typename T_>
3082 CUDA_HOST_DEVICE
inline void add_id(T_& x,
const T_& alpha)
3096 template<
typename T_,
int n_,
int sn_>
3099 for(
int i(0); i < n_; ++i)
3112 template<
typename T_,
int n_,
int sm_,
int sn_>
3115 for(
int i(0); i < n_; ++i)
3128 template<
typename T_,
int n_,
int sl_,
int sm_,
int sn_>
3129 CUDA_HOST_DEVICE
inline void add_id(
Tensor3<T_, n_, n_, n_, sl_, sm_, sn_>& x,
const typename Tensor3<T_, n_, n_, n_, sl_, sm_, sn_>::DataType& alpha)
3131 for(
int i(0); i < n_; ++i)
3149 template<
typename T_>
3150 CUDA_HOST_DEVICE
inline void axpy(T_& y,
const T_& x,
const T_& alpha)
3169 template<
typename T_,
int n_,
int sn_>
3175 for(
int i(0); i < n_; ++i)
3176 axpy(y.v[i], x.v[i], alpha);
3193 template<
typename T_,
int m_,
int n_,
int sm_,
int sn_>
3199 for(
int i(0); i < m_; ++i)
3200 axpy(y.
v[i], x.
v[i], alpha);
3217 template<
typename T_,
int l_,
int m_,
int n_,
int sl_,
int sm_,
int sn_>
3223 for(
int i(0); i < l_; ++i)
3224 axpy(y.v[i], x.v[i], alpha);
3238 struct DetHelper<n_,n_>
3240 template<
typename T_,
int sma_,
int sna_>
3245 const T_ det = Intern::InverseHelper<n_, n_>::compute(b, a);
3249 return Math::isnormal(det) ? det : T_(0);
3251 return CudaMath::cuda_isnormal(det) ? det : T_(0);
3257 struct DetHelper<1,1>
3259 template<
typename T_,
int sma_,
int sna_>
3260 CUDA_HOST_DEVICE
static T_ compute(
const Tiny::Matrix<T_, 1, 1, sma_, sna_>& a)
3268 struct DetHelper<2,2>
3270 template<
typename T_,
int sma_,
int sna_>
3271 CUDA_HOST_DEVICE
static T_ compute(
const Tiny::Matrix<T_, 2, 2, sma_, sna_>& a)
3273 return a(0,0)*a(1,1) - a(0,1)*a(1,0);
3278 struct DetHelper<3,3>
3280 template<
typename T_,
int sma_,
int sna_>
3281 CUDA_HOST_DEVICE
static T_ compute(
const Tiny::Matrix<T_, 3, 3, sma_, sna_>& a)
3283 return a(0,0)*(a(1,1)*a(2,2) - a(1,2)*a(2,1))
3284 + a(0,1)*(a(1,2)*a(2,0) - a(1,0)*a(2,2))
3285 + a(0,2)*(a(1,0)*a(2,1) - a(1,1)*a(2,0));
3290 struct DetHelper<4,4>
3292 template<
typename T_,
int sma_,
int sna_>
3293 CUDA_HOST_DEVICE
static T_ compute(
const Tiny::Matrix<T_, 4, 4, sma_, sna_>& a)
3298 a(2,0)*a(3,1) - a(2,1)*a(3,0),
3299 a(2,0)*a(3,2) - a(2,2)*a(3,0),
3300 a(2,0)*a(3,3) - a(2,3)*a(3,0),
3301 a(2,1)*a(3,2) - a(2,2)*a(3,1),
3302 a(2,1)*a(3,3) - a(2,3)*a(3,1),
3303 a(2,2)*a(3,3) - a(2,3)*a(3,2)
3307 + a(0,0) * (a(1,1)*w[5] - a(1,2)*w[4] + a(1,3)*w[3])
3308 - a(0,1) * (a(1,0)*w[5] - a(1,2)*w[2] + a(1,3)*w[1])
3309 + a(0,2) * (a(1,0)*w[4] - a(1,1)*w[2] + a(1,3)*w[0])
3310 - a(0,3) * (a(1,0)*w[3] - a(1,1)*w[1] + a(1,2)*w[0]);
3315 struct DetHelper<5,5>
3317 template<
typename T_,
int sma_,
int sna_>
3318 CUDA_HOST_DEVICE
static T_ compute(
const Tiny::Matrix<T_, 5, 5, sma_, sna_>& a)
3323 a(3,0)*a(4,1) - a(3,1)*a(4,0),
3324 a(3,0)*a(4,2) - a(3,2)*a(4,0),
3325 a(3,0)*a(4,3) - a(3,3)*a(4,0),
3326 a(3,0)*a(4,4) - a(3,4)*a(4,0),
3327 a(3,1)*a(4,2) - a(3,2)*a(4,1),
3328 a(3,1)*a(4,3) - a(3,3)*a(4,1),
3329 a(3,1)*a(4,4) - a(3,4)*a(4,1),
3330 a(3,2)*a(4,3) - a(3,3)*a(4,2),
3331 a(3,2)*a(4,4) - a(3,4)*a(4,2),
3332 a(3,3)*a(4,4) - a(3,4)*a(4,3)
3337 a(2,0)*v[4] - a(2,1)*v[1] + a(2,2)*v[0],
3338 a(2,0)*v[5] - a(2,1)*v[2] + a(2,3)*v[0],
3339 a(2,0)*v[6] - a(2,1)*v[3] + a(2,4)*v[0],
3340 a(2,0)*v[7] - a(2,2)*v[2] + a(2,3)*v[1],
3341 a(2,0)*v[8] - a(2,2)*v[3] + a(2,4)*v[1],
3342 a(2,0)*v[9] - a(2,3)*v[3] + a(2,4)*v[2],
3343 a(2,1)*v[7] - a(2,2)*v[5] + a(2,3)*v[4],
3344 a(2,1)*v[8] - a(2,2)*v[6] + a(2,4)*v[4],
3345 a(2,1)*v[9] - a(2,3)*v[6] + a(2,4)*v[5],
3346 a(2,2)*v[9] - a(2,3)*v[8] + a(2,4)*v[7]
3350 + a(0,0)*(a(1,1)*w[9] - a(1,2)*w[8] + a(1,3)*w[7] - a(1,4)*w[6])
3351 - a(0,1)*(a(1,0)*w[9] - a(1,2)*w[5] + a(1,3)*w[4] - a(1,4)*w[3])
3352 + a(0,2)*(a(1,0)*w[8] - a(1,1)*w[5] + a(1,3)*w[2] - a(1,4)*w[1])
3353 - a(0,3)*(a(1,0)*w[7] - a(1,1)*w[4] + a(1,2)*w[2] - a(1,4)*w[0])
3354 + a(0,4)*(a(1,0)*w[6] - a(1,1)*w[3] + a(1,2)*w[1] - a(1,3)*w[0]);
3359 struct DetHelper<6,6>
3361 template<
typename T_,
int sma_,
int sna_>
3362 CUDA_HOST_DEVICE
static T_ compute(
const Tiny::Matrix<T_, 6, 6, sma_, sna_>& a)
3367 a(4,0)*a(5,1) - a(4,1)*a(5,0),
3368 a(4,0)*a(5,2) - a(4,2)*a(5,0),
3369 a(4,0)*a(5,3) - a(4,3)*a(5,0),
3370 a(4,0)*a(5,4) - a(4,4)*a(5,0),
3371 a(4,0)*a(5,5) - a(4,5)*a(5,0),
3372 a(4,1)*a(5,2) - a(4,2)*a(5,1),
3373 a(4,1)*a(5,3) - a(4,3)*a(5,1),
3374 a(4,1)*a(5,4) - a(4,4)*a(5,1),
3375 a(4,1)*a(5,5) - a(4,5)*a(5,1),
3376 a(4,2)*a(5,3) - a(4,3)*a(5,2),
3377 a(4,2)*a(5,4) - a(4,4)*a(5,2),
3378 a(4,2)*a(5,5) - a(4,5)*a(5,2),
3379 a(4,3)*a(5,4) - a(4,4)*a(5,3),
3380 a(4,3)*a(5,5) - a(4,5)*a(5,3),
3381 a(4,4)*a(5,5) - a(4,5)*a(5,4)
3386 a(3,0)*v[ 5] - a(3,1)*v[ 1] + a(3,2)*v[ 0],
3387 a(3,0)*v[ 6] - a(3,1)*v[ 2] + a(3,3)*v[ 0],
3388 a(3,0)*v[ 7] - a(3,1)*v[ 3] + a(3,4)*v[ 0],
3389 a(3,0)*v[ 8] - a(3,1)*v[ 4] + a(3,5)*v[ 0],
3390 a(3,0)*v[ 9] - a(3,2)*v[ 2] + a(3,3)*v[ 1],
3391 a(3,0)*v[10] - a(3,2)*v[ 3] + a(3,4)*v[ 1],
3392 a(3,0)*v[11] - a(3,2)*v[ 4] + a(3,5)*v[ 1],
3393 a(3,0)*v[12] - a(3,3)*v[ 3] + a(3,4)*v[ 2],
3394 a(3,0)*v[13] - a(3,3)*v[ 4] + a(3,5)*v[ 2],
3395 a(3,0)*v[14] - a(3,4)*v[ 4] + a(3,5)*v[ 3],
3396 a(3,1)*v[ 9] - a(3,2)*v[ 6] + a(3,3)*v[ 5],
3397 a(3,1)*v[10] - a(3,2)*v[ 7] + a(3,4)*v[ 5],
3398 a(3,1)*v[11] - a(3,2)*v[ 8] + a(3,5)*v[ 5],
3399 a(3,1)*v[12] - a(3,3)*v[ 7] + a(3,4)*v[ 6],
3400 a(3,1)*v[13] - a(3,3)*v[ 8] + a(3,5)*v[ 6],
3401 a(3,1)*v[14] - a(3,4)*v[ 8] + a(3,5)*v[ 7],
3402 a(3,2)*v[12] - a(3,3)*v[10] + a(3,4)*v[ 9],
3403 a(3,2)*v[13] - a(3,3)*v[11] + a(3,5)*v[ 9],
3404 a(3,2)*v[14] - a(3,4)*v[11] + a(3,5)*v[10],
3405 a(3,3)*v[14] - a(3,4)*v[13] + a(3,5)*v[12]
3408 v[ 0] = a(2,0)*w[10] - a(2,1)*w[ 4] + a(2,2)*w[ 1] - a(2,3)*w[ 0];
3409 v[ 1] = a(2,0)*w[11] - a(2,1)*w[ 5] + a(2,2)*w[ 2] - a(2,4)*w[ 0];
3410 v[ 2] = a(2,0)*w[12] - a(2,1)*w[ 6] + a(2,2)*w[ 3] - a(2,5)*w[ 0];
3411 v[ 3] = a(2,0)*w[13] - a(2,1)*w[ 7] + a(2,3)*w[ 2] - a(2,4)*w[ 1];
3412 v[ 4] = a(2,0)*w[14] - a(2,1)*w[ 8] + a(2,3)*w[ 3] - a(2,5)*w[ 1];
3413 v[ 5] = a(2,0)*w[15] - a(2,1)*w[ 9] + a(2,4)*w[ 3] - a(2,5)*w[ 2];
3414 v[ 6] = a(2,0)*w[16] - a(2,2)*w[ 7] + a(2,3)*w[ 5] - a(2,4)*w[ 4];
3415 v[ 7] = a(2,0)*w[17] - a(2,2)*w[ 8] + a(2,3)*w[ 6] - a(2,5)*w[ 4];
3416 v[ 8] = a(2,0)*w[18] - a(2,2)*w[ 9] + a(2,4)*w[ 6] - a(2,5)*w[ 5];
3417 v[ 9] = a(2,0)*w[19] - a(2,3)*w[ 9] + a(2,4)*w[ 8] - a(2,5)*w[ 7];
3418 v[10] = a(2,1)*w[16] - a(2,2)*w[13] + a(2,3)*w[11] - a(2,4)*w[10];
3419 v[11] = a(2,1)*w[17] - a(2,2)*w[14] + a(2,3)*w[12] - a(2,5)*w[10];
3420 v[12] = a(2,1)*w[18] - a(2,2)*w[15] + a(2,4)*w[12] - a(2,5)*w[11];
3421 v[13] = a(2,1)*w[19] - a(2,3)*w[15] + a(2,4)*w[14] - a(2,5)*w[13];
3422 v[14] = a(2,2)*w[19] - a(2,3)*w[18] + a(2,4)*w[17] - a(2,5)*w[16];
3425 + a(0,0)*(a(1,1)*v[14] - a(1,2)*v[13] + a(1,3)*v[12] - a(1,4)*v[11] + a(1,5)*v[10])
3426 - a(0,1)*(a(1,0)*v[14] - a(1,2)*v[ 9] + a(1,3)*v[ 8] - a(1,4)*v[ 7] + a(1,5)*v[ 6])
3427 + a(0,2)*(a(1,0)*v[13] - a(1,1)*v[ 9] + a(1,3)*v[ 5] - a(1,4)*v[ 4] + a(1,5)*v[ 3])
3428 - a(0,3)*(a(1,0)*v[12] - a(1,1)*v[ 8] + a(1,2)*v[ 5] - a(1,4)*v[ 2] + a(1,5)*v[ 1])
3429 + a(0,4)*(a(1,0)*v[11] - a(1,1)*v[ 7] + a(1,2)*v[ 4] - a(1,3)*v[ 2] + a(1,5)*v[ 0])
3430 - a(0,5)*(a(1,0)*v[10] - a(1,1)*v[ 6] + a(1,2)*v[ 3] - a(1,3)*v[ 1] + a(1,4)*v[ 0]);
3440 template<
int m_,
int n_>
3443 template<
typename T_,
int sma_,
int sna_>
3444 CUDA_HOST_DEVICE
static T_ compute(
const Tiny::Matrix<T_, n_, n_, sma_, sna_>& a)
3447 Tiny::Matrix<T_, n_, n_> b;
3448 for(
int i(0); i < n_; ++i)
3450 for(
int j(0); j < n_; ++j)
3453 for(
int k(0); k < m_; ++k)
3455 b(i,j) += a(k,i)*a(k,j);
3460 return Math::sqrt(DetHelper<n_,n_>::compute(b));
3462 return CudaMath::cuda_sqrt(DetHelper<n_,n_>::compute(b));
3468 struct VolHelper<n_,n_>
3470 template<
typename T_,
int sma_,
int sna_>
3471 CUDA_HOST_DEVICE
static T_ compute(
const Tiny::Matrix<T_, n_, n_, sma_, sna_>& a)
3475 return Math::abs(DetHelper<n_,n_>::compute(a));
3477 return CudaMath::cuda_abs(DetHelper<n_,n_>::compute(a));
3483 struct VolHelper<2,1>
3485 template<
typename T_,
int sma_,
int sna_>
3486 CUDA_HOST_DEVICE
static T_ compute(
const Tiny::Matrix<T_, 2, 1, sma_, sna_>& a)
3490 return Math::sqrt(Math::sqr(a(0,0)) + Math::sqr(a(1,0)));
3492 return CudaMath::cuda_sqrt(CudaMath::cuda_sqr(a(0,0)) + CudaMath::cuda_sqr(a(1,0)));
3498 struct VolHelper<3,1>
3500 template<
typename T_,
int sma_,
int sna_>
3501 CUDA_HOST_DEVICE
static T_ compute(
const Tiny::Matrix<T_, 3, 1, sma_, sna_>& a)
3505 return Math::sqrt(Math::sqr(a(0,0)) + Math::sqr(a(1,0)) + Math::sqr(a(2,0)));
3507 return CudaMath::cuda_sqrt(CudaMath::cuda_sqr(a(0,0)) + CudaMath::cuda_sqr(a(1,0)) + CudaMath::cuda_sqr(a(2,0)));
3513 struct VolHelper<3,2>
3515 template<
typename T_,
int sma_,
int sna_>
3516 CUDA_HOST_DEVICE
static T_ compute(
const Tiny::Matrix<T_, 3, 2, sma_, sna_>& a)
3521 Math::sqr(a(1,0)*a(2,1) - a(2,0)*a(1,1)) +
3522 Math::sqr(a(2,0)*a(0,1) - a(0,0)*a(2,1)) +
3523 Math::sqr(a(0,0)*a(1,1) - a(1,0)*a(0,1)));
3525 return CudaMath::cuda_sqrt(
3526 CudaMath::cuda_sqr(a(1,0)*a(2,1) - a(2,0)*a(1,1)) +
3527 CudaMath::cuda_sqr(a(2,0)*a(0,1) - a(0,0)*a(2,1)) +
3528 CudaMath::cuda_sqr(a(0,0)*a(1,1) - a(1,0)*a(0,1)));
3540 struct InverseHelper<1,1>
3542 template<
typename T_,
int smb_,
int snb_,
int sma_,
int sna_>
3543 CUDA_HOST_DEVICE
static T_ compute(Tiny::Matrix<T_, 1, 1, smb_, snb_>& b,
const Tiny::Matrix<T_, 1, 1, sma_, sna_>& a)
3545 b(0,0) = T_(1) / a(0,0);
3551 struct InverseHelper<2,2>
3553 template<
typename T_,
int smb_,
int snb_,
int sma_,
int sna_>
3554 CUDA_HOST_DEVICE
static T_ compute(Tiny::Matrix<T_, 2, 2, smb_, snb_>& b,
const Tiny::Matrix<T_, 2, 2, sma_, sna_>& a)
3556 T_ det = a(0,0)*a(1,1) - a(0,1)*a(1,0);
3567 struct InverseHelper<3,3>
3569 template<
typename T_,
int smb_,
int snb_,
int sma_,
int sna_>
3570 CUDA_HOST_DEVICE
static T_ compute(Tiny::Matrix<T_, 3, 3, smb_, snb_>& b,
const Tiny::Matrix<T_, 3, 3, sma_, sna_>& a)
3572 b(0,0) = a(1,1)*a(2,2) - a(1,2)*a(2,1);
3573 b(1,0) = a(1,2)*a(2,0) - a(1,0)*a(2,2);
3574 b(2,0) = a(1,0)*a(2,1) - a(1,1)*a(2,0);
3575 T_ det = a(0,0)*b(0,0) + a(0,1)*b(1,0) + a(0,2)*b(2,0);
3580 b(0,1) = d*(a(0,2)*a(2,1) - a(0,1)*a(2,2));
3581 b(1,1) = d*(a(0,0)*a(2,2) - a(0,2)*a(2,0));
3582 b(2,1) = d*(a(0,1)*a(2,0) - a(0,0)*a(2,1));
3583 b(0,2) = d*(a(0,1)*a(1,2) - a(0,2)*a(1,1));
3584 b(1,2) = d*(a(0,2)*a(1,0) - a(0,0)*a(1,2));
3585 b(2,2) = d*(a(0,0)*a(1,1) - a(0,1)*a(1,0));
3591 struct InverseHelper<4,4>
3593 template<
typename T_,
int smb_,
int snb_,
int sma_,
int sna_>
3594 CUDA_HOST_DEVICE
static T_ compute(Tiny::Matrix<T_, 4, 4, smb_, snb_>& b,
const Tiny::Matrix<T_, 4, 4, sma_, sna_>& a)
3597 w[0] = a(2,0)*a(3,1)-a(2,1)*a(3,0);
3598 w[1] = a(2,0)*a(3,2)-a(2,2)*a(3,0);
3599 w[2] = a(2,0)*a(3,3)-a(2,3)*a(3,0);
3600 w[3] = a(2,1)*a(3,2)-a(2,2)*a(3,1);
3601 w[4] = a(2,1)*a(3,3)-a(2,3)*a(3,1);
3602 w[5] = a(2,2)*a(3,3)-a(2,3)*a(3,2);
3603 b(0,0) = a(1,1)*w[5]-a(1,2)*w[4]+a(1,3)*w[3];
3604 b(1,0) =-a(1,0)*w[5]+a(1,2)*w[2]-a(1,3)*w[1];
3605 b(2,0) = a(1,0)*w[4]-a(1,1)*w[2]+a(1,3)*w[0];
3606 b(3,0) =-a(1,0)*w[3]+a(1,1)*w[1]-a(1,2)*w[0];
3607 T_ det = a(0,0)*b(0,0)+a(0,1)*b(1,0)+a(0,2)*b(2,0)+a(0,3)*b(3,0);
3613 b(0,1) = d*(-a(0,1)*w[5]+a(0,2)*w[4]-a(0,3)*w[3]);
3614 b(1,1) = d*( a(0,0)*w[5]-a(0,2)*w[2]+a(0,3)*w[1]);
3615 b(2,1) = d*(-a(0,0)*w[4]+a(0,1)*w[2]-a(0,3)*w[0]);
3616 b(3,1) = d*( a(0,0)*w[3]-a(0,1)*w[1]+a(0,2)*w[0]);
3617 w[0] = a(0,0)*a(1,1)-a(0,1)*a(1,0);
3618 w[1] = a(0,0)*a(1,2)-a(0,2)*a(1,0);
3619 w[2] = a(0,0)*a(1,3)-a(0,3)*a(1,0);
3620 w[3] = a(0,1)*a(1,2)-a(0,2)*a(1,1);
3621 w[4] = a(0,1)*a(1,3)-a(0,3)*a(1,1);
3622 w[5] = a(0,2)*a(1,3)-a(0,3)*a(1,2);
3623 b(0,2) = d*( a(3,1)*w[5]-a(3,2)*w[4]+a(3,3)*w[3]);
3624 b(1,2) = d*(-a(3,0)*w[5]+a(3,2)*w[2]-a(3,3)*w[1]);
3625 b(2,2) = d*( a(3,0)*w[4]-a(3,1)*w[2]+a(3,3)*w[0]);
3626 b(3,2) = d*(-a(3,0)*w[3]+a(3,1)*w[1]-a(3,2)*w[0]);
3627 b(0,3) = d*(-a(2,1)*w[5]+a(2,2)*w[4]-a(2,3)*w[3]);
3628 b(1,3) = d*( a(2,0)*w[5]-a(2,2)*w[2]+a(2,3)*w[1]);
3629 b(2,3) = d*(-a(2,0)*w[4]+a(2,1)*w[2]-a(2,3)*w[0]);
3630 b(3,3) = d*( a(2,0)*w[3]-a(2,1)*w[1]+a(2,2)*w[0]);
3636 struct InverseHelper<5,5>
3638 template<
typename T_,
int smb_,
int snb_,
int sma_,
int sna_>
3639 CUDA_HOST_DEVICE
static T_ compute(Tiny::Matrix<T_, 5, 5, smb_, snb_>& b,
const Tiny::Matrix<T_, 5, 5, sma_, sna_>& a)
3642 w[ 0] = a(3,0)*a(4,1)-a(3,1)*a(4,0);
3643 w[ 1] = a(3,0)*a(4,2)-a(3,2)*a(4,0);
3644 w[ 2] = a(3,0)*a(4,3)-a(3,3)*a(4,0);
3645 w[ 3] = a(3,0)*a(4,4)-a(3,4)*a(4,0);
3646 w[ 4] = a(3,1)*a(4,2)-a(3,2)*a(4,1);
3647 w[ 5] = a(3,1)*a(4,3)-a(3,3)*a(4,1);
3648 w[ 6] = a(3,1)*a(4,4)-a(3,4)*a(4,1);
3649 w[ 7] = a(3,2)*a(4,3)-a(3,3)*a(4,2);
3650 w[ 8] = a(3,2)*a(4,4)-a(3,4)*a(4,2);
3651 w[ 9] = a(3,3)*a(4,4)-a(3,4)*a(4,3);
3652 w[10] = a(2,0)*w[4]-a(2,1)*w[1]+a(2,2)*w[0];
3653 w[11] = a(2,0)*w[5]-a(2,1)*w[2]+a(2,3)*w[0];
3654 w[12] = a(2,0)*w[6]-a(2,1)*w[3]+a(2,4)*w[0];
3655 w[13] = a(2,0)*w[7]-a(2,2)*w[2]+a(2,3)*w[1];
3656 w[14] = a(2,0)*w[8]-a(2,2)*w[3]+a(2,4)*w[1];
3657 w[15] = a(2,0)*w[9]-a(2,3)*w[3]+a(2,4)*w[2];
3658 w[16] = a(2,1)*w[7]-a(2,2)*w[5]+a(2,3)*w[4];
3659 w[17] = a(2,1)*w[8]-a(2,2)*w[6]+a(2,4)*w[4];
3660 w[18] = a(2,1)*w[9]-a(2,3)*w[6]+a(2,4)*w[5];
3661 w[19] = a(2,2)*w[9]-a(2,3)*w[8]+a(2,4)*w[7];
3662 b(0,0) = a(1,1)*w[19]-a(1,2)*w[18]+a(1,3)*w[17]-a(1,4)*w[16];
3663 b(1,0) =-a(1,0)*w[19]+a(1,2)*w[15]-a(1,3)*w[14]+a(1,4)*w[13];
3664 b(2,0) = a(1,0)*w[18]-a(1,1)*w[15]+a(1,3)*w[12]-a(1,4)*w[11];
3665 b(3,0) =-a(1,0)*w[17]+a(1,1)*w[14]-a(1,2)*w[12]+a(1,4)*w[10];
3666 b(4,0) = a(1,0)*w[16]-a(1,1)*w[13]+a(1,2)*w[11]-a(1,3)*w[10];
3667 T_ det = a(0,0)*b(0,0)+a(0,1)*b(1,0)+a(0,2)*b(2,0)+a(0,3)*b(3,0)+a(0,4)*b(4,0);
3674 b(0,1) = d*(-a(0,1)*w[19]+a(0,2)*w[18]-a(0,3)*w[17]+a(0,4)*w[16]);
3675 b(1,1) = d*( a(0,0)*w[19]-a(0,2)*w[15]+a(0,3)*w[14]-a(0,4)*w[13]);
3676 b(2,1) = d*(-a(0,0)*w[18]+a(0,1)*w[15]-a(0,3)*w[12]+a(0,4)*w[11]);
3677 b(3,1) = d*( a(0,0)*w[17]-a(0,1)*w[14]+a(0,2)*w[12]-a(0,4)*w[10]);
3678 b(4,1) = d*(-a(0,0)*w[16]+a(0,1)*w[13]-a(0,2)*w[11]+a(0,3)*w[10]);
3679 w[10] = a(1,0)*w[4]-a(1,1)*w[1]+a(1,2)*w[0];
3680 w[11] = a(1,0)*w[5]-a(1,1)*w[2]+a(1,3)*w[0];
3681 w[12] = a(1,0)*w[6]-a(1,1)*w[3]+a(1,4)*w[0];
3682 w[13] = a(1,0)*w[7]-a(1,2)*w[2]+a(1,3)*w[1];
3683 w[14] = a(1,0)*w[8]-a(1,2)*w[3]+a(1,4)*w[1];
3684 w[15] = a(1,0)*w[9]-a(1,3)*w[3]+a(1,4)*w[2];
3685 w[16] = a(1,1)*w[7]-a(1,2)*w[5]+a(1,3)*w[4];
3686 w[17] = a(1,1)*w[8]-a(1,2)*w[6]+a(1,4)*w[4];
3687 w[18] = a(1,1)*w[9]-a(1,3)*w[6]+a(1,4)*w[5];
3688 w[19] = a(1,2)*w[9]-a(1,3)*w[8]+a(1,4)*w[7];
3689 b(0,2) = d*( a(0,1)*w[19]-a(0,2)*w[18]+a(0,3)*w[17]-a(0,4)*w[16]);
3690 b(1,2) = d*(-a(0,0)*w[19]+a(0,2)*w[15]-a(0,3)*w[14]+a(0,4)*w[13]);
3691 b(2,2) = d*( a(0,0)*w[18]-a(0,1)*w[15]+a(0,3)*w[12]-a(0,4)*w[11]);
3692 b(3,2) = d*(-a(0,0)*w[17]+a(0,1)*w[14]-a(0,2)*w[12]+a(0,4)*w[10]);
3693 b(4,2) = d*( a(0,0)*w[16]-a(0,1)*w[13]+a(0,2)*w[11]-a(0,3)*w[10]);
3694 w[ 0] = a(0,0)*a(1,1)-a(0,1)*a(1,0);
3695 w[ 1] = a(0,0)*a(1,2)-a(0,2)*a(1,0);
3696 w[ 2] = a(0,0)*a(1,3)-a(0,3)*a(1,0);
3697 w[ 3] = a(0,0)*a(1,4)-a(0,4)*a(1,0);
3698 w[ 4] = a(0,1)*a(1,2)-a(0,2)*a(1,1);
3699 w[ 5] = a(0,1)*a(1,3)-a(0,3)*a(1,1);
3700 w[ 6] = a(0,1)*a(1,4)-a(0,4)*a(1,1);
3701 w[ 7] = a(0,2)*a(1,3)-a(0,3)*a(1,2);
3702 w[ 8] = a(0,2)*a(1,4)-a(0,4)*a(1,2);
3703 w[ 9] = a(0,3)*a(1,4)-a(0,4)*a(1,3);
3704 w[10] = a(2,0)*w[4]-a(2,1)*w[1]+a(2,2)*w[0];
3705 w[11] = a(2,0)*w[5]-a(2,1)*w[2]+a(2,3)*w[0];
3706 w[12] = a(2,0)*w[6]-a(2,1)*w[3]+a(2,4)*w[0];
3707 w[13] = a(2,0)*w[7]-a(2,2)*w[2]+a(2,3)*w[1];
3708 w[14] = a(2,0)*w[8]-a(2,2)*w[3]+a(2,4)*w[1];
3709 w[15] = a(2,0)*w[9]-a(2,3)*w[3]+a(2,4)*w[2];
3710 w[16] = a(2,1)*w[7]-a(2,2)*w[5]+a(2,3)*w[4];
3711 w[17] = a(2,1)*w[8]-a(2,2)*w[6]+a(2,4)*w[4];
3712 w[18] = a(2,1)*w[9]-a(2,3)*w[6]+a(2,4)*w[5];
3713 w[19] = a(2,2)*w[9]-a(2,3)*w[8]+a(2,4)*w[7];
3714 b(0,3) = d*( a(4,1)*w[19]-a(4,2)*w[18]+a(4,3)*w[17]-a(4,4)*w[16]);
3715 b(1,3) = d*(-a(4,0)*w[19]+a(4,2)*w[15]-a(4,3)*w[14]+a(4,4)*w[13]);
3716 b(2,3) = d*( a(4,0)*w[18]-a(4,1)*w[15]+a(4,3)*w[12]-a(4,4)*w[11]);
3717 b(3,3) = d*(-a(4,0)*w[17]+a(4,1)*w[14]-a(4,2)*w[12]+a(4,4)*w[10]);
3718 b(4,3) = d*( a(4,0)*w[16]-a(4,1)*w[13]+a(4,2)*w[11]-a(4,3)*w[10]);
3719 b(0,4) = d*(-a(3,1)*w[19]+a(3,2)*w[18]-a(3,3)*w[17]+a(3,4)*w[16]);
3720 b(1,4) = d*( a(3,0)*w[19]-a(3,2)*w[15]+a(3,3)*w[14]-a(3,4)*w[13]);
3721 b(2,4) = d*(-a(3,0)*w[18]+a(3,1)*w[15]-a(3,3)*w[12]+a(3,4)*w[11]);
3722 b(3,4) = d*( a(3,0)*w[17]-a(3,1)*w[14]+a(3,2)*w[12]-a(3,4)*w[10]);
3723 b(4,4) = d*(-a(3,0)*w[16]+a(3,1)*w[13]-a(3,2)*w[11]+a(3,3)*w[10]);
3729 struct InverseHelper<6,6>
3731 template<
typename T_,
int smb_,
int snb_,
int sma_,
int sna_>
3732 CUDA_HOST_DEVICE
static T_ compute(Tiny::Matrix<T_, 6, 6, smb_, snb_>& b,
const Tiny::Matrix<T_, 6, 6, sma_, sna_>& a)
3735 w[ 0] = a(4,0)*a(5,1)-a(4,1)*a(5,0);
3736 w[ 1] = a(4,0)*a(5,2)-a(4,2)*a(5,0);
3737 w[ 2] = a(4,0)*a(5,3)-a(4,3)*a(5,0);
3738 w[ 3] = a(4,0)*a(5,4)-a(4,4)*a(5,0);
3739 w[ 4] = a(4,0)*a(5,5)-a(4,5)*a(5,0);
3740 w[ 5] = a(4,1)*a(5,2)-a(4,2)*a(5,1);
3741 w[ 6] = a(4,1)*a(5,3)-a(4,3)*a(5,1);
3742 w[ 7] = a(4,1)*a(5,4)-a(4,4)*a(5,1);
3743 w[ 8] = a(4,1)*a(5,5)-a(4,5)*a(5,1);
3744 w[ 9] = a(4,2)*a(5,3)-a(4,3)*a(5,2);
3745 w[10] = a(4,2)*a(5,4)-a(4,4)*a(5,2);
3746 w[11] = a(4,2)*a(5,5)-a(4,5)*a(5,2);
3747 w[12] = a(4,3)*a(5,4)-a(4,4)*a(5,3);
3748 w[13] = a(4,3)*a(5,5)-a(4,5)*a(5,3);
3749 w[14] = a(4,4)*a(5,5)-a(4,5)*a(5,4);
3750 w[15] = a(3,0)*w[5]-a(3,1)*w[1]+a(3,2)*w[0];
3751 w[16] = a(3,0)*w[6]-a(3,1)*w[2]+a(3,3)*w[0];
3752 w[17] = a(3,0)*w[7]-a(3,1)*w[3]+a(3,4)*w[0];
3753 w[18] = a(3,0)*w[8]-a(3,1)*w[4]+a(3,5)*w[0];
3754 w[19] = a(3,0)*w[9]-a(3,2)*w[2]+a(3,3)*w[1];
3755 w[20] = a(3,0)*w[10]-a(3,2)*w[3]+a(3,4)*w[1];
3756 w[21] = a(3,0)*w[11]-a(3,2)*w[4]+a(3,5)*w[1];
3757 w[22] = a(3,0)*w[12]-a(3,3)*w[3]+a(3,4)*w[2];
3758 w[23] = a(3,0)*w[13]-a(3,3)*w[4]+a(3,5)*w[2];
3759 w[24] = a(3,0)*w[14]-a(3,4)*w[4]+a(3,5)*w[3];
3760 w[25] = a(3,1)*w[9]-a(3,2)*w[6]+a(3,3)*w[5];
3761 w[26] = a(3,1)*w[10]-a(3,2)*w[7]+a(3,4)*w[5];
3762 w[27] = a(3,1)*w[11]-a(3,2)*w[8]+a(3,5)*w[5];
3763 w[28] = a(3,1)*w[12]-a(3,3)*w[7]+a(3,4)*w[6];
3764 w[29] = a(3,1)*w[13]-a(3,3)*w[8]+a(3,5)*w[6];
3765 w[30] = a(3,1)*w[14]-a(3,4)*w[8]+a(3,5)*w[7];
3766 w[31] = a(3,2)*w[12]-a(3,3)*w[10]+a(3,4)*w[9];
3767 w[32] = a(3,2)*w[13]-a(3,3)*w[11]+a(3,5)*w[9];
3768 w[33] = a(3,2)*w[14]-a(3,4)*w[11]+a(3,5)*w[10];
3769 w[34] = a(3,3)*w[14]-a(3,4)*w[13]+a(3,5)*w[12];
3770 w[ 0] = a(2,0)*w[25]-a(2,1)*w[19]+a(2,2)*w[16]-a(2,3)*w[15];
3771 w[ 1] = a(2,0)*w[26]-a(2,1)*w[20]+a(2,2)*w[17]-a(2,4)*w[15];
3772 w[ 2] = a(2,0)*w[27]-a(2,1)*w[21]+a(2,2)*w[18]-a(2,5)*w[15];
3773 w[ 3] = a(2,0)*w[28]-a(2,1)*w[22]+a(2,3)*w[17]-a(2,4)*w[16];
3774 w[ 4] = a(2,0)*w[29]-a(2,1)*w[23]+a(2,3)*w[18]-a(2,5)*w[16];
3775 w[ 5] = a(2,0)*w[30]-a(2,1)*w[24]+a(2,4)*w[18]-a(2,5)*w[17];
3776 w[ 6] = a(2,0)*w[31]-a(2,2)*w[22]+a(2,3)*w[20]-a(2,4)*w[19];
3777 w[ 7] = a(2,0)*w[32]-a(2,2)*w[23]+a(2,3)*w[21]-a(2,5)*w[19];
3778 w[ 8] = a(2,0)*w[33]-a(2,2)*w[24]+a(2,4)*w[21]-a(2,5)*w[20];
3779 w[ 9] = a(2,0)*w[34]-a(2,3)*w[24]+a(2,4)*w[23]-a(2,5)*w[22];
3780 w[10] = a(2,1)*w[31]-a(2,2)*w[28]+a(2,3)*w[26]-a(2,4)*w[25];
3781 w[11] = a(2,1)*w[32]-a(2,2)*w[29]+a(2,3)*w[27]-a(2,5)*w[25];
3782 w[12] = a(2,1)*w[33]-a(2,2)*w[30]+a(2,4)*w[27]-a(2,5)*w[26];
3783 w[13] = a(2,1)*w[34]-a(2,3)*w[30]+a(2,4)*w[29]-a(2,5)*w[28];
3784 w[14] = a(2,2)*w[34]-a(2,3)*w[33]+a(2,4)*w[32]-a(2,5)*w[31];
3785 b(0,0) = a(1,1)*w[14]-a(1,2)*w[13]+a(1,3)*w[12]-a(1,4)*w[11]+a(1,5)*w[10];
3786 b(1,0) = -a(1,0)*w[14]+a(1,2)*w[9]-a(1,3)*w[8]+a(1,4)*w[7]-a(1,5)*w[6];
3787 b(2,0) = a(1,0)*w[13]-a(1,1)*w[9]+a(1,3)*w[5]-a(1,4)*w[4]+a(1,5)*w[3];
3788 b(3,0) = -a(1,0)*w[12]+a(1,1)*w[8]-a(1,2)*w[5]+a(1,4)*w[2]-a(1,5)*w[1];
3789 b(4,0) = a(1,0)*w[11]-a(1,1)*w[7]+a(1,2)*w[4]-a(1,3)*w[2]+a(1,5)*w[0];
3790 b(5,0) = -a(1,0)*w[10]+a(1,1)*w[6]-a(1,2)*w[3]+a(1,3)*w[1]-a(1,4)*w[0];
3791 T_ det = a(0,0)*b(0,0) + a(0,1)*b(1,0) + a(0,2)*b(2,0)
3792 + a(0,3)*b(3,0) + a(0,4)*b(4,0) + a(0,5)*b(5,0);
3800 b(0,1) = d*(-a(0,1)*w[14]+a(0,2)*w[13]-a(0,3)*w[12]+a(0,4)*w[11]-a(0,5)*w[10]);
3801 b(1,1) = d*( a(0,0)*w[14]-a(0,2)*w[9]+a(0,3)*w[8]-a(0,4)*w[7]+a(0,5)*w[6]);
3802 b(2,1) = d*(-a(0,0)*w[13]+a(0,1)*w[9]-a(0,3)*w[5]+a(0,4)*w[4]-a(0,5)*w[3]);
3803 b(3,1) = d*( a(0,0)*w[12]-a(0,1)*w[8]+a(0,2)*w[5]-a(0,4)*w[2]+a(0,5)*w[1]);
3804 b(4,1) = d*(-a(0,0)*w[11]+a(0,1)*w[7]-a(0,2)*w[4]+a(0,3)*w[2]-a(0,5)*w[0]);
3805 b(5,1) = d*( a(0,0)*w[10]-a(0,1)*w[6]+a(0,2)*w[3]-a(0,3)*w[1]+a(0,4)*w[0]);
3806 w[ 0] = a(4,0)*a(5,1)-a(4,1)*a(5,0);
3807 w[ 1] = a(4,0)*a(5,2)-a(4,2)*a(5,0);
3808 w[ 2] = a(4,0)*a(5,3)-a(4,3)*a(5,0);
3809 w[ 3] = a(4,0)*a(5,4)-a(4,4)*a(5,0);
3810 w[ 4] = a(4,0)*a(5,5)-a(4,5)*a(5,0);
3811 w[ 5] = a(4,1)*a(5,2)-a(4,2)*a(5,1);
3812 w[ 6] = a(4,1)*a(5,3)-a(4,3)*a(5,1);
3813 w[ 7] = a(4,1)*a(5,4)-a(4,4)*a(5,1);
3814 w[ 8] = a(4,1)*a(5,5)-a(4,5)*a(5,1);
3815 w[ 9] = a(4,2)*a(5,3)-a(4,3)*a(5,2);
3816 w[10] = a(4,2)*a(5,4)-a(4,4)*a(5,2);
3817 w[11] = a(4,2)*a(5,5)-a(4,5)*a(5,2);
3818 w[12] = a(4,3)*a(5,4)-a(4,4)*a(5,3);
3819 w[13] = a(4,3)*a(5,5)-a(4,5)*a(5,3);
3820 w[14] = a(4,4)*a(5,5)-a(4,5)*a(5,4);
3821 w[15] = a(1,0)*w[5]-a(1,1)*w[1]+a(1,2)*w[0];
3822 w[16] = a(1,0)*w[6]-a(1,1)*w[2]+a(1,3)*w[0];
3823 w[17] = a(1,0)*w[7]-a(1,1)*w[3]+a(1,4)*w[0];
3824 w[18] = a(1,0)*w[8]-a(1,1)*w[4]+a(1,5)*w[0];
3825 w[19] = a(1,0)*w[9]-a(1,2)*w[2]+a(1,3)*w[1];
3826 w[20] = a(1,0)*w[10]-a(1,2)*w[3]+a(1,4)*w[1];
3827 w[21] = a(1,0)*w[11]-a(1,2)*w[4]+a(1,5)*w[1];
3828 w[22] = a(1,0)*w[12]-a(1,3)*w[3]+a(1,4)*w[2];
3829 w[23] = a(1,0)*w[13]-a(1,3)*w[4]+a(1,5)*w[2];
3830 w[24] = a(1,0)*w[14]-a(1,4)*w[4]+a(1,5)*w[3];
3831 w[25] = a(1,1)*w[9]-a(1,2)*w[6]+a(1,3)*w[5];
3832 w[26] = a(1,1)*w[10]-a(1,2)*w[7]+a(1,4)*w[5];
3833 w[27] = a(1,1)*w[11]-a(1,2)*w[8]+a(1,5)*w[5];
3834 w[28] = a(1,1)*w[12]-a(1,3)*w[7]+a(1,4)*w[6];
3835 w[29] = a(1,1)*w[13]-a(1,3)*w[8]+a(1,5)*w[6];
3836 w[30] = a(1,1)*w[14]-a(1,4)*w[8]+a(1,5)*w[7];
3837 w[31] = a(1,2)*w[12]-a(1,3)*w[10]+a(1,4)*w[9];
3838 w[32] = a(1,2)*w[13]-a(1,3)*w[11]+a(1,5)*w[9];
3839 w[33] = a(1,2)*w[14]-a(1,4)*w[11]+a(1,5)*w[10];
3840 w[34] = a(1,3)*w[14]-a(1,4)*w[13]+a(1,5)*w[12];
3841 w[ 0] = a(0,0)*w[25]-a(0,1)*w[19]+a(0,2)*w[16]-a(0,3)*w[15];
3842 w[ 1] = a(0,0)*w[26]-a(0,1)*w[20]+a(0,2)*w[17]-a(0,4)*w[15];
3843 w[ 2] = a(0,0)*w[27]-a(0,1)*w[21]+a(0,2)*w[18]-a(0,5)*w[15];
3844 w[ 3] = a(0,0)*w[28]-a(0,1)*w[22]+a(0,3)*w[17]-a(0,4)*w[16];
3845 w[ 4] = a(0,0)*w[29]-a(0,1)*w[23]+a(0,3)*w[18]-a(0,5)*w[16];
3846 w[ 5] = a(0,0)*w[30]-a(0,1)*w[24]+a(0,4)*w[18]-a(0,5)*w[17];
3847 w[ 6] = a(0,0)*w[31]-a(0,2)*w[22]+a(0,3)*w[20]-a(0,4)*w[19];
3848 w[ 7] = a(0,0)*w[32]-a(0,2)*w[23]+a(0,3)*w[21]-a(0,5)*w[19];
3849 w[ 8] = a(0,0)*w[33]-a(0,2)*w[24]+a(0,4)*w[21]-a(0,5)*w[20];
3850 w[ 9] = a(0,0)*w[34]-a(0,3)*w[24]+a(0,4)*w[23]-a(0,5)*w[22];
3851 w[10] = a(0,1)*w[31]-a(0,2)*w[28]+a(0,3)*w[26]-a(0,4)*w[25];
3852 w[11] = a(0,1)*w[32]-a(0,2)*w[29]+a(0,3)*w[27]-a(0,5)*w[25];
3853 w[12] = a(0,1)*w[33]-a(0,2)*w[30]+a(0,4)*w[27]-a(0,5)*w[26];
3854 w[13] = a(0,1)*w[34]-a(0,3)*w[30]+a(0,4)*w[29]-a(0,5)*w[28];
3855 w[14] = a(0,2)*w[34]-a(0,3)*w[33]+a(0,4)*w[32]-a(0,5)*w[31];
3856 b(0,2) = d*( a(3,1)*w[14]-a(3,2)*w[13]+a(3,3)*w[12]-a(3,4)*w[11]+a(3,5)*w[10]);
3857 b(1,2) = d*(-a(3,0)*w[14]+a(3,2)*w[9]-a(3,3)*w[8]+a(3,4)*w[7]-a(3,5)*w[6]);
3858 b(2,2) = d*( a(3,0)*w[13]-a(3,1)*w[9]+a(3,3)*w[5]-a(3,4)*w[4]+a(3,5)*w[3]);
3859 b(3,2) = d*(-a(3,0)*w[12]+a(3,1)*w[8]-a(3,2)*w[5]+a(3,4)*w[2]-a(3,5)*w[1]);
3860 b(4,2) = d*( a(3,0)*w[11]-a(3,1)*w[7]+a(3,2)*w[4]-a(3,3)*w[2]+a(3,5)*w[0]);
3861 b(5,2) = d*(-a(3,0)*w[10]+a(3,1)*w[6]-a(3,2)*w[3]+a(3,3)*w[1]-a(3,4)*w[0]);
3862 b(0,3) = d*(-a(2,1)*w[14]+a(2,2)*w[13]-a(2,3)*w[12]+a(2,4)*w[11]-a(2,5)*w[10]);
3863 b(1,3) = d*( a(2,0)*w[14]-a(2,2)*w[9]+a(2,3)*w[8]-a(2,4)*w[7]+a(2,5)*w[6]);
3864 b(2,3) = d*(-a(2,0)*w[13]+a(2,1)*w[9]-a(2,3)*w[5]+a(2,4)*w[4]-a(2,5)*w[3]);
3865 b(3,3) = d*( a(2,0)*w[12]-a(2,1)*w[8]+a(2,2)*w[5]-a(2,4)*w[2]+a(2,5)*w[1]);
3866 b(4,3) = d*(-a(2,0)*w[11]+a(2,1)*w[7]-a(2,2)*w[4]+a(2,3)*w[2]-a(2,5)*w[0]);
3867 b(5,3) = d*( a(2,0)*w[10]-a(2,1)*w[6]+a(2,2)*w[3]-a(2,3)*w[1]+a(2,4)*w[0]);
3868 w[ 0] = a(2,0)*a(3,1)-a(2,1)*a(3,0);
3869 w[ 1] = a(2,0)*a(3,2)-a(2,2)*a(3,0);
3870 w[ 2] = a(2,0)*a(3,3)-a(2,3)*a(3,0);
3871 w[ 3] = a(2,0)*a(3,4)-a(2,4)*a(3,0);
3872 w[ 4] = a(2,0)*a(3,5)-a(2,5)*a(3,0);
3873 w[ 5] = a(2,1)*a(3,2)-a(2,2)*a(3,1);
3874 w[ 6] = a(2,1)*a(3,3)-a(2,3)*a(3,1);
3875 w[ 7] = a(2,1)*a(3,4)-a(2,4)*a(3,1);
3876 w[ 8] = a(2,1)*a(3,5)-a(2,5)*a(3,1);
3877 w[ 9] = a(2,2)*a(3,3)-a(2,3)*a(3,2);
3878 w[10] = a(2,2)*a(3,4)-a(2,4)*a(3,2);
3879 w[11] = a(2,2)*a(3,5)-a(2,5)*a(3,2);
3880 w[12] = a(2,3)*a(3,4)-a(2,4)*a(3,3);
3881 w[13] = a(2,3)*a(3,5)-a(2,5)*a(3,3);
3882 w[14] = a(2,4)*a(3,5)-a(2,5)*a(3,4);
3883 w[15] = a(1,0)*w[5]-a(1,1)*w[1]+a(1,2)*w[0];
3884 w[16] = a(1,0)*w[6]-a(1,1)*w[2]+a(1,3)*w[0];
3885 w[17] = a(1,0)*w[7]-a(1,1)*w[3]+a(1,4)*w[0];
3886 w[18] = a(1,0)*w[8]-a(1,1)*w[4]+a(1,5)*w[0];
3887 w[19] = a(1,0)*w[9]-a(1,2)*w[2]+a(1,3)*w[1];
3888 w[20] = a(1,0)*w[10]-a(1,2)*w[3]+a(1,4)*w[1];
3889 w[21] = a(1,0)*w[11]-a(1,2)*w[4]+a(1,5)*w[1];
3890 w[22] = a(1,0)*w[12]-a(1,3)*w[3]+a(1,4)*w[2];
3891 w[23] = a(1,0)*w[13]-a(1,3)*w[4]+a(1,5)*w[2];
3892 w[24] = a(1,0)*w[14]-a(1,4)*w[4]+a(1,5)*w[3];
3893 w[25] = a(1,1)*w[9]-a(1,2)*w[6]+a(1,3)*w[5];
3894 w[26] = a(1,1)*w[10]-a(1,2)*w[7]+a(1,4)*w[5];
3895 w[27] = a(1,1)*w[11]-a(1,2)*w[8]+a(1,5)*w[5];
3896 w[28] = a(1,1)*w[12]-a(1,3)*w[7]+a(1,4)*w[6];
3897 w[29] = a(1,1)*w[13]-a(1,3)*w[8]+a(1,5)*w[6];
3898 w[30] = a(1,1)*w[14]-a(1,4)*w[8]+a(1,5)*w[7];
3899 w[31] = a(1,2)*w[12]-a(1,3)*w[10]+a(1,4)*w[9];
3900 w[32] = a(1,2)*w[13]-a(1,3)*w[11]+a(1,5)*w[9];
3901 w[33] = a(1,2)*w[14]-a(1,4)*w[11]+a(1,5)*w[10];
3902 w[34] = a(1,3)*w[14]-a(1,4)*w[13]+a(1,5)*w[12];
3903 w[ 0] = a(0,0)*w[25]-a(0,1)*w[19]+a(0,2)*w[16]-a(0,3)*w[15];
3904 w[ 1] = a(0,0)*w[26]-a(0,1)*w[20]+a(0,2)*w[17]-a(0,4)*w[15];
3905 w[ 2] = a(0,0)*w[27]-a(0,1)*w[21]+a(0,2)*w[18]-a(0,5)*w[15];
3906 w[ 3] = a(0,0)*w[28]-a(0,1)*w[22]+a(0,3)*w[17]-a(0,4)*w[16];
3907 w[ 4] = a(0,0)*w[29]-a(0,1)*w[23]+a(0,3)*w[18]-a(0,5)*w[16];
3908 w[ 5] = a(0,0)*w[30]-a(0,1)*w[24]+a(0,4)*w[18]-a(0,5)*w[17];
3909 w[ 6] = a(0,0)*w[31]-a(0,2)*w[22]+a(0,3)*w[20]-a(0,4)*w[19];
3910 w[ 7] = a(0,0)*w[32]-a(0,2)*w[23]+a(0,3)*w[21]-a(0,5)*w[19];
3911 w[ 8] = a(0,0)*w[33]-a(0,2)*w[24]+a(0,4)*w[21]-a(0,5)*w[20];
3912 w[ 9] = a(0,0)*w[34]-a(0,3)*w[24]+a(0,4)*w[23]-a(0,5)*w[22];
3913 w[10] = a(0,1)*w[31]-a(0,2)*w[28]+a(0,3)*w[26]-a(0,4)*w[25];
3914 w[11] = a(0,1)*w[32]-a(0,2)*w[29]+a(0,3)*w[27]-a(0,5)*w[25];
3915 w[12] = a(0,1)*w[33]-a(0,2)*w[30]+a(0,4)*w[27]-a(0,5)*w[26];
3916 w[13] = a(0,1)*w[34]-a(0,3)*w[30]+a(0,4)*w[29]-a(0,5)*w[28];
3917 w[14] = a(0,2)*w[34]-a(0,3)*w[33]+a(0,4)*w[32]-a(0,5)*w[31];
3918 b(0,4) = d*( a(5,1)*w[14]-a(5,2)*w[13]+a(5,3)*w[12]-a(5,4)*w[11]+a(5,5)*w[10]);
3919 b(1,4) = d*(-a(5,0)*w[14]+a(5,2)*w[9]-a(5,3)*w[8]+a(5,4)*w[7]-a(5,5)*w[6]);
3920 b(2,4) = d*( a(5,0)*w[13]-a(5,1)*w[9]+a(5,3)*w[5]-a(5,4)*w[4]+a(5,5)*w[3]);
3921 b(3,4) = d*(-a(5,0)*w[12]+a(5,1)*w[8]-a(5,2)*w[5]+a(5,4)*w[2]-a(5,5)*w[1]);
3922 b(4,4) = d*( a(5,0)*w[11]-a(5,1)*w[7]+a(5,2)*w[4]-a(5,3)*w[2]+a(5,5)*w[0]);
3923 b(5,4) = d*(-a(5,0)*w[10]+a(5,1)*w[6]-a(5,2)*w[3]+a(5,3)*w[1]-a(5,4)*w[0]);
3924 b(0,5) = d*(-a(4,1)*w[14]+a(4,2)*w[13]-a(4,3)*w[12]+a(4,4)*w[11]-a(4,5)*w[10]);
3925 b(1,5) = d*( a(4,0)*w[14]-a(4,2)*w[9]+a(4,3)*w[8]-a(4,4)*w[7]+a(4,5)*w[6]);
3926 b(2,5) = d*(-a(4,0)*w[13]+a(4,1)*w[9]-a(4,3)*w[5]+a(4,4)*w[4]-a(4,5)*w[3]);
3927 b(3,5) = d*( a(4,0)*w[12]-a(4,1)*w[8]+a(4,2)*w[5]-a(4,4)*w[2]+a(4,5)*w[1]);
3928 b(4,5) = d*(-a(4,0)*w[11]+a(4,1)*w[7]-a(4,2)*w[4]+a(4,3)*w[2]-a(4,5)*w[0]);
3929 b(5,5) = d*( a(4,0)*w[10]-a(4,1)*w[6]+a(4,2)*w[3]-a(4,3)*w[1]+a(4,4)*w[0]);
3936 struct InverseHelper<n_, n_>
3938 template<
typename T_,
int smb_,
int snb_,
int sma_,
int sna_>
3939 CUDA_HOST_DEVICE
static T_ compute(Tiny::Matrix<T_, n_, n_, smb_, snb_>& b,
const Tiny::Matrix<T_, n_, n_, sma_, sna_>& a)
3942 for (
int i(0); i < n_; ++i)
3944 for (
int j(0); j < n_; ++j)
3954 for(
int i(0); i < n_; ++i)
3963 for(
int k(0); k < n_; ++k)
3971 T_ pivot = CudaMath::cuda_abs(b(p[k], p[k]));
3973 T_ pivot = Math::abs(b(p[k], p[k]));
3978 for(
int j(k+1); j < n_; ++j)
3982 T_ abs_val = CudaMath::cuda_abs(b(p[j], p[j]));
3984 T_ abs_val = Math::abs(b(p[j], p[j]));
4006 det *= b(p[k], p[k]);
4009 const T_ pivot = T_(1) / b(p[k], p[k]);
4012 b(p[k], p[k]) = T_(1);
4015 for(
int j(0); j < n_; ++j)
4017 b(p[k], j) *= pivot;
4024 for(
int i(0); i < n_; ++i)
4031 const T_ factor = b(i, p[k]);
4037 for(
int j(0); j < n_; ++j)
4039 b(i, j) -= b(p[k], j) * factor;
4057 struct CudaGroupedInverseHelper<1,1>
4059 template<
typename T_,
typename ThreadGroup_,
int smb_,
int snb_,
int sma_,
int sna_>
4060 CUDA_DEVICE
static __forceinline__
void compute(
const ThreadGroup_& tg, Tiny::Matrix<T_, 1, 1, smb_, snb_>& b,
const Tiny::Matrix<T_, 1, 1, sma_, sna_>&,
const T_& det)
4062 if(tg.thread_rank() == 0)
4063 b(0,0) = T_(1) / det;
4068 struct CudaGroupedInverseHelper<2,2>
4070 template<
typename T_,
typename ThreadGroup_,
int smb_,
int snb_,
int sma_,
int sna_>
4071 CUDA_DEVICE
static __forceinline__
void compute(
const ThreadGroup_& tg, Tiny::Matrix<T_, 2, 2, smb_, snb_>& b,
const Tiny::Matrix<T_, 2, 2, sma_, sna_>& a,
const T_& det)
4076 for(
int idx = tg.thread_rank(); idx < 4; idx += tg.num_threads())
4078 const int i = idx /2;
4079 const int j = idx %2;
4080 b(i,0) = ((i+j)==1 ? T_(-1) : T_(1)) * d * a(1-j,1-i);
4091 struct CudaGroupedInverseHelper<3,3>
4093 template<
typename T_,
typename ThreadGroup_,
int smb_,
int snb_,
int sma_,
int sna_>
4094 CUDA_DEVICE
static __forceinline__
void compute(
const ThreadGroup_& tg, Tiny::Matrix<T_, 3, 3, smb_, snb_>& b,
const Tiny::Matrix<T_, 3, 3, sma_, sna_>& a,
const T_& det)
4113 for(
int idx = tg.thread_rank(); idx < 9; idx += tg.num_threads())
4116 b(0,0) = d*(a(1,1)*a(2,2) - a(1,2)*a(2,1));
4118 b(1,0) = d*(a(1,2)*a(2,0) - a(1,0)*a(2,2));
4120 b(2,0) = d*(a(1,0)*a(2,1) - a(1,1)*a(2,0));
4122 b(0,1) = d*(a(0,2)*a(2,1) - a(0,1)*a(2,2));
4124 b(1,1) = d*(a(0,0)*a(2,2) - a(0,2)*a(2,0));
4126 b(2,1) = d*(a(0,1)*a(2,0) - a(0,0)*a(2,1));
4128 b(0,2) = d*(a(0,1)*a(1,2) - a(0,2)*a(1,1));
4130 b(1,2) = d*(a(0,2)*a(1,0) - a(0,0)*a(1,2));
4132 b(2,2) = d*(a(0,0)*a(1,1) - a(0,1)*a(1,0));
4139 struct CudaGroupedInverseHelper<n_, n_>
4141 template<
typename T_,
typename ThreadGroup_,
int smb_,
int snb_,
int sma_,
int sna_>
4142 CUDA_DEVICE
static __forceinline__
void compute(
const ThreadGroup_& tg, Tiny::Matrix<T_, n_, n_, smb_, snb_>& b,
const Tiny::Matrix<T_, n_, n_, sma_, sna_>& a,
const T_&)
4145 for (
int i = tg.thread_rank(); i < n_*n_; i += tg.num_threads())
4147 const int row = i/n_;
4148 const int col = i%n_;
4149 b.v[row][col] = a.v[row][col];
4153 __shared__
int p[n_];
4154 for (
int i = tg.thread_rank(); i < n_; i += tg.num_threads())
4160 if(tg.thread_rank() < 32)
4162 auto a_g = cg::coalesced_threads();
4164 CudaMath::cuda_grouped_invert_matrix(a_g, n_, snb_, &b.v[0][0], p);
4177 template<
int m_,
int n_>
4178 struct CofactorHelper
4180 template<
typename T_,
int smb_,
int snb_,
int sma_,
int sna_>
4181 CUDA_HOST_DEVICE
static void compute(Tiny::Matrix<T_, m_, n_, smb_, snb_>& b,
const Tiny::Matrix<T_, m_, n_, sma_, sna_>& a)
4183 static_assert(m_ == n_,
"cofactor computation is only available for square matrices!");
4186 const T_ det = Intern::InverseHelper<m_,n_>::compute(b, a);
4188 for(
int i(0); i < m_; ++i)
4190 for(
int j(0); j <= i; ++j)
4194 for(
int j(i+1); j < n_; ++j)
4196 std::swap(b(i,j), b(j,i));
4204 struct CofactorHelper<1,1>
4206 template<
typename T_,
int smb_,
int snb_,
int sma_,
int sna_>
4207 CUDA_HOST_DEVICE
static void compute(Tiny::Matrix<T_, 1, 1, smb_, snb_>& b,
const Tiny::Matrix<T_, 1, 1, sma_, sna_>&)
4214 struct CofactorHelper<2,2>
4216 template<
typename T_,
int smb_,
int snb_,
int sma_,
int sna_>
4217 CUDA_HOST_DEVICE
static void compute(Tiny::Matrix<T_, 2, 2, smb_, snb_>& b,
const Tiny::Matrix<T_, 2, 2, sma_, sna_>& a)
4227 struct CofactorHelper<3,3>
4229 template<
typename T_,
int smb_,
int snb_,
int sma_,
int sna_>
4230 CUDA_HOST_DEVICE
static void compute(Tiny::Matrix<T_, 3, 3, smb_, snb_>& b,
const Tiny::Matrix<T_, 3, 3, sma_, sna_>& a)
4232 b(0,0) = a(1,1)*a(2,2) - a(1,2)*a(2,1);
4233 b(1,0) = a(1,2)*a(2,0) - a(1,0)*a(2,2);
4234 b(2,0) = a(1,0)*a(2,1) - a(1,1)*a(2,0);
4235 b(0,1) = a(0,2)*a(2,1) - a(0,1)*a(2,2);
4236 b(1,1) = a(0,0)*a(2,2) - a(0,2)*a(2,0);
4237 b(2,1) = a(0,1)*a(2,0) - a(0,0)*a(2,1);
4238 b(0,2) = a(0,1)*a(1,2) - a(0,2)*a(1,1);
4239 b(1,2) = a(0,2)*a(1,0) - a(0,0)*a(1,2);
4240 b(2,2) = a(0,0)*a(1,1) - a(0,1)*a(1,0);
4245 struct CofactorHelper<4,4>
4247 template<
typename T_,
int smb_,
int snb_,
int sma_,
int sna_>
4248 CUDA_HOST_DEVICE
static void compute(Tiny::Matrix<T_, 4, 4, smb_, snb_>& b,
const Tiny::Matrix<T_, 4, 4, sma_, sna_>& a)
4251 w[0] = a(2,0)*a(3,1)-a(2,1)*a(3,0);
4252 w[1] = a(2,0)*a(3,2)-a(2,2)*a(3,0);
4253 w[2] = a(2,0)*a(3,3)-a(2,3)*a(3,0);
4254 w[3] = a(2,1)*a(3,2)-a(2,2)*a(3,1);
4255 w[4] = a(2,1)*a(3,3)-a(2,3)*a(3,1);
4256 w[5] = a(2,2)*a(3,3)-a(2,3)*a(3,2);
4258 b(0,0) = a(1,1)*w[5]-a(1,2)*w[4]+a(1,3)*w[3];
4259 b(1,0) =-a(1,0)*w[5]+a(1,2)*w[2]-a(1,3)*w[1];
4260 b(2,0) = a(1,0)*w[4]-a(1,1)*w[2]+a(1,3)*w[0];
4261 b(3,0) =-a(1,0)*w[3]+a(1,1)*w[1]-a(1,2)*w[0];
4263 b(0,1) =-a(0,1)*w[5]+a(0,2)*w[4]-a(0,3)*w[3];
4264 b(1,1) = a(0,0)*w[5]-a(0,2)*w[2]+a(0,3)*w[1];
4265 b(2,1) =-a(0,0)*w[4]+a(0,1)*w[2]-a(0,3)*w[0];
4266 b(3,1) = a(0,0)*w[3]-a(0,1)*w[1]+a(0,2)*w[0];
4268 w[0] = a(0,0)*a(1,1)-a(0,1)*a(1,0);
4269 w[1] = a(0,0)*a(1,2)-a(0,2)*a(1,0);
4270 w[2] = a(0,0)*a(1,3)-a(0,3)*a(1,0);
4271 w[3] = a(0,1)*a(1,2)-a(0,2)*a(1,1);
4272 w[4] = a(0,1)*a(1,3)-a(0,3)*a(1,1);
4273 w[5] = a(0,2)*a(1,3)-a(0,3)*a(1,2);
4275 b(0,2) = a(3,1)*w[5]-a(3,2)*w[4]+a(3,3)*w[3];
4276 b(1,2) =-a(3,0)*w[5]+a(3,2)*w[2]-a(3,3)*w[1];
4277 b(2,2) = a(3,0)*w[4]-a(3,1)*w[2]+a(3,3)*w[0];
4278 b(3,2) =-a(3,0)*w[3]+a(3,1)*w[1]-a(3,2)*w[0];
4280 b(0,3) =-a(2,1)*w[5]+a(2,2)*w[4]-a(2,3)*w[3];
4281 b(1,3) = a(2,0)*w[5]-a(2,2)*w[2]+a(2,3)*w[1];
4282 b(2,3) =-a(2,0)*w[4]+a(2,1)*w[2]-a(2,3)*w[0];
4283 b(3,3) = a(2,0)*w[3]-a(2,1)*w[1]+a(2,2)*w[0];
4288 struct CofactorHelper<5,5>
4290 template<
typename T_,
int smb_,
int snb_,
int sma_,
int sna_>
4291 CUDA_HOST_DEVICE
static void compute(Tiny::Matrix<T_, 5, 5, smb_, snb_>& b,
const Tiny::Matrix<T_, 5, 5, sma_, sna_>& a)
4294 w[ 0] = a(3,0)*a(4,1)-a(3,1)*a(4,0);
4295 w[ 1] = a(3,0)*a(4,2)-a(3,2)*a(4,0);
4296 w[ 2] = a(3,0)*a(4,3)-a(3,3)*a(4,0);
4297 w[ 3] = a(3,0)*a(4,4)-a(3,4)*a(4,0);
4298 w[ 4] = a(3,1)*a(4,2)-a(3,2)*a(4,1);
4299 w[ 5] = a(3,1)*a(4,3)-a(3,3)*a(4,1);
4300 w[ 6] = a(3,1)*a(4,4)-a(3,4)*a(4,1);
4301 w[ 7] = a(3,2)*a(4,3)-a(3,3)*a(4,2);
4302 w[ 8] = a(3,2)*a(4,4)-a(3,4)*a(4,2);
4303 w[ 9] = a(3,3)*a(4,4)-a(3,4)*a(4,3);
4304 w[10] = a(2,0)*w[4]-a(2,1)*w[1]+a(2,2)*w[0];
4305 w[11] = a(2,0)*w[5]-a(2,1)*w[2]+a(2,3)*w[0];
4306 w[12] = a(2,0)*w[6]-a(2,1)*w[3]+a(2,4)*w[0];
4307 w[13] = a(2,0)*w[7]-a(2,2)*w[2]+a(2,3)*w[1];
4308 w[14] = a(2,0)*w[8]-a(2,2)*w[3]+a(2,4)*w[1];
4309 w[15] = a(2,0)*w[9]-a(2,3)*w[3]+a(2,4)*w[2];
4310 w[16] = a(2,1)*w[7]-a(2,2)*w[5]+a(2,3)*w[4];
4311 w[17] = a(2,1)*w[8]-a(2,2)*w[6]+a(2,4)*w[4];
4312 w[18] = a(2,1)*w[9]-a(2,3)*w[6]+a(2,4)*w[5];
4313 w[19] = a(2,2)*w[9]-a(2,3)*w[8]+a(2,4)*w[7];
4315 b(0,0) = a(1,1)*w[19]-a(1,2)*w[18]+a(1,3)*w[17]-a(1,4)*w[16];
4316 b(1,0) =-a(1,0)*w[19]+a(1,2)*w[15]-a(1,3)*w[14]+a(1,4)*w[13];
4317 b(2,0) = a(1,0)*w[18]-a(1,1)*w[15]+a(1,3)*w[12]-a(1,4)*w[11];
4318 b(3,0) =-a(1,0)*w[17]+a(1,1)*w[14]-a(1,2)*w[12]+a(1,4)*w[10];
4319 b(4,0) = a(1,0)*w[16]-a(1,1)*w[13]+a(1,2)*w[11]-a(1,3)*w[10];
4321 b(0,1) =-a(0,1)*w[19]+a(0,2)*w[18]-a(0,3)*w[17]+a(0,4)*w[16];
4322 b(1,1) = a(0,0)*w[19]-a(0,2)*w[15]+a(0,3)*w[14]-a(0,4)*w[13];
4323 b(2,1) =-a(0,0)*w[18]+a(0,1)*w[15]-a(0,3)*w[12]+a(0,4)*w[11];
4324 b(3,1) = a(0,0)*w[17]-a(0,1)*w[14]+a(0,2)*w[12]-a(0,4)*w[10];
4325 b(4,1) =-a(0,0)*w[16]+a(0,1)*w[13]-a(0,2)*w[11]+a(0,3)*w[10];
4327 w[10] = a(1,0)*w[4]-a(1,1)*w[1]+a(1,2)*w[0];
4328 w[11] = a(1,0)*w[5]-a(1,1)*w[2]+a(1,3)*w[0];
4329 w[12] = a(1,0)*w[6]-a(1,1)*w[3]+a(1,4)*w[0];
4330 w[13] = a(1,0)*w[7]-a(1,2)*w[2]+a(1,3)*w[1];
4331 w[14] = a(1,0)*w[8]-a(1,2)*w[3]+a(1,4)*w[1];
4332 w[15] = a(1,0)*w[9]-a(1,3)*w[3]+a(1,4)*w[2];
4333 w[16] = a(1,1)*w[7]-a(1,2)*w[5]+a(1,3)*w[4];
4334 w[17] = a(1,1)*w[8]-a(1,2)*w[6]+a(1,4)*w[4];
4335 w[18] = a(1,1)*w[9]-a(1,3)*w[6]+a(1,4)*w[5];
4336 w[19] = a(1,2)*w[9]-a(1,3)*w[8]+a(1,4)*w[7];
4338 b(0,2) = a(0,1)*w[19]-a(0,2)*w[18]+a(0,3)*w[17]-a(0,4)*w[16];
4339 b(1,2) =-a(0,0)*w[19]+a(0,2)*w[15]-a(0,3)*w[14]+a(0,4)*w[13];
4340 b(2,2) = a(0,0)*w[18]-a(0,1)*w[15]+a(0,3)*w[12]-a(0,4)*w[11];
4341 b(3,2) =-a(0,0)*w[17]+a(0,1)*w[14]-a(0,2)*w[12]+a(0,4)*w[10];
4342 b(4,2) = a(0,0)*w[16]-a(0,1)*w[13]+a(0,2)*w[11]-a(0,3)*w[10];
4344 w[ 0] = a(0,0)*a(1,1)-a(0,1)*a(1,0);
4345 w[ 1] = a(0,0)*a(1,2)-a(0,2)*a(1,0);
4346 w[ 2] = a(0,0)*a(1,3)-a(0,3)*a(1,0);
4347 w[ 3] = a(0,0)*a(1,4)-a(0,4)*a(1,0);
4348 w[ 4] = a(0,1)*a(1,2)-a(0,2)*a(1,1);
4349 w[ 5] = a(0,1)*a(1,3)-a(0,3)*a(1,1);
4350 w[ 6] = a(0,1)*a(1,4)-a(0,4)*a(1,1);
4351 w[ 7] = a(0,2)*a(1,3)-a(0,3)*a(1,2);
4352 w[ 8] = a(0,2)*a(1,4)-a(0,4)*a(1,2);
4353 w[ 9] = a(0,3)*a(1,4)-a(0,4)*a(1,3);
4354 w[10] = a(2,0)*w[4]-a(2,1)*w[1]+a(2,2)*w[0];
4355 w[11] = a(2,0)*w[5]-a(2,1)*w[2]+a(2,3)*w[0];
4356 w[12] = a(2,0)*w[6]-a(2,1)*w[3]+a(2,4)*w[0];
4357 w[13] = a(2,0)*w[7]-a(2,2)*w[2]+a(2,3)*w[1];
4358 w[14] = a(2,0)*w[8]-a(2,2)*w[3]+a(2,4)*w[1];
4359 w[15] = a(2,0)*w[9]-a(2,3)*w[3]+a(2,4)*w[2];
4360 w[16] = a(2,1)*w[7]-a(2,2)*w[5]+a(2,3)*w[4];
4361 w[17] = a(2,1)*w[8]-a(2,2)*w[6]+a(2,4)*w[4];
4362 w[18] = a(2,1)*w[9]-a(2,3)*w[6]+a(2,4)*w[5];
4363 w[19] = a(2,2)*w[9]-a(2,3)*w[8]+a(2,4)*w[7];
4365 b(0,3) = a(4,1)*w[19]-a(4,2)*w[18]+a(4,3)*w[17]-a(4,4)*w[16];
4366 b(1,3) = -a(4,0)*w[19]+a(4,2)*w[15]-a(4,3)*w[14]+a(4,4)*w[13];
4367 b(2,3) = a(4,0)*w[18]-a(4,1)*w[15]+a(4,3)*w[12]-a(4,4)*w[11];
4368 b(3,3) = -a(4,0)*w[17]+a(4,1)*w[14]-a(4,2)*w[12]+a(4,4)*w[10];
4369 b(4,3) = a(4,0)*w[16]-a(4,1)*w[13]+a(4,2)*w[11]-a(4,3)*w[10];
4371 b(0,4) = -a(3,1)*w[19]+a(3,2)*w[18]-a(3,3)*w[17]+a(3,4)*w[16];
4372 b(1,4) = a(3,0)*w[19]-a(3,2)*w[15]+a(3,3)*w[14]-a(3,4)*w[13];
4373 b(2,4) = -a(3,0)*w[18]+a(3,1)*w[15]-a(3,3)*w[12]+a(3,4)*w[11];
4374 b(3,4) = a(3,0)*w[17]-a(3,1)*w[14]+a(3,2)*w[12]-a(3,4)*w[10];
4375 b(4,4) = -a(3,0)*w[16]+a(3,1)*w[13]-a(3,2)*w[11]+a(3,3)*w[10];
4380 struct CofactorHelper<6,6>
4382 template<
typename T_,
int smb_,
int snb_,
int sma_,
int sna_>
4383 CUDA_HOST_DEVICE
static void compute(Tiny::Matrix<T_, 6, 6, smb_, snb_>& b,
const Tiny::Matrix<T_, 6, 6, sma_, sna_>& a)
4386 w[ 0] = a(4,0)*a(5,1)-a(4,1)*a(5,0);
4387 w[ 1] = a(4,0)*a(5,2)-a(4,2)*a(5,0);
4388 w[ 2] = a(4,0)*a(5,3)-a(4,3)*a(5,0);
4389 w[ 3] = a(4,0)*a(5,4)-a(4,4)*a(5,0);
4390 w[ 4] = a(4,0)*a(5,5)-a(4,5)*a(5,0);
4391 w[ 5] = a(4,1)*a(5,2)-a(4,2)*a(5,1);
4392 w[ 6] = a(4,1)*a(5,3)-a(4,3)*a(5,1);
4393 w[ 7] = a(4,1)*a(5,4)-a(4,4)*a(5,1);
4394 w[ 8] = a(4,1)*a(5,5)-a(4,5)*a(5,1);
4395 w[ 9] = a(4,2)*a(5,3)-a(4,3)*a(5,2);
4396 w[10] = a(4,2)*a(5,4)-a(4,4)*a(5,2);
4397 w[11] = a(4,2)*a(5,5)-a(4,5)*a(5,2);
4398 w[12] = a(4,3)*a(5,4)-a(4,4)*a(5,3);
4399 w[13] = a(4,3)*a(5,5)-a(4,5)*a(5,3);
4400 w[14] = a(4,4)*a(5,5)-a(4,5)*a(5,4);
4401 w[15] = a(3,0)*w[5]-a(3,1)*w[1]+a(3,2)*w[0];
4402 w[16] = a(3,0)*w[6]-a(3,1)*w[2]+a(3,3)*w[0];
4403 w[17] = a(3,0)*w[7]-a(3,1)*w[3]+a(3,4)*w[0];
4404 w[18] = a(3,0)*w[8]-a(3,1)*w[4]+a(3,5)*w[0];
4405 w[19] = a(3,0)*w[9]-a(3,2)*w[2]+a(3,3)*w[1];
4406 w[20] = a(3,0)*w[10]-a(3,2)*w[3]+a(3,4)*w[1];
4407 w[21] = a(3,0)*w[11]-a(3,2)*w[4]+a(3,5)*w[1];
4408 w[22] = a(3,0)*w[12]-a(3,3)*w[3]+a(3,4)*w[2];
4409 w[23] = a(3,0)*w[13]-a(3,3)*w[4]+a(3,5)*w[2];
4410 w[24] = a(3,0)*w[14]-a(3,4)*w[4]+a(3,5)*w[3];
4411 w[25] = a(3,1)*w[9]-a(3,2)*w[6]+a(3,3)*w[5];
4412 w[26] = a(3,1)*w[10]-a(3,2)*w[7]+a(3,4)*w[5];
4413 w[27] = a(3,1)*w[11]-a(3,2)*w[8]+a(3,5)*w[5];
4414 w[28] = a(3,1)*w[12]-a(3,3)*w[7]+a(3,4)*w[6];
4415 w[29] = a(3,1)*w[13]-a(3,3)*w[8]+a(3,5)*w[6];
4416 w[30] = a(3,1)*w[14]-a(3,4)*w[8]+a(3,5)*w[7];
4417 w[31] = a(3,2)*w[12]-a(3,3)*w[10]+a(3,4)*w[9];
4418 w[32] = a(3,2)*w[13]-a(3,3)*w[11]+a(3,5)*w[9];
4419 w[33] = a(3,2)*w[14]-a(3,4)*w[11]+a(3,5)*w[10];
4420 w[34] = a(3,3)*w[14]-a(3,4)*w[13]+a(3,5)*w[12];
4422 w[ 0] = a(2,0)*w[25]-a(2,1)*w[19]+a(2,2)*w[16]-a(2,3)*w[15];
4423 w[ 1] = a(2,0)*w[26]-a(2,1)*w[20]+a(2,2)*w[17]-a(2,4)*w[15];
4424 w[ 2] = a(2,0)*w[27]-a(2,1)*w[21]+a(2,2)*w[18]-a(2,5)*w[15];
4425 w[ 3] = a(2,0)*w[28]-a(2,1)*w[22]+a(2,3)*w[17]-a(2,4)*w[16];
4426 w[ 4] = a(2,0)*w[29]-a(2,1)*w[23]+a(2,3)*w[18]-a(2,5)*w[16];
4427 w[ 5] = a(2,0)*w[30]-a(2,1)*w[24]+a(2,4)*w[18]-a(2,5)*w[17];
4428 w[ 6] = a(2,0)*w[31]-a(2,2)*w[22]+a(2,3)*w[20]-a(2,4)*w[19];
4429 w[ 7] = a(2,0)*w[32]-a(2,2)*w[23]+a(2,3)*w[21]-a(2,5)*w[19];
4430 w[ 8] = a(2,0)*w[33]-a(2,2)*w[24]+a(2,4)*w[21]-a(2,5)*w[20];
4431 w[ 9] = a(2,0)*w[34]-a(2,3)*w[24]+a(2,4)*w[23]-a(2,5)*w[22];
4432 w[10] = a(2,1)*w[31]-a(2,2)*w[28]+a(2,3)*w[26]-a(2,4)*w[25];
4433 w[11] = a(2,1)*w[32]-a(2,2)*w[29]+a(2,3)*w[27]-a(2,5)*w[25];
4434 w[12] = a(2,1)*w[33]-a(2,2)*w[30]+a(2,4)*w[27]-a(2,5)*w[26];
4435 w[13] = a(2,1)*w[34]-a(2,3)*w[30]+a(2,4)*w[29]-a(2,5)*w[28];
4436 w[14] = a(2,2)*w[34]-a(2,3)*w[33]+a(2,4)*w[32]-a(2,5)*w[31];
4438 b(0,0) = a(1,1)*w[14]-a(1,2)*w[13]+a(1,3)*w[12]-a(1,4)*w[11]+a(1,5)*w[10];
4439 b(1,0) = -a(1,0)*w[14]+a(1,2)*w[9]-a(1,3)*w[8]+a(1,4)*w[7]-a(1,5)*w[6];
4440 b(2,0) = a(1,0)*w[13]-a(1,1)*w[9]+a(1,3)*w[5]-a(1,4)*w[4]+a(1,5)*w[3];
4441 b(3,0) = -a(1,0)*w[12]+a(1,1)*w[8]-a(1,2)*w[5]+a(1,4)*w[2]-a(1,5)*w[1];
4442 b(4,0) = a(1,0)*w[11]-a(1,1)*w[7]+a(1,2)*w[4]-a(1,3)*w[2]+a(1,5)*w[0];
4443 b(5,0) = -a(1,0)*w[10]+a(1,1)*w[6]-a(1,2)*w[3]+a(1,3)*w[1]-a(1,4)*w[0];
4445 b(0,1) =-a(0,1)*w[14]+a(0,2)*w[13]-a(0,3)*w[12]+a(0,4)*w[11]-a(0,5)*w[10];
4446 b(1,1) = a(0,0)*w[14]-a(0,2)*w[9]+a(0,3)*w[8]-a(0,4)*w[7]+a(0,5)*w[6];
4447 b(2,1) =-a(0,0)*w[13]+a(0,1)*w[9]-a(0,3)*w[5]+a(0,4)*w[4]-a(0,5)*w[3];
4448 b(3,1) = a(0,0)*w[12]-a(0,1)*w[8]+a(0,2)*w[5]-a(0,4)*w[2]+a(0,5)*w[1];
4449 b(4,1) =-a(0,0)*w[11]+a(0,1)*w[7]-a(0,2)*w[4]+a(0,3)*w[2]-a(0,5)*w[0];
4450 b(5,1) = a(0,0)*w[10]-a(0,1)*w[6]+a(0,2)*w[3]-a(0,3)*w[1]+a(0,4)*w[0];
4452 w[ 0] = a(4,0)*a(5,1)-a(4,1)*a(5,0);
4453 w[ 1] = a(4,0)*a(5,2)-a(4,2)*a(5,0);
4454 w[ 2] = a(4,0)*a(5,3)-a(4,3)*a(5,0);
4455 w[ 3] = a(4,0)*a(5,4)-a(4,4)*a(5,0);
4456 w[ 4] = a(4,0)*a(5,5)-a(4,5)*a(5,0);
4457 w[ 5] = a(4,1)*a(5,2)-a(4,2)*a(5,1);
4458 w[ 6] = a(4,1)*a(5,3)-a(4,3)*a(5,1);
4459 w[ 7] = a(4,1)*a(5,4)-a(4,4)*a(5,1);
4460 w[ 8] = a(4,1)*a(5,5)-a(4,5)*a(5,1);
4461 w[ 9] = a(4,2)*a(5,3)-a(4,3)*a(5,2);
4462 w[10] = a(4,2)*a(5,4)-a(4,4)*a(5,2);
4463 w[11] = a(4,2)*a(5,5)-a(4,5)*a(5,2);
4464 w[12] = a(4,3)*a(5,4)-a(4,4)*a(5,3);
4465 w[13] = a(4,3)*a(5,5)-a(4,5)*a(5,3);
4466 w[14] = a(4,4)*a(5,5)-a(4,5)*a(5,4);
4467 w[15] = a(1,0)*w[5]-a(1,1)*w[1]+a(1,2)*w[0];
4468 w[16] = a(1,0)*w[6]-a(1,1)*w[2]+a(1,3)*w[0];
4469 w[17] = a(1,0)*w[7]-a(1,1)*w[3]+a(1,4)*w[0];
4470 w[18] = a(1,0)*w[8]-a(1,1)*w[4]+a(1,5)*w[0];
4471 w[19] = a(1,0)*w[9]-a(1,2)*w[2]+a(1,3)*w[1];
4472 w[20] = a(1,0)*w[10]-a(1,2)*w[3]+a(1,4)*w[1];
4473 w[21] = a(1,0)*w[11]-a(1,2)*w[4]+a(1,5)*w[1];
4474 w[22] = a(1,0)*w[12]-a(1,3)*w[3]+a(1,4)*w[2];
4475 w[23] = a(1,0)*w[13]-a(1,3)*w[4]+a(1,5)*w[2];
4476 w[24] = a(1,0)*w[14]-a(1,4)*w[4]+a(1,5)*w[3];
4477 w[25] = a(1,1)*w[9]-a(1,2)*w[6]+a(1,3)*w[5];
4478 w[26] = a(1,1)*w[10]-a(1,2)*w[7]+a(1,4)*w[5];
4479 w[27] = a(1,1)*w[11]-a(1,2)*w[8]+a(1,5)*w[5];
4480 w[28] = a(1,1)*w[12]-a(1,3)*w[7]+a(1,4)*w[6];
4481 w[29] = a(1,1)*w[13]-a(1,3)*w[8]+a(1,5)*w[6];
4482 w[30] = a(1,1)*w[14]-a(1,4)*w[8]+a(1,5)*w[7];
4483 w[31] = a(1,2)*w[12]-a(1,3)*w[10]+a(1,4)*w[9];
4484 w[32] = a(1,2)*w[13]-a(1,3)*w[11]+a(1,5)*w[9];
4485 w[33] = a(1,2)*w[14]-a(1,4)*w[11]+a(1,5)*w[10];
4486 w[34] = a(1,3)*w[14]-a(1,4)*w[13]+a(1,5)*w[12];
4487 w[ 0] = a(0,0)*w[25]-a(0,1)*w[19]+a(0,2)*w[16]-a(0,3)*w[15];
4488 w[ 1] = a(0,0)*w[26]-a(0,1)*w[20]+a(0,2)*w[17]-a(0,4)*w[15];
4489 w[ 2] = a(0,0)*w[27]-a(0,1)*w[21]+a(0,2)*w[18]-a(0,5)*w[15];
4490 w[ 3] = a(0,0)*w[28]-a(0,1)*w[22]+a(0,3)*w[17]-a(0,4)*w[16];
4491 w[ 4] = a(0,0)*w[29]-a(0,1)*w[23]+a(0,3)*w[18]-a(0,5)*w[16];
4492 w[ 5] = a(0,0)*w[30]-a(0,1)*w[24]+a(0,4)*w[18]-a(0,5)*w[17];
4493 w[ 6] = a(0,0)*w[31]-a(0,2)*w[22]+a(0,3)*w[20]-a(0,4)*w[19];
4494 w[ 7] = a(0,0)*w[32]-a(0,2)*w[23]+a(0,3)*w[21]-a(0,5)*w[19];
4495 w[ 8] = a(0,0)*w[33]-a(0,2)*w[24]+a(0,4)*w[21]-a(0,5)*w[20];
4496 w[ 9] = a(0,0)*w[34]-a(0,3)*w[24]+a(0,4)*w[23]-a(0,5)*w[22];
4497 w[10] = a(0,1)*w[31]-a(0,2)*w[28]+a(0,3)*w[26]-a(0,4)*w[25];
4498 w[11] = a(0,1)*w[32]-a(0,2)*w[29]+a(0,3)*w[27]-a(0,5)*w[25];
4499 w[12] = a(0,1)*w[33]-a(0,2)*w[30]+a(0,4)*w[27]-a(0,5)*w[26];
4500 w[13] = a(0,1)*w[34]-a(0,3)*w[30]+a(0,4)*w[29]-a(0,5)*w[28];
4501 w[14] = a(0,2)*w[34]-a(0,3)*w[33]+a(0,4)*w[32]-a(0,5)*w[31];
4503 b(0,2) = a(3,1)*w[14]-a(3,2)*w[13]+a(3,3)*w[12]-a(3,4)*w[11]+a(3,5)*w[10];
4504 b(1,2) =-a(3,0)*w[14]+a(3,2)*w[9]-a(3,3)*w[8]+a(3,4)*w[7]-a(3,5)*w[6];
4505 b(2,2) = a(3,0)*w[13]-a(3,1)*w[9]+a(3,3)*w[5]-a(3,4)*w[4]+a(3,5)*w[3];
4506 b(3,2) =-a(3,0)*w[12]+a(3,1)*w[8]-a(3,2)*w[5]+a(3,4)*w[2]-a(3,5)*w[1];
4507 b(4,2) = a(3,0)*w[11]-a(3,1)*w[7]+a(3,2)*w[4]-a(3,3)*w[2]+a(3,5)*w[0];
4508 b(5,2) =-a(3,0)*w[10]+a(3,1)*w[6]-a(3,2)*w[3]+a(3,3)*w[1]-a(3,4)*w[0];
4510 b(0,3) =-a(2,1)*w[14]+a(2,2)*w[13]-a(2,3)*w[12]+a(2,4)*w[11]-a(2,5)*w[10];
4511 b(1,3) = a(2,0)*w[14]-a(2,2)*w[9]+a(2,3)*w[8]-a(2,4)*w[7]+a(2,5)*w[6];
4512 b(2,3) =-a(2,0)*w[13]+a(2,1)*w[9]-a(2,3)*w[5]+a(2,4)*w[4]-a(2,5)*w[3];
4513 b(3,3) = a(2,0)*w[12]-a(2,1)*w[8]+a(2,2)*w[5]-a(2,4)*w[2]+a(2,5)*w[1];
4514 b(4,3) =-a(2,0)*w[11]+a(2,1)*w[7]-a(2,2)*w[4]+a(2,3)*w[2]-a(2,5)*w[0];
4515 b(5,3) = a(2,0)*w[10]-a(2,1)*w[6]+a(2,2)*w[3]-a(2,3)*w[1]+a(2,4)*w[0];
4517 w[ 0] = a(2,0)*a(3,1)-a(2,1)*a(3,0);
4518 w[ 1] = a(2,0)*a(3,2)-a(2,2)*a(3,0);
4519 w[ 2] = a(2,0)*a(3,3)-a(2,3)*a(3,0);
4520 w[ 3] = a(2,0)*a(3,4)-a(2,4)*a(3,0);
4521 w[ 4] = a(2,0)*a(3,5)-a(2,5)*a(3,0);
4522 w[ 5] = a(2,1)*a(3,2)-a(2,2)*a(3,1);
4523 w[ 6] = a(2,1)*a(3,3)-a(2,3)*a(3,1);
4524 w[ 7] = a(2,1)*a(3,4)-a(2,4)*a(3,1);
4525 w[ 8] = a(2,1)*a(3,5)-a(2,5)*a(3,1);
4526 w[ 9] = a(2,2)*a(3,3)-a(2,3)*a(3,2);
4527 w[10] = a(2,2)*a(3,4)-a(2,4)*a(3,2);
4528 w[11] = a(2,2)*a(3,5)-a(2,5)*a(3,2);
4529 w[12] = a(2,3)*a(3,4)-a(2,4)*a(3,3);
4530 w[13] = a(2,3)*a(3,5)-a(2,5)*a(3,3);
4531 w[14] = a(2,4)*a(3,5)-a(2,5)*a(3,4);
4532 w[15] = a(1,0)*w[5]-a(1,1)*w[1]+a(1,2)*w[0];
4533 w[16] = a(1,0)*w[6]-a(1,1)*w[2]+a(1,3)*w[0];
4534 w[17] = a(1,0)*w[7]-a(1,1)*w[3]+a(1,4)*w[0];
4535 w[18] = a(1,0)*w[8]-a(1,1)*w[4]+a(1,5)*w[0];
4536 w[19] = a(1,0)*w[9]-a(1,2)*w[2]+a(1,3)*w[1];
4537 w[20] = a(1,0)*w[10]-a(1,2)*w[3]+a(1,4)*w[1];
4538 w[21] = a(1,0)*w[11]-a(1,2)*w[4]+a(1,5)*w[1];
4539 w[22] = a(1,0)*w[12]-a(1,3)*w[3]+a(1,4)*w[2];
4540 w[23] = a(1,0)*w[13]-a(1,3)*w[4]+a(1,5)*w[2];
4541 w[24] = a(1,0)*w[14]-a(1,4)*w[4]+a(1,5)*w[3];
4542 w[25] = a(1,1)*w[9]-a(1,2)*w[6]+a(1,3)*w[5];
4543 w[26] = a(1,1)*w[10]-a(1,2)*w[7]+a(1,4)*w[5];
4544 w[27] = a(1,1)*w[11]-a(1,2)*w[8]+a(1,5)*w[5];
4545 w[28] = a(1,1)*w[12]-a(1,3)*w[7]+a(1,4)*w[6];
4546 w[29] = a(1,1)*w[13]-a(1,3)*w[8]+a(1,5)*w[6];
4547 w[30] = a(1,1)*w[14]-a(1,4)*w[8]+a(1,5)*w[7];
4548 w[31] = a(1,2)*w[12]-a(1,3)*w[10]+a(1,4)*w[9];
4549 w[32] = a(1,2)*w[13]-a(1,3)*w[11]+a(1,5)*w[9];
4550 w[33] = a(1,2)*w[14]-a(1,4)*w[11]+a(1,5)*w[10];
4551 w[34] = a(1,3)*w[14]-a(1,4)*w[13]+a(1,5)*w[12];
4553 w[ 0] = a(0,0)*w[25]-a(0,1)*w[19]+a(0,2)*w[16]-a(0,3)*w[15];
4554 w[ 1] = a(0,0)*w[26]-a(0,1)*w[20]+a(0,2)*w[17]-a(0,4)*w[15];
4555 w[ 2] = a(0,0)*w[27]-a(0,1)*w[21]+a(0,2)*w[18]-a(0,5)*w[15];
4556 w[ 3] = a(0,0)*w[28]-a(0,1)*w[22]+a(0,3)*w[17]-a(0,4)*w[16];
4557 w[ 4] = a(0,0)*w[29]-a(0,1)*w[23]+a(0,3)*w[18]-a(0,5)*w[16];
4558 w[ 5] = a(0,0)*w[30]-a(0,1)*w[24]+a(0,4)*w[18]-a(0,5)*w[17];
4559 w[ 6] = a(0,0)*w[31]-a(0,2)*w[22]+a(0,3)*w[20]-a(0,4)*w[19];
4560 w[ 7] = a(0,0)*w[32]-a(0,2)*w[23]+a(0,3)*w[21]-a(0,5)*w[19];
4561 w[ 8] = a(0,0)*w[33]-a(0,2)*w[24]+a(0,4)*w[21]-a(0,5)*w[20];
4562 w[ 9] = a(0,0)*w[34]-a(0,3)*w[24]+a(0,4)*w[23]-a(0,5)*w[22];
4563 w[10] = a(0,1)*w[31]-a(0,2)*w[28]+a(0,3)*w[26]-a(0,4)*w[25];
4564 w[11] = a(0,1)*w[32]-a(0,2)*w[29]+a(0,3)*w[27]-a(0,5)*w[25];
4565 w[12] = a(0,1)*w[33]-a(0,2)*w[30]+a(0,4)*w[27]-a(0,5)*w[26];
4566 w[13] = a(0,1)*w[34]-a(0,3)*w[30]+a(0,4)*w[29]-a(0,5)*w[28];
4567 w[14] = a(0,2)*w[34]-a(0,3)*w[33]+a(0,4)*w[32]-a(0,5)*w[31];
4569 b(0,4) = a(5,1)*w[14]-a(5,2)*w[13]+a(5,3)*w[12]-a(5,4)*w[11]+a(5,5)*w[10];
4570 b(1,4) =-a(5,0)*w[14]+a(5,2)*w[9]-a(5,3)*w[8]+a(5,4)*w[7]-a(5,5)*w[6];
4571 b(2,4) = a(5,0)*w[13]-a(5,1)*w[9]+a(5,3)*w[5]-a(5,4)*w[4]+a(5,5)*w[3];
4572 b(3,4) =-a(5,0)*w[12]+a(5,1)*w[8]-a(5,2)*w[5]+a(5,4)*w[2]-a(5,5)*w[1];
4573 b(4,4) = a(5,0)*w[11]-a(5,1)*w[7]+a(5,2)*w[4]-a(5,3)*w[2]+a(5,5)*w[0];
4574 b(5,4) =-a(5,0)*w[10]+a(5,1)*w[6]-a(5,2)*w[3]+a(5,3)*w[1]-a(5,4)*w[0];
4576 b(0,5) =-a(4,1)*w[14]+a(4,2)*w[13]-a(4,3)*w[12]+a(4,4)*w[11]-a(4,5)*w[10];
4577 b(1,5) = a(4,0)*w[14]-a(4,2)*w[9]+a(4,3)*w[8]-a(4,4)*w[7]+a(4,5)*w[6];
4578 b(2,5) =-a(4,0)*w[13]+a(4,1)*w[9]-a(4,3)*w[5]+a(4,4)*w[4]-a(4,5)*w[3];
4579 b(3,5) = a(4,0)*w[12]-a(4,1)*w[8]+a(4,2)*w[5]-a(4,4)*w[2]+a(4,5)*w[1];
4580 b(4,5) =-a(4,0)*w[11]+a(4,1)*w[7]-a(4,2)*w[4]+a(4,3)*w[2]-a(4,5)*w[0];
4581 b(5,5) = a(4,0)*w[10]-a(4,1)*w[6]+a(4,2)*w[3]-a(4,3)*w[1]+a(4,4)*w[0];
#define XABORTM(msg)
Abortion macro definition with custom message.
#define ASSERTM(expr, msg)
Debug-Assertion macro definition with custom message.
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Math Limits class template.
Tiny Matrix class template.
CUDA_HOST_DEVICE DataType det() const
Returns the determinant of the matrix.
CUDA_HOST_DEVICE Matrix(DataType value)
value-assignment constructor
CUDA_HOST_DEVICE Matrix & set_cofactor(const Matrix< T_, m_, n_, sma_, sna_ > &a)
Sets this matrix to the cofactor matrix of another matrix.
CUDA_HOST_DEVICE Matrix & add_scalar_main_diag(DataType alpha)
Adds a value onto the matrix's main diagonal.
CUDA_HOST_DEVICE Matrix & add_outer_product(const Vector< T_, m_, snx_ > &x, const Vector< T_, n_, sny_ > &y, const DataType alpha=DataType(1))
Adds the outer product of two vectors onto the matrix.
CUDA_HOST_DEVICE Matrix & add_mat_mat_mult(const Matrix< T_, m_, la_, sma_, sna_ > &a, const Matrix< T_, lb_, n_, smb_, snb_ > &b, DataType alpha=DataType(1))
Adds the algebraic matrix-product of two other matrices onto this matrix.
CUDA_HOST_DEVICE Matrix & set_mat_mat_mult(const Matrix< T_, m_, la_, sma_, sna_ > &a, const Matrix< T_, lb_, n_, smb_, snb_ > &b)
Sets this matrix to the algebraic matrix-product of two other matrices.
CUDA_HOST_DEVICE DataType norm_frobenius_sqr() const
Returns the Frobenius norm squared of the matrix.
T_ ValueType
the data type of the matrix
CUDA_HOST_DEVICE Matrix & operator=(const Matrix< T_, m_, n_, sma_, sna_ > &a)
assignment operator
CUDA_HOST_DEVICE Matrix(const std::initializer_list< std::initializer_list< Tx_ > > &x)
Initializer list constructor.
CUDA_HOST_DEVICE Matrix & add_vec_tensor_mult(const Vector< T_, l_, snv_ > &x, const Tensor3< T_, l_, m_, n_, slt_, smt_, snt_ > &t, DataType alpha=DataType(1))
Adds the result of a vector-tensor left-product onto this matrix.
CUDA_HOST_DEVICE Matrix & set_transpose(const Matrix< T_, n_, m_, sma_, sna_ > &a)
Sets this matrix to the transpose of another matrix.
CUDA_HOST friend std::istream & operator>>(std::istream &in, Matrix &A)
Tiny::Matrix streaming operator.
CUDA_HOST_DEVICE Matrix & axpy(DataType alpha, const Matrix< T_, m_, n_, sma_, sna_ > &a)
Adds another scaled matrix onto this matrix.
CUDA_HOST_DEVICE Matrix & operator-=(const Matrix< T_, m_, n_, sma_, sna_ > &a)
matrix component-wise subtraction operator
CUDA_HOST_DEVICE Matrix(const Matrix< T2_, m_, n_, sma_, sna_ > &a)
copy constructor
static CUDA_HOST_DEVICE Matrix null()
Returns a null-matrix.
CUDA_HOST_DEVICE DataType norm_sub_id_frobenius() const
Returns the Frobenius norm of the difference of this matrix and the identity matrix.
CUDA_HOST_DEVICE Matrix & set_inverse(const Matrix< T_, m_, n_, sma_, sna_ > &a)
Sets this matrix to the inverse of another matrix.
CUDA_HOST_DEVICE const RowType & operator[](int i) const
Row-Access operator.
CUDA_HOST_DEVICE Matrix & operator*=(DataType alpha)
scalar-right-multiply-by operator
CUDA_HOST_DEVICE DataType scalar_product(const Vector< T_, m_, snx_ > &x, const Vector< T_, n_, sny_ > &y) const
Computes the scalar product of two vectors with this matrix.
Intern::DataTypeExtractor< ValueType >::MyDataType DataType
The basic data type buried in the lowest level of the vector.
Vector< T_, n_, sn_ > RowType
the type of a single matrix row
CUDA_HOST_DEVICE const T_ & operator()(int i, int j) const
Access operator.
CUDA_HOST_DEVICE Matrix & operator=(DataType value)
value-assignment operator
CUDA_HOST_DEVICE Matrix & set_rotation_3d(T_ yaw, T_ pitch, T_ roll)
Sets this matrix to a 3D yaw-pitch-roll rotation matrix.
CUDA_HOST_DEVICE DataType trace() const
Returns the trace of the matrix.
CUDA_HOST_DEVICE DataType norm_hessian_sqr() const
Returns the Hessian norm square.
CUDA_HOST_DEVICE Matrix(const std::initializer_list< Tx_ > &x)
Initializer list of Tiny::Vector constructor.
CUDA_HOST_DEVICE Matrix & set_gram(const Matrix< T_, l_, n_, sla_, sna_ > &a)
Sets this matrix to the Gram matrix of another matrix.
CUDA_HOST_DEVICE Matrix & set_rotation_2d(T_ angle)
Sets this matrix to a 2D rotation matrix.
CUDA_HOST_DEVICE Matrix & set_double_mat_mult(const Matrix< T_, k_, l_, sma_, sna_ > &a, const Matrix< T_, k_, m_, smb_, snb_ > &b, const Matrix< T_, l_, n_, smd_, snd_ > &d, T_ alpha=T_(1))
Sets this matrix to the algebraic matrix double-product of three other matrices.
CUDA_HOST_DEVICE T_ & operator()(int i, int j)
Access operator.
CUDA_HOST_DEVICE Matrix & add_double_mat_mult(const Matrix< T_, k_, l_, sma_, sna_ > &a, const Matrix< T_, k_, m_, smb_, snb_ > &b, const Matrix< T_, l_, n_, smd_, snd_ > &d, DataType alpha=DataType(1))
Adds the algebraic matrix double-product of three other matrices onto this matrix.
CUDA_HOST_DEVICE void convert(const Matrix< Tx_, m_, n_, sma_, sna_ > &a)
conversion operator
CUDA_HOST_DEVICE Matrix & set_identity()
Sets this matrix to the identity matrix.
CUDA_HOST_DEVICE Matrix & operator+=(const Matrix< T_, m_, n_, sma_, sna_ > &a)
matrix component-wise addition operator
CUDA_HOST_DEVICE RowType & operator[](int i)
Row-Access operator.
CUDA_HOST_DEVICE Matrix & operator=(const std::initializer_list< std::initializer_list< Tx_ > > &x)
Initializer list assignment operator.
CUDA_HOST_DEVICE Matrix()
default constructor
CUDA_HOST_DEVICE void copy_n(const Matrix< T_, ma_, na_, sma_, sna_ > &a)
Copies the upper left mm_ x nn_ entries of a matrix.
CUDA_HOST_DEVICE DataType vol() const
Returns the volume of the matrix.
RowType v[sm_]
actual matrix data; that's an array of vectors
CUDA_HOST_DEVICE Matrix & operator=(const std::initializer_list< Tx_ > &x)
Initializer list assignment operator.
CUDA_HOST_DEVICE Matrix & set_vec_tensor_mult(const Vector< T_, l_, snv_ > &x, const Tensor3< T_, l_, m_, n_, slt_, smt_, snt_ > &t, DataType alpha=DataType(1))
Sets this matrix to the result of a vector-tensor left-product.
CUDA_HOST_DEVICE Matrix & set_outer_product(const Vector< T_, m_, snx_ > &x, const Vector< T_, n_, sny_ > &y)
Sets this matrix to the outer product of two vectors.
CUDA_HOST_DEVICE void format(DataType alpha=DataType(0))
Formats the matrix.
CUDA_HOST_DEVICE void copy(const Matrix< T_, m_, n_, sma_, sna_ > &a)
Copies a matrix.
CUDA_HOST_DEVICE DataType norm_frobenius() const
Returns the Frobenius norm of the matrix.
Tiny Tensor3 class template.
CUDA_HOST_DEVICE Tensor3 & add_double_mat_mult(const Tensor3< T_, lt_, mt_, nt_, slt_, smt_, snt_ > &t, const Matrix< T_, nt_, n_, smb_, snb_ > &b, const Matrix< T_, mt_, m_, smd_, snd_ > &d, DataType alpha=DataType(1))
Adds the result of a matrix-tensor-matrix double-product onto this tensor.
Intern::DataTypeExtractor< ValueType >::MyDataType DataType
The basic data type buried in the lowest level of the vector.
CUDA_HOST_DEVICE void copy_n(const Tensor3< T_, la_, ma_, na_, sla_, sma_, sna_ > &a)
Copies the upper left mm_ x nn_ entries of a matrix.
static constexpr int l
the tube count of the tensor
CUDA_HOST_DEVICE Tensor3 & operator=(const std::initializer_list< Tx_ > &x)
Initializer list assignment operator.
static constexpr int sn
the column stride of the tensor
CUDA_HOST_DEVICE Tensor3 & operator=(const Tensor3< T_, l_, m_, n_, sla_, sma_, sna_ > &a)
copy-assignment operator
CUDA_HOST_DEVICE void format(DataType alpha=DataType(0))
formats the tensor
CUDA_HOST_DEVICE const T_ & operator()(int h, int i, int j) const
Access operator.
CUDA_HOST_DEVICE Tensor3 & operator=(DataType value)
value-assignment operator
CUDA_HOST_DEVICE Tensor3 & operator=(const std::initializer_list< std::initializer_list< std::initializer_list< Tx_ > > > &x)
Initializer list assignment operator.
CUDA_HOST_DEVICE Tensor3 & add_vec_mat_outer_product(const Vector< T_, l_, slx_ > &x, const Matrix< T_, m_, n_, sma_, sna_ > &a, DataType alpha=DataType(1))
Adds the result of a vector-matrix outer product onto this tensor.
CUDA_HOST_DEVICE Tensor3 & operator+=(const Tensor3< T_, l_, m_, n_, sla_, sma_, sna_ > &a)
tensor component-wise addition operator
CUDA_HOST_DEVICE Tensor3(const std::initializer_list< std::initializer_list< std::initializer_list< Tx_ > > > &x)
Initializer list constructor.
CUDA_HOST_DEVICE const PlaneType & operator[](int h) const
Plane-Access operator.
CUDA_HOST_DEVICE Tensor3(const Tensor3< T_, l_, m_, n_, sla_, sma_, sna_ > &a)
copy-constructor
Matrix< T_, m_, n_, sm_, sn_ > PlaneType
Type of tensor data; that's an array of matrices.
static constexpr int sl
the tube stride of the tensor
CUDA_HOST_DEVICE Tensor3(const std::initializer_list< Tx_ > &x)
Initializer list constructor.
CUDA_HOST_DEVICE PlaneType & operator[](int h)
Plane-Access operator.
CUDA_HOST_DEVICE Tensor3(DataType value)
value-assignment constructor
static CUDA_HOST_DEVICE Tensor3 null()
Returns a null-tensor.
CUDA_HOST_DEVICE Tensor3 & add_mat_tensor_mult(const Matrix< T_, l_, k_, sma_, sna_ > &a, const Tensor3< T_, k_, m_, n_, slt_, smt_, snt_ > &t, DataType alpha=DataType(1))
Adds the result of a matrix-tensor product onto this tensor.
PlaneType v[sl_]
Actual tensor data.
static constexpr int m
the row count of the tensor
CUDA_HOST_DEVICE void convert(const Tensor3< Tx_, l_, m_, n_, sla_, sma_, sna_ > &a)
conversion operator
CUDA_HOST_DEVICE Tensor3()
default constructor
CUDA_HOST_DEVICE Tensor3 & operator-=(const Tensor3< T_, l_, m_, n_, sla_, sma_, sna_ > &a)
tensor component-wise subtraction operator
CUDA_HOST_DEVICE T_ & operator()(int h, int i, int j)
Access operator.
static constexpr int n
the column count of the tensor
static constexpr int sm
the row stride of the tensor
T_ ValueType
the data type of the tensor
CUDA_HOST_DEVICE Tensor3 & operator*=(DataType alpha)
scalar right-multiply-by operator
CUDA_HOST_DEVICE void copy(const Tensor3< T_, l_, m_, n_, sla_, sma_, sna_ > &a)
Copies a tensor3.
Tiny Vector class template.
static CUDA_HOST_DEVICE Vector null()
Returns a null-vector.
CUDA_HOST_DEVICE DataType norm_euclid_n() const
Computes the euclid norm of first nn_ entries of this vector.
CUDA_HOST_DEVICE Vector & add_mat_vec_mult(const Matrix< T_, n_, m_, sma_, sna_ > &a, const Vector< T_, m_, sx_ > &x, DataType alpha=DataType(1))
Adds the result of a matrix-vector product onto this vector.
CUDA_HOST_DEVICE DataType norm_euclid_sqr_n() const
Computes the squared euclid norm of first nn_ entries of this vector.
CUDA_HOST_DEVICE Vector & normalize_n()
Normalizes the first nn_ entries of this vector.
CUDA_HOST_DEVICE Vector & operator=(const Vector< T_, n_, sx_ > &x)
copy-assignment operator
T_ v[s_]
actual vector data
CUDA_HOST_DEVICE bool normalized() const
Return if the vector is normalized.
CUDA_HOST_DEVICE void format_n(DataType alpha=DataType(0))
Formats the first nn_ entries of the vector.
static CUDA_HOST_DEVICE Vector convert_new(Vector &&x)
overload for moveable rvalue type
CUDA_HOST_DEVICE void format(DataType alpha=DataType(0))
Formats the vector.
CUDA_HOST_DEVICE Vector(const Vector< T_, n_, sx_ > &x)
copy constructor
CUDA_HOST_DEVICE Vector & operator=(const std::initializer_list< Tx_ > &x)
Initializer list assignment operator.
CUDA_HOST_DEVICE DataType norm_max() const
Computes the max-norm of this vector.
CUDA_HOST_DEVICE T_ & operator()(int i)
Access operator.
CUDA_HOST_DEVICE const T_ & operator[](int i) const
Access operator.
CUDA_HOST_DEVICE Vector & operator-=(const Vector< T_, n_, sx_ > &x)
vector-subtract operator
CUDA_HOST_DEVICE Vector & set_vec_mat_mult(const Vector< T_, m_, sx_ > &x, const Matrix< T_, m_, n_, sma_, sna_ > &a)
Sets this vector to the result of a vector-matrix product.
static CUDA_HOST_DEVICE Vector convert_new(const Vector< Tx_, n_, sx_ > &x)
convert function, not callable with non convertable inner type
CUDA_HOST_DEVICE Vector & add_vec_mat_mult_n(const Vector< T_, mx_, smx_ > &x, const Matrix< T_, ma_, na_, sma_, sna_ > &a, DataType alpha=DataType(1))
Adds the result of a vector-matrix product onto this vector.
CUDA_HOST_DEVICE Vector(const Vector< Tx_, n_, sx_ > &x)
copy constructor
CUDA_HOST_DEVICE void scale_n(DataType alpha)
Scales the first nn_ entries of the vector.
CUDA_HOST_DEVICE Vector & set_mat_vec_mult_n(const Matrix< T_, ma_, na_, sma_, sna_ > &a, const Vector< T_, nx_, sx_ > &x)
Sets the first nn_ entries of this vector to the result of a matrix-vector product with the first mm_...
static constexpr int n
the length of the vector
CUDA_HOST_DEVICE Vector & operator*=(DataType alpha)
scalar-multiply operator
CUDA_HOST_DEVICE Vector()
default constructor
CUDA_HOST_DEVICE DataType norm_l1() const
Computes the l1-norm of this vector.
CUDA_HOST_DEVICE Vector & set_convex(DataType alpha, const Vector< T_, n_, sna_ > &a, const Vector< T_, n_, snb_ > &b)
Sets this vector to the convex combination of two other vectors.
CUDA_HOST_DEVICE Vector & axpy_n(DataType alpha, const Vector< T_, nx_, snx_ > &x)
Adds the first nn_ entries of another scaled vector onto this vector.
CUDA_HOST_DEVICE Vector & add_mat_vec_mult_n(const Matrix< T_, ma_, na_, sma_, sna_ > &a, const Vector< T_, nx_, sx_ > &x, DataType alpha=DataType(1))
Adds the result of a matrix-vector product onto this vector.
CUDA_HOST friend std::ostream & operator<<(std::ostream &lhs, const Vector &b)
Tiny::Vector streaming operator.
CUDA_HOST_DEVICE Vector & operator+=(const Vector< T_, n_, sx_ > &x)
vector-add operator
CUDA_HOST_DEVICE Vector & operator=(DataType value)
value-assignment operator
CUDA_HOST_DEVICE Vector & operator=(const Vector< Tx_, n_, sx_ > &x)
copy-assignment operator
CUDA_HOST_DEVICE const T_ & operator()(int i) const
Access operator.
CUDA_HOST_DEVICE Vector & axpy(DataType alpha, const Vector< T_, n_, snx_ > &x)
Adds another scaled vector onto this vector.
CUDA_HOST_DEVICE void copy(const Vector< T_, n_, snx_ > &x)
Copies a vector.
CUDA_HOST_DEVICE Vector(const std::array< Tx_, n_ > &x)
copy constructor
CUDA_HOST_DEVICE Vector & normalize()
Normalizes this vector.
CUDA_HOST_DEVICE Vector & operator=(const std::array< Tx_, n_ > &x)
copy-assignment operator
CUDA_HOST_DEVICE Vector & negate()
Negates the vector, i.e. effectively multiplies all components by -1.
Intern::DataTypeExtractor< ValueType >::MyDataType DataType
The basic data type buried in the lowest level of the vector.
CUDA_HOST_DEVICE Vector(DataType value)
value-assignment constructor
CUDA_HOST_DEVICE DataType norm_max_n() const
Computes the max-norm of the vector.
CUDA_HOST_DEVICE Vector & negate_n()
Negates the first nn_ entries of this vector.
CUDA_HOST_DEVICE DataType norm_l1_n() const
Computes the l1-norm of the first nn_ entries of this vector.
CUDA_HOST_DEVICE Vector & set_convex_n(DataType alpha, const Vector< T_, na_, sna_ > &a, const Vector< T_, nb_, snb_ > &b)
Sets the first nn_ entries of this vector to the convex combination of two other vectors.
CUDA_HOST_DEVICE DataType norm_euclid_sqr() const
Computes the squared euclid norm of this vector.
CUDA_HOST_DEVICE void copy_n(const Vector< T_, nx_, snx_ > &x)
Copies the first nn_ entries of a vector.
CUDA_HOST_DEVICE void scale(DataType alpha)
Scales the vector.
CUDA_HOST_DEVICE Vector(const std::initializer_list< Tx_ > &x)
Initializer list constructor.
static constexpr int s
the stride of the vector
CUDA_HOST_DEVICE Vector & set_mat_vec_mult(const Matrix< T_, n_, m_, sma_, sna_ > &a, const Vector< T_, m_, sx_ > &x)
Sets this vector to the result of a matrix-vector product.
CUDA_HOST_DEVICE Vector & add_vec_mat_mult(const Vector< T_, m_, sx_ > &x, const Matrix< T_, m_, n_, sma_, sna_ > &a, DataType alpha=DataType(1))
Adds the result of a vector-matrix product onto this vector.
CUDA_HOST_DEVICE T_ & operator[](int i)
Access operator.
T_ ValueType
the value type of the vector
CUDA_HOST_DEVICE DataType norm_euclid() const
Computes the euclid norm of this vector.
CUDA_HOST_DEVICE Vector & set_vec_mat_mult_n(const Vector< T_, mx_, smx_ > &x, const Matrix< T_, ma_, na_, sma_, sna_ > &a)
Sets the first mm_ entries of this vector to the result of a vector-matrix product with the first mm_...
CUDA_HOST_DEVICE void convert(const Vector< Tx_, n_, sx_ > &x)
conversion operator
CUDA_HOST_DEVICE Vector & operator*=(const Vector< T_, n_, sx_ > &x)
element-wise-multiply operator
T_ sqrt(T_ x)
Returns the square-root of a value.
T_ abs(T_ x)
Returns the absolute value.
DT_ calc_opening_angle(DT_ x1, DT_ x2, DT_ y1, DT_ y2)
Calculates the opening angle of two 2D vectors.
T_ pow(T_ x, T_ y)
Returns x raised to the power of y.
T_ sin(T_ x)
Returns the sine of a value.
T_ sqr(T_ x)
Returns the square of a value.
T_ min(T_ a, T_ b)
Returns the minimum of two values.
T_ cos(T_ x)
Returns the cosine of a value.
T_ max(T_ a, T_ b)
Returns the maximum of two values.
CUDA_HOST_DEVICE Vector< T_, n_ > operator+(const Vector< T_, n_, sa_ > &a, const Vector< T_, n_, sb_ > &b)
vector addition operator
CUDA_HOST Vector< T_, dim_ > project_onto(const Vector< T_, dim_ > &x, const Vector< T_, dim_ > &y)
Calculates the projected vector.
CUDA_HOST_DEVICE T_ dot(const T_ &a, const T_ &b)
Computes the dot-product of two scalars.
CUDA_HOST_DEVICE void orthogonal_3x2(Vector< T_, mx_, smx_ > &nu, const Matrix< T_, ma_, na_, sm_, sn_ > &tau)
Computes the positively oriented orthogonal vector to the columns of a 3x2 matrix.
CUDA_HOST_DEVICE void axpy(T_ &y, const T_ &x, const T_ &alpha)
Performs an AXPY of two scalars.
CUDA_HOST_DEVICE void orthogonal_2x1(Vector< T_, mx_, smx_ > &nu, const Matrix< T_, ma_, na_, sm_, sn_ > &tau)
Computes the positively oriented orthogonal vector to the columns of a 2x1 matrix.
CUDA_HOST_DEVICE void add_id(T_ &x, const T_ &alpha)
Adds a scaled identity onto a scalar.
Vector< T_, m_ > orthogonal(const Matrix< T_, m_, m_-1, sm_, sn_ > &tau)
Computes the positively oriented orthogonal vector to the columns of a m_ x (m_-1) Matrix.
CUDA_HOST_DEVICE Vector< T_, n_ > operator-(const Vector< T_, n_, sa_ > &a, const Vector< T_, n_, sb_ > &b)
vector subtraction operator
CUDA_HOST_DEVICE Vector< T_, n_ > component_product(const Vector< T_, n_, sa_ > &a, const Vector< T_, n_, sb_ > &b)
vector element-wise-product operator
CUDA_HOST_DEVICE Vector< T_, n_ > operator*(typename Vector< T_, n_ >::DataType alpha, const Vector< T_, n_, s_ > &x)
scalar-left-multiplication operator
CUDA_HOST T_ calculate_opening_angle(const Vector< T_, 2 > &x, const Vector< T_, 2 > &y)
Calculates the counter-clockwise opening angle between two 2D vectors.
String stringify(const T_ &item)
Converts an item into a String.
@ value
specifies whether the space should supply basis function values