7#ifndef KERNEL_LAFEM_ARCH_DOT_PRODUCT_GENERIC_HPP
8#define KERNEL_LAFEM_ARCH_DOT_PRODUCT_GENERIC_HPP 1
10#ifndef KERNEL_LAFEM_ARCH_DOT_PRODUCT_HPP
11#error "Do not include this implementation-only header file directly!"
20 template <
typename DT_>
21 DT_ DotProduct::value_generic(
const DT_ *
const x,
const DT_ *
const y,
const Index size)
27 FEAT_PRAGMA_OMP(parallel
for reduction(+:r))
28 for (
Index i = 0 ; i < size ; ++i)
35 FEAT_PRAGMA_OMP(parallel
for reduction(+:r))
36 for (
Index i = 0 ; i < size ; ++i)
45 template <
typename ValueType_>
46 ValueType_ DotProduct::value_blocked_generic(
const ValueType_ *
const x,
const ValueType_ *
const y,
const Index size)
53 for (
Index i(0) ; i < size ; ++i)
55 for(
int j(0); j < ValueType_::n; ++j) {
56 r[j] += x[i][j] * x[i][j];
63 for (
Index i(0) ; i < size ; ++i)
65 for(
int j(0); j < ValueType_::n; ++j) {
66 r[j] += x[i][j] * y[i][j];
74 template <
typename DT_>
75 DT_ TripleDotProduct::value_generic(
const DT_ *
const x,
const DT_ *
const y,
const DT_ *
const z,
const Index size)
81 FEAT_PRAGMA_OMP(parallel
for reduction(+:r))
82 for (
Index i = 0 ; i < size ; ++i)
83 r += x[i] * x[i] * z[i];
87 FEAT_PRAGMA_OMP(parallel
for reduction(+:r))
88 for (
Index i = 0 ; i < size ; ++i)
89 r += x[i] * x[i] * y[i];
93 FEAT_PRAGMA_OMP(parallel
for reduction(+:r))
94 for (
Index i = 0 ; i < size ; ++i)
95 r += x[i] * y[i] * y[i];
99 FEAT_PRAGMA_OMP(parallel
for reduction(+:r))
100 for (
Index i = 0 ; i < size ; ++i)
101 r += x[i] * y[i] * z[i];
107 template <
typename ValueType_>
108 ValueType_ TripleDotProduct::value_blocked_generic(
const ValueType_ *
const x,
const ValueType_ *
const y,
const ValueType_ *
const z,
const Index size)
115 for(
Index i(0); i < size; ++i)
117 for(
int j(0); j < ValueType_::n; ++j)
119 r[j] += x[i][j] * x[i][j] * z[i][j];
126 for (
Index i(0) ; i < size ; ++i)
128 for(
int j(0); j < ValueType_::n; ++j)
130 r[j] += x[i][j] * x[i][j] * y[i][j];
137 for (
Index i(0) ; i < size ; ++i)
139 for(
int j(0); j < ValueType_::n; ++j)
141 r[j] += x[i][j] * y[i][j] * y[i][j];
148 for(
Index i(0); i < size; ++i)
150 for(
int j(0); j < ValueType_::n; ++j)
152 r[j] += x[i][j] * y[i][j] * z[i][j];
std::uint64_t Index
Index data type.