7#ifndef KERNEL_LAFEM_ARCH_DOT_PRODUCT_HPP
8#define KERNEL_LAFEM_ARCH_DOT_PRODUCT_HPP 1
12#include <kernel/backend.hpp>
13#include <kernel/util/half.hpp>
23 template <
typename DT_>
24 static DT_
value(
const DT_ *
const x,
const DT_ *
const y,
const Index size)
26 return value_generic(x, y, size);
29 template <
typename ValueType_>
30 static ValueType_ value_blocked(
const ValueType_ *
const x,
const ValueType_ *
const y,
const Index size)
32 return value_blocked_generic(x, y, size);
35#ifdef FEAT_HAVE_HALFMATH
38 BACKEND_SKELETON_RETURN(value_cuda, value_generic, value_generic, x, y, size)
42 static float value(
const float *
const x,
const float *
const y,
const Index size)
44 BACKEND_SKELETON_RETURN(value_cuda, value_mkl, value_generic, x, y, size)
47 static double value(
const double *
const x,
const double *
const y,
const Index size)
49 BACKEND_SKELETON_RETURN(value_cuda, value_mkl, value_generic, x, y, size)
52 template <
typename DT_>
53 static DT_ value_generic(
const DT_ *
const x,
const DT_ *
const y,
const Index size);
55 template <
typename ValueType_>
56 static ValueType_ value_blocked_generic(
const ValueType_ *
const x,
const ValueType_ *
const y,
const Index size);
58 static float value_mkl(
const float *
const x,
const float *
const y,
const Index size);
59 static double value_mkl(
const double *
const x,
const double *
const y,
const Index size);
61 template <
typename DT_>
62 static DT_ value_cuda(
const DT_ *
const x,
const DT_ *
const y,
const Index size);
66 extern template float DotProduct::value_generic(
const float *
const,
const float *
const,
const Index);
67 extern template double DotProduct::value_generic(
const double *
const,
const double *
const,
const Index);
72 template <
typename DT_>
73 static DT_
value(
const DT_ *
const x,
const DT_ *
const y,
const DT_ *
const z,
const Index size)
75 return value_generic(x, y, z, size);
78 template <
typename ValueType_>
79 static ValueType_ value_blocked(
const ValueType_ *
const x,
const ValueType_ *
const y,
const ValueType_ *
const z,
const Index size)
81 return value_blocked_generic(x, y, z, size);
84#ifdef FEAT_HAVE_HALFMATH
87 BACKEND_SKELETON_RETURN(value_cuda, value_generic, value_generic, x, y, z, size)
91 static float value(
const float *
const x,
const float *
const y,
const float *
const z,
const Index size)
93 BACKEND_SKELETON_RETURN(value_cuda, value_mkl, value_generic, x, y, z, size)
96 static double value(
const double *
const x,
const double *
const y,
const double *
const z,
const Index size)
98 BACKEND_SKELETON_RETURN(value_cuda, value_mkl, value_generic, x, y, z, size)
101 template <
typename DT_>
102 static DT_ value_generic(
const DT_ *
const x,
const DT_ *
const y,
const DT_ *
const z,
const Index size);
104 template <
typename ValueType_>
105 static ValueType_ value_blocked_generic(
const ValueType_ *
const x,
const ValueType_ *
const y,
const ValueType_ *
const z,
const Index size);
107 static float value_mkl(
const float *
const x,
const float *
const y,
const float *
const z,
const Index size);
108 static double value_mkl(
const double *
const x,
const double *
const y,
const double *
const z,
const Index size);
110 template <
typename DT_>
111 static DT_ value_cuda(
const DT_ *
const x,
const DT_ *
const y,
const DT_ *
const z,
const Index size);
115 extern template float TripleDotProduct::value_generic(
const float *
const,
const float *
const,
const float *
const,
const Index);
116 extern template double TripleDotProduct::value_generic(
const double *
const,
const double *
const,
const double *
const,
const Index);
123#include <kernel/lafem/arch/dot_product_generic.hpp>
__half Half
Half data type.
@ value
specifies whether the space should supply basis function values
std::uint64_t Index
Index data type.