7#ifndef KERNEL_LAFEM_ARCH_PRODUCT_MATMAT_HPP
8#define KERNEL_LAFEM_ARCH_PRODUCT_MATMAT_HPP 1
12#include <kernel/backend.hpp>
13#include <kernel/util/half.hpp>
23 template <
typename DT_>
24 static void dense(DT_ * r,
const DT_ alpha,
const DT_ beta,
const DT_ *
const x,
const DT_ *
const y,
const DT_ *
const z,
const Index rows,
const Index columns,
const Index inner)
26 dense_generic(r, alpha, beta, x, y, z, rows, columns, inner);
29#ifdef FEAT_HAVE_HALFMATH
32 BACKEND_SKELETON_VOID(dense_cuda, dense_generic, dense_generic, r, alpha, beta, x, y, z, rows, columns, inner)
36 static void dense(
float * r,
const float alpha,
const float beta,
const float *
const x,
const float *
const y,
const float *
const z,
const Index rows,
const Index columns,
const Index inner)
38 BACKEND_SKELETON_VOID(dense_cuda, dense_mkl, dense_generic, r, alpha, beta, x, y, z, rows, columns, inner)
41 static void dense(
double * r,
const double alpha,
const double beta,
const double *
const x,
const double *
const y,
const double *
const z,
const Index rows,
const Index columns,
const Index inner)
43 BACKEND_SKELETON_VOID(dense_cuda, dense_mkl, dense_generic, r, alpha, beta, x, y, z, rows, columns, inner)
46 template <
typename DT_,
typename IT_>
47 static void dsd(DT_ * r,
const DT_ alpha,
const DT_ beta,
const DT_ *
const val,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index used_elements,
48 const DT_ *
const y,
const Index rows,
const Index columns,
const Index inner)
50 dsd_generic(r, alpha, beta, val, col_ind, row_ptr, used_elements, y, rows, columns, inner);
53#ifdef FEAT_HAVE_HALFMATH
54 static void dsd(
Half * r,
const Half alpha,
const Half beta,
const Half *
const val,
const std::uint64_t *
const col_ind,
const std::uint64_t *
const row_ptr,
const Index used_elements,
57 BACKEND_SKELETON_VOID(dsd_cuda, dsd_generic, dsd_generic, r, alpha, beta, val, col_ind, row_ptr, used_elements, y, rows, columns, inner)
61 static void dsd(
float * r,
const float alpha,
const float beta,
const float *
const val,
const std::uint64_t *
const col_ind,
const std::uint64_t *
const row_ptr,
const Index used_elements,
62 const float *
const y,
const Index rows,
const Index columns,
const Index inner)
64 BACKEND_SKELETON_VOID(dsd_cuda, dsd_generic, dsd_generic, r, alpha, beta, val, col_ind, row_ptr, used_elements, y, rows, columns, inner)
67 static void dsd(
double * r,
const double alpha,
const double beta,
const double *
const val,
const std::uint64_t *
const col_ind,
const std::uint64_t *
const row_ptr,
const Index used_elements,
68 const double *
const y,
const Index rows,
const Index columns,
const Index inner)
70 BACKEND_SKELETON_VOID(dsd_cuda, dsd_generic, dsd_generic, r, alpha, beta, val, col_ind, row_ptr, used_elements, y, rows, columns, inner)
73#ifdef FEAT_HAVE_HALFMATH
74 static void dsd(
Half * r,
const Half alpha,
const Half beta,
const Half *
const val,
const std::uint32_t *
const col_ind,
const std::uint32_t *
const row_ptr,
const Index used_elements,
77 BACKEND_SKELETON_VOID(dsd_cuda, dsd_generic, dsd_generic, r, alpha, beta, val, col_ind, row_ptr, used_elements, y, rows, columns, inner)
81 static void dsd(
float * r,
const float alpha,
const float beta,
const float *
const val,
const std::uint32_t *
const col_ind,
const std::uint32_t *
const row_ptr,
const Index used_elements,
82 const float *
const y,
const Index rows,
const Index columns,
const Index inner)
84 BACKEND_SKELETON_VOID(dsd_cuda, dsd_generic, dsd_generic, r, alpha, beta, val, col_ind, row_ptr, used_elements, y, rows, columns, inner)
87 static void dsd(
double * r,
const double alpha,
const double beta,
const double *
const val,
const std::uint32_t *
const col_ind,
const std::uint32_t *
const row_ptr,
const Index used_elements,
88 const double *
const y,
const Index rows,
const Index columns,
const Index inner)
90 BACKEND_SKELETON_VOID(dsd_cuda, dsd_generic, dsd_generic, r, alpha, beta, val, col_ind, row_ptr, used_elements, y, rows, columns, inner)
93 template <
typename DT_>
94 static void dense_generic(DT_ * r,
const DT_ alpha,
const DT_ beta,
const DT_ *
const x,
const DT_ *
const y,
const DT_ *
const z,
const Index rows,
const Index columns,
const Index inner);
96 static void dense_mkl(
float * r,
const float alpha,
const float beta,
const float *
const x,
const float *
const y,
const float *
const z,
const Index rows,
const Index columns,
const Index inner);
97 static void dense_mkl(
double * r,
const double alpha,
const double beta,
const double *
const x,
const double *
const y,
const double *
const z,
const Index rows,
const Index columns,
const Index inner);
99 template <
typename DT_>
100 static void dense_cuda(DT_ * r,
const DT_ alpha,
const DT_ beta,
const DT_ *
const x,
const DT_ *
const y,
const DT_ *
const z,
const Index rows,
const Index columns,
const Index inner);
102 template <
typename DT_,
typename IT_>
103 static void dsd_generic(DT_ * r,
const DT_ alpha,
const DT_ beta,
const DT_ *
const val,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index used_elements,
104 const DT_ *
const y,
const Index rows,
const Index columns,
const Index inner);
106 template <
typename DT_,
typename IT_>
107 static void dsd_cuda(DT_ * r,
const DT_ alpha,
const DT_ beta,
const DT_ *
const val,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index used_elements,
108 const DT_ *
const y,
const Index rows,
const Index columns,
const Index inner);
113 extern template void ProductMatMat::dense_generic(
float *,
const float,
const float,
const float *
const,
const float *
const,
const float *
const,
const Index,
const Index,
const Index);
114 extern template void ProductMatMat::dense_generic(
double *,
const double,
const double,
const double *
const,
const double *
const,
const double *
const,
const Index,
const Index,
const Index);
116 extern template void ProductMatMat::dsd_generic(
float *,
const float,
const float,
const float *
const,
const std::uint64_t *
const,
const std::uint64_t *
const,
const Index,
const float *
const,
const Index,
const Index,
const Index);
117 extern template void ProductMatMat::dsd_generic(
double *,
const double,
const double,
const double *
const,
const std::uint64_t *
const,
const std::uint64_t *
const,
const Index,
const double *
const ,
const Index,
const Index,
const Index);
119 extern template void ProductMatMat::dsd_generic(
float *,
const float,
const float,
const float *
const,
const std::uint32_t *
const,
const std::uint32_t *
const,
const Index,
const float *
const,
const Index,
const Index,
const Index);
120 extern template void ProductMatMat::dsd_generic(
double *,
const double,
const double,
const double *
const,
const std::uint32_t *
const,
const std::uint32_t *
const,
const Index,
const double *
const,
const Index,
const Index,
const Index);
128#include <kernel/lafem/arch/product_matmat_generic.hpp>
__half Half
Half data type.
std::uint64_t Index
Index data type.