7#ifndef KERNEL_LAFEM_ARCH_SCALE_ROW_COL_HPP
8#define KERNEL_LAFEM_ARCH_SCALE_ROW_COL_HPP 1
12#include <kernel/backend.hpp>
22 template <
typename DT_,
typename IT_>
23 static void csr(DT_ * r,
const DT_ *
const a,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const DT_ *
const x,
const Index rows,
const Index columns,
const Index used_elements)
25 csr_generic(r, a, col_ind, row_ptr, x, rows, columns, used_elements);
28 static void csr(
float * r,
const float *
const a,
const std::uint64_t *
const col_ind,
const std::uint64_t *
const row_ptr,
const float *
const x,
const Index rows,
const Index columns,
const Index used_elements)
30 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
33 static void csr(
double * r,
const double *
const a,
const std::uint64_t *
const col_ind,
const std::uint64_t *
const row_ptr,
const double *
const x,
const Index rows,
const Index columns,
const Index used_elements)
35 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
38 static void csr(
float * r,
const float *
const a,
const std::uint32_t *
const col_ind,
const std::uint32_t *
const row_ptr,
const float *
const x,
const Index rows,
const Index columns,
const Index used_elements)
40 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
43 static void csr(
double * r,
const double *
const a,
const std::uint32_t *
const col_ind,
const std::uint32_t *
const row_ptr,
const double *
const x,
const Index rows,
const Index columns,
const Index used_elements)
45 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
48 template <
typename DT_,
typename IT_>
49 static void csr_generic(DT_ * r,
const DT_ *
const a,
const IT_ *
const ,
const IT_ *
const row_ptr,
const DT_ *
const x,
const Index rows,
const Index,
const Index);
51 template <
typename DT_,
typename IT_>
52 static void csr_cuda(DT_ * r,
const DT_ *
const a,
const IT_ *
const ,
const IT_ *
const row_ptr,
const DT_ *
const x,
const Index rows,
const Index,
const Index);
54 template <
int bh_,
int bw_,
typename DT_,
typename IT_>
55 static void bcsr(DT_ * r,
const DT_ *
const a,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const DT_ *
const x,
const Index rows,
const Index columns,
const Index used_elements)
57 if constexpr ( (std::is_same<DT_, double>::value || std::is_same<DT_, float>::value)
58 && (std::is_same<IT_, std::uint32_t>::value || std::is_same<IT_, std::uint64_t>::value))
60 BACKEND_SKELETON_VOID_T2(bh_, bw_, bcsr_cuda, bcsr_generic, bcsr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
64 bcsr_generic<bh_, bw_>(r, a, col_ind, row_ptr, x, rows, columns, used_elements);
68 template <
int bh_,
int bw_,
typename DT_,
typename IT_>
69 static void bcsr_generic(DT_ * r,
const DT_ *
const a,
const IT_ *
const ,
const IT_ *
const row_ptr,
const DT_ *
const x,
const Index rows,
const Index,
const Index);
71 template<
typename DT_,
typename IT_>
72 static void bcsr_cuda_intern(DT_*,
const DT_*
const,
const IT_*
const,
const IT_*
const,
const DT_*
const,
const Index,
const Index,
const Index,
const int,
const int);
74 template <
int bh_,
int bw_,
typename DT_,
typename IT_>
75 static void bcsr_cuda(DT_ * r,
const DT_ *
const a,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const DT_ *
const x,
const Index rows,
const Index cols,
const Index used_el)
77 bcsr_cuda_intern(r, a, col_ind, row_ptr, x, rows, cols, used_el, bh_, bw_);
83 extern template void ScaleRows::csr_generic(
float *,
const float *
const,
const std::uint64_t *
const,
const std::uint64_t *
const,
const float *
const,
const Index,
const Index,
const Index);
84 extern template void ScaleRows::csr_generic(
double *,
const double *
const,
const std::uint64_t *
const,
const std::uint64_t *
const,
const double *
const,
const Index,
const Index,
const Index);
85 extern template void ScaleRows::csr_generic(
float *,
const float *
const,
const std::uint32_t *
const,
const std::uint32_t *
const,
const float *
const,
const Index,
const Index,
const Index);
86 extern template void ScaleRows::csr_generic(
double *,
const double *
const,
const std::uint32_t *
const,
const std::uint32_t *
const,
const double *
const,
const Index,
const Index,
const Index);
94 template <
typename DT_,
typename IT_>
95 static void csr(DT_ * r,
const DT_ *
const a,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const DT_ *
const x,
const Index rows,
const Index columns,
const Index used_elements)
97 csr_generic(r, a, col_ind, row_ptr, x, rows, columns, used_elements);
100 static void csr(
float * r,
const float *
const a,
const std::uint64_t *
const col_ind,
const std::uint64_t *
const row_ptr,
const float *
const x,
const Index rows,
const Index columns,
const Index used_elements)
102 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
105 static void csr(
double * r,
const double *
const a,
const std::uint64_t *
const col_ind,
const std::uint64_t *
const row_ptr,
const double *
const x,
const Index rows,
const Index columns,
const Index used_elements)
107 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
110 static void csr(
float * r,
const float *
const a,
const std::uint32_t *
const col_ind,
const std::uint32_t *
const row_ptr,
const float *
const x,
const Index rows,
const Index columns,
const Index used_elements)
112 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
115 static void csr(
double * r,
const double *
const a,
const std::uint32_t *
const col_ind,
const std::uint32_t *
const row_ptr,
const double *
const x,
const Index rows,
const Index columns,
const Index used_elements)
117 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
120 template <
typename DT_,
typename IT_>
121 static void csr_generic(DT_ * r,
const DT_ *
const a,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const DT_ *
const x,
const Index rows,
const Index,
const Index);
123 template <
typename DT_,
typename IT_>
124 static void csr_cuda(DT_ * r,
const DT_ *
const a,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const DT_ *
const x,
const Index rows,
const Index,
const Index);
127 template <
int bh_,
int bw_,
typename DT_,
typename IT_>
128 static void bcsr(DT_ * r,
const DT_ *
const a,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const DT_ *
const x,
const Index rows,
const Index columns,
const Index used_elements)
130 if constexpr ( (std::is_same<DT_, double>::value || std::is_same<DT_, float>::value)
131 && (std::is_same<IT_, std::uint32_t>::value || std::is_same<IT_, std::uint64_t>::value))
133 BACKEND_SKELETON_VOID_T2(bh_, bw_, bcsr_cuda, bcsr_generic, bcsr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
137 bcsr_generic<bh_, bw_>(r, a, col_ind, row_ptr, x, rows, columns, used_elements);
141 template <
int bh_,
int bw_,
typename DT_,
typename IT_>
142 static void bcsr_generic(DT_ * r,
const DT_ *
const a,
const IT_ *
const ,
const IT_ *
const row_ptr,
const DT_ *
const x,
const Index rows,
const Index,
const Index);
144 template<
typename DT_,
typename IT_>
145 static void bcsr_cuda_intern(DT_*,
const DT_*
const,
const IT_*
const,
const IT_*
const,
const DT_*
const,
const Index,
const Index,
const Index,
const int,
const int);
147 template <
int bh_,
int bw_,
typename DT_,
typename IT_>
148 static void bcsr_cuda(DT_ * r,
const DT_ *
const a,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const DT_ *
const x,
const Index rows,
const Index cols,
const Index used_el)
150 bcsr_cuda_intern(r, a, col_ind, row_ptr, x, rows, cols, used_el, bh_, bw_);
155 extern template void ScaleCols::csr_generic(
float *,
const float *
const,
const std::uint64_t *
const,
const std::uint64_t *
const,
const float *
const,
const Index,
const Index,
const Index);
156 extern template void ScaleCols::csr_generic(
double *,
const double *
const,
const std::uint64_t *
const,
const std::uint64_t *
const,
const double *
const,
const Index,
const Index,
const Index);
157 extern template void ScaleCols::csr_generic(
float *,
const float *
const,
const std::uint32_t *
const,
const std::uint32_t *
const,
const float *
const,
const Index,
const Index,
const Index);
158 extern template void ScaleCols::csr_generic(
double *,
const double *
const,
const std::uint32_t *
const,
const std::uint32_t *
const,
const double *
const,
const Index,
const Index,
const Index);
165#include <kernel/lafem/arch/scale_row_col_generic.hpp>
std::uint64_t Index
Index data type.