7#ifndef KERNEL_LAFEM_ARCH_APPLY_HPP
8#define KERNEL_LAFEM_ARCH_APPLY_HPP 1
13#include <kernel/backend.hpp>
14#include <kernel/lafem/arch/product_matmat.hpp>
15#include <kernel/util/half.hpp>
27 template <
typename DT_,
typename IT_>
28 static void csr(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
29 const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows,
const Index columns,
30 const Index used_elements,
const bool transposed)
32 csr_generic(r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, transposed);
35#ifdef FEAT_HAVE_HALFMATH
36 static void csr(
Half * r,
const Half a,
const Half *
const x,
const Half b,
const Half *
const y,
const Half *
const val,
37 const std::uint64_t *
const col_ind,
const std::uint64_t *
const row_ptr,
const Index rows,
const Index columns,
38 const Index used_elements,
const bool transposed)
40 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, transposed)
44 static void csr(
float * r,
const float a,
const float *
const x,
const float b,
const float *
const y,
const float *
const val,
45 const std::uint64_t *
const col_ind,
const std::uint64_t *
const row_ptr,
const Index rows,
const Index columns,
46 const Index used_elements,
const bool transposed)
48 BACKEND_SKELETON_VOID(csr_cuda, csr_mkl, csr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, transposed)
51 static void csr(
double * r,
const double a,
const double *
const x,
const double b,
const double *
const y,
const double *
const val,
52 const std::uint64_t *
const col_ind,
const std::uint64_t *
const row_ptr,
const Index rows,
const Index columns,
53 const Index used_elements,
const bool transposed)
55 BACKEND_SKELETON_VOID(csr_cuda, csr_mkl, csr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, transposed)
58#ifdef FEAT_HAVE_HALFMATH
59 static void csr(
Half * r,
const Half a,
const Half *
const x,
const Half b,
const Half *
const y,
const Half *
const val,
60 const std::uint32_t *
const col_ind,
const std::uint32_t *
const row_ptr,
const Index rows,
const Index columns,
61 const Index used_elements,
const bool transposed)
63 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, transposed)
67 static void csr(
float * r,
const float a,
const float *
const x,
const float b,
const float *
const y,
const float *
const val,
68 const std::uint32_t *
const col_ind,
const std::uint32_t *
const row_ptr,
const Index rows,
const Index columns,
69 const Index used_elements,
const bool transposed)
71 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, transposed)
74 static void csr(
double * r,
const double a,
const double *
const x,
const double b,
const double *
const y,
const double *
const val,
75 const std::uint32_t *
const col_ind,
const std::uint32_t *
const row_ptr,
const Index rows,
const Index columns,
76 const Index used_elements,
const bool transposed)
78 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, transposed)
81 template <
typename DT_,
typename IT_>
82 static void cscr(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
83 const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const IT_ *
const row_numbers,
const Index used_rows,
const Index rows,
const Index columns,
84 const Index used_elements,
const bool transposed)
86 cscr_generic(r, a, x, b, y, val, col_ind, row_ptr, row_numbers, used_rows, rows, columns, used_elements, transposed);
89 template <
int BlockHeight_,
int BlockW
idth_,
typename DT_,
typename IT_>
90 static void bcsr(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
91 const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows,
const Index columns,
92 const Index used_elements)
94 bcsr_generic<BlockHeight_, BlockWidth_, DT_, IT_>(r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements);
97 template <
int BlockHeight_,
int BlockW
idth_,
typename DT_,
typename IT_>
98 static void bcsr_transposed(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
99 const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows,
const Index columns,
100 const Index used_elements)
102 bcsr_transposed_generic<BlockHeight_, BlockWidth_, DT_, IT_>(r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements);
105 template <
int BlockHeight_,
int BlockW
idth_>
106 static void bcsr(
float * r,
const float a,
const float *
const x,
const float b,
const float *
const y,
const float *
const val,
107 const std::uint64_t *
const col_ind,
const std::uint64_t *
const row_ptr,
const Index rows,
const Index columns,
108 const Index used_elements)
110 if (BlockHeight_ == BlockWidth_)
111 BACKEND_SKELETON_VOID_T2(BlockHeight_, BlockWidth_, bcsr_cuda, bcsr_mkl, bcsr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements)
113 BACKEND_SKELETON_VOID_T2(BlockHeight_, BlockWidth_, bcsr_cuda, bcsr_generic, bcsr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements)
116 template <
int BlockHeight_,
int BlockW
idth_>
117 static void bcsr(
double * r,
const double a,
const double *
const x,
const double b,
const double *
const y,
const double *
const val,
118 const std::uint64_t *
const col_ind,
const std::uint64_t *
const row_ptr,
const Index rows,
const Index columns,
119 const Index used_elements)
121 if (BlockHeight_ == BlockWidth_)
122 BACKEND_SKELETON_VOID_T2(BlockHeight_, BlockWidth_, bcsr_cuda, bcsr_mkl, bcsr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements)
124 BACKEND_SKELETON_VOID_T2(BlockHeight_, BlockWidth_, bcsr_cuda, bcsr_generic, bcsr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements)
127 template <
int BlockHeight_,
int BlockW
idth_>
128 static void bcsr(
float * r,
const float a,
const float *
const x,
const float b,
const float *
const y,
const float *
const val,
129 const std::uint32_t *
const col_ind,
const std::uint32_t *
const row_ptr,
const Index rows,
const Index columns,
130 const Index used_elements)
132 BACKEND_SKELETON_VOID_T2(BlockHeight_, BlockWidth_, bcsr_cuda, bcsr_generic, bcsr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements)
135 template <
int BlockHeight_,
int BlockW
idth_>
136 static void bcsr(
double * r,
const double a,
const double *
const x,
const double b,
const double *
const y,
const double *
const val,
137 const std::uint32_t *
const col_ind,
const std::uint32_t *
const row_ptr,
const Index rows,
const Index columns,
138 const Index used_elements)
140 BACKEND_SKELETON_VOID_T2(BlockHeight_, BlockWidth_, bcsr_cuda, bcsr_generic, bcsr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements)
143 template <
int BlockSize_,
typename DT_,
typename IT_>
144 static void csrsb(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows,
const Index columns,
const Index used_elements)
146 csrsb_generic<BlockSize_, DT_, IT_>(r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements);
149 template <
int BlockSize_>
150 static void csrsb(
double * r,
const double a,
const double *
const x,
const double b,
const double *
const y,
const double *
const val,
const std::uint64_t *
const col_ind,
const std::uint64_t *
const row_ptr,
const Index rows,
const Index columns,
const Index used_elements)
152 BACKEND_SKELETON_VOID_T1(BlockSize_, csrsb_cuda, csrsb_generic, csrsb_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements)
155 template <
int BlockSize_>
156 static void csrsb(
double * r,
const double a,
const double *
const x,
const double b,
const double *
const y,
const double *
const val,
const std::uint32_t *
const col_ind,
const std::uint32_t *
const row_ptr,
const Index rows,
const Index columns,
const Index used_elements)
158 BACKEND_SKELETON_VOID_T1(BlockSize_, csrsb_cuda, csrsb_generic, csrsb_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements)
161 template <
int BlockSize_>
162 static void csrsb(
float * r,
const float a,
const float *
const x,
const float b,
const float *
const y,
const float *
const val,
const std::uint64_t *
const col_ind,
const std::uint64_t *
const row_ptr,
const Index rows,
const Index columns,
const Index used_elements)
164 BACKEND_SKELETON_VOID_T1(BlockSize_, csrsb_cuda, csrsb_generic, csrsb_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements)
167 template <
int BlockSize_>
168 static void csrsb(
float * r,
const float a,
const float *
const x,
const float b,
const float *
const y,
const float *
const val,
const std::uint32_t *
const col_ind,
const std::uint32_t *
const row_ptr,
const Index rows,
const Index columns,
const Index used_elements)
170 BACKEND_SKELETON_VOID_T1(BlockSize_, csrsb_cuda, csrsb_generic, csrsb_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements)
173 template <
typename DT_,
typename IT_>
174 static void banded(DT_ * r,
const DT_ alpha,
const DT_ *
const x,
const DT_ beta,
const DT_ *
const y,
const DT_ *
const val,
const IT_ *
const offsets,
const Index num_of_offsets,
const Index rows,
const Index columns)
176 banded_generic(r, alpha, x, beta, y, val, offsets, num_of_offsets, rows, columns);
179 template <
typename DT_,
typename IT_>
180 static void banded_transposed(DT_ * r,
const DT_ alpha,
const DT_ *
const x,
const DT_ beta,
const DT_ *
const y,
const DT_ *
const val,
const IT_ *
const offsets,
const Index num_of_offsets,
const Index rows,
const Index columns)
182 banded_transposed_generic(r, alpha, x, beta, y, val, offsets, num_of_offsets, rows, columns);
185 static void banded(
float * r,
const float alpha,
const float *
const x,
const float beta,
const float *
const y,
const float *
const val,
const std::uint64_t *
const offsets,
const Index num_of_offsets,
const Index rows,
const Index columns)
187 BACKEND_SKELETON_VOID(banded_cuda, banded_generic, banded_generic, r, alpha, x, beta, y, val, offsets, num_of_offsets, rows, columns)
190 static void banded(
double * r,
const double alpha,
const double *
const x,
const double beta,
const double *
const y,
const double *
const val,
const std::uint64_t *
const offsets,
const Index num_of_offsets,
const Index rows,
const Index columns)
192 BACKEND_SKELETON_VOID(banded_cuda, banded_generic, banded_generic, r, alpha, x, beta, y, val, offsets, num_of_offsets, rows, columns)
195 static void banded(
float * r,
const float alpha,
const float *
const x,
const float beta,
const float *
const y,
const float *
const val,
const std::uint32_t *
const offsets,
const Index num_of_offsets,
const Index rows,
const Index columns)
197 BACKEND_SKELETON_VOID(banded_cuda, banded_generic, banded_generic, r, alpha, x, beta, y, val, offsets, num_of_offsets, rows, columns)
200 static void banded(
double * r,
const double alpha,
const double *
const x,
const double beta,
const double *
const y,
const double *
const val,
const std::uint32_t *
const offsets,
const Index num_of_offsets,
const Index rows,
const Index columns)
202 BACKEND_SKELETON_VOID(banded_cuda, banded_generic, banded_generic, r, alpha, x, beta, y, val, offsets, num_of_offsets, rows, columns)
205 template <
typename DT_>
206 static void dense(DT_ * r,
const DT_ alpha,
const DT_ beta,
const DT_ *
const y,
const DT_ *
const val,
const DT_ *
const x,
const Index rows,
const Index columns)
208 dense_generic(r, alpha, beta, y, val, x, rows, columns);
211 template <
typename DT_>
212 static void dense_transposed(DT_ * r,
const DT_ alpha,
const DT_ beta,
const DT_ *
const y,
const DT_ *
const val,
const DT_ *
const x,
const Index rows,
const Index columns)
214 dense_transposed_generic(r, alpha, beta, y, val, x, rows, columns);
217#ifdef FEAT_HAVE_HALFMATH
218 static void dense(
Half * r,
const Half alpha,
const Half beta,
const Half *
const y,
const Half *
const val,
const Half *
const x,
const Index rows,
const Index columns)
225 ProductMatMat::dense_cuda(r, alpha, beta, val, x, y, rows, 1, columns);
229 dense_generic(r, alpha, beta, y, val, x, rows, columns);
234 static void dense(
float * r,
const float alpha,
const float beta,
const float *
const y,
const float *
const val,
const float *
const x,
const Index rows,
const Index columns)
236 BACKEND_SKELETON_VOID(dense_cuda, dense_mkl, dense_generic, r, alpha, beta, y, val, x, rows, columns)
239 static void dense(
double * r,
const double alpha,
const double beta,
const double *
const y,
const double *
const val,
const double *
const x,
const Index rows,
const Index columns)
241 BACKEND_SKELETON_VOID(dense_cuda, dense_mkl, dense_generic, r, alpha, beta, y, val, x, rows, columns)
245 template <
typename DT_,
typename IT_>
246 static void csr_generic(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
247 const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows,
const Index,
const Index,
const bool);
249 template <
typename DT_,
typename IT_>
250 static void cscr_generic(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
251 const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const IT_ *
const row_numbers,
const Index used_rows,
254 template <
int BlockHeight_,
int BlockW
idth_,
typename DT_,
typename IT_>
255 static void bcsr_generic(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
256 const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows,
const Index,
const Index);
258 template <
int BlockHeight_,
int BlockW
idth_,
typename DT_,
typename IT_>
259 static void bcsr_transposed_generic(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
260 const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows,
const Index,
const Index);
262 template <
int BlockSize_,
typename DT_,
typename IT_>
263 static void csrsb_generic(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows,
const Index,
const Index);
265 template <
typename DT_,
typename IT_>
266 static void banded_generic(DT_ * r,
const DT_ alpha,
const DT_ *
const x,
const DT_ beta,
const DT_ *
const y,
const DT_ *
const val,
const IT_ *
const offsets,
const Index num_of_offsets,
const Index rows,
const Index columns);
268 template <
typename DT_,
typename IT_>
269 static void banded_transposed_generic(DT_ * r,
const DT_ alpha,
const DT_ *
const x,
const DT_ beta,
const DT_ *
const y,
const DT_ *
const val,
const IT_ *
const offsets,
const Index num_of_offsets,
const Index rows,
const Index columns);
271 template <
typename DT_>
272 static void dense_generic(DT_ * r,
const DT_ alpha,
const DT_ beta,
const DT_ *
const rhs,
const DT_ *
const val,
const DT_ *
const x,
const Index rows,
const Index columns);
274 template <
typename DT_>
275 static void dense_transposed_generic(DT_ * r,
const DT_ alpha,
const DT_ beta,
const DT_ *
const rhs,
const DT_ *
const val,
const DT_ *
const x,
const Index rows,
const Index columns);
277 static void csr_mkl(
float * r,
const float a,
const float *
const x,
const float b,
const float *
const y,
const float *
const val,
const Index *
const col_ind,
const Index *
const row_ptr,
const Index rows,
const Index columns,
const Index,
const bool);
278 static void csr_mkl(
double * r,
const double a,
const double *
const x,
const double b,
const double *
const y,
const double *
const val,
const Index *
const col_ind,
const Index *
const row_ptr,
const Index rows,
const Index columns,
const Index,
const bool);
280 template <
int BlockHeight_,
int BlockW
idth_,
typename DT_,
typename IT_>
281 static void bcsr_mkl(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows,
const Index columns,
const Index used_elements)
283 XASSERTM(BlockHeight_ == BlockWidth_,
"MKL only supports square blocks!");
284 bcsr_mkl(r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, BlockHeight_);
287 static void bcsr_mkl(
float * r,
const float a,
const float *
const x,
const float b,
const float *
const y,
const float *
const val,
const Index *
const col_ind,
const Index *
const row_ptr,
const Index rows,
const Index columns,
const Index,
const int blocksize);
288 static void bcsr_mkl(
double * r,
const double a,
const double *
const x,
const double b,
const double *
const y,
const double *
const val,
const Index *
const col_ind,
const Index *
const row_ptr,
const Index rows,
const Index columns,
const Index,
const int blocksize);
290 template <
int BlockHeight_,
int BlockW
idth_,
typename DT_,
typename IT_>
291 static void bcsr_transposed_mkl(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows,
const Index columns,
const Index used_elements)
293 XASSERTM(BlockHeight_ == BlockWidth_,
"MKL only supports square blocks!");
294 bcsr_mkl(r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, BlockHeight_);
297 static void bcsr_transposed_mkl(
float * r,
const float a,
const float *
const x,
const float b,
const float *
const y,
const float *
const val,
const Index *
const col_ind,
const Index *
const row_ptr,
const Index rows,
const Index columns,
const Index,
const int blocksize);
298 static void bcsr_transposed_mkl(
double * r,
const double a,
const double *
const x,
const double b,
const double *
const y,
const double *
const val,
const Index *
const col_ind,
const Index *
const row_ptr,
const Index rows,
const Index columns,
const Index,
const int blocksize);
300 static void dense_mkl(
float * r,
const float alpha,
const float beta,
const float *
const y,
const float *
const val,
const float *
const x,
const Index rows,
const Index columns);
301 static void dense_mkl(
double * r,
const double alpha,
const double beta,
const double *
const y,
const double *
const val,
const double *
const x,
const Index rows,
const Index columns);
303 template <
typename DT_,
typename IT_>
304 static void csr_cuda(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows,
const Index columns,
const Index used_elements,
const bool transposed);
306 template <
int BlockHeight_,
int BlockW
idth_,
typename DT_,
typename IT_>
307 static void bcsr_cuda(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows,
const Index columns,
const Index used_elements)
309 XASSERTM(BlockHeight_ < 10,
"The generic cuda bcsr kernel does not support BlockHeight greather than 9!");
310 bcsr_wrapper_cuda(r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, BlockHeight_, BlockWidth_);
313 template <
typename DT_,
typename IT_>
314 static void bcsr_wrapper_cuda(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows,
const Index columns,
const Index used_elements,
const int BlockHeight,
const int BlockWidth);
316 template <
typename DT_,
typename IT_>
317 static void bcsr_intern_cuda(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows,
const Index columns,
const Index used_elements,
const int BlockSize);
319 template <
typename DT_,
typename IT_>
320 static void bcsr_intern_cuda(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows,
const Index columns,
const Index used_elements,
const int BlockHeight,
const int BlockWidth);
322 template <
int BlockSize_,
typename DT_,
typename IT_>
323 static void csrsb_cuda(DT_ * r,
const DT_ a,
const DT_ *
const x,
const DT_ b,
const DT_ *
const y,
const DT_ *
const val,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows,
const Index columns,
const Index used_elements);
325 template <
typename DT_,
typename IT_>
326 static void banded_cuda(DT_ * r,
const DT_ alpha,
const DT_ *
const x,
const DT_ beta,
const DT_ *
const y,
const DT_ *
const val,
const IT_ *
const offsets,
const Index num_of_offsets,
const Index rows,
const Index columns);
328 template <
typename DT_>
329 static void dense_cuda(DT_ * r,
const DT_ alpha,
const DT_ beta,
const DT_ *
const y,
const DT_ *
const val,
const DT_ *
const x,
const Index rows,
const Index columns);
333 extern template void Apply::csr_generic(
float *,
const float,
const float *
const,
const float,
const float *
const,
const float *
const,
const std::uint64_t *
const,
const std::uint64_t *
const,
const Index,
const Index,
const Index,
const bool);
334 extern template void Apply::csr_generic(
float *,
const float,
const float *
const,
const float,
const float *
const,
const float *
const,
const std::uint32_t *
const,
const std::uint32_t *
const,
const Index,
const Index,
const Index,
const bool);
335 extern template void Apply::csr_generic(
double *,
const double,
const double *
const,
const double,
const double *
const,
const double *
const,
const std::uint64_t *
const,
const std::uint64_t *
const,
const Index,
const Index,
const Index,
const bool);
336 extern template void Apply::csr_generic(
double *,
const double,
const double *
const,
const double,
const double *
const,
const double *
const,
const std::uint32_t *
const,
const std::uint32_t *
const,
const Index,
const Index,
const Index,
const bool);
338 extern template void Apply::cscr_generic(
float *,
const float,
const float *
const,
const float,
const float *
const,
const float *
const,
const std::uint64_t *
const,
const std::uint64_t *
const,
const std::uint64_t *
const,
const Index,
const Index,
const Index,
const Index,
const bool);
339 extern template void Apply::cscr_generic(
double *,
const double,
const double *
const,
const double,
const double *
const,
const double *
const,
const std::uint64_t *
const,
const std::uint64_t *
const,
const std::uint64_t *
const,
const Index,
const Index,
const Index,
const Index,
const bool);
340 extern template void Apply::cscr_generic(
double *,
const double,
const double *
const,
const double,
const double *
const,
const double *
const,
const std::uint32_t *
const,
const std::uint32_t *
const,
const std::uint32_t *
const,
const Index,
const Index,
const Index,
const Index,
const bool);
341 extern template void Apply::cscr_generic(
double *,
const double,
const double *
const,
const double,
const double *
const,
const double *
const,
const std::uint32_t *
const,
const std::uint32_t *
const,
const std::uint32_t *
const,
const Index,
const Index,
const Index,
const Index,
const bool);
343 extern template void Apply::banded_generic(
float *,
const float,
const float *
const,
const float,
const float *
const,
const float *
const,
const std::uint64_t *
const,
const Index,
const Index,
const Index);
344 extern template void Apply::banded_generic(
float *,
const float,
const float *
const,
const float,
const float *
const,
const float *
const,
const std::uint32_t *
const,
const Index,
const Index,
const Index);
345 extern template void Apply::banded_generic(
double *,
const double,
const double *
const,
const double,
const double *
const,
const double *
const,
const std::uint64_t *
const,
const Index,
const Index,
const Index);
346 extern template void Apply::banded_generic(
double *,
const double,
const double *
const,
const double,
const double *
const,
const double *
const,
const std::uint32_t *
const,
const Index,
const Index,
const Index);
348 extern template void Apply::dense_generic(
float *,
const float,
const float,
const float *
const,
const float *
const,
const float *
const,
const Index,
const Index);
349 extern template void Apply::dense_generic(
double *,
const double,
const double,
const double *
const,
const double *
const,
const double *
const,
const Index,
const Index);
357#include <kernel/lafem/arch/apply_generic.hpp>
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
static PreferredBackend get_preferred_backend()
get current preferred backend
__half Half
Half data type.
std::uint64_t Index
Index data type.