FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
apply.hpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#pragma once
7#ifndef KERNEL_LAFEM_ARCH_APPLY_HPP
8#define KERNEL_LAFEM_ARCH_APPLY_HPP 1
9
10// includes, FEAT
13#include <kernel/backend.hpp>
14#include <kernel/lafem/arch/product_matmat.hpp>
15#include <kernel/util/half.hpp>
16
17#include <typeinfo>
18
19namespace FEAT
20{
21 namespace LAFEM
22 {
23 namespace Arch
24 {
25 struct Apply
26 {
27 template <typename DT_, typename IT_>
28 static void csr(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val,
29 const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows, const Index columns,
30 const Index used_elements, const bool transposed)
31 {
32 csr_generic(r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, transposed);
33 }
34
35#ifdef FEAT_HAVE_HALFMATH
36 static void csr(Half * r, const Half a, const Half * const x, const Half b, const Half * const y, const Half * const val,
37 const std::uint64_t * const col_ind, const std::uint64_t * const row_ptr, const Index rows, const Index columns,
38 const Index used_elements, const bool transposed)
39 {
40 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, transposed)
41 }
42#endif
43
44 static void csr(float * r, const float a, const float * const x, const float b, const float * const y, const float * const val,
45 const std::uint64_t * const col_ind, const std::uint64_t * const row_ptr, const Index rows, const Index columns,
46 const Index used_elements, const bool transposed)
47 {
48 BACKEND_SKELETON_VOID(csr_cuda, csr_mkl, csr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, transposed)
49 }
50
51 static void csr(double * r, const double a, const double * const x, const double b, const double * const y, const double * const val,
52 const std::uint64_t * const col_ind, const std::uint64_t * const row_ptr, const Index rows, const Index columns,
53 const Index used_elements, const bool transposed)
54 {
55 BACKEND_SKELETON_VOID(csr_cuda, csr_mkl, csr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, transposed)
56 }
57
58#ifdef FEAT_HAVE_HALFMATH
59 static void csr(Half * r, const Half a, const Half * const x, const Half b, const Half * const y, const Half * const val,
60 const std::uint32_t * const col_ind, const std::uint32_t * const row_ptr, const Index rows, const Index columns,
61 const Index used_elements, const bool transposed)
62 {
63 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, transposed)
64 }
65#endif
66
67 static void csr(float * r, const float a, const float * const x, const float b, const float * const y, const float * const val,
68 const std::uint32_t * const col_ind, const std::uint32_t * const row_ptr, const Index rows, const Index columns,
69 const Index used_elements, const bool transposed)
70 {
71 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, transposed)
72 }
73
74 static void csr(double * r, const double a, const double * const x, const double b, const double * const y, const double * const val,
75 const std::uint32_t * const col_ind, const std::uint32_t * const row_ptr, const Index rows, const Index columns,
76 const Index used_elements, const bool transposed)
77 {
78 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, transposed)
79 }
80
81 template <typename DT_, typename IT_>
82 static void cscr(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val,
83 const IT_ * const col_ind, const IT_ * const row_ptr, const IT_ * const row_numbers, const Index used_rows, const Index rows, const Index columns,
84 const Index used_elements, const bool transposed)
85 {
86 cscr_generic(r, a, x, b, y, val, col_ind, row_ptr, row_numbers, used_rows, rows, columns, used_elements, transposed);
87 }
88
89 template <int BlockHeight_, int BlockWidth_, typename DT_, typename IT_>
90 static void bcsr(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val,
91 const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows, const Index columns,
92 const Index used_elements)
93 {
94 bcsr_generic<BlockHeight_, BlockWidth_, DT_, IT_>(r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements);
95 }
96
97 template <int BlockHeight_, int BlockWidth_, typename DT_, typename IT_>
98 static void bcsr_transposed(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val,
99 const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows, const Index columns,
100 const Index used_elements)
101 {
102 bcsr_transposed_generic<BlockHeight_, BlockWidth_, DT_, IT_>(r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements);
103 }
104
105 template <int BlockHeight_, int BlockWidth_>
106 static void bcsr(float * r, const float a, const float * const x, const float b, const float * const y, const float * const val,
107 const std::uint64_t * const col_ind, const std::uint64_t * const row_ptr, const Index rows, const Index columns,
108 const Index used_elements)
109 {
110 if (BlockHeight_ == BlockWidth_)
111 BACKEND_SKELETON_VOID_T2(BlockHeight_, BlockWidth_, bcsr_cuda, bcsr_mkl, bcsr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements)
112 else
113 BACKEND_SKELETON_VOID_T2(BlockHeight_, BlockWidth_, bcsr_cuda, bcsr_generic, bcsr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements)
114 }
115
116 template <int BlockHeight_, int BlockWidth_>
117 static void bcsr(double * r, const double a, const double * const x, const double b, const double * const y, const double * const val,
118 const std::uint64_t * const col_ind, const std::uint64_t * const row_ptr, const Index rows, const Index columns,
119 const Index used_elements)
120 {
121 if (BlockHeight_ == BlockWidth_)
122 BACKEND_SKELETON_VOID_T2(BlockHeight_, BlockWidth_, bcsr_cuda, bcsr_mkl, bcsr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements)
123 else
124 BACKEND_SKELETON_VOID_T2(BlockHeight_, BlockWidth_, bcsr_cuda, bcsr_generic, bcsr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements)
125 }
126
127 template <int BlockHeight_, int BlockWidth_>
128 static void bcsr(float * r, const float a, const float * const x, const float b, const float * const y, const float * const val,
129 const std::uint32_t * const col_ind, const std::uint32_t * const row_ptr, const Index rows, const Index columns,
130 const Index used_elements)
131 {
132 BACKEND_SKELETON_VOID_T2(BlockHeight_, BlockWidth_, bcsr_cuda, bcsr_generic, bcsr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements)
133 }
134
135 template <int BlockHeight_, int BlockWidth_>
136 static void bcsr(double * r, const double a, const double * const x, const double b, const double * const y, const double * const val,
137 const std::uint32_t * const col_ind, const std::uint32_t * const row_ptr, const Index rows, const Index columns,
138 const Index used_elements)
139 {
140 BACKEND_SKELETON_VOID_T2(BlockHeight_, BlockWidth_, bcsr_cuda, bcsr_generic, bcsr_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements)
141 }
142
143 template <int BlockSize_, typename DT_, typename IT_>
144 static void csrsb(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val, const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows, const Index columns, const Index used_elements)
145 {
146 csrsb_generic<BlockSize_, DT_, IT_>(r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements);
147 }
148
149 template <int BlockSize_>
150 static void csrsb(double * r, const double a, const double * const x, const double b, const double * const y, const double * const val, const std::uint64_t * const col_ind, const std::uint64_t * const row_ptr, const Index rows, const Index columns, const Index used_elements)
151 {
152 BACKEND_SKELETON_VOID_T1(BlockSize_, csrsb_cuda, csrsb_generic, csrsb_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements);
153 }
154
155 template <int BlockSize_>
156 static void csrsb(double * r, const double a, const double * const x, const double b, const double * const y, const double * const val, const std::uint32_t * const col_ind, const std::uint32_t * const row_ptr, const Index rows, const Index columns, const Index used_elements)
157 {
158 BACKEND_SKELETON_VOID_T1(BlockSize_, csrsb_cuda, csrsb_generic, csrsb_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements);
159 }
160
161 template <int BlockSize_>
162 static void csrsb(float * r, const float a, const float * const x, const float b, const float * const y, const float * const val, const std::uint64_t * const col_ind, const std::uint64_t * const row_ptr, const Index rows, const Index columns, const Index used_elements)
163 {
164 BACKEND_SKELETON_VOID_T1(BlockSize_, csrsb_cuda, csrsb_generic, csrsb_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements);
165 }
166
167 template <int BlockSize_>
168 static void csrsb(float * r, const float a, const float * const x, const float b, const float * const y, const float * const val, const std::uint32_t * const col_ind, const std::uint32_t * const row_ptr, const Index rows, const Index columns, const Index used_elements)
169 {
170 BACKEND_SKELETON_VOID_T1(BlockSize_, csrsb_cuda, csrsb_generic, csrsb_generic, r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements);
171 }
172
173 template <typename DT_, typename IT_>
174 static void banded(DT_ * r, const DT_ alpha, const DT_ * const x, const DT_ beta, const DT_ * const y, const DT_ * const val, const IT_ * const offsets, const Index num_of_offsets, const Index rows, const Index columns)
175 {
176 banded_generic(r, alpha, x, beta, y, val, offsets, num_of_offsets, rows, columns);
177 }
178
179 template <typename DT_, typename IT_>
180 static void banded_transposed(DT_ * r, const DT_ alpha, const DT_ * const x, const DT_ beta, const DT_ * const y, const DT_ * const val, const IT_ * const offsets, const Index num_of_offsets, const Index rows, const Index columns)
181 {
182 banded_transposed_generic(r, alpha, x, beta, y, val, offsets, num_of_offsets, rows, columns);
183 }
184
185 static void banded(float * r, const float alpha, const float * const x, const float beta, const float * const y, const float * const val, const std::uint64_t * const offsets, const Index num_of_offsets, const Index rows, const Index columns)
186 {
187 BACKEND_SKELETON_VOID(banded_cuda, banded_generic, banded_generic, r, alpha, x, beta, y, val, offsets, num_of_offsets, rows, columns)
188 }
189
190 static void banded(double * r, const double alpha, const double * const x, const double beta, const double * const y, const double * const val, const std::uint64_t * const offsets, const Index num_of_offsets, const Index rows, const Index columns)
191 {
192 BACKEND_SKELETON_VOID(banded_cuda, banded_generic, banded_generic, r, alpha, x, beta, y, val, offsets, num_of_offsets, rows, columns)
193 }
194
195 static void banded(float * r, const float alpha, const float * const x, const float beta, const float * const y, const float * const val, const std::uint32_t * const offsets, const Index num_of_offsets, const Index rows, const Index columns)
196 {
197 BACKEND_SKELETON_VOID(banded_cuda, banded_generic, banded_generic, r, alpha, x, beta, y, val, offsets, num_of_offsets, rows, columns)
198 }
199
200 static void banded(double * r, const double alpha, const double * const x, const double beta, const double * const y, const double * const val, const std::uint32_t * const offsets, const Index num_of_offsets, const Index rows, const Index columns)
201 {
202 BACKEND_SKELETON_VOID(banded_cuda, banded_generic, banded_generic, r, alpha, x, beta, y, val, offsets, num_of_offsets, rows, columns)
203 }
204
205 template <typename DT_>
206 static void dense(DT_ * r, const DT_ alpha, const DT_ beta, const DT_ * const y, const DT_ * const val, const DT_ * const x, const Index rows, const Index columns)
207 {
208 dense_generic(r, alpha, beta, y, val, x, rows, columns);
209 }
210
211 template <typename DT_>
212 static void dense_transposed(DT_ * r, const DT_ alpha, const DT_ beta, const DT_ * const y, const DT_ * const val, const DT_ * const x, const Index rows, const Index columns)
213 {
214 dense_transposed_generic(r, alpha, beta, y, val, x, rows, columns);
215 }
216
217#ifdef FEAT_HAVE_HALFMATH
218 static void dense(Half * r, const Half alpha, const Half beta, const Half * const y, const Half * const val, const Half * const x, const Index rows, const Index columns)
219 {
221 {
222 //no cuda half implementation exists, thus we use the gemm version
224 //ProductMatMat::dense_cuda(r, alpha, beta, x, val, y, rows, 1, columns);
225 ProductMatMat::dense_cuda(r, alpha, beta, val, x, y, rows, 1, columns);
226 break;
228 default:
229 dense_generic(r, alpha, beta, y, val, x, rows, columns);
230 }
231 }
232#endif
233
234 static void dense(float * r, const float alpha, const float beta, const float * const y, const float * const val, const float * const x, const Index rows, const Index columns)
235 {
236 BACKEND_SKELETON_VOID(dense_cuda, dense_mkl, dense_generic, r, alpha, beta, y, val, x, rows, columns)
237 }
238
239 static void dense(double * r, const double alpha, const double beta, const double * const y, const double * const val, const double * const x, const Index rows, const Index columns)
240 {
241 BACKEND_SKELETON_VOID(dense_cuda, dense_mkl, dense_generic, r, alpha, beta, y, val, x, rows, columns)
242 }
243
244
245 template <typename DT_, typename IT_>
246 static void csr_generic(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val,
247 const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows, const Index, const Index, const bool);
248
249 template <typename DT_, typename IT_>
250 static void cscr_generic(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val,
251 const IT_ * const col_ind, const IT_ * const row_ptr, const IT_ * const row_numbers, const Index used_rows,
252 const Index rows, const Index, const Index, const bool);
253
254 template <int BlockHeight_, int BlockWidth_, typename DT_, typename IT_>
255 static void bcsr_generic(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val,
256 const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows, const Index, const Index);
257
258 template <int BlockHeight_, int BlockWidth_, typename DT_, typename IT_>
259 static void bcsr_transposed_generic(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val,
260 const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows, const Index, const Index);
261
262 template <int BlockSize_, typename DT_, typename IT_>
263 static void csrsb_generic(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val, const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows, const Index, const Index);
264
265 template <typename DT_, typename IT_>
266 static void banded_generic(DT_ * r, const DT_ alpha, const DT_ * const x, const DT_ beta, const DT_ * const y, const DT_ * const val, const IT_ * const offsets, const Index num_of_offsets, const Index rows, const Index columns);
267
268 template <typename DT_, typename IT_>
269 static void banded_transposed_generic(DT_ * r, const DT_ alpha, const DT_ * const x, const DT_ beta, const DT_ * const y, const DT_ * const val, const IT_ * const offsets, const Index num_of_offsets, const Index rows, const Index columns);
270
271 template <typename DT_>
272 static void dense_generic(DT_ * r, const DT_ alpha, const DT_ beta, const DT_ * const rhs, const DT_ * const val, const DT_ * const x, const Index rows, const Index columns);
273
274 template <typename DT_>
275 static void dense_transposed_generic(DT_ * r, const DT_ alpha, const DT_ beta, const DT_ * const rhs, const DT_ * const val, const DT_ * const x, const Index rows, const Index columns);
276
277 static void csr_mkl(float * r, const float a, const float * const x, const float b, const float * const y, const float * const val, const Index * const col_ind, const Index * const row_ptr, const Index rows, const Index columns, const Index, const bool);
278 static void csr_mkl(double * r, const double a, const double * const x, const double b, const double * const y, const double * const val, const Index * const col_ind, const Index * const row_ptr, const Index rows, const Index columns, const Index, const bool);
279
280 template <int BlockHeight_, int BlockWidth_, typename DT_, typename IT_>
281 static void bcsr_mkl(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val, const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows, const Index columns, const Index used_elements)
282 {
283 XASSERTM(BlockHeight_ == BlockWidth_, "MKL only supports square blocks!");
284 bcsr_mkl(r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, BlockHeight_);
285 }
286
287 static void bcsr_mkl(float * r, const float a, const float * const x, const float b, const float * const y, const float * const val, const Index * const col_ind, const Index * const row_ptr, const Index rows, const Index columns, const Index, const int blocksize);
288 static void bcsr_mkl(double * r, const double a, const double * const x, const double b, const double * const y, const double * const val, const Index * const col_ind, const Index * const row_ptr, const Index rows, const Index columns, const Index, const int blocksize);
289
290 template <int BlockHeight_, int BlockWidth_, typename DT_, typename IT_>
291 static void bcsr_transposed_mkl(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val, const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows, const Index columns, const Index used_elements)
292 {
293 XASSERTM(BlockHeight_ == BlockWidth_, "MKL only supports square blocks!");
294 bcsr_mkl(r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, BlockHeight_);
295 }
296
297 static void bcsr_transposed_mkl(float * r, const float a, const float * const x, const float b, const float * const y, const float * const val, const Index * const col_ind, const Index * const row_ptr, const Index rows, const Index columns, const Index, const int blocksize);
298 static void bcsr_transposed_mkl(double * r, const double a, const double * const x, const double b, const double * const y, const double * const val, const Index * const col_ind, const Index * const row_ptr, const Index rows, const Index columns, const Index, const int blocksize);
299
300 static void dense_mkl(float * r, const float alpha, const float beta, const float * const y, const float * const val, const float * const x, const Index rows, const Index columns);
301 static void dense_mkl(double * r, const double alpha, const double beta, const double * const y, const double * const val, const double * const x, const Index rows, const Index columns);
302
303 template <typename DT_, typename IT_>
304 static void csr_cuda(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val, const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows, const Index columns, const Index used_elements, const bool transposed);
305
306 template <int BlockHeight_, int BlockWidth_, typename DT_, typename IT_>
307 static void bcsr_cuda(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val, const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows, const Index columns, const Index used_elements)
308 {
309 XASSERTM(BlockHeight_ < 10, "The generic cuda bcsr kernel does not support BlockHeight greather than 9!");
310 bcsr_wrapper_cuda(r, a, x, b, y, val, col_ind, row_ptr, rows, columns, used_elements, BlockHeight_, BlockWidth_);
311 }
312
313 template <typename DT_, typename IT_>
314 static void bcsr_wrapper_cuda(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val, const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows, const Index columns, const Index used_elements, const int BlockHeight, const int BlockWidth);
315
316 template <typename DT_, typename IT_>
317 static void bcsr_intern_cuda(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val, const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows, const Index columns, const Index used_elements, const int BlockSize);
318
319 template <typename DT_, typename IT_>
320 static void bcsr_intern_cuda(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val, const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows, const Index columns, const Index used_elements, const int BlockHeight, const int BlockWidth);
321
322 template <int BlockSize_, typename DT_, typename IT_>
323 static void csrsb_cuda(DT_ * r, const DT_ a, const DT_ * const x, const DT_ b, const DT_ * const y, const DT_ * const val, const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows, const Index columns, const Index used_elements);
324
325 template <typename DT_, typename IT_>
326 static void banded_cuda(DT_ * r, const DT_ alpha, const DT_ * const x, const DT_ beta, const DT_ * const y, const DT_ * const val, const IT_ * const offsets, const Index num_of_offsets, const Index rows, const Index columns);
327
328 template <typename DT_>
329 static void dense_cuda(DT_ * r, const DT_ alpha, const DT_ beta, const DT_ * const y, const DT_ * const val, const DT_ * const x, const Index rows, const Index columns);
330 };
331
332#ifdef FEAT_EICKT
333 extern template void Apply::csr_generic(float *, const float, const float * const, const float, const float * const, const float * const, const std::uint64_t * const, const std::uint64_t * const, const Index, const Index, const Index, const bool);
334 extern template void Apply::csr_generic(float *, const float, const float * const, const float, const float * const, const float * const, const std::uint32_t * const, const std::uint32_t * const, const Index, const Index, const Index, const bool);
335 extern template void Apply::csr_generic(double *, const double, const double * const, const double, const double * const, const double * const, const std::uint64_t * const, const std::uint64_t * const, const Index, const Index, const Index, const bool);
336 extern template void Apply::csr_generic(double *, const double, const double * const, const double, const double * const, const double * const, const std::uint32_t * const, const std::uint32_t * const, const Index, const Index, const Index, const bool);
337
338 extern template void Apply::cscr_generic(float *, const float, const float * const, const float, const float * const, const float * const, const std::uint64_t * const, const std::uint64_t * const, const std::uint64_t * const, const Index, const Index, const Index, const Index, const bool);
339 extern template void Apply::cscr_generic(double *, const double, const double * const, const double, const double * const, const double * const, const std::uint64_t * const, const std::uint64_t * const, const std::uint64_t * const, const Index, const Index, const Index, const Index, const bool);
340 extern template void Apply::cscr_generic(double *, const double, const double * const, const double, const double * const, const double * const, const std::uint32_t * const, const std::uint32_t * const, const std::uint32_t * const, const Index, const Index, const Index, const Index, const bool);
341 extern template void Apply::cscr_generic(double *, const double, const double * const, const double, const double * const, const double * const, const std::uint32_t * const, const std::uint32_t * const, const std::uint32_t * const, const Index, const Index, const Index, const Index, const bool);
342
343 extern template void Apply::banded_generic(float *, const float, const float * const, const float, const float * const, const float * const, const std::uint64_t * const, const Index, const Index, const Index);
344 extern template void Apply::banded_generic(float *, const float, const float * const, const float, const float * const, const float * const, const std::uint32_t * const, const Index, const Index, const Index);
345 extern template void Apply::banded_generic(double *, const double, const double * const, const double, const double * const, const double * const, const std::uint64_t * const, const Index, const Index, const Index);
346 extern template void Apply::banded_generic(double *, const double, const double * const, const double, const double * const, const double * const, const std::uint32_t * const, const Index, const Index, const Index);
347
348 extern template void Apply::dense_generic(float *, const float, const float, const float * const, const float * const, const float * const, const Index, const Index);
349 extern template void Apply::dense_generic(double *, const double, const double, const double * const, const double * const, const double * const, const Index, const Index);
350#endif
351
352 } // namespace Arch
353 } // namespace LAFEM
354} // namespace FEAT
355
356#ifndef __CUDACC__
357#include <kernel/lafem/arch/apply_generic.hpp>
358#endif
359#endif // KERNEL_LAFEM_ARCH_APPLY_HPP
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Definition: assertion.hpp:263
FEAT Kernel base header.
static PreferredBackend get_preferred_backend()
get current preferred backend
Definition: backend.cpp:27
FEAT namespace.
Definition: adjactor.hpp:12
__half Half
Half data type.
Definition: half.hpp:25
std::uint64_t Index
Index data type.