FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
row_norm.hpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#pragma once
7#ifndef KERNEL_LAFEM_ARCH_ROW_NORM_HPP
8#define KERNEL_LAFEM_ARCH_ROW_NORM_HPP 1
9
10// includes, FEAT
12#include <kernel/backend.hpp>
13
14
15namespace FEAT
16{
17 namespace LAFEM
18 {
19 namespace Arch
20 {
21 struct RowNorm
22 {
24 // row norm2
26 template <typename DT_, typename IT_>
27 static void csr_norm2(DT_* row_norms, const DT_* const val, const IT_* const col_ind,
28 const IT_* const row_ptr, const Index rows)
29 {
30 csr_generic_norm2(row_norms, val, col_ind, row_ptr, rows);
31 }
32
33 static void csr_norm2(float* row_norms, const float* const val, const std::uint64_t* const col_ind,
34 const std::uint64_t* const row_ptr, const Index rows)
35 {
36 BACKEND_SKELETON_VOID(csr_cuda_norm2, csr_generic_norm2, csr_generic_norm2, row_norms, val, col_ind, row_ptr, rows)
37 }
38
39 static void csr_norm2(double* row_norms, const double* const val, const std::uint64_t* const col_ind,
40 const std::uint64_t* const row_ptr, const Index rows)
41 {
42 BACKEND_SKELETON_VOID(csr_cuda_norm2, csr_generic_norm2, csr_generic_norm2, row_norms, val, col_ind, row_ptr, rows)
43 }
44
45 static void csr_norm2(float* row_norms, const float* const val, const std::uint32_t* const col_ind,
46 const std::uint32_t* const row_ptr, const Index rows)
47 {
48 BACKEND_SKELETON_VOID(csr_cuda_norm2, csr_generic_norm2, csr_generic_norm2, row_norms, val, col_ind, row_ptr, rows)
49 }
50
51 static void csr_norm2(double* row_norms, const double* const val, const std::uint32_t* const col_ind,
52 const std::uint32_t* const row_ptr, const Index rows)
53 {
54 BACKEND_SKELETON_VOID(csr_cuda_norm2, csr_generic_norm2, csr_generic_norm2, row_norms, val, col_ind, row_ptr, rows)
55 }
56
57 template <typename DT_, typename IT_>
58 static void csr_generic_norm2(DT_* row_norms, const DT_* const val, const IT_* const col_ind,
59 const IT_* const row_ptr, const Index rows);
60
61 template <typename DT_, typename IT_>
62 static void bcsr_norm2(DT_* row_norms, const DT_* const val, const IT_* const col_ind,
63 const IT_* const row_ptr, const Index rows, const int BlockHeight, const int BlockWidth)
64 {
65 bcsr_generic_norm2(row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth);
66 }
67
68 static void bcsr_norm2(float* row_norms, const float* const val, const std::uint64_t* const col_ind,
69 const std::uint64_t* const row_ptr, const Index rows, const int BlockHeight, const int BlockWidth)
70 {
71 BACKEND_SKELETON_VOID(bcsr_cuda_norm2, bcsr_generic_norm2, bcsr_generic_norm2, row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
72 }
73
74 static void bcsr_norm2(double* row_norms, const double* const val, const std::uint64_t* const col_ind,
75 const std::uint64_t* const row_ptr, const Index rows, const int BlockHeight, const int BlockWidth)
76 {
77 BACKEND_SKELETON_VOID(bcsr_cuda_norm2, bcsr_generic_norm2, bcsr_generic_norm2, row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
78 }
79
80 static void bcsr_norm2(float* row_norms, const float* const val, const std::uint32_t* const col_ind,
81 const std::uint32_t* const row_ptr, const Index rows, const int BlockHeight, const int BlockWidth)
82 {
83 BACKEND_SKELETON_VOID(bcsr_cuda_norm2, bcsr_generic_norm2, bcsr_generic_norm2, row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
84 }
85
86 static void bcsr_norm2(double* row_norms, const double* const val, const std::uint32_t* const col_ind,
87 const std::uint32_t* const row_ptr, const Index rows, const int BlockHeight, const int BlockWidth)
88 {
89 BACKEND_SKELETON_VOID(bcsr_cuda_norm2, bcsr_generic_norm2, bcsr_generic_norm2, row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
90 }
91
92 template <typename DT_, typename IT_>
93 static void bcsr_generic_norm2(DT_* row_norms, const DT_* const val, const IT_* const col_ind,
94 const IT_ * const row_ptr, const Index rows, const int BlockHeight, const int BlockWidth);
95
96 template <typename DT_, typename IT_>
97 static void csr_cuda_norm2(DT_ * row_norms, const DT_ * const val, const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows);
98 template <typename DT_, typename IT_>
99 static void bcsr_cuda_norm2(DT_* row_norms, const DT_* const val, const IT_* const col_ind,
100 const IT_* const row_ptr, const Index rows, const int BlockHeight, const int BlockWidth);
101
103 // row norm2sqr
105 template <typename DT_, typename IT_>
106 static void csr_norm2sqr(DT_* row_norms, const DT_* const val, const IT_* const col_ind,
107 const IT_* const row_ptr, const Index rows)
108 {
109 csr_generic_norm2sqr(row_norms, val, col_ind, row_ptr, rows);
110 }
111
112 static void csr_norm2sqr(float* row_norms, const float* const val, const std::uint64_t* const col_ind,
113 const std::uint64_t* const row_ptr, const Index rows)
114 {
115 BACKEND_SKELETON_VOID(csr_cuda_norm2sqr, csr_generic_norm2sqr, csr_generic_norm2sqr, row_norms, val, col_ind, row_ptr, rows)
116 }
117
118 static void csr_norm2sqr(double* row_norms, const double* const val, const std::uint64_t* const col_ind,
119 const std::uint64_t* const row_ptr, const Index rows)
120 {
121 BACKEND_SKELETON_VOID(csr_cuda_norm2sqr, csr_generic_norm2sqr, csr_generic_norm2sqr, row_norms, val, col_ind, row_ptr, rows)
122 }
123
124 static void csr_norm2sqr(float* row_norms, const float* const val, const std::uint32_t* const col_ind,
125 const std::uint32_t* const row_ptr, const Index rows)
126 {
127 BACKEND_SKELETON_VOID(csr_cuda_norm2sqr, csr_generic_norm2sqr, csr_generic_norm2sqr, row_norms, val, col_ind, row_ptr, rows)
128 }
129
130 static void csr_norm2sqr(double* row_norms, const double* const val, const std::uint32_t* const col_ind,
131 const std::uint32_t* const row_ptr, const Index rows)
132 {
133 BACKEND_SKELETON_VOID(csr_cuda_norm2sqr, csr_generic_norm2sqr, csr_generic_norm2sqr, row_norms, val, col_ind, row_ptr, rows)
134 }
135
136 template <typename DT_, typename IT_>
137 static void csr_generic_norm2sqr(DT_* row_norms, const DT_* const val, const IT_* const col_ind,
138 const IT_* const row_ptr, const Index rows);
139
140 template <typename DT_, typename IT_>
141 static void bcsr_norm2sqr(DT_* row_norms, const DT_* const val, const IT_* const col_ind,
142 const IT_* const row_ptr, const Index rows, const int BlockHeight, const int BlockWidth)
143 {
144 bcsr_generic_norm2sqr(row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth);
145 }
146
147 static void bcsr_norm2sqr(float* row_norms, const float* const val, const std::uint64_t* const col_ind,
148 const std::uint64_t* const row_ptr, const Index rows, const int BlockHeight, const int BlockWidth)
149 {
150 BACKEND_SKELETON_VOID(bcsr_cuda_norm2sqr, bcsr_generic_norm2sqr, bcsr_generic_norm2sqr, row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
151 }
152
153 static void bcsr_norm2sqr(double* row_norms, const double* const val, const std::uint64_t* const col_ind,
154 const std::uint64_t* const row_ptr, const Index rows, const int BlockHeight, const int BlockWidth)
155 {
156 BACKEND_SKELETON_VOID(bcsr_cuda_norm2sqr, bcsr_generic_norm2sqr, bcsr_generic_norm2sqr, row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
157 }
158
159 static void bcsr_norm2sqr(float* row_norms, const float* const val, const std::uint32_t* const col_ind,
160 const std::uint32_t* const row_ptr, const Index rows, const int BlockHeight, const int BlockWidth)
161 {
162 BACKEND_SKELETON_VOID(bcsr_cuda_norm2sqr, bcsr_generic_norm2sqr, bcsr_generic_norm2sqr, row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
163 }
164
165 static void bcsr_norm2sqr(double* row_norms, const double* const val, const std::uint32_t* const col_ind,
166 const std::uint32_t* const row_ptr, const Index rows, const int BlockHeight, const int BlockWidth)
167 {
168 BACKEND_SKELETON_VOID(bcsr_cuda_norm2sqr, bcsr_generic_norm2sqr, bcsr_generic_norm2sqr, row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
169 }
170
171 template <typename DT_, typename IT_>
172 static void bcsr_generic_norm2sqr(DT_* row_norms, const DT_* const val, const IT_* const col_ind,
173 const IT_ * const row_ptr, const Index rows, const int BlockHeight, const int BlockWidth);
174
175 template <typename DT_, typename IT_>
176 static void csr_cuda_norm2sqr(DT_ * row_norms, const DT_ * const val, const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows);
177 template <typename DT_, typename IT_>
178 static void bcsr_cuda_norm2sqr(DT_* row_norms, const DT_* const val, const IT_* const col_ind,
179 const IT_* const row_ptr, const Index rows, const int BlockHeight, const int BlockWidth);
180
182 // scaled row norm2sqr
184 template <typename DT_, typename IT_>
185 static void csr_scaled_norm2sqr(DT_* row_norms, const DT_* const scal, const DT_* const val,
186 const IT_* const col_ind, const IT_* const row_ptr, const Index rows)
187 {
188 csr_generic_scaled_norm2sqr(row_norms, scal, val, col_ind, row_ptr, rows);
189 }
190
191 static void csr_scaled_norm2sqr(float* row_norms, const float* const scal, const float* const val,
192 const std::uint64_t* const col_ind, const std::uint64_t* const row_ptr, const Index rows)
193 {
194 BACKEND_SKELETON_VOID(csr_cuda_scaled_norm2sqr, csr_generic_scaled_norm2sqr, csr_generic_scaled_norm2sqr, row_norms, scal, val, col_ind, row_ptr, rows)
195 }
196
197 static void csr_scaled_norm2sqr(double* row_norms, const double* const scal, const double* const val,
198 const std::uint64_t* const col_ind, const std::uint64_t* const row_ptr, const Index rows)
199 {
200 BACKEND_SKELETON_VOID(csr_cuda_scaled_norm2sqr, csr_generic_scaled_norm2sqr, csr_generic_scaled_norm2sqr, row_norms, scal, val, col_ind, row_ptr, rows)
201 }
202
203 static void csr_scaled_norm2sqr(float* row_norms, const float* const scal, const float* const val,
204 const std::uint32_t* const col_ind, const std::uint32_t* const row_ptr, const Index rows)
205 {
206 BACKEND_SKELETON_VOID(csr_cuda_scaled_norm2sqr, csr_generic_scaled_norm2sqr, csr_generic_scaled_norm2sqr, row_norms, scal, val, col_ind, row_ptr, rows)
207 }
208
209 static void csr_scaled_norm2sqr(double* row_norms, const double* const scal, const double* const val,
210 const std::uint32_t* const col_ind, const std::uint32_t* const row_ptr, const Index rows)
211 {
212 BACKEND_SKELETON_VOID(csr_cuda_scaled_norm2sqr, csr_generic_scaled_norm2sqr, csr_generic_scaled_norm2sqr, row_norms, scal, val, col_ind, row_ptr, rows)
213 }
214
215 template <typename DT_, typename IT_>
216 static void csr_generic_scaled_norm2sqr(DT_* row_norms, const DT_* const scal, const DT_* const val,
217 const IT_* const col_ind, const IT_* const row_ptr, const Index rows);
218
219 template <typename DT_, typename IT_>
220 static void bcsr_scaled_norm2sqr(DT_* row_norms, const DT_* const scal, const DT_* const val,
221 const IT_* const col_ind, const IT_* const row_ptr, const Index rows,
222 const int BlockHeight, const int BlockWidth)
223 {
224 bcsr_generic_scaled_norm2sqr(row_norms, scal, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth);
225 }
226
227 static void bcsr_scaled_norm2sqr(float* row_norms, const float* const scal, const float* const val,
228 const std::uint64_t* const col_ind, const std::uint64_t* const row_ptr, const Index rows,
229 const int BlockHeight, const int BlockWidth)
230 {
231 BACKEND_SKELETON_VOID(bcsr_cuda_scaled_norm2sqr, bcsr_generic_scaled_norm2sqr, bcsr_generic_scaled_norm2sqr, row_norms, scal, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
232 }
233
234 static void bcsr_scaled_norm2sqr(double* row_norms, const double* const scal, const double* const val,
235 const std::uint64_t* const col_ind, const std::uint64_t* const row_ptr, const Index rows,
236 const int BlockHeight, const int BlockWidth)
237 {
238 BACKEND_SKELETON_VOID(bcsr_cuda_scaled_norm2sqr, bcsr_generic_scaled_norm2sqr, bcsr_generic_scaled_norm2sqr, row_norms, scal, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
239 }
240
241 static void bcsr_scaled_norm2sqr(float* row_norms, const float* const scal, const float* const val,
242 const std::uint32_t* const col_ind, const std::uint32_t* const row_ptr, const Index rows,
243 const int BlockHeight, const int BlockWidth)
244 {
245 BACKEND_SKELETON_VOID(bcsr_cuda_scaled_norm2sqr, bcsr_generic_scaled_norm2sqr, bcsr_generic_scaled_norm2sqr, row_norms, scal, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
246 }
247
248 static void bcsr_scaled_norm2sqr(double* row_norms, const double* const scal, const double* const val,
249 const std::uint32_t* const col_ind, const std::uint32_t* const row_ptr, const Index rows,
250 const int BlockHeight, const int BlockWidth)
251 {
252 BACKEND_SKELETON_VOID(bcsr_cuda_scaled_norm2sqr, bcsr_generic_scaled_norm2sqr, bcsr_generic_scaled_norm2sqr, row_norms, scal, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
253 }
254
255 template <typename DT_, typename IT_>
256 static void bcsr_generic_scaled_norm2sqr(DT_* row_norms, const DT_* const scal, const DT_* const val,
257 const IT_* const col_ind, const IT_* const row_ptr, const Index rows,
258 const int BlockHeight, const int BlockWidth);
259
260
261 template <typename DT_, typename IT_>
262 static void csr_cuda_scaled_norm2sqr(DT_ * row_norms, const DT_ * const scal, const DT_ * const val, const IT_ * const col_ind, const IT_ * const row_ptr, const Index rows);
263 template <typename DT_, typename IT_>
264 static void bcsr_cuda_scaled_norm2sqr(DT_* row_norms, const DT_ * const scal, const DT_* const val, const IT_* const col_ind,
265 const IT_* const row_ptr, const Index rows, const int BlockHeight, const int BlockWidth);
266 };
267
268#ifdef FEAT_EICKT
269 extern template void RowNorm::csr_generic_norm2(float*,
270 const float* const, const Index* const, const Index * const, const Index);
271 extern template void RowNorm::csr_generic_norm2(double*,
272 const double* const, const Index* const, const Index* const, const Index);
273
274 extern template void RowNorm::bcsr_generic_norm2(float*,
275 const float* const, const Index* const, const Index* const, const Index, const int, const int);
276 extern template void RowNorm::bcsr_generic_norm2(double*,
277 const double* const, const Index* const, const Index* const, const Index, const int, const int);
278
279 extern template void RowNorm::csr_generic_norm2sqr(float*,
280 const float* const, const Index* const, const Index * const, const Index);
281 extern template void RowNorm::csr_generic_norm2sqr(double*,
282 const double* const, const Index* const, const Index* const, const Index);
283
284 extern template void RowNorm::bcsr_generic_norm2sqr(float*,
285 const float* const, const Index* const, const Index* const, const Index, const int, const int);
286 extern template void RowNorm::bcsr_generic_norm2sqr(double*,
287 const double* const, const Index* const, const Index* const, const Index, const int, const int);
288
289 extern template void RowNorm::csr_generic_scaled_norm2sqr(float*, const float* const,
290 const float* const, const Index* const, const Index * const, const Index);
291 extern template void RowNorm::csr_generic_scaled_norm2sqr(double*, const double* const,
292 const double* const, const Index* const, const Index* const, const Index);
293
294 extern template void RowNorm::bcsr_generic_scaled_norm2sqr(float*, const float* const,
295 const float* const, const Index* const, const Index* const, const Index, const int, const int);
296 extern template void RowNorm::bcsr_generic_scaled_norm2sqr(double*, const double* const,
297 const double* const, const Index* const, const Index* const, const Index, const int, const int);
298#endif
299
300
301 } // namespace Arch
302 } // namespace LAFEM
303} // namespace FEAT
304
305#ifndef __CUDACC__
306#include <kernel/lafem/arch/row_norm_generic.hpp>
307#endif
308#endif // KERNEL_LAFEM_ARCH_ROW_NORM_HPP
FEAT Kernel base header.
FEAT namespace.
Definition: adjactor.hpp:12
std::uint64_t Index
Index data type.