7#ifndef KERNEL_LAFEM_ARCH_ROW_NORM_HPP
8#define KERNEL_LAFEM_ARCH_ROW_NORM_HPP 1
12#include <kernel/backend.hpp>
26 template <
typename DT_,
typename IT_>
27 static void csr_norm2(DT_* row_norms,
const DT_*
const val,
const IT_*
const col_ind,
28 const IT_*
const row_ptr,
const Index rows)
30 csr_generic_norm2(row_norms, val, col_ind, row_ptr, rows);
33 static void csr_norm2(
float* row_norms,
const float*
const val,
const std::uint64_t*
const col_ind,
34 const std::uint64_t*
const row_ptr,
const Index rows)
36 BACKEND_SKELETON_VOID(csr_cuda_norm2, csr_generic_norm2, csr_generic_norm2, row_norms, val, col_ind, row_ptr, rows)
39 static void csr_norm2(
double* row_norms,
const double*
const val,
const std::uint64_t*
const col_ind,
40 const std::uint64_t*
const row_ptr,
const Index rows)
42 BACKEND_SKELETON_VOID(csr_cuda_norm2, csr_generic_norm2, csr_generic_norm2, row_norms, val, col_ind, row_ptr, rows)
45 static void csr_norm2(
float* row_norms,
const float*
const val,
const std::uint32_t*
const col_ind,
46 const std::uint32_t*
const row_ptr,
const Index rows)
48 BACKEND_SKELETON_VOID(csr_cuda_norm2, csr_generic_norm2, csr_generic_norm2, row_norms, val, col_ind, row_ptr, rows)
51 static void csr_norm2(
double* row_norms,
const double*
const val,
const std::uint32_t*
const col_ind,
52 const std::uint32_t*
const row_ptr,
const Index rows)
54 BACKEND_SKELETON_VOID(csr_cuda_norm2, csr_generic_norm2, csr_generic_norm2, row_norms, val, col_ind, row_ptr, rows)
57 template <
typename DT_,
typename IT_>
58 static void csr_generic_norm2(DT_* row_norms,
const DT_*
const val,
const IT_*
const col_ind,
59 const IT_*
const row_ptr,
const Index rows);
61 template <
typename DT_,
typename IT_>
62 static void bcsr_norm2(DT_* row_norms,
const DT_*
const val,
const IT_*
const col_ind,
63 const IT_*
const row_ptr,
const Index rows,
const int BlockHeight,
const int BlockWidth)
65 bcsr_generic_norm2(row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth);
68 static void bcsr_norm2(
float* row_norms,
const float*
const val,
const std::uint64_t*
const col_ind,
69 const std::uint64_t*
const row_ptr,
const Index rows,
const int BlockHeight,
const int BlockWidth)
71 BACKEND_SKELETON_VOID(bcsr_cuda_norm2, bcsr_generic_norm2, bcsr_generic_norm2, row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
74 static void bcsr_norm2(
double* row_norms,
const double*
const val,
const std::uint64_t*
const col_ind,
75 const std::uint64_t*
const row_ptr,
const Index rows,
const int BlockHeight,
const int BlockWidth)
77 BACKEND_SKELETON_VOID(bcsr_cuda_norm2, bcsr_generic_norm2, bcsr_generic_norm2, row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
80 static void bcsr_norm2(
float* row_norms,
const float*
const val,
const std::uint32_t*
const col_ind,
81 const std::uint32_t*
const row_ptr,
const Index rows,
const int BlockHeight,
const int BlockWidth)
83 BACKEND_SKELETON_VOID(bcsr_cuda_norm2, bcsr_generic_norm2, bcsr_generic_norm2, row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
86 static void bcsr_norm2(
double* row_norms,
const double*
const val,
const std::uint32_t*
const col_ind,
87 const std::uint32_t*
const row_ptr,
const Index rows,
const int BlockHeight,
const int BlockWidth)
89 BACKEND_SKELETON_VOID(bcsr_cuda_norm2, bcsr_generic_norm2, bcsr_generic_norm2, row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
92 template <
typename DT_,
typename IT_>
93 static void bcsr_generic_norm2(DT_* row_norms,
const DT_*
const val,
const IT_*
const col_ind,
94 const IT_ *
const row_ptr,
const Index rows,
const int BlockHeight,
const int BlockWidth);
96 template <
typename DT_,
typename IT_>
97 static void csr_cuda_norm2(DT_ * row_norms,
const DT_ *
const val,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows);
98 template <
typename DT_,
typename IT_>
99 static void bcsr_cuda_norm2(DT_* row_norms,
const DT_*
const val,
const IT_*
const col_ind,
100 const IT_*
const row_ptr,
const Index rows,
const int BlockHeight,
const int BlockWidth);
105 template <
typename DT_,
typename IT_>
106 static void csr_norm2sqr(DT_* row_norms,
const DT_*
const val,
const IT_*
const col_ind,
107 const IT_*
const row_ptr,
const Index rows)
109 csr_generic_norm2sqr(row_norms, val, col_ind, row_ptr, rows);
112 static void csr_norm2sqr(
float* row_norms,
const float*
const val,
const std::uint64_t*
const col_ind,
113 const std::uint64_t*
const row_ptr,
const Index rows)
115 BACKEND_SKELETON_VOID(csr_cuda_norm2sqr, csr_generic_norm2sqr, csr_generic_norm2sqr, row_norms, val, col_ind, row_ptr, rows)
118 static void csr_norm2sqr(
double* row_norms,
const double*
const val,
const std::uint64_t*
const col_ind,
119 const std::uint64_t*
const row_ptr,
const Index rows)
121 BACKEND_SKELETON_VOID(csr_cuda_norm2sqr, csr_generic_norm2sqr, csr_generic_norm2sqr, row_norms, val, col_ind, row_ptr, rows)
124 static void csr_norm2sqr(
float* row_norms,
const float*
const val,
const std::uint32_t*
const col_ind,
125 const std::uint32_t*
const row_ptr,
const Index rows)
127 BACKEND_SKELETON_VOID(csr_cuda_norm2sqr, csr_generic_norm2sqr, csr_generic_norm2sqr, row_norms, val, col_ind, row_ptr, rows)
130 static void csr_norm2sqr(
double* row_norms,
const double*
const val,
const std::uint32_t*
const col_ind,
131 const std::uint32_t*
const row_ptr,
const Index rows)
133 BACKEND_SKELETON_VOID(csr_cuda_norm2sqr, csr_generic_norm2sqr, csr_generic_norm2sqr, row_norms, val, col_ind, row_ptr, rows)
136 template <
typename DT_,
typename IT_>
137 static void csr_generic_norm2sqr(DT_* row_norms,
const DT_*
const val,
const IT_*
const col_ind,
138 const IT_*
const row_ptr,
const Index rows);
140 template <
typename DT_,
typename IT_>
141 static void bcsr_norm2sqr(DT_* row_norms,
const DT_*
const val,
const IT_*
const col_ind,
142 const IT_*
const row_ptr,
const Index rows,
const int BlockHeight,
const int BlockWidth)
144 bcsr_generic_norm2sqr(row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth);
147 static void bcsr_norm2sqr(
float* row_norms,
const float*
const val,
const std::uint64_t*
const col_ind,
148 const std::uint64_t*
const row_ptr,
const Index rows,
const int BlockHeight,
const int BlockWidth)
150 BACKEND_SKELETON_VOID(bcsr_cuda_norm2sqr, bcsr_generic_norm2sqr, bcsr_generic_norm2sqr, row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
153 static void bcsr_norm2sqr(
double* row_norms,
const double*
const val,
const std::uint64_t*
const col_ind,
154 const std::uint64_t*
const row_ptr,
const Index rows,
const int BlockHeight,
const int BlockWidth)
156 BACKEND_SKELETON_VOID(bcsr_cuda_norm2sqr, bcsr_generic_norm2sqr, bcsr_generic_norm2sqr, row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
159 static void bcsr_norm2sqr(
float* row_norms,
const float*
const val,
const std::uint32_t*
const col_ind,
160 const std::uint32_t*
const row_ptr,
const Index rows,
const int BlockHeight,
const int BlockWidth)
162 BACKEND_SKELETON_VOID(bcsr_cuda_norm2sqr, bcsr_generic_norm2sqr, bcsr_generic_norm2sqr, row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
165 static void bcsr_norm2sqr(
double* row_norms,
const double*
const val,
const std::uint32_t*
const col_ind,
166 const std::uint32_t*
const row_ptr,
const Index rows,
const int BlockHeight,
const int BlockWidth)
168 BACKEND_SKELETON_VOID(bcsr_cuda_norm2sqr, bcsr_generic_norm2sqr, bcsr_generic_norm2sqr, row_norms, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
171 template <
typename DT_,
typename IT_>
172 static void bcsr_generic_norm2sqr(DT_* row_norms,
const DT_*
const val,
const IT_*
const col_ind,
173 const IT_ *
const row_ptr,
const Index rows,
const int BlockHeight,
const int BlockWidth);
175 template <
typename DT_,
typename IT_>
176 static void csr_cuda_norm2sqr(DT_ * row_norms,
const DT_ *
const val,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows);
177 template <
typename DT_,
typename IT_>
178 static void bcsr_cuda_norm2sqr(DT_* row_norms,
const DT_*
const val,
const IT_*
const col_ind,
179 const IT_*
const row_ptr,
const Index rows,
const int BlockHeight,
const int BlockWidth);
184 template <
typename DT_,
typename IT_>
185 static void csr_scaled_norm2sqr(DT_* row_norms,
const DT_*
const scal,
const DT_*
const val,
186 const IT_*
const col_ind,
const IT_*
const row_ptr,
const Index rows)
188 csr_generic_scaled_norm2sqr(row_norms, scal, val, col_ind, row_ptr, rows);
191 static void csr_scaled_norm2sqr(
float* row_norms,
const float*
const scal,
const float*
const val,
192 const std::uint64_t*
const col_ind,
const std::uint64_t*
const row_ptr,
const Index rows)
194 BACKEND_SKELETON_VOID(csr_cuda_scaled_norm2sqr, csr_generic_scaled_norm2sqr, csr_generic_scaled_norm2sqr, row_norms, scal, val, col_ind, row_ptr, rows)
197 static void csr_scaled_norm2sqr(
double* row_norms,
const double*
const scal,
const double*
const val,
198 const std::uint64_t*
const col_ind,
const std::uint64_t*
const row_ptr,
const Index rows)
200 BACKEND_SKELETON_VOID(csr_cuda_scaled_norm2sqr, csr_generic_scaled_norm2sqr, csr_generic_scaled_norm2sqr, row_norms, scal, val, col_ind, row_ptr, rows)
203 static void csr_scaled_norm2sqr(
float* row_norms,
const float*
const scal,
const float*
const val,
204 const std::uint32_t*
const col_ind,
const std::uint32_t*
const row_ptr,
const Index rows)
206 BACKEND_SKELETON_VOID(csr_cuda_scaled_norm2sqr, csr_generic_scaled_norm2sqr, csr_generic_scaled_norm2sqr, row_norms, scal, val, col_ind, row_ptr, rows)
209 static void csr_scaled_norm2sqr(
double* row_norms,
const double*
const scal,
const double*
const val,
210 const std::uint32_t*
const col_ind,
const std::uint32_t*
const row_ptr,
const Index rows)
212 BACKEND_SKELETON_VOID(csr_cuda_scaled_norm2sqr, csr_generic_scaled_norm2sqr, csr_generic_scaled_norm2sqr, row_norms, scal, val, col_ind, row_ptr, rows)
215 template <
typename DT_,
typename IT_>
216 static void csr_generic_scaled_norm2sqr(DT_* row_norms,
const DT_*
const scal,
const DT_*
const val,
217 const IT_*
const col_ind,
const IT_*
const row_ptr,
const Index rows);
219 template <
typename DT_,
typename IT_>
220 static void bcsr_scaled_norm2sqr(DT_* row_norms,
const DT_*
const scal,
const DT_*
const val,
221 const IT_*
const col_ind,
const IT_*
const row_ptr,
const Index rows,
222 const int BlockHeight,
const int BlockWidth)
224 bcsr_generic_scaled_norm2sqr(row_norms, scal, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth);
227 static void bcsr_scaled_norm2sqr(
float* row_norms,
const float*
const scal,
const float*
const val,
228 const std::uint64_t*
const col_ind,
const std::uint64_t*
const row_ptr,
const Index rows,
229 const int BlockHeight,
const int BlockWidth)
231 BACKEND_SKELETON_VOID(bcsr_cuda_scaled_norm2sqr, bcsr_generic_scaled_norm2sqr, bcsr_generic_scaled_norm2sqr, row_norms, scal, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
234 static void bcsr_scaled_norm2sqr(
double* row_norms,
const double*
const scal,
const double*
const val,
235 const std::uint64_t*
const col_ind,
const std::uint64_t*
const row_ptr,
const Index rows,
236 const int BlockHeight,
const int BlockWidth)
238 BACKEND_SKELETON_VOID(bcsr_cuda_scaled_norm2sqr, bcsr_generic_scaled_norm2sqr, bcsr_generic_scaled_norm2sqr, row_norms, scal, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
241 static void bcsr_scaled_norm2sqr(
float* row_norms,
const float*
const scal,
const float*
const val,
242 const std::uint32_t*
const col_ind,
const std::uint32_t*
const row_ptr,
const Index rows,
243 const int BlockHeight,
const int BlockWidth)
245 BACKEND_SKELETON_VOID(bcsr_cuda_scaled_norm2sqr, bcsr_generic_scaled_norm2sqr, bcsr_generic_scaled_norm2sqr, row_norms, scal, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
248 static void bcsr_scaled_norm2sqr(
double* row_norms,
const double*
const scal,
const double*
const val,
249 const std::uint32_t*
const col_ind,
const std::uint32_t*
const row_ptr,
const Index rows,
250 const int BlockHeight,
const int BlockWidth)
252 BACKEND_SKELETON_VOID(bcsr_cuda_scaled_norm2sqr, bcsr_generic_scaled_norm2sqr, bcsr_generic_scaled_norm2sqr, row_norms, scal, val, col_ind, row_ptr, rows, BlockHeight, BlockWidth)
255 template <
typename DT_,
typename IT_>
256 static void bcsr_generic_scaled_norm2sqr(DT_* row_norms,
const DT_*
const scal,
const DT_*
const val,
257 const IT_*
const col_ind,
const IT_*
const row_ptr,
const Index rows,
258 const int BlockHeight,
const int BlockWidth);
261 template <
typename DT_,
typename IT_>
262 static void csr_cuda_scaled_norm2sqr(DT_ * row_norms,
const DT_ *
const scal,
const DT_ *
const val,
const IT_ *
const col_ind,
const IT_ *
const row_ptr,
const Index rows);
263 template <
typename DT_,
typename IT_>
264 static void bcsr_cuda_scaled_norm2sqr(DT_* row_norms,
const DT_ *
const scal,
const DT_*
const val,
const IT_*
const col_ind,
265 const IT_*
const row_ptr,
const Index rows,
const int BlockHeight,
const int BlockWidth);
269 extern template void RowNorm::csr_generic_norm2(
float*,
270 const float*
const,
const Index*
const,
const Index *
const,
const Index);
271 extern template void RowNorm::csr_generic_norm2(
double*,
272 const double*
const,
const Index*
const,
const Index*
const,
const Index);
274 extern template void RowNorm::bcsr_generic_norm2(
float*,
275 const float*
const,
const Index*
const,
const Index*
const,
const Index,
const int,
const int);
276 extern template void RowNorm::bcsr_generic_norm2(
double*,
277 const double*
const,
const Index*
const,
const Index*
const,
const Index,
const int,
const int);
279 extern template void RowNorm::csr_generic_norm2sqr(
float*,
280 const float*
const,
const Index*
const,
const Index *
const,
const Index);
281 extern template void RowNorm::csr_generic_norm2sqr(
double*,
282 const double*
const,
const Index*
const,
const Index*
const,
const Index);
284 extern template void RowNorm::bcsr_generic_norm2sqr(
float*,
285 const float*
const,
const Index*
const,
const Index*
const,
const Index,
const int,
const int);
286 extern template void RowNorm::bcsr_generic_norm2sqr(
double*,
287 const double*
const,
const Index*
const,
const Index*
const,
const Index,
const int,
const int);
289 extern template void RowNorm::csr_generic_scaled_norm2sqr(
float*,
const float*
const,
290 const float*
const,
const Index*
const,
const Index *
const,
const Index);
291 extern template void RowNorm::csr_generic_scaled_norm2sqr(
double*,
const double*
const,
292 const double*
const,
const Index*
const,
const Index*
const,
const Index);
294 extern template void RowNorm::bcsr_generic_scaled_norm2sqr(
float*,
const float*
const,
295 const float*
const,
const Index*
const,
const Index*
const,
const Index,
const int,
const int);
296 extern template void RowNorm::bcsr_generic_scaled_norm2sqr(
double*,
const double*
const,
297 const double*
const,
const Index*
const,
const Index*
const,
const Index,
const int,
const int);
306#include <kernel/lafem/arch/row_norm_generic.hpp>
std::uint64_t Index
Index data type.