FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
scale_row_col.hpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#pragma once
7#ifndef KERNEL_LAFEM_ARCH_SCALE_ROW_COL_HPP
8#define KERNEL_LAFEM_ARCH_SCALE_ROW_COL_HPP 1
9
10// includes, FEAT
12#include <kernel/backend.hpp>
13
14namespace FEAT
15{
16 namespace LAFEM
17 {
18 namespace Arch
19 {
20 struct ScaleRows
21 {
22 template <typename DT_, typename IT_>
23 static void csr(DT_ * r, const DT_ * const a, const IT_ * const col_ind, const IT_ * const row_ptr, const DT_ * const x, const Index rows, const Index columns, const Index used_elements)
24 {
25 csr_generic(r, a, col_ind, row_ptr, x, rows, columns, used_elements);
26 }
27
28 static void csr(float * r, const float * const a, const std::uint64_t * const col_ind, const std::uint64_t * const row_ptr, const float * const x, const Index rows, const Index columns, const Index used_elements)
29 {
30 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
31 }
32
33 static void csr(double * r, const double * const a, const std::uint64_t * const col_ind, const std::uint64_t * const row_ptr, const double * const x, const Index rows, const Index columns, const Index used_elements)
34 {
35 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
36 }
37
38 static void csr(float * r, const float * const a, const std::uint32_t * const col_ind, const std::uint32_t * const row_ptr, const float * const x, const Index rows, const Index columns, const Index used_elements)
39 {
40 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
41 }
42
43 static void csr(double * r, const double * const a, const std::uint32_t * const col_ind, const std::uint32_t * const row_ptr, const double * const x, const Index rows, const Index columns, const Index used_elements)
44 {
45 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
46 }
47
48 template <typename DT_, typename IT_>
49 static void csr_generic(DT_ * r, const DT_ * const a, const IT_ * const /*col_ind*/, const IT_ * const row_ptr, const DT_ * const x, const Index rows, const Index, const Index);
50
51 template <typename DT_, typename IT_>
52 static void csr_cuda(DT_ * r, const DT_ * const a, const IT_ * const /*col_ind*/, const IT_ * const row_ptr, const DT_ * const x, const Index rows, const Index, const Index);
53
54 template <int bh_, int bw_, typename DT_, typename IT_>
55 static void bcsr(DT_ * r, const DT_ * const a, const IT_ * const col_ind, const IT_ * const row_ptr, const DT_ * const x, const Index rows, const Index columns, const Index used_elements)
56 {
57 if constexpr ( (std::is_same<DT_, double>::value || std::is_same<DT_, float>::value)
58 && (std::is_same<IT_, std::uint32_t>::value || std::is_same<IT_, std::uint64_t>::value))
59 {
60 BACKEND_SKELETON_VOID_T2(bh_, bw_, bcsr_cuda, bcsr_generic, bcsr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
61 }
62 else
63 {
64 bcsr_generic<bh_, bw_>(r, a, col_ind, row_ptr, x, rows, columns, used_elements);
65 }
66 }
67
68 template <int bh_, int bw_, typename DT_, typename IT_>
69 static void bcsr_generic(DT_ * r, const DT_ * const a, const IT_ * const /*col_ind*/, const IT_ * const row_ptr, const DT_ * const x, const Index rows, const Index, const Index);
70
71 template<typename DT_, typename IT_>
72 static void bcsr_cuda_intern(DT_*, const DT_* const, const IT_* const, const IT_* const, const DT_* const, const Index, const Index, const Index, const int, const int);
73
74 template <int bh_, int bw_, typename DT_, typename IT_>
75 static void bcsr_cuda(DT_ * r, const DT_ * const a, const IT_ * const col_ind, const IT_ * const row_ptr, const DT_ * const x, const Index rows, const Index cols, const Index used_el)
76 {
77 bcsr_cuda_intern(r, a, col_ind, row_ptr, x, rows, cols, used_el, bh_, bw_);
78 }
79
80 };
81
82#ifdef FEAT_EICKT
83 extern template void ScaleRows::csr_generic(float *, const float * const, const std::uint64_t * const, const std::uint64_t * const, const float * const, const Index, const Index, const Index);
84 extern template void ScaleRows::csr_generic(double *, const double * const, const std::uint64_t * const, const std::uint64_t * const, const double * const, const Index, const Index, const Index);
85 extern template void ScaleRows::csr_generic(float *, const float * const, const std::uint32_t * const, const std::uint32_t * const, const float * const, const Index, const Index, const Index);
86 extern template void ScaleRows::csr_generic(double *, const double * const, const std::uint32_t * const, const std::uint32_t * const, const double * const, const Index, const Index, const Index);
87#endif
88
89
90 // ***********************************************
91
92 struct ScaleCols
93 {
94 template <typename DT_, typename IT_>
95 static void csr(DT_ * r, const DT_ * const a, const IT_ * const col_ind, const IT_ * const row_ptr, const DT_ * const x, const Index rows, const Index columns, const Index used_elements)
96 {
97 csr_generic(r, a, col_ind, row_ptr, x, rows, columns, used_elements);
98 }
99
100 static void csr(float * r, const float * const a, const std::uint64_t * const col_ind, const std::uint64_t * const row_ptr, const float * const x, const Index rows, const Index columns, const Index used_elements)
101 {
102 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
103 }
104
105 static void csr(double * r, const double * const a, const std::uint64_t * const col_ind, const std::uint64_t * const row_ptr, const double * const x, const Index rows, const Index columns, const Index used_elements)
106 {
107 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
108 }
109
110 static void csr(float * r, const float * const a, const std::uint32_t * const col_ind, const std::uint32_t * const row_ptr, const float * const x, const Index rows, const Index columns, const Index used_elements)
111 {
112 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
113 }
114
115 static void csr(double * r, const double * const a, const std::uint32_t * const col_ind, const std::uint32_t * const row_ptr, const double * const x, const Index rows, const Index columns, const Index used_elements)
116 {
117 BACKEND_SKELETON_VOID(csr_cuda, csr_generic, csr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
118 }
119
120 template <typename DT_, typename IT_>
121 static void csr_generic(DT_ * r, const DT_ * const a, const IT_ * const col_ind, const IT_ * const row_ptr, const DT_ * const x, const Index rows, const Index, const Index);
122
123 template <typename DT_, typename IT_>
124 static void csr_cuda(DT_ * r, const DT_ * const a, const IT_ * const col_ind, const IT_ * const row_ptr, const DT_ * const x, const Index rows, const Index, const Index);
125
126
127 template <int bh_, int bw_, typename DT_, typename IT_>
128 static void bcsr(DT_ * r, const DT_ * const a, const IT_ * const col_ind, const IT_ * const row_ptr, const DT_ * const x, const Index rows, const Index columns, const Index used_elements)
129 {
130 if constexpr ( (std::is_same<DT_, double>::value || std::is_same<DT_, float>::value)
131 && (std::is_same<IT_, std::uint32_t>::value || std::is_same<IT_, std::uint64_t>::value))
132 {
133 BACKEND_SKELETON_VOID_T2(bh_, bw_, bcsr_cuda, bcsr_generic, bcsr_generic, r, a, col_ind, row_ptr, x, rows, columns, used_elements)
134 }
135 else
136 {
137 bcsr_generic<bh_, bw_>(r, a, col_ind, row_ptr, x, rows, columns, used_elements);
138 }
139 }
140
141 template <int bh_, int bw_, typename DT_, typename IT_>
142 static void bcsr_generic(DT_ * r, const DT_ * const a, const IT_ * const /*col_ind*/, const IT_ * const row_ptr, const DT_ * const x, const Index rows, const Index, const Index);
143
144 template<typename DT_, typename IT_>
145 static void bcsr_cuda_intern(DT_*, const DT_* const, const IT_* const, const IT_* const, const DT_* const, const Index, const Index, const Index, const int, const int);
146
147 template <int bh_, int bw_, typename DT_, typename IT_>
148 static void bcsr_cuda(DT_ * r, const DT_ * const a, const IT_ * const col_ind, const IT_ * const row_ptr, const DT_ * const x, const Index rows, const Index cols, const Index used_el)
149 {
150 bcsr_cuda_intern(r, a, col_ind, row_ptr, x, rows, cols, used_el, bh_, bw_);
151 }
152 };
153
154#ifdef FEAT_EICKT
155 extern template void ScaleCols::csr_generic(float *, const float * const, const std::uint64_t * const, const std::uint64_t * const, const float * const, const Index, const Index, const Index);
156 extern template void ScaleCols::csr_generic(double *, const double * const, const std::uint64_t * const, const std::uint64_t * const, const double * const, const Index, const Index, const Index);
157 extern template void ScaleCols::csr_generic(float *, const float * const, const std::uint32_t * const, const std::uint32_t * const, const float * const, const Index, const Index, const Index);
158 extern template void ScaleCols::csr_generic(double *, const double * const, const std::uint32_t * const, const std::uint32_t * const, const double * const, const Index, const Index, const Index);
159#endif
160 } // namespace Arch
161 } // namespace LAFEM
162} // namespace FEAT
163
164#ifndef __CUDACC__
165#include <kernel/lafem/arch/scale_row_col_generic.hpp>
166#endif
167#endif // KERNEL_LAFEM_ARCH_SCALE_ROW_COL_HPP
FEAT Kernel base header.
FEAT namespace.
Definition: adjactor.hpp:12
std::uint64_t Index
Index data type.