FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
unit_filter_blocked.hpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#pragma once
7#ifndef KERNEL_LAFEM_ARCH_UNIT_FILTER_BLOCKED_HPP
8#define KERNEL_LAFEM_ARCH_UNIT_FILTER_BLOCKED_HPP 1
9
10// includes, FEAT
12#include <kernel/backend.hpp>
13
15namespace FEAT
16{
17 namespace LAFEM
18 {
19 namespace Arch
20 {
21 struct UnitFilterBlocked
22 {
23 template <typename DT_, typename IT_>
24 static void filter_rhs(DT_ * v, int block_size, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans)
25 {
26 filter_rhs_generic(v, block_size, sv_elements, sv_indices, ue, ign_nans);
27 }
28
29 template <typename IT_>
30 static void filter_rhs(double * v, int block_size, const double * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans)
31 {
32 BACKEND_SKELETON_VOID(filter_rhs_cuda, filter_rhs_generic, filter_rhs_generic, v, block_size, sv_elements, sv_indices, ue, ign_nans)
33 }
34
35 template <typename IT_>
36 static void filter_rhs(float * v, int block_size, const float * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans)
37 {
38 BACKEND_SKELETON_VOID(filter_rhs_cuda, filter_rhs_generic, filter_rhs_generic, v, block_size, sv_elements, sv_indices, ue, ign_nans)
39 }
40
41 template <typename DT_, typename IT_>
42 static void filter_def(DT_ * v, int block_size, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans)
43 {
44 filter_def_generic(v, block_size, sv_elements, sv_indices, ue, ign_nans);
45 }
46
47 template <typename IT_>
48 static void filter_def(double * v, int block_size, const double * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans)
49 {
50 BACKEND_SKELETON_VOID(filter_def_cuda, filter_def_generic, filter_def_generic, v, block_size, sv_elements, sv_indices, ue, ign_nans)
51 }
52
53 template <typename IT_>
54 static void filter_def(float * v, int block_size, const float * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans)
55 {
56 BACKEND_SKELETON_VOID(filter_def_cuda, filter_def_generic, filter_def_generic, v, block_size, sv_elements, sv_indices, ue, ign_nans)
57 }
58
59 template<typename DT_, typename IT_>
60 static void filter_unit_mat(DT_* mat, const IT_* const row_ptr, const IT_* const col_idx, int block_height, int block_width, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans)
61 {
62 filter_unit_mat_generic(mat, row_ptr, col_idx, block_height, block_width, sv_elements, sv_indices, ue, ign_nans);
63 }
64
65 template<typename IT_>
66 static void filter_unit_mat(double* mat, const IT_* const row_ptr, const IT_* const col_idx, int block_height, int block_width, const double * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans)
67 {
68 BACKEND_SKELETON_VOID(filter_unit_mat_cuda, filter_unit_mat_generic, filter_unit_mat_generic, mat, row_ptr, col_idx, block_height, block_width, sv_elements, sv_indices, ue, ign_nans)
69 }
70
71 template<typename IT_>
72 static void filter_unit_mat(float* mat, const IT_* const row_ptr, const IT_* const col_idx, int block_height, int block_width, const float * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans)
73 {
74 BACKEND_SKELETON_VOID(filter_unit_mat_cuda, filter_unit_mat_generic, filter_unit_mat_generic, mat, row_ptr, col_idx, block_height, block_width, sv_elements, sv_indices, ue, ign_nans)
75 }
76
77 template<typename DT_, typename IT_>
78 static void filter_offdiag_row_mat(DT_* mat, const IT_* const row_ptr, int block_height, int block_width, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans)
79 {
80 filter_offdiag_row_mat_generic(mat, row_ptr, block_height, block_width, sv_elements, sv_indices, ue, ign_nans);
81 }
82
83 template<typename IT_>
84 static void filter_offdiag_row_mat(double* mat, const IT_* const row_ptr, int block_height, int block_width, const double * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans)
85 {
86 BACKEND_SKELETON_VOID(filter_offdiag_row_mat_cuda, filter_offdiag_row_mat_generic, filter_offdiag_row_mat_generic, mat, row_ptr, block_height, block_width, sv_elements, sv_indices, ue, ign_nans)
87 }
88
89 template<typename IT_>
90 static void filter_offdiag_row_mat(float* mat, const IT_* const row_ptr, int block_height, int block_width, const float * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans)
91 {
92 BACKEND_SKELETON_VOID(filter_offdiag_row_mat_cuda, filter_offdiag_row_mat_generic, filter_offdiag_row_mat_generic, mat, row_ptr, block_height, block_width, sv_elements, sv_indices, ue, ign_nans)
93 }
94
95 template<typename DT_, typename IT_>
96 static void filter_weak_matrix_rows(DT_* mat_a, const DT_* const mat_m, const IT_* const row_ptr, int block_height, int block_width, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue)
97 {
98 filter_weak_matrix_rows_generic(mat_a, mat_m, row_ptr, block_height, block_width, sv_elements, sv_indices, ue);
99 }
100
101 template<typename IT_>
102 static void filter_weak_matrix_rows(double* mat_a, const double* const mat_m, const IT_* const row_ptr, int block_height, int block_width, const double * const sv_elements, const IT_ * const sv_indices, const Index ue)
103 {
104 BACKEND_SKELETON_VOID(filter_weak_matrix_rows_cuda, filter_weak_matrix_rows_generic, filter_weak_matrix_rows_generic, mat_a, mat_m, row_ptr, block_height, block_width, sv_elements, sv_indices, ue)
105 }
106
107 template<typename IT_>
108 static void filter_weak_matrix_rows(float* mat_a, const float* const mat_m, const IT_* const row_ptr, int block_height, int block_width, const float * const sv_elements, const IT_ * const sv_indices, const Index ue)
109 {
110 BACKEND_SKELETON_VOID(filter_weak_matrix_rows_cuda, filter_weak_matrix_rows_generic, filter_weak_matrix_rows_generic, mat_a, mat_m, row_ptr, block_height, block_width, sv_elements, sv_indices, ue)
111 }
112
113 template <typename DT_, typename IT_>
114 static void filter_rhs_generic(DT_ * v, int block_size, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans);
115
116 template <typename DT_, typename IT_>
117 static void filter_def_generic(DT_ * v, int block_size, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans);
118
119 template <typename DT_, typename IT_>
120 static void filter_rhs_cuda(DT_ * v, int block_size, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans);
121
122 template <typename DT_, typename IT_>
123 static void filter_def_cuda(DT_ * v, int block_size, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans);
124
125 template<typename DT_, typename IT_>
126 static void filter_unit_mat_generic(DT_* mat, const IT_* const row_ptr, const IT_* const col_idx, int block_height, int block_width, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans);
127
128 template<typename DT_, typename IT_>
129 static void filter_unit_mat_cuda(DT_* mat, const IT_* const row_ptr, const IT_* const col_idx, int block_height, int block_width, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans);
130
131 template<typename DT_, typename IT_>
132 static void filter_offdiag_row_mat_generic(DT_* mat, const IT_* const row_ptr, int block_height, int block_width, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans);
133
134 template<typename DT_, typename IT_>
135 static void filter_offdiag_row_mat_cuda(DT_* mat, const IT_* const row_ptr, int block_height, int block_width, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue, bool ign_nans);
136
137 template<typename DT_, typename IT_>
138 static void filter_weak_matrix_rows_generic(DT_* mat_a, const DT_* const mat_m, const IT_* const row_ptr, int block_height, int block_width, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue);
139
140 template<typename DT_, typename IT_>
141 static void filter_weak_matrix_rows_cuda(DT_* mat_a, const DT_* const mat_m, const IT_* const row_ptr, int block_height, int block_width, const DT_ * const sv_elements, const IT_ * const sv_indices, const Index ue);
142 };
143
144 // Do not instantiate the following templates as this is done in unit_filter_blocked_generic.cpp and then linked
145 // into the shared library
146#ifdef FEAT_EICKT
147 extern template void UnitFilterBlocked::filter_rhs_generic<float, std::uint64_t>(float * v, int block_size, const float * const sv_elements, const std::uint64_t * const sv_indices, const Index ue, bool ign_nans);
148 extern template void UnitFilterBlocked::filter_rhs_generic<double, std::uint64_t>(double * v, int block_size, const double * const sv_elements, const std::uint64_t * const sv_indices, const Index ue, bool ign_nans);
149 extern template void UnitFilterBlocked::filter_rhs_generic<float, std::uint32_t>(float * v, int block_size, const float * const sv_elements, const std::uint32_t * const sv_indices, const Index ue, bool ign_nans);
150 extern template void UnitFilterBlocked::filter_rhs_generic<double, std::uint32_t>(double * v, int block_size, const double * const sv_elements, const std::uint32_t * const sv_indices, const Index ue, bool ign_nans);
151
152 extern template void UnitFilterBlocked::filter_def_generic<float, std::uint64_t>(float * v, int block_size, const float * const sv_elements, const std::uint64_t * const sv_indices, const Index ue, bool ign_nans);
153 extern template void UnitFilterBlocked::filter_def_generic<double, std::uint64_t>(double * v, int block_size, const double * const sv_elements, const std::uint64_t * const sv_indices, const Index ue, bool ign_nans);
154 extern template void UnitFilterBlocked::filter_def_generic<float, std::uint32_t>(float * v, int block_size, const float * const sv_elements, const std::uint32_t * const sv_indices, const Index ue, bool ign_nans);
155 extern template void UnitFilterBlocked::filter_def_generic<double, std::uint32_t>(double * v, int block_size, const double * const sv_elements, const std::uint32_t * const sv_indices, const Index ue, bool ign_nans);
156
157 extern template void UnitFilterBlocked::filter_unit_mat_generic<float, std::uint64_t>(float* mat, const std::uint64_t* const row_ptr, const std::uint64_t* const col_idx, int block_height, int block_width, const float * const sv_elements, const std::uint64_t * const sv_indices, const Index ue, bool ign_nans);
158 extern template void UnitFilterBlocked::filter_unit_mat_generic<double, std::uint64_t>(double* mat, const std::uint64_t* const row_ptr, const std::uint64_t* const col_idx, int block_height, int block_width, const double * const sv_elements, const std::uint64_t * const sv_indices, const Index ue, bool ign_nans);
159 extern template void UnitFilterBlocked::filter_unit_mat_generic<float, std::uint32_t>(float* mat, const std::uint32_t* const row_ptr, const std::uint32_t* const col_idx, int block_height, int block_width, const float * const sv_elements, const std::uint32_t * const sv_indices, const Index ue, bool ign_nans);
160 extern template void UnitFilterBlocked::filter_unit_mat_generic<double, std::uint32_t>(double* mat, const std::uint32_t* const row_ptr, const std::uint32_t* const col_idx, int block_height, int block_width, const double * const sv_elements, const std::uint32_t * const sv_indices, const Index ue, bool ign_nans);
161
162 extern template void UnitFilterBlocked::filter_offdiag_row_mat_generic<float, std::uint64_t>(float* mat, const std::uint64_t* const row_ptr, int block_height, int block_width, const float * const sv_elements, const std::uint64_t * const sv_indices, const Index ue, bool ign_nans);
163 extern template void UnitFilterBlocked::filter_offdiag_row_mat_generic<double, std::uint64_t>(double* mat, const std::uint64_t* const row_ptr, int block_height, int block_width, const double * const sv_elements, const std::uint64_t * const sv_indices, const Index ue, bool ign_nans);
164 extern template void UnitFilterBlocked::filter_offdiag_row_mat_generic<float, std::uint32_t>(float* mat, const std::uint32_t* const row_ptr, int block_height, int block_width, const float * const sv_elements, const std::uint32_t * const sv_indices, const Index ue, bool ign_nans);
165 extern template void UnitFilterBlocked::filter_offdiag_row_mat_generic<double, std::uint32_t>(double* mat, const std::uint32_t* const row_ptr, int block_height, int block_width, const double * const sv_elements, const std::uint32_t * const sv_indices, const Index ue, bool ign_nans);
166
167 extern template void UnitFilterBlocked::filter_weak_matrix_rows_generic<float, std::uint64_t>(float* mat_a, const float* const mat_m, const std::uint64_t* const row_ptr, int block_height, int block_width, const float * const sv_elements, const std::uint64_t * const sv_indices, const Index ue);
168 extern template void UnitFilterBlocked::filter_weak_matrix_rows_generic<double, std::uint64_t>(double* mat_a, const double* const mat_m, const std::uint64_t* const row_ptr, int block_height, int block_width, const double * const sv_elements, const std::uint64_t * const sv_indices, const Index ue);
169 extern template void UnitFilterBlocked::filter_weak_matrix_rows_generic<float, std::uint32_t>(float* mat_a, const float* const mat_m, const std::uint32_t* const row_ptr, int block_height, int block_width, const float * const sv_elements, const std::uint32_t * const sv_indices, const Index ue);
170 extern template void UnitFilterBlocked::filter_weak_matrix_rows_generic<double, std::uint32_t>(double* mat_a, const double* const mat_m, const std::uint32_t* const row_ptr, int block_height, int block_width, const double * const sv_elements, const std::uint32_t * const sv_indices, const Index ue);
171#endif
172
173 } // namespace Arch
174 } // namespace LAFEM
175} // namespace FEAT
176
178#ifndef __CUDACC__
179#include <kernel/lafem/arch/unit_filter_blocked_generic.hpp>
180#endif
181#endif // KERNEL_LAFEM_ARCH_UNIT_FILTER_BLOCKED_HPP
FEAT Kernel base header.
FEAT namespace.
Definition: adjactor.hpp:12
std::uint64_t Index
Index data type.