FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
voxel_amavanka.cpp
2#include <kernel/adjacency/coloring.hpp>
3#include <kernel/solver/amavanka_base.hpp>
4#include <kernel/solver/voxel_amavanka.hpp>
5#include <vector>
6
7namespace FEAT
8{
9 namespace Solver
10 {
11 namespace Kernel
12 {
13 template<typename DT_, typename IT_, int n_, bool skip_singular_>
14 void assemble_unscaled_vanka_host(const Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& mat_wrap,
15 Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& vanka_wrap,
16 const Adjacency::Graph** macro_dofs, int* macro_mask,
17 const int* coloring_map, Index color_size,
18 Index stride, DT_ eps)
19 {
20 typedef DT_ DataType;
21 // typedef IT_ IndexType;
22 if constexpr(skip_singular_)
23 {
24 FEAT_PRAGMA_OMP(parallel)
25 {
26 // allocate arrays for local matrix
27 std::unique_ptr<DataType[]> local(new DataType[stride*stride]);
28 std::fill(local.get(), local.get() + stride*stride, DataType(0));
29 std::unique_ptr<DataType[]> local_t(new DataType[stride*stride]);
30 std::fill(local_t.get(), local_t.get() + stride*stride, DataType(0));
31 std::unique_ptr<Index[]> pivot(new Index[stride]);
32 std::fill(pivot.get(), pivot.get() + stride, Index(0));
33 FEAT_PRAGMA_OMP(for)
34 for(int k = 0; k < int(color_size); ++k)
35 {
36 const int imacro = coloring_map[k];
37 const std::pair<Index, Index> nrc = Intern::VoxelAmaVankaCore::gather(mat_wrap, local.get(), stride, Index(imacro), macro_dofs,
38 Index(0), Index(0), Index(0), Index(0));
39 // the approach used for checking the regularity of the local matrix is to check whether
40 //
41 // || I - A*A^{-1} ||_F^2 < eps
42 //
43 // we could try to analyse the pivots returned by invert_matrix function instead, but
44 // unfortunately this approach sometimes leads to false positives
45
46 // make a backup if checking for singularity
47 for(Index i(0); i < nrc.first; ++i)
48 for(Index j(0); j < nrc.second; ++j)
49 local_t[i*stride+j] = local[i*stride+j];
50
51 // invert matrix
52 Math::invert_matrix(nrc.first, stride, local.get(), pivot.get());
53
54 // compute (squared) Frobenius norm of (I - A*A^{-1})
55 DataType norm = DataType(0);
56 for(Index i(0); i < nrc.first; ++i)
57 {
58 for(Index j(0); j < nrc.first; ++j)
59 {
60 DataType xij = DataType(i == j ? 1 : 0);
61 for(Index l(0); l < nrc.first; ++l)
62 xij -= local_t[i*stride+l] * local[l*stride+j]; // A_il * (A^{-1})_lj
63 norm += xij * xij;
64 }
65 }
66
67 // is the matrix block singular?
68 // Note: we check for !(norm < eps) instead of (norm >= eps),
69 // because the latter one evaluates to false if norm is NaN,
70 // which would result in a false negative
71 const bool singular = !(norm < eps);
72
73 // set macro regularity mask
74 macro_mask[imacro] = (singular ? 0 : 1);
75
76 // scatter local matrix
77 if(!singular)
78 {
79 Intern::VoxelAmaVankaCore::scatter_add(vanka_wrap, local.get(), stride, Index(imacro), macro_dofs,
80 Index(0), Index(0), Index(0), Index(0));
81 }
82
83 // reformat local matrix TODO: necessary?
84 for(Index i(0); i < nrc.first; ++i)
85 for(Index j(0); j < nrc.second; ++j)
86 local[i*stride+j] = DataType(0);
87 }
88 }
89 }
90 else
91 {
92 (void)eps;
93 FEAT_PRAGMA_OMP(parallel)
94 {
95 // #pragma master
96 // {
97 // std::cout << "Num threads " << omp_get_num_threads() << "\n";
98 // }
99 // allocate arrays for local matrix
100 std::unique_ptr<DataType[]> local(new DataType[stride*stride]);
101 std::fill(local.get(), local.get() + stride*stride, DataType(0));
102 std::unique_ptr<Index[]> pivot(new Index[stride]);
103 std::fill(pivot.get(), pivot.get() + stride, Index(0));
104 FEAT_PRAGMA_OMP(for)
105 for(int k = 0; k < int(color_size); ++k)
106 {
107 const int imacro = coloring_map[k];
108 const std::pair<Index, Index> nrc = Intern::VoxelAmaVankaCore::gather(mat_wrap, local.get(), stride, Index(imacro), macro_dofs,
109 Index(0), Index(0), Index(0), Index(0));
110
111 // invert matrix
112 Math::invert_matrix(nrc.first, stride, local.get(), pivot.get());
113
114 Intern::VoxelAmaVankaCore::scatter_add(vanka_wrap, local.get(), stride, Index(imacro), macro_dofs,
115 Index(0), Index(0), Index(0), Index(0));
116
117 // reformat local matrix TODO: necessary?
118 for(Index i(0); i < nrc.first; ++i)
119 for(Index j(0); j < nrc.second; ++j)
120 local[i*stride+j] = DataType(0);
121 }
122 }
123 }
124 }
125
126 template<typename DT_, typename IT_, int n_, bool skip_singular_>
127 void scale_vanka_rows_host(Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& vanka_wrap, const DT_ omega,
128 const Adjacency::Graph** graphs_dof_macros, const int* _m_mask)
129 {
130 for(int i = 0; i < n_; ++i)
131 {
132 const Index* row_dom_ptr = graphs_dof_macros[i]->get_domain_ptr();
133 const Index* row_img_idx = graphs_dof_macros[i]->get_image_idx();
134 const int hw = vanka_wrap.blocksizes[Index(1) + Math::min(Index(i),Index(1))];
135 const int num_rows = int(vanka_wrap.tensor_counts[2*i]);
136 for(int j = 0; j < n_; ++j)
137 {
138 DT_* vals = vanka_wrap.data_arrays[i*n_ + j];
139 const IT_* row_ptr = vanka_wrap.row_arrays[i*n_+j];
140 const IT_* col_idx = vanka_wrap.col_arrays[i*n_+j];
141 const int hb = vanka_wrap.blocksizes[j +1];
142 const bool meta_diag = (i == j);
143 // now do the actual inner openmp loop over each column of the sub matrix
144 FEAT_PRAGMA_OMP(parallel for)
145 for(int row = 0; row < num_rows; ++row)
146 {
147 // careful, num rows is counted against native elements, not raw elements
148 Intern::VoxelAmaVankaCore::scale_row<DT_, IT_, skip_singular_>(vals, omega, row_ptr, col_idx, row_dom_ptr, row_img_idx, hw, hb, Index(row), _m_mask, meta_diag);
149 }
150 }
151 }
152
153 }
154 }
155
156 namespace Arch
157 {
158 template<typename DT_, typename IT_, int n_>
159 void assemble_vanka_host(const Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& mat_wrap,
160 Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& vanka_wrap, const std::vector<Adjacency::Graph>& macro_dofs,
161 const std::vector<Adjacency::Graph>& dof_macros, std::vector<int>& macro_mask, const Adjacency::ColoringDataHandler& coloring_data,
162 Index stride, DT_ omega, DT_ eps, bool skip_singular)
163 {
164 //further extract internal arrays
165 const Adjacency::Graph* graphs_dof_macros[n_];
166 const Adjacency::Graph* graphs_macro_dofs[n_];
167 for(int i = 0; i < n_; ++i)
168 {
169 graphs_dof_macros[i] = &(dof_macros.at(std::size_t(i)));
170 graphs_macro_dofs[i] = &(macro_dofs.at(std::size_t(i)));
171 }
172 int* _m_mask = macro_mask.data();
173 if(!skip_singular)
174 _m_mask = nullptr;
175 auto& coloring_map = coloring_data.get_coloring_maps();
176 for(Index k = 0; k < coloring_map.size(); ++k)
177 {
178 if(skip_singular)
179 Solver::Kernel::template assemble_unscaled_vanka_host<DT_, IT_, n_, true>
180 (mat_wrap, vanka_wrap, graphs_macro_dofs,
181 _m_mask, coloring_map[k], coloring_data.get_color_size(k), stride, eps);
182 else
183 Solver::Kernel::template assemble_unscaled_vanka_host<DT_, IT_, n_, false>
184 (mat_wrap, vanka_wrap, graphs_macro_dofs,
185 _m_mask, coloring_map[k], coloring_data.get_color_size(k), stride, eps);
186 }
187 if(skip_singular)
188 Solver::Kernel::template scale_vanka_rows_host<DT_, IT_, n_, true>(vanka_wrap, omega, graphs_dof_macros,_m_mask);
189 else
190 Solver::Kernel::template scale_vanka_rows_host<DT_, IT_, n_, false>(vanka_wrap, omega, graphs_dof_macros, _m_mask);
191
192 }
193 }
194
195 }
196}
197
198//instantiate the templates
199using namespace FEAT;
200using namespace FEAT::Solver;
201
202
203template void Arch::assemble_vanka_host(const Intern::CSRTupleMatrixWrapper<double, std::uint64_t, 2>&, Intern::CSRTupleMatrixWrapper<double, std::uint64_t, 2>&, const std::vector<Adjacency::Graph>&,
204 const std::vector<Adjacency::Graph>&, std::vector<int>&, const Adjacency::ColoringDataHandler&, Index, double, double, bool);
205template void Arch::assemble_vanka_host(const Intern::CSRTupleMatrixWrapper<double, std::uint32_t, 2>&, Intern::CSRTupleMatrixWrapper<double, std::uint32_t, 2>&, const std::vector<Adjacency::Graph>&,
206 const std::vector<Adjacency::Graph>&, std::vector<int>&, const Adjacency::ColoringDataHandler&, Index, double, double, bool);
207template void Arch::assemble_vanka_host(const Intern::CSRTupleMatrixWrapper<float, std::uint64_t, 2>&, Intern::CSRTupleMatrixWrapper<float, std::uint64_t, 2>&, const std::vector<Adjacency::Graph>&,
208 const std::vector<Adjacency::Graph>&, std::vector<int>&, const Adjacency::ColoringDataHandler&, Index, float, float, bool);
209template void Arch::assemble_vanka_host(const Intern::CSRTupleMatrixWrapper<float, std::uint32_t, 2>&, Intern::CSRTupleMatrixWrapper<float, std::uint32_t, 2>&, const std::vector<Adjacency::Graph>&,
210 const std::vector<Adjacency::Graph>&, std::vector<int>&, const Adjacency::ColoringDataHandler&, Index, float, float, bool);
FEAT Kernel base header.
Datahandler for inverse coloring data.
Definition: coloring.hpp:253
std::vector< int * > & get_coloring_maps()
Retrieve the color maps.
Definition: coloring.hpp:346
Adjacency Graph implementation.
Definition: graph.hpp:34
Index * get_domain_ptr()
Returns the domain pointer array.
Definition: graph.hpp:359
Index * get_image_idx()
Returns the image node index array.
Definition: graph.hpp:374
T_ min(T_ a, T_ b)
Returns the minimum of two values.
Definition: math.hpp:123
DT_ invert_matrix(const IT_ n, const IT_ stride, DT_ a[], IT_ p[])
Inverts a matrix and returns its determinant.
Definition: math.hpp:1292
T_ eps()
Returns the machine precision constant for a floating-point data type.
Definition: math.hpp:787
Solver namespace.
FEAT namespace.
Definition: adjactor.hpp:12
std::uint64_t Index
Index data type.