2#include <kernel/adjacency/coloring.hpp>
3#include <kernel/solver/amavanka_base.hpp>
4#include <kernel/solver/voxel_amavanka.hpp>
13 template<
typename DT_,
typename IT_,
int n_,
bool skip_singular_>
14 void assemble_unscaled_vanka_host(
const Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& mat_wrap,
15 Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& vanka_wrap,
17 const int* coloring_map,
Index color_size,
18 Index stride, DT_ eps)
22 if constexpr(skip_singular_)
24 FEAT_PRAGMA_OMP(parallel)
27 std::unique_ptr<DataType[]> local(
new DataType[stride*stride]);
28 std::fill(local.get(), local.get() + stride*stride, DataType(0));
29 std::unique_ptr<DataType[]> local_t(
new DataType[stride*stride]);
30 std::fill(local_t.get(), local_t.get() + stride*stride, DataType(0));
31 std::unique_ptr<Index[]> pivot(
new Index[stride]);
32 std::fill(pivot.get(), pivot.get() + stride,
Index(0));
34 for(
int k = 0; k < int(color_size); ++k)
36 const int imacro = coloring_map[k];
37 const std::pair<Index, Index> nrc = Intern::VoxelAmaVankaCore::gather(mat_wrap, local.get(), stride,
Index(imacro), macro_dofs,
47 for(
Index i(0); i < nrc.first; ++i)
48 for(
Index j(0); j < nrc.second; ++j)
49 local_t[i*stride+j] = local[i*stride+j];
55 DataType norm = DataType(0);
56 for(
Index i(0); i < nrc.first; ++i)
58 for(
Index j(0); j < nrc.first; ++j)
60 DataType xij = DataType(i == j ? 1 : 0);
61 for(
Index l(0); l < nrc.first; ++l)
62 xij -= local_t[i*stride+l] * local[l*stride+j];
71 const bool singular = !(norm <
eps);
74 macro_mask[imacro] = (singular ? 0 : 1);
79 Intern::VoxelAmaVankaCore::scatter_add(vanka_wrap, local.get(), stride,
Index(imacro), macro_dofs,
84 for(
Index i(0); i < nrc.first; ++i)
85 for(
Index j(0); j < nrc.second; ++j)
86 local[i*stride+j] = DataType(0);
93 FEAT_PRAGMA_OMP(parallel)
100 std::unique_ptr<DataType[]> local(
new DataType[stride*stride]);
101 std::fill(local.get(), local.get() + stride*stride, DataType(0));
102 std::unique_ptr<Index[]> pivot(
new Index[stride]);
103 std::fill(pivot.get(), pivot.get() + stride,
Index(0));
105 for(
int k = 0; k < int(color_size); ++k)
107 const int imacro = coloring_map[k];
108 const std::pair<Index, Index> nrc = Intern::VoxelAmaVankaCore::gather(mat_wrap, local.get(), stride,
Index(imacro), macro_dofs,
114 Intern::VoxelAmaVankaCore::scatter_add(vanka_wrap, local.get(), stride,
Index(imacro), macro_dofs,
118 for(
Index i(0); i < nrc.first; ++i)
119 for(
Index j(0); j < nrc.second; ++j)
120 local[i*stride+j] = DataType(0);
126 template<
typename DT_,
typename IT_,
int n_,
bool skip_singular_>
127 void scale_vanka_rows_host(Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& vanka_wrap,
const DT_ omega,
130 for(
int i = 0; i < n_; ++i)
135 const int num_rows = int(vanka_wrap.tensor_counts[2*i]);
136 for(
int j = 0; j < n_; ++j)
138 DT_* vals = vanka_wrap.data_arrays[i*n_ + j];
139 const IT_* row_ptr = vanka_wrap.row_arrays[i*n_+j];
140 const IT_* col_idx = vanka_wrap.col_arrays[i*n_+j];
141 const int hb = vanka_wrap.blocksizes[j +1];
142 const bool meta_diag = (i == j);
144 FEAT_PRAGMA_OMP(parallel
for)
145 for(
int row = 0; row < num_rows; ++row)
148 Intern::VoxelAmaVankaCore::scale_row<DT_, IT_, skip_singular_>(vals, omega, row_ptr, col_idx, row_dom_ptr, row_img_idx, hw, hb,
Index(row), _m_mask, meta_diag);
158 template<
typename DT_,
typename IT_,
int n_>
159 void assemble_vanka_host(
const Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& mat_wrap,
160 Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& vanka_wrap,
const std::vector<Adjacency::Graph>& macro_dofs,
162 Index stride, DT_ omega, DT_ eps,
bool skip_singular)
167 for(
int i = 0; i < n_; ++i)
169 graphs_dof_macros[i] = &(dof_macros.at(std::size_t(i)));
170 graphs_macro_dofs[i] = &(macro_dofs.at(std::size_t(i)));
172 int* _m_mask = macro_mask.data();
176 for(
Index k = 0; k < coloring_map.size(); ++k)
179 Solver::Kernel::template assemble_unscaled_vanka_host<DT_, IT_, n_, true>
180 (mat_wrap, vanka_wrap, graphs_macro_dofs,
181 _m_mask, coloring_map[k], coloring_data.get_color_size(k), stride, eps);
183 Solver::Kernel::template assemble_unscaled_vanka_host<DT_, IT_, n_, false>
184 (mat_wrap, vanka_wrap, graphs_macro_dofs,
185 _m_mask, coloring_map[k], coloring_data.get_color_size(k), stride, eps);
188 Solver::Kernel::template scale_vanka_rows_host<DT_, IT_, n_, true>(vanka_wrap, omega, graphs_dof_macros,_m_mask);
190 Solver::Kernel::template scale_vanka_rows_host<DT_, IT_, n_, false>(vanka_wrap, omega, graphs_dof_macros, _m_mask);
203template void Arch::assemble_vanka_host(
const Intern::CSRTupleMatrixWrapper<double, std::uint64_t, 2>&, Intern::CSRTupleMatrixWrapper<double, std::uint64_t, 2>&,
const std::vector<Adjacency::Graph>&,
205template void Arch::assemble_vanka_host(
const Intern::CSRTupleMatrixWrapper<double, std::uint32_t, 2>&, Intern::CSRTupleMatrixWrapper<double, std::uint32_t, 2>&,
const std::vector<Adjacency::Graph>&,
207template void Arch::assemble_vanka_host(
const Intern::CSRTupleMatrixWrapper<float, std::uint64_t, 2>&, Intern::CSRTupleMatrixWrapper<float, std::uint64_t, 2>&,
const std::vector<Adjacency::Graph>&,
209template void Arch::assemble_vanka_host(
const Intern::CSRTupleMatrixWrapper<float, std::uint32_t, 2>&, Intern::CSRTupleMatrixWrapper<float, std::uint32_t, 2>&,
const std::vector<Adjacency::Graph>&,
Datahandler for inverse coloring data.
std::vector< int * > & get_coloring_maps()
Retrieve the color maps.
Adjacency Graph implementation.
Index * get_domain_ptr()
Returns the domain pointer array.
Index * get_image_idx()
Returns the image node index array.
T_ min(T_ a, T_ b)
Returns the minimum of two values.
DT_ invert_matrix(const IT_ n, const IT_ stride, DT_ a[], IT_ p[])
Inverts a matrix and returns its determinant.
T_ eps()
Returns the machine precision constant for a floating-point data type.
std::uint64_t Index
Index data type.