2#include <kernel/adjacency/coloring.hpp> 
    3#include <kernel/solver/amavanka_base.hpp> 
    4#include <kernel/solver/voxel_amavanka.hpp> 
   13      template<
typename DT_, 
typename IT_, 
int n_, 
bool skip_singular_>
 
   14      void assemble_unscaled_vanka_host(
const Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& mat_wrap,
 
   15        Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& vanka_wrap,
 
   17        const int* coloring_map, 
Index color_size,
 
   18        Index stride, DT_ eps)
 
   22        if constexpr(skip_singular_)
 
   24          FEAT_PRAGMA_OMP(parallel)
 
   27            std::unique_ptr<DataType[]> local(
new DataType[stride*stride]);
 
   28            std::fill(local.get(), local.get() + stride*stride, DataType(0));
 
   29            std::unique_ptr<DataType[]> local_t(
new DataType[stride*stride]);
 
   30            std::fill(local_t.get(), local_t.get() + stride*stride, DataType(0));
 
   31            std::unique_ptr<Index[]> pivot(
new Index[stride]);
 
   32            std::fill(pivot.get(), pivot.get() + stride, 
Index(0));
 
   34            for(
int k = 0; k < int(color_size); ++k)
 
   36              const int imacro = coloring_map[k];
 
   37              const std::pair<Index, Index> nrc = Intern::VoxelAmaVankaCore::gather(mat_wrap, local.get(), stride, 
Index(imacro), macro_dofs,
 
   47              for(
Index i(0); i < nrc.first; ++i)
 
   48                for(
Index j(0); j < nrc.second; ++j)
 
   49                  local_t[i*stride+j] = local[i*stride+j];
 
   55              DataType norm = DataType(0);
 
   56              for(
Index i(0); i < nrc.first; ++i)
 
   58                for(
Index j(0); j < nrc.first; ++j)
 
   60                  DataType xij = DataType(i == j ? 1 : 0);
 
   61                  for(
Index l(0); l < nrc.first; ++l)
 
   62                    xij -= local_t[i*stride+l] * local[l*stride+j]; 
 
   71              const bool singular = !(norm < 
eps);
 
   74              macro_mask[imacro] = (singular ? 0 : 1);
 
   79                Intern::VoxelAmaVankaCore::scatter_add(vanka_wrap, local.get(), stride, 
Index(imacro), macro_dofs,
 
   84              for(
Index i(0); i < nrc.first; ++i)
 
   85                for(
Index j(0); j < nrc.second; ++j)
 
   86                  local[i*stride+j] = DataType(0);
 
   93          FEAT_PRAGMA_OMP(parallel)
 
  100            std::unique_ptr<DataType[]> local(
new DataType[stride*stride]);
 
  101            std::fill(local.get(), local.get() + stride*stride, DataType(0));
 
  102            std::unique_ptr<Index[]> pivot(
new Index[stride]);
 
  103            std::fill(pivot.get(), pivot.get() + stride, 
Index(0));
 
  105            for(
int k = 0; k < int(color_size); ++k)
 
  107              const int imacro = coloring_map[k];
 
  108              const std::pair<Index, Index> nrc = Intern::VoxelAmaVankaCore::gather(mat_wrap, local.get(), stride, 
Index(imacro), macro_dofs,
 
  114              Intern::VoxelAmaVankaCore::scatter_add(vanka_wrap, local.get(), stride, 
Index(imacro), macro_dofs,
 
  118              for(
Index i(0); i < nrc.first; ++i)
 
  119                for(
Index j(0); j < nrc.second; ++j)
 
  120                  local[i*stride+j] = DataType(0);
 
  126      template<
typename DT_, 
typename  IT_, 
int n_, 
bool skip_singular_>
 
  127      void scale_vanka_rows_host(Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& vanka_wrap, 
const DT_ omega,
 
  130        for(
int i = 0; i < n_; ++i)
 
  135          const int num_rows = int(vanka_wrap.tensor_counts[2*i]);
 
  136          for(
int j = 0; j < n_; ++j)
 
  138            DT_* vals = vanka_wrap.data_arrays[i*n_ + j];
 
  139            const IT_* row_ptr = vanka_wrap.row_arrays[i*n_+j];
 
  140            const IT_* col_idx = vanka_wrap.col_arrays[i*n_+j];
 
  141            const int hb = vanka_wrap.blocksizes[j +1];
 
  142            const bool meta_diag = (i == j);
 
  144            FEAT_PRAGMA_OMP(parallel 
for)
 
  145            for(
int row = 0; row < num_rows; ++row)
 
  148              Intern::VoxelAmaVankaCore::scale_row<DT_, IT_, skip_singular_>(vals, omega, row_ptr, col_idx, row_dom_ptr, row_img_idx, hw, hb, 
Index(row), _m_mask, meta_diag);
 
  158      template<
typename DT_, 
typename IT_, 
int n_>
 
  159      void assemble_vanka_host(
const Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& mat_wrap,
 
  160        Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& vanka_wrap, 
const std::vector<Adjacency::Graph>& macro_dofs,
 
  162        Index stride, DT_ omega, DT_ eps, 
bool skip_singular)
 
  167        for(
int i = 0; i < n_; ++i)
 
  169          graphs_dof_macros[i] = &(dof_macros.at(std::size_t(i)));
 
  170          graphs_macro_dofs[i] = &(macro_dofs.at(std::size_t(i)));
 
  172        int* _m_mask = macro_mask.data();
 
  176        for(
Index k = 0; k < coloring_map.size(); ++k)
 
  179            Solver::Kernel::template assemble_unscaled_vanka_host<DT_, IT_, n_, true>
 
  180                                            (mat_wrap, vanka_wrap, graphs_macro_dofs,
 
  181                                             _m_mask, coloring_map[k], coloring_data.get_color_size(k), stride, eps);
 
  183            Solver::Kernel::template assemble_unscaled_vanka_host<DT_, IT_, n_, false>
 
  184                                            (mat_wrap, vanka_wrap, graphs_macro_dofs,
 
  185                                             _m_mask, coloring_map[k], coloring_data.get_color_size(k), stride, eps);
 
  188          Solver::Kernel::template scale_vanka_rows_host<DT_, IT_, n_, true>(vanka_wrap, omega, graphs_dof_macros,_m_mask);
 
  190          Solver::Kernel::template scale_vanka_rows_host<DT_, IT_, n_, false>(vanka_wrap, omega, graphs_dof_macros, _m_mask);
 
  203template void Arch::assemble_vanka_host(
const Intern::CSRTupleMatrixWrapper<double, std::uint64_t, 2>&, Intern::CSRTupleMatrixWrapper<double, std::uint64_t, 2>&, 
const std::vector<Adjacency::Graph>&,
 
  205template void Arch::assemble_vanka_host(
const Intern::CSRTupleMatrixWrapper<double, std::uint32_t, 2>&, Intern::CSRTupleMatrixWrapper<double, std::uint32_t, 2>&, 
const std::vector<Adjacency::Graph>&,
 
  207template void Arch::assemble_vanka_host(
const Intern::CSRTupleMatrixWrapper<float, std::uint64_t, 2>&, Intern::CSRTupleMatrixWrapper<float, std::uint64_t, 2>&, 
const std::vector<Adjacency::Graph>&,
 
  209template void Arch::assemble_vanka_host(
const Intern::CSRTupleMatrixWrapper<float, std::uint32_t, 2>&, Intern::CSRTupleMatrixWrapper<float, std::uint32_t, 2>&, 
const std::vector<Adjacency::Graph>&,
 
Datahandler for inverse coloring data.
std::vector< int * > & get_coloring_maps()
Retrieve the color maps.
Adjacency Graph implementation.
Index * get_domain_ptr()
Returns the domain pointer array.
Index * get_image_idx()
Returns the image node index array.
T_ min(T_ a, T_ b)
Returns the minimum of two values.
DT_ invert_matrix(const IT_ n, const IT_ stride, DT_ a[], IT_ p[])
Inverts a matrix and returns its determinant.
T_ eps()
Returns the machine precision constant for a floating-point data type.
std::uint64_t Index
Index data type.