10#include <kernel/backend.hpp>
11#include <kernel/adjacency/coloring.hpp>
12#include <kernel/lafem/null_matrix.hpp>
13#include <kernel/lafem/sparse_matrix_bcsr.hpp>
14#include <kernel/lafem/sparse_matrix_csr.hpp>
15#include <kernel/lafem/saddle_point_matrix.hpp>
16#include <kernel/lafem/tuple_matrix.hpp>
17#include <kernel/global/matrix.hpp>
18#include <kernel/solver/base.hpp>
19#include <kernel/util/stop_watch.hpp>
20#include <kernel/solver/amavanka.hpp>
23#include <kernel/util/cuda_util.hpp>
35 enum VankaAssemblyPolicy
38 oneThreadperBlock = 0,
54 anisotropicMacros = 0,
105 template<
typename DT_,
typename IT_,
int n_>
106 void assemble_vanka_host(
const Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& mat_wrap,
107 Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& vanka_wrap,
const std::vector<Adjacency::Graph>& macro_dofs,
108 const std::vector<Adjacency::Graph>& dof_macros, std::vector<int>& macro_mask,
const Adjacency::ColoringDataHandler& coloring_data,
109 Index stride, DT_ omega, DT_ eps,
bool skip_singular);
184 template<
typename DT_,
typename IT_,
int n_>
185 void assemble_vanka_device(
const Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& mat_wrap,
186 Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& vanka_wrap,
const std::vector<Index*>& d_macro_dofs,
187 const std::vector<Index*>& d_dof_macros,
int* d_macro_mask,
const std::vector<Index>& max_degree_dofs,
188 const std::vector<Index>& max_degree_macros,
const Adjacency::ColoringDataHandler& coloring_data,
189 Index num_macros,
Index stride, DT_ omega, DT_ eps,
bool skip_singular,
bool uniform_macros);
260 template<
typename DT_,
typename IT_,
int n_>
261 void assemble_vanka_device_batched(
const Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& mat_wrap,
262 Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& vanka_wrap,
const std::vector<Index*>& d_macro_dofs,
263 const std::vector<Index*>& d_dof_macros,
int* d_macro_mask,
const std::vector<Index>& max_degree_dofs,
264 const std::vector<Index>& max_degree_macros,
const Adjacency::ColoringDataHandler& coloring_data,
265 Index num_macros,
Index stride,
Index actual_matrix_size, DT_ omega,
bool skip_singular);
299 template<
typename Matrix_,
301 FEAT::Intern::VankaAssemblyPolicy pol_threading_ = FEAT::Intern::VankaAssemblyPolicy::batchedAssembly,
302 FEAT::Intern::VankaMacroPolicy macro_type_ = FEAT::Intern::VankaMacroPolicy::uniformMacros>
321 typedef Intern::CSRTupleMatrixWrapper<DataType, IndexType, Intern::AmaVankaMatrixHelper<Matrix_>::num_blocks>
MatrixWrapper;
323 typedef Intern::CSRTupleMatrixWrapper<DataType, IndexType, Intern::AmaVankaMatrixHelper<VankaMatrixType>::num_blocks>
VankaWrapper;
339 _max_degree_macros.resize(this->
_macro_dofs.size());
342 if constexpr(macro_type_ == FEAT::Intern::VankaMacroPolicy::uniformMacros)
346 _max_degree_macros[i] = this->_dof_macros[i].degree();
356 #ifdef FEAT_HAVE_CUDA
358 _d_dof_macros.resize(this->_dof_macros.size());
359 for(std::size_t i = 0; i < this->
_macro_dofs.size(); ++i)
362 if constexpr(macro_type_ == FEAT::Intern::VankaMacroPolicy::uniformMacros)
374 if constexpr(macro_type_ == FEAT::Intern::VankaMacroPolicy::uniformMacros)
380 const Index loc_size = dom_ptr[k+1] - dom_ptr[k];
382 std::memcpy(tmp_alias + k*(
_max_degree_dofs[i]+1) + 1, img_ptr + dom_ptr[k], loc_size*
sizeof(
Index));
387 malloc_size = this->_dof_macros[i].get_num_nodes_domain()*(_max_degree_macros[i]+1)*
sizeof(
Index);
388 _d_dof_macros[i] = (
Index*)Util::cuda_malloc_managed(malloc_size);
390 Index* tmp_alias = _d_dof_macros[i];
391 const Index* dom_ptr = this->_dof_macros[i].get_domain_ptr();
392 const Index* img_ptr = this->_dof_macros[i].get_image_idx();
393 for(
Index k = 0; k < this->_dof_macros[i].get_num_nodes_domain(); ++k)
395 const Index loc_size = dom_ptr[k+1] - dom_ptr[k];
396 tmp_alias[k*(_max_degree_macros[i]+1)] = loc_size;
397 std::memcpy(tmp_alias + k*(_max_degree_macros[i]+1) + 1, img_ptr + dom_ptr[k], loc_size*
sizeof(
Index));
398 std::memset(tmp_alias + k*(_max_degree_macros[i]+1) + loc_size + 1, ~
int(0), (_max_degree_macros[i] - loc_size)*
sizeof(
Index));
405 _d_macro_mask = (
int*)Util::cuda_malloc_managed(this->
_macro_mask.size()*
sizeof(int));
415 #ifdef FEAT_HAVE_CUDA
421 Util::cuda_free((
void*)(_d_dof_macros[i]));
423 _d_dof_macros.clear();
449 Arch::assemble_vanka_host(mat_wrap, vanka_wrap, this->
_macro_dofs, this->_dof_macros, this->
_macro_mask, _coloring_data,
453 #if defined(FEAT_HAVE_CUDA) || defined(DOXYGEN)
475 bool uniform_macros = (macro_type_ == FEAT::Intern::VankaMacroPolicy::uniformMacros);
477 if constexpr(pol_threading_ == FEAT::Intern::VankaAssemblyPolicy::oneThreadperBlock)
479 Arch::assemble_vanka_device(mat_wrap, vanka_wrap,
_d_macro_dofs, _d_dof_macros,
_d_macro_mask,
_max_degree_dofs, _max_degree_macros,
_coloring_data, num_macros, stride, this->
_omega, eps, this->
_skip_singular, uniform_macros);
481 else if (pol_threading_ == FEAT::Intern::VankaAssemblyPolicy::batchedAssembly)
483 XASSERTM(uniform_macros,
"Batched assembly only works with uniform macros!");
485 Arch::assemble_vanka_device_batched(mat_wrap, vanka_wrap,
_d_macro_dofs, _d_dof_macros,
_d_macro_mask,
_max_degree_dofs, _max_degree_macros,
_coloring_data, num_macros, stride, stride, this->
_omega, this->
_skip_singular);
514 template<
typename ColoringType_>
516 const ColoringType_& coloring,
518 BaseClass(matrix, filter, omega, num_steps),
525 #ifdef FEAT_HAVE_CUDA
599 template<typename ColoringType_>
602 XASSERTM(color.size() == this->_macro_dofs.front().get_num_nodes_domain(),
"Coloring does not fit macro dofs");
610 return "VoxelAmaVanka";
617 this->watch_init_symbolic.
start();
621 this->watch_init_symbolic.
stop();
631 _max_degree_macros.clear();
638 const DataType eps = Math::eps<DataType>();
640 this->watch_init_numeric.
start();
648 auto matrix_wrapper = Solver::Intern::get_meta_matrix_wrapper(this->
_matrix);
650 auto vanka_wrapper = Solver::Intern::get_meta_matrix_wrapper(this->
_vanka);
653 this->watch_init_numeric.
stop();
681 template<
typename Matrix_,
typename Filter_,
typename ColoringType_,
682 FEAT::Intern::VankaAssemblyPolicy pol_threading_ = FEAT::Intern::VankaAssemblyPolicy::batchedAssembly,
683 FEAT::Intern::VankaMacroPolicy macro_type_ = FEAT::Intern::VankaMacroPolicy::uniformMacros>
684 std::shared_ptr<VoxelAmaVanka<Matrix_, Filter_, pol_threading_, macro_type_>>
new_voxel_amavanka(
const Matrix_& matrix,
const Filter_& filter,
const ColoringType_& coloring,
685 typename Matrix_::DataType omega =
typename Matrix_::DataType(1),
Index num_steps =
Index(1))
687 return std::make_shared<VoxelAmaVanka<Matrix_, Filter_, pol_threading_, macro_type_>>(matrix, filter,coloring, omega, num_steps);
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Datahandler for inverse coloring data.
void fill_color(const std::vector< int > &coloring, int hint=-1)
Fill in the coloring array.
Additive Macro-wise Matrix-based Vanka preconditioner/smoother.
const Matrix_ & _matrix
the system matrix
std::vector< int > _macro_mask
the macro mask
DataType _omega
damping parameter
bool _skip_singular
skip singular macros?
virtual void init_symbolic() override
Performs symbolic factorization.
VankaMatrixType _vanka
the Vanka preconditioner matrix
virtual void done_symbolic() override
Releases the symbolic factorization data.
std::vector< Adjacency::Graph > _macro_dofs
the DOF-macro graphs
virtual void init_numeric()
Numeric initialization method.
Additive Macro-wise Matrix-based Vanka preconditioner/smoother.
bool _allocate_device
flag whether we should allocate additional device pointer
void _init_numeric_generic(const MatrixWrapper &mat_wrap, VankaWrapper &vanka_wrap, Index num_macros, Index stride, DataType eps)
Calls generic numeric kernel.
std::vector< Index > _max_degree_dofs
size data
virtual void done_symbolic() override
Frees symbolic values and device pointers.
Matrix_::DataType DataType
our data type
Intern::CSRTupleMatrixWrapper< DataType, IndexType, Intern::AmaVankaMatrixHelper< Matrix_ >::num_blocks > MatrixWrapper
our matrix data wrapper
void _free_device()
Frees device pointers.
void _init_numeric_cuda(const MatrixWrapper &mat_wrap, VankaWrapper &vanka_wrap, Index num_macros, Index stride, DataType eps)
Calls cuda numeric kernel.
Intern::AmaVankaMatrixHelper< Matrix_ >::VankaMatrix VankaMatrixType
the type of our Vanka matrix
Matrix_::VectorTypeL VectorType
our vector type
Intern::CSRTupleMatrixWrapper< DataType, IndexType, Intern::AmaVankaMatrixHelper< VankaMatrixType >::num_blocks > VankaWrapper
our vanka data wrapper
Solver::AmaVanka< Matrix_, Filter_ > BaseClass
our base-class
virtual void init_symbolic() override
Initializes symbolic values and device pointers.
Adjacency::ColoringDataHandler _coloring_data
coloring
Matrix_::IndexType IndexType
our index type
VoxelAmaVanka(const Matrix_ &matrix, const Filter_ &filter, const ColoringType_ &coloring, const DataType omega=DataType(1), const Index num_steps=Index(1))
Constructor.
void fill_color(const ColoringType_ &color, int hint=-1)
Fills the coloring data.
virtual String name() const override
Returns the name of the solver.
std::vector< Index * > _d_macro_dofs
vector of graph arrays
void _alloc_device()
Allocates device pointers, if required.
int * _d_macro_mask
array of macro mask
virtual void init_numeric() override
Performs numeric factorization.
void _alloc_max_degrees()
Calculate the max degree of our graphs.
void start()
Starts the stop-watch.
void stop()
Stops the stop-watch and increments elapsed time.
String class implementation.
std::shared_ptr< VoxelAmaVanka< Matrix_, Filter_, pol_threading_, macro_type_ > > new_voxel_amavanka(const Matrix_ &matrix, const Filter_ &filter, const ColoringType_ &coloring, typename Matrix_::DataType omega=typename Matrix_::DataType(1), Index num_steps=Index(1))
Creates a new VoxelAmaVanka smoother object.
std::uint64_t Index
Index data type.