FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
voxel_amavanka.hpp
1
2// FEAT3: Finite Element Analysis Toolbox, Version 3
3// Copyright (C) 2010 by Stefan Turek & the FEAT group
4// FEAT3 is released under the GNU General Public License version 3,
5// see the file 'copyright.txt' in the top level directory for details.
6
7#pragma once
8
10#include <kernel/backend.hpp>
11#include <kernel/adjacency/coloring.hpp>
12#include <kernel/lafem/null_matrix.hpp>
13#include <kernel/lafem/sparse_matrix_bcsr.hpp>
14#include <kernel/lafem/sparse_matrix_csr.hpp>
15#include <kernel/lafem/saddle_point_matrix.hpp>
16#include <kernel/lafem/tuple_matrix.hpp>
17#include <kernel/global/matrix.hpp>
18#include <kernel/solver/base.hpp>
19#include <kernel/util/stop_watch.hpp>
20#include <kernel/solver/amavanka.hpp>
21
22#ifdef FEAT_HAVE_CUDA
23#include <kernel/util/cuda_util.hpp>
24#endif
25
26namespace FEAT
27{
28 namespace Intern
29 {
35 enum VankaAssemblyPolicy
36 {
38 oneThreadperBlock = 0,
40 batchedAssembly = 1
41 };
42
51 enum VankaMacroPolicy
52 {
54 anisotropicMacros = 0,
56 uniformMacros = 1
57 };
58 }
59 namespace Solver
60 {
61 namespace Arch
62 {
105 template<typename DT_, typename IT_, int n_>
106 void assemble_vanka_host(const Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& mat_wrap,
107 Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& vanka_wrap, const std::vector<Adjacency::Graph>& macro_dofs,
108 const std::vector<Adjacency::Graph>& dof_macros, std::vector<int>& macro_mask, const Adjacency::ColoringDataHandler& coloring_data,
109 Index stride, DT_ omega, DT_ eps, bool skip_singular);
110
184 template<typename DT_, typename IT_, int n_>
185 void assemble_vanka_device(const Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& mat_wrap,
186 Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& vanka_wrap, const std::vector<Index*>& d_macro_dofs,
187 const std::vector<Index*>& d_dof_macros, int* d_macro_mask, const std::vector<Index>& max_degree_dofs,
188 const std::vector<Index>& max_degree_macros, const Adjacency::ColoringDataHandler& coloring_data,
189 Index num_macros, Index stride, DT_ omega, DT_ eps, bool skip_singular, bool uniform_macros);
190
260 template<typename DT_, typename IT_, int n_>
261 void assemble_vanka_device_batched(const Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& mat_wrap,
262 Intern::CSRTupleMatrixWrapper<DT_, IT_, n_>& vanka_wrap, const std::vector<Index*>& d_macro_dofs,
263 const std::vector<Index*>& d_dof_macros, int* d_macro_mask, const std::vector<Index>& max_degree_dofs,
264 const std::vector<Index>& max_degree_macros, const Adjacency::ColoringDataHandler& coloring_data,
265 Index num_macros, Index stride, Index actual_matrix_size, DT_ omega, bool skip_singular);
266 }
267
268 #ifndef __CUDACC__
299 template<typename Matrix_,
300 typename Filter_,
301 FEAT::Intern::VankaAssemblyPolicy pol_threading_ = FEAT::Intern::VankaAssemblyPolicy::batchedAssembly,
302 FEAT::Intern::VankaMacroPolicy macro_type_ = FEAT::Intern::VankaMacroPolicy::uniformMacros>
304 public Solver::AmaVanka<Matrix_, Filter_>
305 {
306 public:
309
311 typedef typename Matrix_::DataType DataType;
313 typedef typename Matrix_::IndexType IndexType;
315 typedef typename Matrix_::VectorTypeL VectorType;
316
317 protected:
319 typedef typename Intern::AmaVankaMatrixHelper<Matrix_>::VankaMatrix VankaMatrixType;
321 typedef Intern::CSRTupleMatrixWrapper<DataType, IndexType, Intern::AmaVankaMatrixHelper<Matrix_>::num_blocks> MatrixWrapper;
323 typedef Intern::CSRTupleMatrixWrapper<DataType, IndexType, Intern::AmaVankaMatrixHelper<VankaMatrixType>::num_blocks> VankaWrapper;
327 std::vector<Index*> _d_macro_dofs, _d_dof_macros;
329 std::vector<Index> _max_degree_dofs, _max_degree_macros;
333 bool _allocate_device = false;
334
337 {
338 _max_degree_dofs.resize(this->_macro_dofs.size());
339 _max_degree_macros.resize(this->_macro_dofs.size());
340 for(std::size_t i = 0; i < _max_degree_dofs.size(); ++i)
341 {
342 if constexpr(macro_type_ == FEAT::Intern::VankaMacroPolicy::uniformMacros)
343 _max_degree_dofs[i] = this->_macro_dofs[i].degree(Index(0));
344 else
345 _max_degree_dofs[i] = this->_macro_dofs[i].degree();
346 _max_degree_macros[i] = this->_dof_macros[i].degree();
347 }
348 }
349
352 {
353 XASSERTM(_max_degree_dofs.size() == this->_macro_dofs.size(), "call _alloc_max_degrees beforehand");
355 return;
356 #ifdef FEAT_HAVE_CUDA
357 _d_macro_dofs.resize(this->_macro_dofs.size());
358 _d_dof_macros.resize(this->_dof_macros.size());
359 for(int i = 0; i < int(this->_macro_dofs.size()); ++i)
360 {
361 Index malloc_size;
362 if constexpr(macro_type_ == FEAT::Intern::VankaMacroPolicy::uniformMacros)
363 malloc_size = this->_macro_dofs[i].get_num_nodes_domain() * _max_degree_dofs[i] * sizeof(Index);
364 else
365 malloc_size = this->_macro_dofs[i].get_num_nodes_domain() * (_max_degree_dofs[i]+1) * sizeof(Index);
366 _d_macro_dofs[i] = (Index*)Util::cuda_malloc_managed(malloc_size);
367 // prepare tmp array
368 {
369 Index* tmp_alias = _d_macro_dofs[i];
370 const Index* dom_ptr = this->_macro_dofs[i].get_domain_ptr();
371 const Index* img_ptr = this->_macro_dofs[i].get_image_idx();
372 for(int k = 0; k < int(this->_macro_dofs[i].get_num_nodes_domain()); ++k)
373 {
374 if constexpr(macro_type_ == FEAT::Intern::VankaMacroPolicy::uniformMacros)
375 {
376 std::memcpy(tmp_alias + k*_max_degree_dofs[i], img_ptr + dom_ptr[k], _max_degree_dofs[i]*sizeof(Index));
377 }
378 else
379 {
380 const Index loc_size = dom_ptr[k+1] - dom_ptr[k];
381 tmp_alias[k*(_max_degree_dofs[i]+1)] = loc_size;
382 std::memcpy(tmp_alias + k*(_max_degree_dofs[i]+1) + 1, img_ptr + dom_ptr[k], loc_size*sizeof(Index));
383 std::memset(tmp_alias + k*(_max_degree_dofs[i]+1) + loc_size + 1, ~int(0), (_max_degree_dofs[i] - loc_size)*sizeof(Index));
384 }
385 }
386 }
387 malloc_size = this->_dof_macros[i].get_num_nodes_domain()*(_max_degree_macros[i]+1)*sizeof(Index);
388 _d_dof_macros[i] = (Index*)Util::cuda_malloc_managed(malloc_size);
389 {
390 Index* tmp_alias = _d_dof_macros[i];
391 const Index* dom_ptr = this->_dof_macros[i].get_domain_ptr();
392 const Index* img_ptr = this->_dof_macros[i].get_image_idx();
393 for(int k = 0; k < int(this->_dof_macros[i].get_num_nodes_domain()); ++k)
394 {
395 const Index loc_size = dom_ptr[k+1] - dom_ptr[k];
396 tmp_alias[k*(_max_degree_macros[i]+1)] = loc_size;
397 std::memcpy(tmp_alias + k*(_max_degree_macros[i]+1) + 1, img_ptr + dom_ptr[k], loc_size*sizeof(Index));
398 std::memset(tmp_alias + k*(_max_degree_macros[i]+1) + loc_size + 1, ~int(0), (_max_degree_macros[i] - loc_size)*sizeof(Index));
399 }
400 }
401 }
402 {
403 if(this->_skip_singular)
404 {
405 _d_macro_mask = (int*)Util::cuda_malloc_managed(this->_macro_mask.size()*sizeof(int));
406 Util::cuda_set_memory(_d_macro_mask, 0, this->_macro_mask.size());
407 }
408 }
409 #endif
410 }
411
414 {
415 #ifdef FEAT_HAVE_CUDA
416 Util::cuda_free(_d_macro_mask);
417 _d_macro_mask = nullptr;
418 for(int i = 0; i < int(_d_macro_dofs.size()); ++i)
419 {
420 Util::cuda_free((void*)(_d_macro_dofs[i]));
421 Util::cuda_free((void*)(_d_dof_macros[i]));
422 }
423 _d_dof_macros.clear();
424 _d_macro_dofs.clear();
425 #endif
426 }
427
446 void _init_numeric_generic(const MatrixWrapper& mat_wrap, VankaWrapper& vanka_wrap, Index DOXY(num_macros), Index stride, DataType eps)
447 {
448 //call backend
449 Arch::assemble_vanka_host(mat_wrap, vanka_wrap, this->_macro_dofs, this->_dof_macros, this->_macro_mask, _coloring_data,
450 stride, this->_omega, eps, this->_skip_singular);
451 }
452
453 #if defined(FEAT_HAVE_CUDA) || defined(DOXYGEN)
472 void _init_numeric_cuda(const MatrixWrapper& mat_wrap, VankaWrapper& vanka_wrap, Index num_macros, Index stride, DataType eps)
473 {
474 XASSERTM(_allocate_device, "Allocate device disabled!");
475 bool uniform_macros = (macro_type_ == FEAT::Intern::VankaMacroPolicy::uniformMacros);
476 //call backend
477 if constexpr(pol_threading_ == FEAT::Intern::VankaAssemblyPolicy::oneThreadperBlock)
478 {
479 Arch::assemble_vanka_device(mat_wrap, vanka_wrap, _d_macro_dofs, _d_dof_macros, _d_macro_mask, _max_degree_dofs, _max_degree_macros, _coloring_data, num_macros, stride, this->_omega, eps, this->_skip_singular, uniform_macros);
480 }
481 else if (pol_threading_ == FEAT::Intern::VankaAssemblyPolicy::batchedAssembly)
482 {
483 XASSERTM(uniform_macros, "Batched assembly only works with uniform macros!");
484 // TODO: stride always actual local matrix size?
485 Arch::assemble_vanka_device_batched(mat_wrap, vanka_wrap, _d_macro_dofs, _d_dof_macros, _d_macro_mask, _max_degree_dofs, _max_degree_macros, _coloring_data, num_macros, stride, stride, this->_omega, this->_skip_singular);
486 }
487
488 }
489 #endif
490
491
492 public:
514 template<typename ColoringType_>
515 explicit VoxelAmaVanka(const Matrix_& matrix, const Filter_& filter,
516 const ColoringType_& coloring,
517 const DataType omega = DataType(1), const Index num_steps = Index(1)) :
518 BaseClass(matrix, filter, omega, num_steps),
521 _d_dof_macros(),
522 _d_macro_mask(nullptr)
523 {
524 _coloring_data.fill_color(coloring);
525 #ifdef FEAT_HAVE_CUDA
526 _allocate_device = Util::cuda_get_device_count() > 0;
527 #endif
528 }
529
530 // rule of 5
531 VoxelAmaVanka(const VoxelAmaVanka&) = delete;
532
533 VoxelAmaVanka& operator=(const VoxelAmaVanka&) = delete;
534
535 // VoxelAmaVanka(VoxelAmaVanka&& other) noexcept :
536 // BaseClass(std::move(other)),
537 // _coloring_data(std::move(other._coloring_data)),
538 // _d_macro_dofs(std::move(other._d_macro_dofs)),
539 // _d_dof_macros(std::move(other._d_dof_macros)),
540 // _max_degree_dofs(std::move(other._max_degree_dofs)),
541 // _max_degree_macros(std::move(other._max_degree_macros)),
542 // _d_macro_mask(other._d_macro_mask),
543 // _allocate_device(other._allocate_device)
544 // {
545 // other._d_macro_dofs.clear();
546 // other._d_dof_macros.clear();
547 // other._max_degree_dofs.clear();
548 // other._max_degree_macros.clear();
549 // other._d_macro_mask = nullptr;
550 // }
551
552 // VoxelAmaVanka& operator=(VoxelAmaVanka&& other) noexcept
553 // {
554 // if(this == &other)
555 // return *this;
556 // this->_free_device();
557 // BaseClass::operator=(std::move(other));
558 // _coloring_data = std::move(other._coloring_data);
559 // _d_macro_dofs = std::move(other._d_macro_dofs);
560 // _d_dof_macros = std::move(other._d_dof_macros);
561 // _max_degree_dofs = std::move(other._max_degree_dofs);
562 // _max_degree_macros = std::move(other._max_degree_macros);
563 // _d_macro_mask = other._d_macro_mask;
564 // _allocate_device = other._allocate_device;
565 // other._d_macro_dofs.clear();
566 // other._d_dof_macros.clear();
567 // other._max_degree_dofs.clear();
568 // other._max_degree_macros.clear();
569 // other._d_macro_mask = nullptr;
570 // }
571
572 VoxelAmaVanka(VoxelAmaVanka&&) noexcept = delete;
573
574
575 VoxelAmaVanka& operator=(VoxelAmaVanka&&) noexcept = delete;
576
577 // /**
578 // * \brief Sets whether device ptr should be allocated.
579 // *
580 // * \param[in] allocate Should device ptr be allocated?
581 // *
582 // * \warning Setting this to false while having PreferedBackend set to cuda during the assembly
583 // * will lead to an error.
584 // */
585 // void set_allocate_device(bool allocate)
586 // {
587 // _allocate_device = allocate;
588 // }
589
599 template<typename ColoringType_>
600 void fill_color(const ColoringType_& color, int hint = -1)
601 {
602 XASSERTM(color.size() == this->_macro_dofs.front().get_num_nodes_domain(), "Coloring does not fit macro dofs");
603 XASSERTM(_coloring_data.initialized(), "Coloring data already initialized");
604 _coloring_data.fill_color(color, hint);
605 }
606
608 virtual String name() const override
609 {
610 return "VoxelAmaVanka";
611 }
612
614 virtual void init_symbolic() override
615 {
617 this->watch_init_symbolic.start();
619 // _alloc_row_helper();
621 this->watch_init_symbolic.stop();
622 }
623
625 virtual void done_symbolic() override
626 {
627 _free_device();
628 // _accum_row_ctr.clear();
629 // _accum_row_index.clear();
630 _max_degree_dofs.clear();
631 _max_degree_macros.clear();
633 }
634
635
636 virtual void init_numeric() override
637 {
638 const DataType eps = Math::eps<DataType>();
639 //call numeric init of BaseSolver, but not of Vanka
640 this->watch_init_numeric.start();
642
643 // get maximum macro size
644 const Index num_macros = Index(this->_macro_dofs.front().get_num_nodes_domain());
645 const Index stride = Intern::AmaVankaCore::calc_stride(this->_vanka, this->_macro_dofs);
646 this->_vanka.format();
647 //gather matrix wrappers
648 auto matrix_wrapper = Solver::Intern::get_meta_matrix_wrapper(this->_matrix);
649 // std::cout << matrix_wrapper.print();
650 auto vanka_wrapper = Solver::Intern::get_meta_matrix_wrapper(this->_vanka);
651 BACKEND_SKELETON_VOID(_init_numeric_cuda, _init_numeric_generic, _init_numeric_generic, matrix_wrapper, vanka_wrapper, num_macros, stride, eps)
652
653 this->watch_init_numeric.stop();
654 }
655
656
657 }; // VoxelAmaVanka
658
659
681 template<typename Matrix_, typename Filter_, typename ColoringType_,
682 FEAT::Intern::VankaAssemblyPolicy pol_threading_ = FEAT::Intern::VankaAssemblyPolicy::batchedAssembly,
683 FEAT::Intern::VankaMacroPolicy macro_type_ = FEAT::Intern::VankaMacroPolicy::uniformMacros>
684 std::shared_ptr<VoxelAmaVanka<Matrix_, Filter_, pol_threading_, macro_type_>> new_voxel_amavanka(const Matrix_& matrix, const Filter_& filter, const ColoringType_& coloring,
685 typename Matrix_::DataType omega = typename Matrix_::DataType(1), Index num_steps = Index(1))
686 {
687 return std::make_shared<VoxelAmaVanka<Matrix_, Filter_, pol_threading_, macro_type_>>(matrix, filter,coloring, omega, num_steps);
688 }
689 #endif //__CUDACC__
690
691 }
692}
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Definition: assertion.hpp:263
FEAT Kernel base header.
Datahandler for inverse coloring data.
Definition: coloring.hpp:253
void fill_color(const std::vector< int > &coloring, int hint=-1)
Fill in the coloring array.
Definition: coloring.hpp:370
Additive Macro-wise Matrix-based Vanka preconditioner/smoother.
Definition: amavanka.hpp:65
const Matrix_ & _matrix
the system matrix
Definition: amavanka.hpp:82
std::vector< int > _macro_mask
the macro mask
Definition: amavanka.hpp:94
DataType _omega
damping parameter
Definition: amavanka.hpp:98
bool _skip_singular
skip singular macros?
Definition: amavanka.hpp:90
virtual void init_symbolic() override
Performs symbolic factorization.
Definition: amavanka.hpp:276
VankaMatrixType _vanka
the Vanka preconditioner matrix
Definition: amavanka.hpp:86
virtual void done_symbolic() override
Releases the symbolic factorization data.
Definition: amavanka.hpp:322
std::vector< Adjacency::Graph > _macro_dofs
the DOF-macro graphs
Definition: amavanka.hpp:92
virtual void init_numeric()
Numeric initialization method.
Definition: base.hpp:237
Additive Macro-wise Matrix-based Vanka preconditioner/smoother.
bool _allocate_device
flag whether we should allocate additional device pointer
void _init_numeric_generic(const MatrixWrapper &mat_wrap, VankaWrapper &vanka_wrap, Index num_macros, Index stride, DataType eps)
Calls generic numeric kernel.
std::vector< Index > _max_degree_dofs
size data
virtual void done_symbolic() override
Frees symbolic values and device pointers.
Matrix_::DataType DataType
our data type
Intern::CSRTupleMatrixWrapper< DataType, IndexType, Intern::AmaVankaMatrixHelper< Matrix_ >::num_blocks > MatrixWrapper
our matrix data wrapper
void _free_device()
Frees device pointers.
void _init_numeric_cuda(const MatrixWrapper &mat_wrap, VankaWrapper &vanka_wrap, Index num_macros, Index stride, DataType eps)
Calls cuda numeric kernel.
Intern::AmaVankaMatrixHelper< Matrix_ >::VankaMatrix VankaMatrixType
the type of our Vanka matrix
Matrix_::VectorTypeL VectorType
our vector type
Intern::CSRTupleMatrixWrapper< DataType, IndexType, Intern::AmaVankaMatrixHelper< VankaMatrixType >::num_blocks > VankaWrapper
our vanka data wrapper
Solver::AmaVanka< Matrix_, Filter_ > BaseClass
our base-class
virtual void init_symbolic() override
Initializes symbolic values and device pointers.
Adjacency::ColoringDataHandler _coloring_data
coloring
Matrix_::IndexType IndexType
our index type
VoxelAmaVanka(const Matrix_ &matrix, const Filter_ &filter, const ColoringType_ &coloring, const DataType omega=DataType(1), const Index num_steps=Index(1))
Constructor.
void fill_color(const ColoringType_ &color, int hint=-1)
Fills the coloring data.
virtual String name() const override
Returns the name of the solver.
std::vector< Index * > _d_macro_dofs
vector of graph arrays
void _alloc_device()
Allocates device pointers, if required.
int * _d_macro_mask
array of macro mask
virtual void init_numeric() override
Performs numeric factorization.
void _alloc_max_degrees()
Calculate the max degree of our graphs.
void start()
Starts the stop-watch.
Definition: stop_watch.hpp:43
void stop()
Stops the stop-watch and increments elapsed time.
Definition: stop_watch.hpp:51
String class implementation.
Definition: string.hpp:46
std::shared_ptr< VoxelAmaVanka< Matrix_, Filter_, pol_threading_, macro_type_ > > new_voxel_amavanka(const Matrix_ &matrix, const Filter_ &filter, const ColoringType_ &coloring, typename Matrix_::DataType omega=typename Matrix_::DataType(1), Index num_steps=Index(1))
Creates a new VoxelAmaVanka smoother object.
FEAT namespace.
Definition: adjactor.hpp:12
std::uint64_t Index
Index data type.