9#include <kernel/solver/base.hpp> 
   10#include <kernel/lafem/sparse_matrix_csr.hpp> 
   11#include <kernel/lafem/sparse_matrix_bcsr.hpp> 
   23      int cuda_ilu_apply(
double * y, 
const double * x, 
double * csrVal, 
int * csrRowPtr, 
int * csrColInd, 
void * vinfo);
 
   24      void * cuda_ilu_init_symbolic(
int m, 
int nnz, 
double * csrVal, 
int * csrRowPtr, 
int * csrColInd);
 
   25      void cuda_ilu_init_numeric(
double * csrVal, 
int * csrRowPtr, 
int * csrColInd, 
void * vinfo);
 
   26      void cuda_ilu_done_symbolic(
void * vinfo);
 
   28      int cuda_ilub_apply(
double * y, 
const double * x, 
double * csrVal, 
int * csrRowPtr, 
int * csrColInd, 
void * vinfo);
 
   29      void * cuda_ilub_init_symbolic(
int m, 
int nnz, 
double * csrVal, 
int * csrRowPtr, 
int * csrColInd, 
const int blocksize);
 
   30      void cuda_ilub_init_numeric(
double * csrVal, 
int * csrRowPtr, 
int * csrColInd, 
void * vinfo);
 
   31      void cuda_ilub_done_symbolic(
void * vinfo);
 
   44      template<
typename IT_>
 
   51        std::vector<IT_> _row_ptr_l, _col_idx_l;
 
   53        std::vector<IT_> _row_ptr_u, _col_idx_u;
 
   70        IT_ get_nnze_l()
 const 
   72          return _row_ptr_l.empty() ? IT_(0) : _row_ptr_l.back();
 
   76        IT_ get_nnze_u()
 const 
   78          return _row_ptr_u.empty() ? IT_(0) : _row_ptr_u.back();
 
   84          return _n + get_nnze_l() + get_nnze_u();
 
   88        std::size_t bytes_symbolic()
 const 
   90          return sizeof(IT_) * (_row_ptr_l.size() + _row_ptr_u.size() + _col_idx_l.size() + _col_idx_u.size());
 
  105        void set_struct_csr(
const IT_ n, 
const IT_* row_ptr_a, 
const IT_* col_idx_a)
 
  110          const IT_ nzul = (Math::max(row_ptr_a[n], n) - n) / IT_(2);
 
  120          _row_ptr_l.reserve(n+1);
 
  121          _row_ptr_u.reserve(n+1);
 
  122          _col_idx_l.reserve(nzul);
 
  123          _col_idx_u.reserve(nzul);
 
  126          _row_ptr_l.push_back(0);
 
  127          _row_ptr_u.push_back(0);
 
  130          for(IT_ i(0); i < n; ++i)
 
  132            IT_ j = row_ptr_a[i];
 
  133            const IT_ jend = row_ptr_a[i+1];
 
  134            for(; (j < jend) && (col_idx_a[j] < i); ++j)
 
  136              _col_idx_l.push_back(col_idx_a[j]);
 
  138            if(col_idx_a[j] != i)
 
  139              throw InvalidMatrixStructureException();
 
  140            for(++j; j < jend; ++j)
 
  142              _col_idx_u.push_back(col_idx_a[j]);
 
  144            _row_ptr_l.push_back(IT_(_col_idx_l.size()));
 
  145            _row_ptr_u.push_back(IT_(_col_idx_u.size()));
 
  158        template<
typename DT_>
 
  159        void set_struct(
const LAFEM::SparseMatrixCSR<DT_, IT_>& matrix)
 
  161          this->set_struct_csr(IT_(matrix.rows()), matrix.row_ptr(), matrix.col_ind());
 
  170        template<
typename DT_, 
int m_, 
int n_>
 
  171        void set_struct(
const LAFEM::SparseMatrixBCSR<DT_, IT_, m_, n_>& matrix)
 
  173          this->set_struct_csr(IT_(matrix.rows()), matrix.row_ptr(), matrix.col_ind());
 
  190        void factorize_symbolic(
const int p)
 
  196          std::vector<IT_> new_ptr_l, new_idx_l, new_ptr_u, new_idx_u;
 
  199          std::vector<int> new_lvl_l, new_lvl_u;
 
  202          new_ptr_l.push_back(IT_(0));
 
  203          new_ptr_u.push_back(IT_(0));
 
  206          for(IT_ i(0); i < _n; ++i)
 
  209            for(IT_ j(_row_ptr_l[i]); j < _row_ptr_l[i+1]; ++j)
 
  211              new_idx_l.push_back(_col_idx_l[j]);
 
  212              new_lvl_l.push_back(0);
 
  214            for(IT_ j(_row_ptr_u[i]); j < _row_ptr_u[i+1]; ++j)
 
  216              new_idx_u.push_back(_col_idx_u[j]);
 
  217              new_lvl_u.push_back(0);
 
  221            for(IT_ j(new_ptr_l[i]); j < IT_(new_idx_l.size()); ++j)
 
  224              const IT_ cj = new_idx_l[j];
 
  225              const int lj = new_lvl_l[j];
 
  227              IT_ ouj = new_ptr_u[i];
 
  230              for(IT_ k(new_ptr_u[cj]); k < new_ptr_u[cj+1]; ++k)
 
  233                const IT_ ck = new_idx_u[k];
 
  234                const int lk = new_lvl_u[k];
 
  237                const int ll = lj + lk + 1;
 
  243                  olj = _insert(new_idx_l, new_lvl_l, olj, ck, ll);
 
  245                  ouj = _insert(new_idx_u, new_lvl_u, ouj, ck, ll);
 
  250            new_ptr_l.push_back(IT_(new_idx_l.size()));
 
  251            new_ptr_u.push_back(IT_(new_idx_u.size()));
 
  255          _row_ptr_l = std::move(new_ptr_l);
 
  256          _col_idx_l = std::move(new_idx_l);
 
  257          _row_ptr_u = std::move(new_ptr_u);
 
  258          _col_idx_u = std::move(new_idx_u);
 
  289        static IT_ _insert(std::vector<IT_>& idx, std::vector<int>& lvl, IT_ i, 
const IT_ j, 
const int l)
 
  292          IT_ n = IT_(idx.size());
 
  317          idx.push_back(IT_(0));
 
  321          for(IT_ k(n); k > i; --k)
 
  349      template<
typename DT_, 
typename IT_>
 
  350      class ILUCoreScalar :
 
  351        public ILUCoreSymbolic<IT_>
 
  354        typedef ILUCoreSymbolic<IT_> BaseClass;
 
  357        std::vector<DT_> _data_l, _data_u, _data_d;
 
  373          _data_d.resize(this->_n);
 
  374          _data_l.resize(this->_col_idx_l.size());
 
  375          _data_u.resize(this->_col_idx_u.size());
 
  379        std::size_t bytes_numeric()
 const 
  381          return sizeof(DT_) * (_data_l.size() + _data_u.size() + _data_d.size());
 
  385        std::size_t bytes()
 const 
  387          return this->bytes_numeric() + this->bytes_symbolic();
 
  402        void copy_data_csr(
const IT_* row_ptr_a, 
const IT_* col_idx_a, 
const DT_* data_a)
 
  405          for(IT_ i(0); i < this->_n; ++i)
 
  408            IT_ ra = row_ptr_a[i];
 
  409            IT_ xa = row_ptr_a[i+1];
 
  412            for(IT_ j(this->_row_ptr_l[i]); j < this->_row_ptr_l[i+1]; ++j)
 
  414              if(this->_col_idx_l[j] == col_idx_a[ra])
 
  415                _data_l[j] = data_a[ra++];
 
  421            _data_d[i] = data_a[ra++];
 
  424            for(IT_ j(this->_row_ptr_u[i]); j < this->_row_ptr_u[i+1]; ++j)
 
  426              if((ra < xa) && (this->_col_idx_u[j] == col_idx_a[ra]))
 
  427                _data_u[j] = data_a[ra++];
 
  440        void copy_data(
const LAFEM::SparseMatrixCSR<DT_, IT_>& matrix)
 
  442          this->copy_data_csr(matrix.row_ptr(), matrix.col_ind(), matrix.val());
 
  451        void factorize_numeric_il_du()
 
  454          const IT_* rptr_l = this->_row_ptr_l.data();
 
  455          const IT_* rptr_u = this->_row_ptr_u.data();
 
  456          const IT_* cidx_l = (this->_col_idx_l.empty() ? nullptr : this->_col_idx_l.data());
 
  457          const IT_* cidx_u = (this->_col_idx_u.empty() ? nullptr : this->_col_idx_u.data());
 
  458          DT_* data_l = (this->_data_l.empty() ? nullptr : this->_data_l.data());
 
  459          DT_* data_u = (this->_data_u.empty() ? nullptr : this->_data_u.data());
 
  460          DT_* data_d = this->_data_d.data();
 
  463          for(IT_ i(0); i < this->_n; ++i)
 
  466            const IT_ ql = rptr_l[i+1];
 
  467            const IT_ qu = rptr_u[i+1];
 
  470            for(IT_ j(rptr_l[i]); j < rptr_l[i+1]; ++j)
 
  473              const IT_ cj = cidx_l[j];
 
  480              data_l[j] *= data_d[cj];
 
  484              for(; k < rptr_u[cj+1]; ++k)
 
  486                const IT_ ck = cidx_u[k];
 
  489                for(; (pl < ql) && (cidx_l[pl] <= ck); ++pl)
 
  492                    data_l[pl] -= data_l[j] * data_u[k];
 
  497              if((k < rptr_u[cj+1]) && (cidx_u[k] == i))
 
  499                data_d[i] -= data_l[j] * data_u[k];
 
  503              for(; k < rptr_u[cj+1]; ++k)
 
  505                const IT_ ck = cidx_u[k];
 
  506                for(; (pu < qu) && (cidx_u[pu] <= ck); ++pu)
 
  509                    data_u[pu] -= data_l[j] * data_u[k];
 
  515            data_d[i] = DT_(1) / data_d[i];
 
  531        void solve_il(DT_* x, 
const DT_* b)
 const 
  534          const IT_* rptr = this->_row_ptr_l.data();
 
  535          const IT_* cidx = (this->_col_idx_l.empty() ? nullptr : this->_col_idx_l.data());
 
  536          const DT_* data_l = (this->_data_l.empty() ? nullptr : this->_data_l.data());
 
  538          for(IT_ i(0); i < this->_n; ++i)
 
  541            for(IT_ j(rptr[i]); j < rptr[i+1]; ++j)
 
  543              r -= data_l[j] * x[cidx[j]];
 
  561        void solve_du(DT_* x, 
const DT_* b)
 const 
  563          const IT_* rptr = this->_row_ptr_u.data();
 
  564          const IT_* cidx = (this->_col_idx_u.empty() ? nullptr : this->_col_idx_u.data());
 
  565          const DT_* data_u = (this->_data_u.empty() ? nullptr : this->_data_u.data());
 
  566          const DT_* data_d = this->_data_d.data();
 
  568          for(IT_ i(this->_n); i > IT_(0); )
 
  572            for(IT_ j(rptr[i]); j < rptr[i+1]; ++j)
 
  574              r -= data_u[j] * x[cidx[j]];
 
  576            x[i] = data_d[i] * r;
 
  592        void solve_ilt(DT_* x, 
const DT_* b)
 const 
  594          const IT_* rptr = this->_row_ptr_l.data();
 
  595          const IT_* cidx = (this->_col_idx_l.empty() ? nullptr : this->_col_idx_l.data());
 
  596          const DT_* data_l = (this->_data_l.empty() ? nullptr : this->_data_l.data());
 
  600            for(IT_ i(0); i < this->_n; ++i)
 
  604          for(IT_ i(this->_n); i > IT_(0); )
 
  607            for(IT_ j(rptr[i]); j < rptr[i+1]; ++j)
 
  610              x[cidx[j]] -= data_l[j] * x[i];
 
  627        void solve_dut(DT_* x, 
const DT_* b)
 const 
  629          const IT_* rptr = this->_row_ptr_u.data();
 
  630          const IT_* cidx = (this->_col_idx_u.empty() ? nullptr : this->_col_idx_u.data());
 
  631          const DT_* data_u = (this->_data_u.empty() ? nullptr : this->_data_u.data());
 
  632          const DT_* data_d = this->_data_d.data();
 
  636            for(IT_ i(0); i < this->_n; ++i)
 
  640          for(IT_ i(0); i < this->_n; ++i)
 
  643            for(IT_ j(rptr[i]); j < rptr[i+1]; ++j)
 
  646              x[cidx[j]] -= data_u[j] * x[i];
 
  668      template<
typename DT_, 
typename IT_, 
int dim_>
 
  669      class ILUCoreBlocked :
 
  670        public ILUCoreSymbolic<IT_>
 
  673        typedef ILUCoreSymbolic<IT_> BaseClass;
 
  675        static_assert(dim_ > 0, 
"invalid block dimension");
 
  677        typedef Tiny::Matrix<DT_, dim_, dim_> MatBlock;
 
  678        typedef Tiny::Vector<DT_, dim_> VecBlock;
 
  681        std::vector<MatBlock> _data_l, _data_u, _data_d;
 
  697          _data_d.resize(this->_n);
 
  698          _data_l.resize(this->_col_idx_l.size());
 
  699          _data_u.resize(this->_col_idx_u.size());
 
  703        std::size_t bytes_numeric()
 const 
  705          return sizeof(DT_) * std::size_t(Math::sqr(dim_)) * (_data_l.size() + _data_u.size() + _data_d.size());
 
  709        std::size_t bytes()
 const 
  711          return this->bytes_numeric() + this->bytes_symbolic();
 
  726        void copy_data_bcsr(
const IT_* row_ptr_a, 
const IT_* col_idx_a, 
const MatBlock* data_a)
 
  729          for(IT_ i(0); i < this->_n; ++i)
 
  732            IT_ ra = row_ptr_a[i];
 
  733            IT_ xa = row_ptr_a[i+1];
 
  736            for(IT_ j(this->_row_ptr_l[i]); j < this->_row_ptr_l[i+1]; ++j)
 
  738              if(this->_col_idx_l[j] == col_idx_a[ra])
 
  739                _data_l[j] = data_a[ra++];
 
  745            _data_d[i] = data_a[ra++];
 
  748            for(IT_ j(this->_row_ptr_u[i]); j < this->_row_ptr_u[i+1]; ++j)
 
  750              if((ra < xa) && (this->_col_idx_u[j] == col_idx_a[ra]))
 
  751                _data_u[j] = data_a[ra++];
 
  764        void copy_data(
const LAFEM::SparseMatrixBCSR<DT_, IT_, dim_, dim_>& matrix)
 
  766          this->copy_data_bcsr(matrix.row_ptr(), matrix.col_ind(), matrix.val());
 
  775        void factorize_numeric_il_du()
 
  778          const IT_* rptr_l = this->_row_ptr_l.data();
 
  779          const IT_* rptr_u = this->_row_ptr_u.data();
 
  780          const IT_* cidx_l = (this->_col_idx_l.empty() ? nullptr : this->_col_idx_l.data());
 
  781          const IT_* cidx_u = (this->_col_idx_u.empty() ? nullptr : this->_col_idx_u.data());
 
  782          MatBlock* data_l = (this->_data_l.empty() ? nullptr : this->_data_l.data());
 
  783          MatBlock* data_u = (this->_data_u.empty() ? nullptr : this->_data_u.data());
 
  784          MatBlock* data_d = this->_data_d.data();
 
  787          for(IT_ i(0); i < this->_n; ++i)
 
  790            const IT_ ql = rptr_l[i+1];
 
  791            const IT_ qu = rptr_u[i+1];
 
  794            for(IT_ j(rptr_l[i]); j < rptr_l[i+1]; ++j)
 
  797              const IT_ cj = cidx_l[j];
 
  806                const MatBlock l_ij(data_l[j]);
 
  807                data_l[j].set_mat_mat_mult(data_d[cj], l_ij);
 
  812              for(; k < rptr_u[cj+1]; ++k)
 
  814                const IT_ ck = cidx_u[k];
 
  817                for(; (pl < ql) && (cidx_l[pl] <= ck); ++pl)
 
  821                    data_l[pl].add_mat_mat_mult(data_l[j], data_u[k], -DT_(1));
 
  826              if((k < rptr_u[cj+1]) && (cidx_u[k] == i))
 
  829                data_d[i].add_mat_mat_mult(data_l[j], data_u[k], -DT_(1));
 
  834              for(; k < rptr_u[cj+1]; ++k)
 
  836                const IT_ ck = cidx_u[k];
 
  837                for(; (pu < qu) && (cidx_u[pu] <= ck); ++pu)
 
  841                    data_u[pu].add_mat_mat_mult(data_l[j], data_u[k], -DT_(1));
 
  849              const MatBlock d_ii(data_d[i]);
 
  850              data_d[i].set_inverse(d_ii);
 
  867        void solve_il(VecBlock* x, 
const VecBlock* b)
 const 
  869          const IT_* rptr = this->_row_ptr_l.data();
 
  870          const IT_* cidx = (this->_col_idx_l.empty() ? nullptr : this->_col_idx_l.data());
 
  871          const MatBlock* data_l = (this->_data_l.empty() ? nullptr : this->_data_l.data());
 
  873          for(IT_ i(0); i < this->_n; ++i)
 
  876            for(IT_ j(rptr[i]); j < rptr[i+1]; ++j)
 
  879              r.add_mat_vec_mult(data_l[j], x[cidx[j]], -DT_(1));
 
  897        void solve_du(VecBlock* x, 
const VecBlock* b)
 const 
  899          const IT_* rptr = this->_row_ptr_u.data();
 
  900          const IT_* cidx = (this->_col_idx_u.empty() ? nullptr : this->_col_idx_u.data());
 
  901          const MatBlock* data_u = (this->_data_u.empty() ? nullptr : this->_data_u.data());
 
  902          const MatBlock* data_d = this->_data_d.data();
 
  904          for(IT_ i(this->_n); i > IT_(0); )
 
  908            for(IT_ j(rptr[i]); j < rptr[i+1]; ++j)
 
  911              r.add_mat_vec_mult(data_u[j], x[cidx[j]], -DT_(1));
 
  914            x[i].set_mat_vec_mult(data_d[i], r);
 
  935    template<
typename Matrix_>
 
  939      typedef typename Matrix_::DataType DataType;
 
  940      typedef typename Matrix_::VectorTypeL VectorType;
 
  942      virtual void set_fill_in_param(
int p) = 0;
 
  944      virtual void init_symbolic() = 0;
 
  946      virtual void done_symbolic() = 0;
 
  948      virtual void init_numeric() = 0;
 
  950      virtual String name() 
const = 0;
 
  952      virtual Status apply(VectorType& vec_cor, 
const VectorType& vec_def) = 0;
 
  965    template<PreferredBackend backend_, 
typename Matrix_, 
typename Filter_>
 
  975    template<
typename DT_, 
typename IT_, 
typename Filter_>
 
  981      typedef DT_ DataType;
 
  982      typedef IT_ IndexType;
 
  983      typedef Filter_ FilterType;
 
  988      const FilterType& _filter;
 
  989      Intern::ILUCoreScalar<DataType, IndexType> _ilu;
 
 1010        const MatrixType& matrix, 
const FilterType& filter) :
 
 1015        auto fill_in_param_p = section->
query(
"fill_in_param");
 
 1016        if(fill_in_param_p.second && !fill_in_param_p.first.parse(_p))
 
 1017          throw ParseError(section_name + 
".fill_in_param", fill_in_param_p.first, 
"a non-negative integer");
 
 1045      virtual void init_symbolic()
 override 
 1049          XABORTM(
"Matrix is not square!");
 
 1053        _ilu.set_struct(_matrix);
 
 1056        _ilu.factorize_symbolic(_p);
 
 1062      virtual void done_symbolic()
 override 
 1067      virtual void init_numeric()
 override 
 1069        _ilu.copy_data(_matrix);
 
 1070        _ilu.factorize_numeric_il_du();
 
 1088        _ilu.solve_il(x, b);
 
 1091        _ilu.solve_du(x, x);
 
 1094        this->_filter.filter_cor(out);
 
 1097        Statistics::add_time_precon(ts_stop.
elapsed(ts_start));
 
 1109    template<
typename DT_, 
typename IT_, 
int dim_, 
typename Filter_>
 
 1111      public ILUPrecondBase<typename LAFEM::SparseMatrixBCSR<DT_, IT_, dim_, dim_>>
 
 1115      typedef DT_ DataType;
 
 1116      typedef IT_ IndexType;
 
 1117      typedef Filter_ FilterType;
 
 1122      const FilterType& _filter;
 
 1123      Intern::ILUCoreBlocked<DataType, IndexType, dim_> _ilu;
 
 1144        const MatrixType& matrix, 
const FilterType& filter) :
 
 1149        auto fill_in_param_p = section->
query(
"fill_in_param");
 
 1150        if(fill_in_param_p.second && !fill_in_param_p.first.parse(_p))
 
 1151          throw ParseError(section_name + 
".fill_in_param", fill_in_param_p.first, 
"a non-negative integer");
 
 1179      virtual void init_symbolic()
 override 
 1183          XABORTM(
"Matrix is not square!");
 
 1187        _ilu.set_struct(_matrix);
 
 1190        _ilu.factorize_symbolic(_p);
 
 1196      virtual void done_symbolic()
 override 
 1201      virtual void init_numeric()
 override 
 1203        _ilu.copy_data(_matrix);
 
 1204        _ilu.factorize_numeric_il_du();
 
 1218        auto* x = out.elements();
 
 1219        const auto* b = in.elements();
 
 1222        _ilu.solve_il(x, b);
 
 1225        _ilu.solve_du(x, x);
 
 1228        this->_filter.filter_cor(out);
 
 1231        Statistics::add_time_precon(ts_stop.
elapsed(ts_start));
 
 1238#ifdef FEAT_HAVE_CUDA 
 1252    template<
typename Filter_>
 
 1254      public ILUPrecondBase<LAFEM::SparseMatrixCSR<double, unsigned int>>
 
 1258      typedef Filter_ FilterType;
 
 1259      typedef typename MatrixType::VectorTypeL VectorType;
 
 1260      typedef typename MatrixType::DataType DataType;
 
 1263      const MatrixType& _matrix;
 
 1264      MatrixType _lu_matrix;
 
 1265      const FilterType& _filter;
 
 1280      explicit ILUPrecondWithBackend(
const MatrixType& matrix, 
const FilterType& filter, 
const int = 0) :
 
 1286      explicit ILUPrecondWithBackend(
const String& section_name, 
const PropertyMap* section,
 
 1287        const MatrixType& matrix, 
const FilterType& filter) :
 
 1292        auto fill_in_param_p = section->query(
"fill_in_param");
 
 1293        if(fill_in_param_p.second)
 
 1295          XASSERTM(std::stoi(fill_in_param_p.first) == 0, 
"For PreferredBackend::cuda, the fill in parameter has to be == 0!");
 
 1302      virtual ~ILUPrecondWithBackend()
 
 1312      virtual void set_fill_in_param(
int p)
 override 
 1314        XASSERTM(p == 0, 
"For PreferredBackend::cuda, the fill in parameter has to be == 0!");
 
 1318      virtual String name()
 const override 
 1323      virtual void init_symbolic()
 override 
 1325        if (_matrix.columns() != _matrix.rows())
 
 1327          XABORTM(
"Matrix is not square!");
 
 1332        cuda_info = Intern::cuda_ilu_init_symbolic(
 
 1333          (
int)_lu_matrix.rows(),
 
 1334          (
int)_lu_matrix.used_elements(),
 
 1336          (
int*)_lu_matrix.row_ptr(),
 
 1337          (
int*)_lu_matrix.col_ind());
 
 1340      virtual void done_symbolic()
 override 
 1342        Intern::cuda_ilu_done_symbolic(cuda_info);
 
 1345      virtual void init_numeric()
 override 
 1347        _lu_matrix.copy(_matrix);
 
 1349        Intern::cuda_ilu_init_numeric(
 
 1351          (
int*)_lu_matrix.row_ptr(),
 
 1352          (
int*)_lu_matrix.col_ind(),
 
 1356      virtual Status apply(VectorType& vec_cor, 
const VectorType& vec_def)
 override 
 1358        XASSERTM(_matrix.rows() == vec_cor.size(), 
"matrix / vector size mismatch!");
 
 1359        XASSERTM(_matrix.rows() == vec_def.size(), 
"matrix / vector size mismatch!");
 
 1363        int status = Intern::cuda_ilu_apply(
 
 1367          (
int*)_lu_matrix.row_ptr(),
 
 1368          (
int*)_lu_matrix.col_ind(),
 
 1371        this->_filter.filter_cor(vec_cor);
 
 1374        Statistics::add_time_precon(ts_stop.elapsed(ts_start));
 
 1467    template<
typename Filter_, 
int blocksize_>
 
 1468    class ILUPrecondWithBackend<
PreferredBackend::
cuda, LAFEM::SparseMatrixBCSR<double, unsigned int, blocksize_, blocksize_>, Filter_> :
 
 1469      public ILUPrecondBase<typename LAFEM::SparseMatrixBCSR<double, unsigned int, blocksize_, blocksize_>>
 
 1472      typedef LAFEM::SparseMatrixBCSR<double, unsigned int, blocksize_, blocksize_> MatrixType;
 
 1473      typedef Filter_ FilterType;
 
 1474      typedef typename MatrixType::VectorTypeL VectorType;
 
 1475      typedef typename MatrixType::DataType DataType;
 
 1478      const MatrixType& _matrix;
 
 1479      MatrixType _lu_matrix;
 
 1480      const FilterType& _filter;
 
 1495      explicit ILUPrecondWithBackend(
const MatrixType& matrix, 
const FilterType& filter, 
const int = 0) :
 
 1517      explicit ILUPrecondWithBackend(
const String& section_name, 
const PropertyMap* section,
 
 1518        const MatrixType& matrix, 
const FilterType& filter) :
 
 1523          auto fill_in_param_p = section->query(
"fill_in_param");
 
 1524          if(fill_in_param_p.second)
 
 1526            XASSERTM(std::stoi(fill_in_param_p.first) == 0, 
"For PreferredBackend::cuda, the fill in parameter has to be == 0!");
 
 1533      virtual ~ILUPrecondWithBackend()
 
 1543      virtual void set_fill_in_param(
int p)
 override 
 1545        XASSERTM(p == 0, 
"For PreferredBackend::cuda, the fill in parameter has to be == 0!");
 
 1549      virtual String name()
 const override 
 1554      virtual void init_symbolic()
 override 
 1556        if (_matrix.columns() != _matrix.rows())
 
 1558          XABORTM(
"Matrix is not square!");
 
 1563        cuda_info = Intern::cuda_ilub_init_symbolic(
 
 1564          (
int)_lu_matrix.rows(),
 
 1565          (
int)_lu_matrix.used_elements(),
 
 1566          _lu_matrix.template val<LAFEM::Perspective::pod>(),
 
 1567          (
int*)_lu_matrix.row_ptr(),
 
 1568          (
int*)_lu_matrix.col_ind(),
 
 1572      virtual void done_symbolic()
 override 
 1574        Intern::cuda_ilub_done_symbolic(cuda_info);
 
 1577      virtual void init_numeric()
 override 
 1579        _lu_matrix.copy(_matrix);
 
 1581        Intern::cuda_ilub_init_numeric(_lu_matrix.template val<LAFEM::Perspective::pod>(), (
int*)_lu_matrix.row_ptr(), (
int*)_lu_matrix.col_ind(), cuda_info);
 
 1584      virtual Status apply(VectorType& vec_cor, 
const VectorType& vec_def)
 override 
 1586        XASSERTM(_matrix.rows() == vec_cor.size(), 
"matrix / vector size mismatch!");
 
 1587        XASSERTM(_matrix.rows() == vec_def.size(), 
"matrix / vector size mismatch!");
 
 1591        int status = Intern::cuda_ilub_apply(
 
 1592          vec_cor.template elements<LAFEM::Perspective::pod>(),
 
 1593          vec_def.template elements<LAFEM::Perspective::pod>(),
 
 1594          _lu_matrix.template val<LAFEM::Perspective::pod>(),
 
 1595          (
int*)_lu_matrix.row_ptr(),
 
 1596          (
int*)_lu_matrix.col_ind(),
 
 1599        this->_filter.filter_cor(vec_cor);
 
 1602        Statistics::add_time_precon(ts_stop.elapsed(ts_start));
 
 1611    template<PreferredBackend backend_, 
typename Matrix_, 
typename Filter_>
 
 1625      virtual Status apply(
typename Matrix_::VectorTypeL &, 
const typename Matrix_::VectorTypeL &)
 override 
 1627          XABORTM(
"not implemented yet!");
 
 1630      virtual String name()
 const override 
 1632          XABORTM(
"not implemented yet!");
 
 1635      virtual void init_symbolic()
 override 
 1637        XABORTM(
"not implemented yet!");
 
 1640      virtual void done_symbolic()
 override 
 1642        XABORTM(
"not implemented yet!");
 
 1645      virtual void set_fill_in_param(
int )
 override 
 1647        XABORTM(
"not implemented yet!");
 
 1650      virtual void init_numeric()
 override 
 1652        XABORTM(
"not implemented yet!");
 
 1667    template<
typename Matrix_, 
typename Filter_>
 
 1671      std::shared_ptr<ILUPrecondBase<Matrix_>> _impl;
 
 1674      typedef Matrix_ MatrixType;
 
 1675      typedef Filter_ FilterType;
 
 1676      typedef typename MatrixType::VectorTypeL VectorType;
 
 1677      typedef typename MatrixType::DataType DataType;
 
 1704            _impl = std::make_shared<ILUPrecondWithBackend<PreferredBackend::cuda, Matrix_, Filter_>>(matrix, filter);
 
 1709            _impl = std::make_shared<ILUPrecondWithBackend<PreferredBackend::generic, Matrix_, Filter_>>(matrix, filter, p);
 
 1738            _impl = std::make_shared<ILUPrecondWithBackend<PreferredBackend::cuda, Matrix_, Filter_>>(section_name, section, matrix, filter);
 
 1743            _impl = std::make_shared<ILUPrecondWithBackend<PreferredBackend::generic, Matrix_, Filter_>>(section_name, section, matrix, filter);
 
 1757        return _impl->name();
 
 1768        _impl->set_fill_in_param(p);
 
 1777      virtual Status apply(VectorType& vec_cor, 
const VectorType& vec_def)
 override 
 1779        return _impl->apply(vec_cor, vec_def);
 
 1784        _impl->init_symbolic();
 
 1789        _impl->done_symbolic();
 
 1794        _impl->init_numeric();
 
 1816    template<
typename Matrix_, 
typename Filter_>
 
 1818      const Matrix_& matrix, 
const Filter_& filter, 
const int p = 0)
 
 1820      return std::make_shared<ILUPrecond<Matrix_, Filter_>>(backend, matrix, filter, p);
 
 1844    template<
typename Matrix_, 
typename Filter_>
 
 1847      const Matrix_& matrix, 
const Filter_& filter)
 
 1849      return std::make_shared<ILUPrecond<Matrix_, Filter_>>(section_name, section, backend, matrix, filter);
 
#define XABORTM(msg)
Abortion macro definition with custom message.
#define XASSERT(expr)
Assertion macro definition.
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Index size() const
Returns the containers size.
DT_ * elements()
Get a pointer to the data array.
CSR based blocked sparse matrix.
Intern::BCSRVectorHelper< DT_, IT_, BlockHeight_ >::VectorType VectorTypeL
Compatible L-vector type.
Index rows() const
Retrieve matrix row count.
Index columns() const
Retrieve matrix column count.
Index rows() const
Retrieve matrix row count.
Index columns() const
Retrieve matrix column count.
Class for parser related errors.
A class organizing a tree of key-value pairs.
std::pair< String, bool > query(String key_path) const
Queries a value by its key path.
Inheritances inside ilu_precond.hpp.
ILU(0) and ILU(p) preconditioner implementation.
ILUPrecond(const String §ion_name, const PropertyMap *section, PreferredBackend backend, const MatrixType &matrix, const FilterType &filter)
Constructor using a PropertyMap.
virtual ~ILUPrecond()
Empty virtual destructor.
ILUPrecond(PreferredBackend backend, const MatrixType &matrix, const FilterType &filter, const int p=0)
Constructor.
SolverBase< VectorType > BaseClass
Our base class.
virtual void init_numeric() override
Numeric initialization method.
virtual Status apply(VectorType &vec_cor, const VectorType &vec_def) override
apply the preconditioner
virtual void set_fill_in_param(int p)
Sets the fill-in parameter.
virtual void done_symbolic() override
Symbolic finalization method.
virtual void init_symbolic() override
Symbolic initialization method.
virtual String name() const override
Returns the name of the solver.
ILUPrecondWithBackend(const MatrixType &matrix, const FilterType &filter, const int p=0)
Constructor.
virtual String name() const override
Returns the name of the solver.
virtual ~ILUPrecondWithBackend()
Empty virtual destructor.
virtual Status apply(VectorType &out, const VectorType &in) override
apply the preconditioner
virtual void set_fill_in_param(int p) override
Sets the fill-in parameter.
virtual Status apply(VectorType &out, const VectorType &in) override
apply the preconditioner
virtual void set_fill_in_param(int p) override
Sets the fill-in parameter.
virtual ~ILUPrecondWithBackend()
Empty virtual destructor.
virtual String name() const override
Returns the name of the solver.
ILUPrecondWithBackend(const MatrixType &matrix, const FilterType &filter, const int p=0)
Constructor.
ILU(0) and ILU(p) preconditioner internal implementation.
Polymorphic solver interface.
static void add_flops(Index flops)
Add an amount of flops to the global flop counter.
String class implementation.
double elapsed(const TimeStamp &before) const
Calculates the time elapsed between two time stamps.
Status
Solver status return codes enumeration.
@ success
solving successful (convergence criterion fulfilled)
@ aborted
premature abort (solver aborted due to internal errors or preconditioner failure)
std::shared_ptr< ILUPrecond< Matrix_, Filter_ > > new_ilu_precond(PreferredBackend backend, const Matrix_ &matrix, const Filter_ &filter, const int p=0)
Creates a new ILUPrecond solver object.
PreferredBackend
The backend that shall be used in all compute heavy calculations.