FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
sparse_matrix_csr.hpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#pragma once
7
8// includes, FEAT
11#include <kernel/lafem/forward.hpp>
12#include <kernel/lafem/container.hpp>
13#include <kernel/lafem/dense_vector.hpp>
14#include <kernel/lafem/sparse_matrix_banded.hpp>
15#include <kernel/lafem/sparse_matrix_bcsr.hpp>
16#include <kernel/lafem/sparse_layout.hpp>
17#include <kernel/lafem/arch/scale_row_col.hpp>
18#include <kernel/lafem/arch/scale.hpp>
19#include <kernel/lafem/arch/axpy.hpp>
20#include <kernel/lafem/arch/apply.hpp>
21#include <kernel/lafem/arch/norm.hpp>
22#include <kernel/lafem/arch/diagonal.hpp>
23#include <kernel/lafem/arch/lumping.hpp>
24#include <kernel/lafem/arch/row_norm.hpp>
25#include <kernel/adjacency/graph.hpp>
26#include <kernel/adjacency/permutation.hpp>
27#include <kernel/util/statistics.hpp>
28#include <kernel/util/time_stamp.hpp>
29
30#include <fstream>
31
32
33namespace FEAT
34{
35 namespace LAFEM
36 {
58 template <typename DT_, typename IT_ = Index>
59 class SparseMatrixCSR : public Container<DT_, IT_>
60 {
61 public:
68 {
69 public:
71 typedef DT_ DataType;
72 typedef IT_ IndexType;
73
74 private:
75#ifdef DEBUG
76 const IT_ _deadcode;
77#endif
78 Index _num_rows;
79 Index _num_cols;
80 IT_* _row_ptr;
81 IT_* _col_idx;
82 IT_* _col_ptr;
83 DT_* _data;
84
85 public:
86 explicit ScatterAxpy(MatrixType& matrix) :
87#ifdef DEBUG
88 _deadcode(~IT_(0)),
89#endif
90 _num_rows(matrix.rows()),
91 _num_cols(matrix.columns()),
92 _row_ptr(matrix.row_ptr()),
93 _col_idx(matrix.col_ind()),
94 _col_ptr(nullptr),
95 _data(matrix.val())
96 {
97 // allocate column-pointer array
98 _col_ptr = new IT_[_num_cols];
99#ifdef DEBUG
100 for(Index i(0); i < _num_cols; ++i)
101 {
102 _col_ptr[i] = _deadcode;
103 }
104#endif
105 }
106
107 virtual ~ScatterAxpy()
108 {
109 if(_col_ptr != nullptr)
110 {
111 delete [] _col_ptr;
112 }
113 }
114
115 template<typename LocalMatrix_, typename RowMapping_, typename ColMapping_>
116 void operator()(const LocalMatrix_& loc_mat, const RowMapping_& row_map,
117 const ColMapping_& col_map, DT_ alpha = DT_(1))
118 {
119 // loop over all local row entries
120 for(int i(0); i < row_map.get_num_local_dofs(); ++i)
121 {
122 // fetch row index
123 const Index ix = row_map.get_index(i);
124
125 // build column pointer for this row entry contribution
126 for(IT_ k(_row_ptr[ix]); k < _row_ptr[ix + 1]; ++k)
127 {
128 _col_ptr[_col_idx[k]] = k;
129 }
130
131 // loop over all local column entries
132 for(int j(0); j < col_map.get_num_local_dofs(); ++j)
133 {
134 // fetch column index
135 const Index jx = col_map.get_index(j);
136
137 // ensure that the column pointer is valid for this index
138 ASSERTM(_col_ptr[jx] != _deadcode, "invalid column index");
139
140 // incorporate data into global matrix
141 _data[_col_ptr[jx]] += alpha * loc_mat[i][j];
142
143 // continue with next column entry
144 }
145
146#ifdef DEBUG
147 // reformat column-pointer array
148 for(IT_ k(_row_ptr[ix]); k < _row_ptr[ix + 1]; ++k)
149 {
150 _col_ptr[_col_idx[k]] = _deadcode;
151 }
152#endif
153 // continue with next row entry
154 }
155 }
156 }; // class ScatterAxpy
157
164 {
165 public:
167 typedef DT_ DataType;
168 typedef IT_ IndexType;
169
170 private:
171#ifdef DEBUG
172 const IT_ _deadcode;
173#endif
174 Index _num_rows;
175 Index _num_cols;
176 const IT_* _row_ptr;
177 const IT_* _col_idx;
178 IT_* _col_ptr;
179 const DT_* _data;
180
181 public:
182 explicit GatherAxpy(const MatrixType& matrix) :
183#ifdef DEBUG
184 _deadcode(~IT_(0)),
185#endif
186 _num_rows(matrix.rows()),
187 _num_cols(matrix.columns()),
188 _row_ptr(matrix.row_ptr()),
189 _col_idx(matrix.col_ind()),
190 _col_ptr(nullptr),
191 _data(matrix.val())
192 {
193 // allocate column-pointer array
194 _col_ptr = new IT_[_num_cols];
195#ifdef DEBUG
196 for(Index i(0); i < _num_cols; ++i)
197 {
198 _col_ptr[i] = _deadcode;
199 }
200#endif
201 }
202
203 virtual ~GatherAxpy()
204 {
205 if(_col_ptr != nullptr)
206 {
207 delete [] _col_ptr;
208 }
209 }
210
211 template<typename LocalMatrix_, typename RowMapping_, typename ColMapping_>
212 void operator()(LocalMatrix_& loc_mat, const RowMapping_& row_map,
213 const ColMapping_& col_map, DT_ alpha = DT_(1))
214 {
215 // loop over all local row entries
216 for(int i(0); i < row_map.get_num_local_dofs(); ++i)
217 {
218 // fetch row index
219 const Index ix = row_map.get_index(i);
220
221 // build column pointer for this row entry contribution
222 for(IT_ k(_row_ptr[ix]); k < _row_ptr[ix + 1]; ++k)
223 {
224 _col_ptr[_col_idx[k]] = k;
225 }
226
227 // loop over all local column entries
228 for(int j(0); j < col_map.get_num_local_dofs(); ++j)
229 {
230 // fetch column index
231 const Index jx = col_map.get_index(j);
232
233 // ensure that the column pointer is valid for this index
234 ASSERTM(_col_ptr[jx] != _deadcode, "invalid column index");
235
236 // update local matrix data
237 loc_mat[i][j] += alpha * _data[_col_ptr[jx]];
238
239 // continue with next column entry
240 }
241
242#ifdef DEBUG
243 // reformat column-pointer array
244 for(IT_ k(_row_ptr[ix]); k < _row_ptr[ix + 1]; ++k)
245 {
246 _col_ptr[_col_idx[k]] = _deadcode;
247 }
248#endif
249
250 // continue with next row entry
251 }
252 }
253 }; // class GatherAxpy
254
255 private:
256 Index & _size()
257 {
258 return this->_scalar_index.at(0);
259 }
260
261 Index & _rows()
262 {
263 return this->_scalar_index.at(1);
264 }
265
266 Index & _columns()
267 {
268 return this->_scalar_index.at(2);
269 }
270
271 Index & _used_elements()
272 {
273 return this->_scalar_index.at(3);
274 }
275
276 public:
278 typedef DT_ DataType;
280 typedef IT_ IndexType;
282 typedef DT_ ValueType;
290 typedef const IT_* ImageIterator;
292 template <typename DT2_ = DT_, typename IT2_ = IT_>
294
296 template <typename DataType2_, typename IndexType2_>
298
299 static constexpr bool is_global = false;
300 static constexpr bool is_local = true;
301
307 explicit SparseMatrixCSR() :
308 Container<DT_, IT_> (0)
309 {
310 this->_scalar_index.push_back(0);
311 this->_scalar_index.push_back(0);
312 this->_scalar_index.push_back(0);
313 }
314
326 explicit SparseMatrixCSR(Index rows_in, Index columns_in) :
327 Container<DT_, IT_> (rows_in * columns_in)
328 {
329 this->_scalar_index.push_back(rows_in);
330 this->_scalar_index.push_back(columns_in);
331 this->_scalar_index.push_back(0);
332 }
333
345 explicit SparseMatrixCSR(Index rows_in, Index columns_in, Index used_elements_in) :
346 Container<DT_, IT_> (rows_in * columns_in)
347 {
348 XASSERT(rows_in != Index(0) && columns_in != Index(0));
349
350 this->_scalar_index.push_back(rows_in);
351 this->_scalar_index.push_back(columns_in);
352 this->_scalar_index.push_back(used_elements_in);
353
354 this->_indices.push_back(MemoryPool::template allocate_memory<IT_>(_used_elements()));
355 this->_indices_size.push_back(_used_elements());
356
357 this->_indices.push_back(MemoryPool::template allocate_memory<IT_>(_rows() + 1));
358 this->_indices_size.push_back(_rows() + 1);
359
360 this->_elements.push_back(MemoryPool::template allocate_memory<DT_>(_used_elements()));
361 this->_elements_size.push_back(_used_elements());
362 }
363
371 explicit SparseMatrixCSR(const SparseLayout<IT_, layout_id> & layout_in) :
372 Container<DT_, IT_> (layout_in._scalar_index.at(0))
373 {
374 this->_indices.assign(layout_in._indices.begin(), layout_in._indices.end());
375 this->_indices_size.assign(layout_in._indices_size.begin(), layout_in._indices_size.end());
376 this->_scalar_index.assign(layout_in._scalar_index.begin(), layout_in._scalar_index.end());
377
378 for (auto i : this->_indices)
380
381 this->_elements.push_back(MemoryPool::template allocate_memory<DT_>(_used_elements()));
382 this->_elements_size.push_back(_used_elements());
383 }
384
392 template <typename MT_>
393 explicit SparseMatrixCSR(const MT_ & other) :
394 Container<DT_, IT_>(other.size())
395 {
396 convert(other);
397 }
398
406 explicit SparseMatrixCSR(const Adjacency::Graph & graph) :
407 SparseMatrixCSR(graph.get_num_nodes_domain(), graph.get_num_nodes_image(), graph.get_num_indices())
408 {
409 const Index num_nnze = graph.get_num_indices();
410 const Index num_rows = graph.get_num_nodes_domain();
411 const Index * dom_ptr(graph.get_domain_ptr());
412 const Index * img_idx(graph.get_image_idx());
413 IT_ * prow_ptr(this->row_ptr());
414 IT_ * pcol_idx(this->col_ind());
415
416 FEAT_PRAGMA_OMP(parallel for)
417 for(Index i = 0; i <= num_rows; ++i)
418 prow_ptr[i] = IT_(dom_ptr[i]);
419
420 FEAT_PRAGMA_OMP(parallel for)
421 for(Index i = 0; i < num_nnze; ++i)
422 pcol_idx[i] = IT_(img_idx[i]);
423 }
424
433 explicit SparseMatrixCSR(FileMode mode, const String& filename) :
434 Container<DT_, IT_>(0)
435 {
436 read_from(mode, filename);
437 }
438
447 explicit SparseMatrixCSR(FileMode mode, std::istream& file) :
448 Container<DT_, IT_>(0)
449 {
450 read_from(mode, file);
451 }
452
465 explicit SparseMatrixCSR(const Index rows_in, const Index columns_in,
466 DenseVector<IT_, IT_> & col_ind_in, DenseVector<DT_, IT_> & val_in, DenseVector<IT_, IT_> & row_ptr_in) :
467 Container<DT_, IT_>(rows_in * columns_in)
468 {
470 XASSERT(col_ind_in.size() > 0);
471 XASSERT(val_in.size() > 0);
472 XASSERT(row_ptr_in.size() > 0);
473
474 this->_scalar_index.push_back(rows_in);
475 this->_scalar_index.push_back(columns_in);
476 this->_scalar_index.push_back(val_in.size());
477
478 this->_elements.push_back(val_in.elements());
479 this->_elements_size.push_back(val_in.size());
480 this->_indices.push_back(col_ind_in.elements());
481 this->_indices_size.push_back(col_ind_in.size());
482 this->_indices.push_back(row_ptr_in.elements());
483 this->_indices_size.push_back(row_ptr_in.size());
484
485 for (Index i(0) ; i < this->_elements.size() ; ++i)
487 for (Index i(0) ; i < this->_indices.size() ; ++i)
489 }
490
498 template <typename DT2_ = DT_, typename IT2_ = IT_>
499 explicit SparseMatrixCSR(std::vector<char> input) :
500 Container<DT_, IT_>(0)
501 {
502 deserialize<DT2_, IT2_>(input);
503 }
504
513 Container<DT_, IT_>(std::forward<SparseMatrixCSR>(other))
514 {
515 }
516
525 {
526 this->move(std::forward<SparseMatrixCSR>(other));
527
528 return *this;
529 }
530
540 {
542 t.clone(*this, clone_mode);
543 return t;
544 }
545
554 template<typename DT2_, typename IT2_>
556 {
557 Container<DT_, IT_>::clone(other, clone_mode);
558 }
559
567 template <typename DT2_, typename IT2_>
569 {
570 this->assign(other);
571 }
572
580 template <typename DT2_, typename IT2_>
582 {
583 this->clear();
584
585 this->_scalar_index.push_back(other.size());
586 this->_scalar_index.push_back(other.rows());
587 this->_scalar_index.push_back(other.columns());
588 this->_scalar_index.push_back(other.used_elements());
589
590 if (other.used_elements() == 0)
591 return;
592
593 this->_elements.push_back(MemoryPool::template allocate_memory<DT_>(_used_elements()));
594 this->_elements_size.push_back(_used_elements());
595 this->_indices.push_back(MemoryPool::template allocate_memory<IT_>(_used_elements()));
596 this->_indices_size.push_back(_used_elements());
597 this->_indices.push_back(MemoryPool::template allocate_memory<IT_>(_rows() + 1));
598 this->_indices_size.push_back(_rows() + 1);
599
600 DT_ * tval(nullptr);
601 IT_ * tcol_ind(nullptr);
602 IT_ * trow_ptr(nullptr);
603 tval = this->_elements.at(0);
604 tcol_ind = this->_indices.at(0);
605 trow_ptr = this->_indices.at(1);
606
607 trow_ptr[0] = 0;
608
609 const DT_ * cval(other.val());
610 const IT_ * coffsets(other.offsets());
611 const Index cnum_of_offsets(other.num_of_offsets());
612 const Index crows(other.rows());
613
614 // Search first offset of the upper triangular matrix
615 Index k(0);
616 while (k < cnum_of_offsets && coffsets[k] + 1 < crows)
617 {
618 ++k;
619 }
620
621 IT_ ue(0);
622 // iteration over all offsets of the lower triangular matrix
623 for (Index i(k + 1); i > 0;)
624 {
625 --i;
626
627 // iteration over all offsets of the upper triangular matrix
628 for (Index j(cnum_of_offsets + 1); j > 0;)
629 {
630 --j;
631
632 // iteration over all rows which contain the offsets between offset i and offset j
633 const Index start(Math::max(other.start_offset(i),
634 other.end_offset(j) + 1));
635 const Index end (Math::min(other.start_offset(i-1),
636 other.end_offset(j-1) + 1));
637 for (Index l(start); l < end; ++l)
638 {
639 for (Index a(i); a < j; ++a)
640 {
641 tval[ue] = cval[a * crows + l];
642 tcol_ind[ue] = IT_(l + coffsets[a] + 1 - crows);
643 ++ue;
644 }
645 trow_ptr[l + 1] = ue;
646 }
647 }
648 }
649 }
650
658 template <typename DT2_, typename IT2_, int BlockHeight_, int BlockWidth_>
660 {
661 this->clear();
662
663 this->_scalar_index.push_back(other.template rows<Perspective::pod>() * other.template columns<Perspective::pod>());
664 this->_scalar_index.push_back(other.template rows<Perspective::pod>());
665 this->_scalar_index.push_back(other.template columns<Perspective::pod>());
666 this->_scalar_index.push_back(other.template used_elements<Perspective::pod>());
667
668 if (other.template used_elements<Perspective::pod>() == 0)
669 return;
670
671 this->_elements.push_back(MemoryPool::template allocate_memory<DT_>(_used_elements()));
672 this->_elements_size.push_back(_used_elements());
673 this->_indices.push_back(MemoryPool::template allocate_memory<IT_>(_used_elements()));
674 this->_indices_size.push_back(_used_elements());
675 this->_indices.push_back(MemoryPool::template allocate_memory<IT_>(_rows() + 1));
676 this->_indices_size.push_back(_rows() + 1);
677
678 DT_ * tval(nullptr);
679 IT_ * tcol_ind(nullptr);
680 IT_ * trow_ptr(nullptr);
681 tval = this->_elements.at(0);
682 tcol_ind = this->_indices.at(0);
683 trow_ptr = this->_indices.at(1);
684
685 Index ait(0);
686 trow_ptr[0] = IT_(0);
687 for (Index orow(0) ; orow < other.rows() ; ++orow)
688 {
689 for (int row(0) ; row < BlockHeight_ ; ++row)
690 {
691 for(Index ocol(other.row_ptr()[orow]) ; ocol < other.row_ptr()[orow + 1] ; ++ocol)
692 {
694 for (int col(0) ; col < BlockWidth_ ; ++col)
695 {
696 tval[ait] = tbm(row,col);
697 tcol_ind[ait] = other.col_ind()[ocol] * (IT_)BlockWidth_ + (IT_)col;
698 ++ait;
699 }
700 }
701 trow_ptr[orow * Index(BlockHeight_) + Index(row) + 1] = (IT_)ait;
702 }
703 }
704 }
705
713 template <typename MT_>
714 void convert(const MT_ & a)
715 {
716 XASSERT(a.template used_elements<Perspective::pod>() > 0);
717
718 typename MT_::template ContainerType<DT_, IT_> ta;
719 ta.convert(a);
720
721 const Index arows(ta.template rows<Perspective::pod>());
722 const Index acolumns(ta.template columns<Perspective::pod>());
723 const Index aused_elements(ta.template used_elements<Perspective::pod>());
724
725 DenseVector<DT_, IT_> tval(aused_elements);
726 DenseVector<IT_, IT_> tcol_ind(aused_elements);
727 DenseVector<IT_, IT_> trow_ptr(arows + 1);
728
729 DT_ * pval(tval.elements());
730 IT_ * pcol_ind(tcol_ind.elements());
731 IT_ * prow_ptr(trow_ptr.elements());
732
733 for (Index i(0); i < arows; ++i)
734 {
735 prow_ptr[i + 1] = IT_(a.get_length_of_line(i));
736 }
737
738 prow_ptr[0] = IT_(0);
739
740 for (Index i(1); i < arows + 1; ++i)
741 {
742 prow_ptr[i] += prow_ptr[i - 1];
743 }
744
745 for (Index i(0); i < arows; ++i)
746 {
747 ta.set_line(i, pval + prow_ptr[i], pcol_ind + prow_ptr[i], 0);
748 }
749
750 this->move(SparseMatrixCSR<DT_, IT_>(arows, acolumns, tcol_ind, tval, trow_ptr));
751 }
752
760 void convert(const Adjacency::Graph & graph)
761 {
762 this->move(SparseMatrixCSR(graph));
763 }
764
775 template <typename MT_>
776 void convert_reverse(MT_ & a) const
777 {
778 const Index arows(this->template rows<Perspective::pod>());
779
780 const DT_ * pval(this->val());
781 const IT_ * prow_ptr(this->row_ptr());
782
783 for (Index i(0); i < arows; ++i)
784 {
785 const DT_ * temp(pval + prow_ptr[i]);
786 a.set_line_reverse(i, temp);
787 }
788 }
789
798 {
799 for (Index i(0) ; i < this->_elements.size() ; ++i)
801 for (Index i(0) ; i < this->_indices.size() ; ++i)
803
804 this->_elements.clear();
805 this->_indices.clear();
806 this->_elements_size.clear();
807 this->_indices_size.clear();
808 this->_scalar_index.clear();
809
810 this->_indices.assign(layout_in._indices.begin(), layout_in._indices.end());
811 this->_indices_size.assign(layout_in._indices_size.begin(), layout_in._indices_size.end());
812 this->_scalar_index.assign(layout_in._scalar_index.begin(), layout_in._scalar_index.end());
813
814 for (auto i : this->_indices)
816
817 this->_elements.push_back(MemoryPool::template allocate_memory<DT_>(_used_elements()));
818 this->_elements_size.push_back(_used_elements());
819
820 return *this;
821 }
822
830 template <typename DT2_ = DT_, typename IT2_ = IT_>
831 void deserialize(std::vector<char> input)
832 {
833 this->template _deserialize<DT2_, IT2_>(FileMode::fm_csr, input);
834 }
835
846 template <typename DT2_ = DT_, typename IT2_ = IT_>
847 std::vector<char> serialize(const LAFEM::SerialConfig& config = SerialConfig()) const
848 {
849 return this->template _serialize<DT2_, IT2_>(FileMode::fm_csr, config);
850 }
851
858 void read_from(FileMode mode, const String& filename)
859 {
860 std::ios_base::openmode bin = std::ifstream::in | std::ifstream::binary;
861 if(mode == FileMode::fm_mtx)
862 bin = std::ifstream::in;
863 std::ifstream file(filename.c_str(), bin);
864 if (! file.is_open())
865 XABORTM("Unable to open Matrix file " + filename);
866 read_from(mode, file);
867 file.close();
868 }
869
876 void read_from(FileMode mode, std::istream& file)
877 {
878 switch(mode)
879 {
880 case FileMode::fm_mtx:
881 {
882 this->clear();
883 this->_scalar_index.push_back(0);
884 this->_scalar_index.push_back(0);
885 this->_scalar_index.push_back(0);
886 this->_scalar_index.push_back(0);
887
888 std::map<IT_, std::map<IT_, DT_> > entries; // map<row, map<column, value> >
889
890 Index ue(0);
891 String line;
892 std::getline(file, line);
893 const bool general((line.find("%%MatrixMarket matrix coordinate real general") != String::npos) ? true : false);
894 const bool symmetric((line.find("%%MatrixMarket matrix coordinate real symmetric") != String::npos) ? true : false);
895
896 if (symmetric == false && general == false)
897 {
898 XABORTM("Input-file is not a compatible mtx-file");
899 }
900
901 while(!file.eof())
902 {
903 std::getline(file,line);
904 if (file.eof())
905 XABORTM("Input-file is empty");
906
907 String::size_type begin(line.find_first_not_of(" "));
908 if (line.at(begin) != '%')
909 break;
910 }
911 {
912 String::size_type begin(line.find_first_not_of(" "));
913 line.erase(0, begin);
914 String::size_type end(line.find_first_of(" "));
915 String srow(line, 0, end);
916 Index row((Index)atol(srow.c_str()));
917 line.erase(0, end);
918
919 begin = line.find_first_not_of(" ");
920 line.erase(0, begin);
921 end = line.find_first_of(" ");
922 String scol(line, 0, end);
923 Index col((Index)atol(scol.c_str()));
924 line.erase(0, end);
925 _rows() = row;
926 _columns() = col;
927 _size() = this->rows() * this->columns();
928 }
929
930 while(!file.eof())
931 {
932 std::getline(file, line);
933 if (file.eof())
934 break;
935
936 String::size_type begin(line.find_first_not_of(" "));
937 line.erase(0, begin);
938 String::size_type end(line.find_first_of(" "));
939 String srow(line, 0, end);
940 IT_ row((IT_)atol(srow.c_str()));
941 --row;
942 line.erase(0, end);
943
944 begin = line.find_first_not_of(" ");
945 line.erase(0, begin);
946 end = line.find_first_of(" ");
947 String scol(line, 0, end);
948 IT_ col((IT_)atol(scol.c_str()));
949 --col;
950 line.erase(0, end);
951
952 begin = line.find_first_not_of(" ");
953 line.erase(0, begin);
954 end = line.find_first_of(" ");
955 String sval(line, 0, end);
956 DT_ tval((DT_)atof(sval.c_str()));
957
958 entries[IT_(row)].insert(std::pair<IT_, DT_>(col, tval));
959 ++ue;
960 if (symmetric == true && row != col)
961 {
962 entries[IT_(col)].insert(std::pair<IT_, DT_>(row, tval));
963 ++ue;
964 }
965 }
966 _size() = this->rows() * this->columns();
967 _used_elements() = ue;
968
969 this->_elements.push_back(MemoryPool::template allocate_memory<DT_>(_used_elements()));
970 this->_elements_size.push_back(_used_elements());
971 this->_indices.push_back(MemoryPool::template allocate_memory<IT_>(_used_elements()));
972 this->_indices_size.push_back(_used_elements());
973 this->_indices.push_back(MemoryPool::template allocate_memory<IT_>(rows() + 1));
974 this->_indices_size.push_back(rows() + 1);
975
976 DT_ * tval = this->val();
977 IT_ * tcol_ind = this->col_ind();
978 IT_ * trow_ptr = this->row_ptr();
979
980 IT_ idx(0);
981 Index row_idx(0);
982 for (auto row : entries)
983 {
984 trow_ptr[row_idx] = idx;
985 for (auto col : row.second )
986 {
987 tcol_ind[idx] = col.first;
988 tval[idx] = col.second;
989 ++idx;
990 }
991 row.second.clear();
992 ++row_idx;
993 }
994 trow_ptr[rows()] = IT_(ue);
995 entries.clear();
996
997 break;
998 }
999 case FileMode::fm_csr:
1001 this->template _deserialize<double, std::uint64_t>(FileMode::fm_csr, file);
1002 break;
1003 default:
1004 XABORTM("Filemode not supported!");
1005 }
1006 }
1007
1018 void write_out(FileMode mode, const String& filename, bool symmetric = false) const
1019 {
1020 std::ios_base::openmode bin = std::ofstream::out | std::ofstream::binary;
1021 if(mode == FileMode::fm_mtx)
1022 bin = std::ofstream::out;
1023 std::ofstream file;
1024 char* buff = nullptr;
1025 if(mode == FileMode::fm_mtx)
1026 {
1027 buff = new char[LAFEM::FileOutStreamBufferSize];
1028 file.rdbuf()->pubsetbuf(buff, LAFEM::FileOutStreamBufferSize);
1029 }
1030 file.open(filename.c_str(), bin);
1031 if(! file.is_open())
1032 XABORTM("Unable to open Matrix file " + filename);
1033 write_out(mode, file, symmetric);
1034 file.close();
1035 delete[] buff;
1036 }
1037
1048 void write_out(FileMode mode, std::ostream& file, bool symmetric = false) const
1049 {
1050 switch(mode)
1051 {
1052 case FileMode::fm_csr:
1054 this->template _serialize<double, std::uint64_t>(FileMode::fm_csr, file);
1055 break;
1056 case FileMode::fm_mtx:
1057 {
1058 if (symmetric)
1059 {
1060 file << "%%MatrixMarket matrix coordinate real symmetric\n";
1061 std::vector<IT_> rowv;
1062 std::vector<IT_> colv;
1063 std::vector<DT_> valv;
1064 for (Index row(0) ; row < rows() ; ++row)
1065 {
1066 const IT_ end(this->row_ptr()[row + 1]);
1067 for (IT_ i(this->row_ptr()[row]) ; i < end ; ++i)
1068 {
1069 const IT_ col(this->col_ind()[i]);
1070 if (row >= col)
1071 {
1072 rowv.push_back(IT_(row + 1));
1073 colv.push_back(col + 1);
1074 valv.push_back(this->val()[i]);
1075 }
1076 }
1077 }
1078 file << this->rows() << " " << this->columns() << " " << valv.size() << "\n";
1079 for (Index i(0) ; i < valv.size() ; ++i)
1080 {
1081 file << rowv.at(i) << " " << colv.at(i) << " " << stringify_fp_sci(valv.at(i)) << "\n";
1082 }
1083 }
1084 else
1085 {
1086 file << "%%MatrixMarket matrix coordinate real general\n";
1087 file << this->rows() << " " << this->columns() << " " << this->used_elements() << "\n";
1088
1089 // const int max_size = 3u*20u*3000u*(this->used_elements()/this->rows() + 1);
1090 // const int stop_point = max_size - 400u;
1091
1092 // char* buffer = new char[max_size];
1093 // int buffer_ptr = 0;
1094
1095 for (Index row(0) ; row < rows() ; ++row)
1096 {
1097 const IT_ end(this->row_ptr()[row + 1]);
1098 for (IT_ i(this->row_ptr()[row]) ; i < end ; ++i)
1099 {
1100 // const String tmp = stringify(row+1) + " " + stringify(this->col_ind()[i] + 1) + " " + stringify_fp_sci(this->val()[i]) + "\n";
1101 // const auto* data = tmp.data();
1102 // std::memcpy(buffer+buffer_ptr, data, tmp.size());
1103 // buffer_ptr += tmp.size();
1104 // if(buffer_ptr > stop_point)
1105 // {
1106 // file.write(buffer, buffer_ptr+1);
1107 // buffer_ptr = 0;
1108 // }
1109 file << row + 1 << " " << this->col_ind()[i] + 1 << " " << stringify_fp_sci(this->val()[i]) << "\n";
1110 }
1111 }
1112 // if(buffer_ptr > 0)
1113 // {
1114 // file.write(buffer, buffer_ptr+1);
1115 // }
1116
1117 }
1118 break;
1119 }
1120 default:
1121 XABORTM("Filemode not supported!");
1122 }
1123 }
1124
1133 DT_ operator()(Index row, Index col) const
1134 {
1135 ASSERT(row < rows());
1136 ASSERT(col < columns());
1137
1138 MemoryPool::synchronize();
1139
1140 const IT_ * const trow_ptr(this->row_ptr());
1141 const IT_ * const tcol_ind(this->col_ind());
1142
1143 for (Index i(trow_ptr[row]) ; i < trow_ptr[row+1] ; ++i)
1144 {
1145 if (Index(tcol_ind[i]) == col)
1146 return this->val()[i];
1147 if (Index(tcol_ind[i]) > col)
1148 return DT_(0.);
1149 }
1150 return DT_(0.);
1151 }
1152
1159 {
1161 }
1162
1168 template <Perspective = Perspective::native>
1169 Index rows() const
1170 {
1171 return this->_scalar_index.at(1);
1172 }
1173
1179 template <Perspective = Perspective::native>
1181 {
1182 return this->_scalar_index.at(2);
1183 }
1184
1190 template <Perspective = Perspective::native>
1192 {
1193 return this->_scalar_index.at(3);
1194 }
1195
1196
1202 IT_ * col_ind()
1203 {
1204 if (this->_indices.size() == 0)
1205 return nullptr;
1206
1207 return this->_indices.at(0);
1208 }
1209
1210 IT_ const * col_ind() const
1211 {
1212 if (this->_indices.size() == 0)
1213 return nullptr;
1214
1215 return this->_indices.at(0);
1216 }
1217
1223 DT_ * val()
1224 {
1225 if (this->_elements.size() == 0)
1226 return nullptr;
1227
1228 return this->_elements.at(0);
1229 }
1230
1231 DT_ const * val() const
1232 {
1233 if (this->_elements.size() == 0)
1234 return nullptr;
1235
1236 return this->_elements.at(0);
1237 }
1238
1244 IT_ * row_ptr()
1245 {
1246 if (this->_indices.size() == 0)
1247 return nullptr;
1248
1249 return this->_indices.at(1);
1250 }
1251
1252 IT_ const * row_ptr() const
1253 {
1254 if (this->_indices.size() == 0)
1255 return nullptr;
1256
1257 return this->_indices.at(1);
1258 }
1259
1260
1267 void bandwidth_row(Index & bandw, Index & bandw_i) const
1268 {
1269 bandw = 0;
1270 bandw_i = 0;
1271 for (Index row(0) ; row < rows() ; ++row)
1272 {
1273 if (this->row_ptr()[row+1] == this->row_ptr()[row])
1274 continue;
1275
1276 Index temp = this->col_ind()[this->row_ptr()[row+1]-1] - this->col_ind()[this->row_ptr()[row]] + 1;
1277 if(temp > bandw)
1278 {
1279 bandw = temp;
1280 bandw_i = row;
1281 }
1282 }
1283 }
1284
1291 void bandwidth_column(Index & bandw, Index & bandw_i) const
1292 {
1294 temp.transpose(*this);
1295 temp.bandwidth_row(bandw, bandw_i);
1296 }
1297
1306 void radius_row(Index & radius, Index & radius_i) const
1307 {
1308 radius = 0;
1309 radius_i = 0;
1310
1311 for (Index row(0) ; row < rows() ; ++row)
1312 {
1313 if (this->row_ptr()[row+1] == this->row_ptr()[row])
1314 continue;
1315
1316 if (this->row_ptr()[row+1] > 0)
1317 {
1318 Index temp1 = this->col_ind()[this->row_ptr()[row+1]-1];
1319 if(Math::max(temp1,row) - Math::min(temp1, row) > radius)
1320 {
1321 radius = Math::max(temp1,row) - Math::min(temp1, row);
1322 radius_i = row;
1323 }
1324 }
1325 Index temp2 = this->col_ind()[this->row_ptr()[row]];
1326 if(Math::max(temp2,row) - Math::min(temp2, row) > radius)
1327 {
1328 radius = Math::max(temp2,row) - Math::min(temp2, row);
1329 radius_i = row;
1330 }
1331 }
1332 }
1333
1342 void radius_column(Index & radius, Index & radius_i) const
1343 {
1344 SparseMatrixCSR<DT_, IT_> temp(this->clone());
1345 temp.transpose(temp);
1346 temp.radius_row(radius, radius_i);
1347 }
1348
1354 static String name()
1355 {
1356 return "SparseMatrixCSR";
1357 }
1358
1365 void copy(const SparseMatrixCSR & x, bool full = false)
1366 {
1367 this->_copy_content(x, full);
1368 }
1369
1372
1381 void axpy(
1382 const SparseMatrixCSR & x,
1383 const DT_ alpha = DT_(1))
1384 {
1385 XASSERTM(x.rows() == this->rows(), "Matrix rows do not match!");
1386 XASSERTM(x.columns() == this->columns(), "Matrix columns do not match!");
1387 XASSERTM(x.used_elements() == this->used_elements(), "Matrix used_elements do not match!");
1388
1389 TimeStamp ts_start;
1390
1392 Arch::Axpy::value(this->val(), alpha, x.val(), this->used_elements());
1393
1394 TimeStamp ts_stop;
1395 Statistics::add_time_axpy(ts_stop.elapsed(ts_start));
1396 }
1397
1404 void scale(const SparseMatrixCSR & x, const DT_ alpha)
1405 {
1406 XASSERTM(x.rows() == this->rows(), "Row count does not match!");
1407 XASSERTM(x.columns() == this->columns(), "Column count does not match!");
1408 XASSERTM(x.used_elements() == this->used_elements(), "Nonzero count does not match!");
1409
1410 TimeStamp ts_start;
1411
1413 Arch::Scale::value(this->val(), x.val(), alpha, this->used_elements());
1414
1415 TimeStamp ts_stop;
1416 Statistics::add_time_axpy(ts_stop.elapsed(ts_start));
1417 }
1418
1424 DT_ norm_frobenius() const
1425 {
1426 TimeStamp ts_start;
1427
1429 DT_ result = Arch::Norm2::value(this->val(), this->used_elements());
1430
1431 TimeStamp ts_stop;
1432 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1433
1434 return result;
1435 }
1436
1443 void row_norm2(VectorTypeL& row_norms) const
1444 {
1445 ASSERTM(row_norms.size() == this->rows(), "Matrix/Vector dimension mismatch");
1446
1447 TimeStamp ts_start;
1449
1450 Arch::RowNorm::csr_norm2(row_norms.elements(), this->val(), col_ind(), row_ptr(), rows());
1451
1452 TimeStamp ts_stop;
1453 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1454 }
1455
1462 void row_norm2sqr(VectorTypeL& row_norms) const
1463 {
1464 ASSERTM(row_norms.size() == this->rows(), "Matrix/Vector dimension mismatch");
1465
1466 TimeStamp ts_start;
1468
1469 Arch::RowNorm::csr_norm2sqr(row_norms.elements(), this->val(), col_ind(), row_ptr(), rows());
1470
1471 TimeStamp ts_stop;
1472 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1473 }
1474
1494 void row_norm2sqr(VectorTypeL& row_norms, const VectorTypeR& scal) const
1495 {
1496 ASSERTM(row_norms.size() == this->rows(), "Matrix/Vector dimension mismatch");
1497 ASSERTM(scal.size() == this->rows(), "Matrix/scalings dimension mismatch");
1498
1499 TimeStamp ts_start;
1501
1502 Arch::RowNorm::csr_scaled_norm2sqr(row_norms.elements(), scal.elements(), this->val(), col_ind(),
1503 row_ptr(), rows());
1504
1505 TimeStamp ts_stop;
1506 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1507 }
1508
1515 {
1516 TimeStamp ts_start;
1517
1518 Index max_abs_index = Arch::MaxAbsIndex::value(this->val(), this->used_elements());
1519 ASSERT(max_abs_index < this->used_elements());
1520 DT_ result;
1521 MemoryPool::copy(&result, this->val() + max_abs_index, 1);
1522 result = Math::abs(result);
1523
1524 TimeStamp ts_stop;
1525 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1526
1527 return result;
1528 }
1529
1536 {
1537 TimeStamp ts_start;
1538
1539 Index min_abs_index = Arch::MinAbsIndex::value(this->val(), this->used_elements());
1540 ASSERT(min_abs_index < this->used_elements());
1541 DT_ result;
1542 MemoryPool::copy(&result, this->val() + min_abs_index, 1);
1543 result = Math::abs(result);
1544
1545 TimeStamp ts_stop;
1546 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1547
1548 return result;
1549 }
1550
1556 DT_ max_element() const
1557 {
1558 TimeStamp ts_start;
1559
1560 Index max_index = Arch::MaxIndex::value(this->val(), this->used_elements());
1561 ASSERT(max_index < this->used_elements());
1562 DT_ result;
1563 MemoryPool::copy(&result, this->val() + max_index, 1);
1564
1565 TimeStamp ts_stop;
1566 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1567
1568 return result;
1569 }
1570
1576 DT_ min_element() const
1577 {
1578 TimeStamp ts_start;
1579
1580 Index min_index = Arch::MinIndex::value(this->val(), this->used_elements());
1581 ASSERT(min_index < this->used_elements());
1582 DT_ result;
1583 MemoryPool::copy(&result, this->val() + min_index, 1);
1584
1585 TimeStamp ts_stop;
1586 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1587
1588 return result;
1589 }
1590
1597 DT_ max_rel_diff(const SparseMatrixCSR & x) const
1598 {
1599 XASSERTM(x.used_elements() == this->used_elements(), "Nonzero count does not match!");
1600 TimeStamp ts_start;
1601
1602 DataType max_rel_diff = Arch::MaxRelDiff::value(this->val(), x.val(), this->used_elements());
1603
1604 TimeStamp ts_stop;
1605 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1606
1607 return max_rel_diff;
1608 }
1609
1618 bool same_layout(const SparseMatrixCSR& x) const
1619 {
1620 if (this->row_ptr ()== x.row_ptr() && this->col_ind() == x.col_ind())
1621 return true;
1622 if(this->size() == 0 && x.size() == 0 && this->get_elements().size() == 0 && this->get_indices().size() == 0 && x.get_elements().size() == 0 && x.get_indices().size() == 0)
1623 return true;
1624 if (this->rows() != x.rows())
1625 return false;
1626 if (this->columns() != x.columns())
1627 return false;
1628 if (this->used_elements() != x.used_elements())
1629 return false;
1630
1631 IT_ * col_ind_a;
1632 IT_ * col_ind_b;
1633 IT_ * row_ptr_a;
1634 IT_ * row_ptr_b;
1635
1636 col_ind_a = const_cast<IT_*>(this->col_ind());
1637 row_ptr_a = const_cast<IT_*>(this->row_ptr());
1638 col_ind_b = const_cast<IT_*>(x.col_ind());
1639 row_ptr_b = const_cast<IT_*>(x.row_ptr());
1640
1641 bool ret(true);
1642 for (Index i(0) ; i < this->used_elements() ; ++i)
1643 {
1644 if (col_ind_a[i] != col_ind_b[i])
1645 {
1646 ret = false;
1647 break;
1648 }
1649 }
1650 if (ret)
1651 {
1652 for (Index i(0) ; i < this->rows() + 1; ++i)
1653 {
1654 if (row_ptr_a[i] != row_ptr_b[i])
1655 {
1656 ret = false;
1657 break;
1658 }
1659 }
1660 }
1661
1662 return ret;
1663 }
1664
1671 {
1672 SparseMatrixCSR x_t;
1673 x_t.transpose(*this);
1674 return x_t;
1675 }
1676
1683 {
1684 if (x.used_elements() == 0)
1685 {
1686 this->move(SparseMatrixCSR(x.rows(), x.columns()));
1687 return;
1688 }
1689
1690 const Index txrows(x.rows());
1691 const Index txcolumns(x.columns());
1692 const Index txused_elements(x.used_elements());
1693
1694 const IT_ * ptxcol_ind(x.col_ind());
1695 const IT_ * ptxrow_ptr(x.row_ptr());
1696 const DT_ * ptxval(x.val());
1697
1698 DenseVector<IT_, IT_> tcol_ind(txused_elements);
1699 DenseVector<DT_, IT_> tval(txused_elements);
1700 DenseVector<IT_, IT_> trow_ptr(txcolumns + 1, IT_(0));
1701
1702 IT_ * ptcol_ind(tcol_ind.elements());
1703 DT_ * ptval(tval.elements());
1704 IT_ * ptrow_ptr(trow_ptr.elements());
1705
1706 ptrow_ptr[0] = 0;
1707
1708 for (Index i(0); i < txused_elements; ++i)
1709 {
1710 ++ptrow_ptr[ptxcol_ind[i] + 1];
1711 }
1712
1713 for (Index i(1); i < txcolumns - 1; ++i)
1714 {
1715 ptrow_ptr[i + 1] += ptrow_ptr[i];
1716 }
1717
1718 for (Index i(0); i < txrows; ++i)
1719 {
1720 for (IT_ k(ptxrow_ptr[i]); k < ptxrow_ptr[i+1]; ++k)
1721 {
1722 const IT_ l(ptxcol_ind[k]);
1723 const IT_ j(ptrow_ptr[l]);
1724 ptval[j] = ptxval[k];
1725 ptcol_ind[j] = IT_(i);
1726 ++ptrow_ptr[l];
1727 }
1728 }
1729
1730 for (Index i(txcolumns); i > 0; --i)
1731 {
1732 ptrow_ptr[i] = ptrow_ptr[i - 1];
1733 }
1734 ptrow_ptr[0] = 0;
1735
1736 this->move(SparseMatrixCSR<DT_, IT_>(txcolumns, txrows, tcol_ind, tval, trow_ptr));
1737 }
1738
1746 {
1747 XASSERTM(x.rows() == this->rows(), "Row count does not match!");
1748 XASSERTM(x.columns() == this->columns(), "Column count does not match!");
1749 XASSERTM(x.used_elements() == this->used_elements(), "Nonzero count does not match!");
1750 XASSERTM(s.size() == this->rows(), "Vector size does not match!");
1751
1752 TimeStamp ts_start;
1753
1755 Arch::ScaleRows::csr(this->val(), x.val(), this->col_ind(), this->row_ptr(),
1756 s.elements(), this->rows(), this->columns(), this->used_elements());
1757
1758 TimeStamp ts_stop;
1759 Statistics::add_time_axpy(ts_stop.elapsed(ts_start));
1760 }
1761
1769 {
1770 XASSERTM(x.rows() == this->rows(), "Row count does not match!");
1771 XASSERTM(x.columns() == this->columns(), "Column count does not match!");
1772 XASSERTM(x.used_elements() == this->used_elements(), "Nonzero count does not match!");
1773 XASSERTM(s.size() == this->columns(), "Vector size does not match!");
1774
1775 TimeStamp ts_start;
1776
1778 Arch::ScaleCols::csr(this->val(), x.val(), this->col_ind(), this->row_ptr(),
1779 s.elements(), this->rows(), this->columns(), this->used_elements());
1780
1781 TimeStamp ts_stop;
1782 Statistics::add_time_axpy(ts_stop.elapsed(ts_start));
1783 }
1784
1794 {
1795 XASSERTM(r.size() == this->rows(), "Vector size of r does not match!");
1796 XASSERTM(x.size() == this->columns(), "Vector size of x does not match!");
1797
1798 TimeStamp ts_start;
1799
1800 if (this->used_elements() == 0)
1801 {
1802 r.format();
1803 return;
1804 }
1805
1806 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1807
1809 Arch::Apply::csr(r.elements(), DT_(1), x.elements(), DT_(0), r.elements(),
1810 this->val(), this->col_ind(), this->row_ptr(), this->rows(), this->columns(), this->used_elements(), false);
1811
1812 TimeStamp ts_stop;
1813 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
1814 }
1815
1825 {
1826 XASSERTM(r.size() == this->columns(), "Vector size of r does not match!");
1827 XASSERTM(x.size() == this->rows(), "Vector size of x does not match!");
1828
1829 TimeStamp ts_start;
1830
1831 if (this->used_elements() == 0)
1832 {
1833 r.format();
1834 return;
1835 }
1836
1837 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1838
1840 Arch::Apply::csr(r.elements(), DT_(1), x.elements(), DT_(0), r.elements(),
1841 this->val(), this->col_ind(), this->row_ptr(), this->rows(), this->columns(), this->used_elements(), true);
1842
1843 TimeStamp ts_stop;
1844 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
1845 }
1846
1857 template<int BlockSize_>
1859 {
1860 XASSERTM(r.size() == this->rows(), "Vector size of r does not match!");
1861 XASSERTM(x.size() == this->columns(), "Vector size of x does not match!");
1862
1863 TimeStamp ts_start;
1864
1865 if (this->used_elements() == 0)
1866 {
1867 r.format();
1868 return;
1869 }
1870
1871 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1872
1873
1874 Statistics::add_flops(this->used_elements() * 2 * BlockSize_);
1875 Arch::Apply::template csrsb<BlockSize_, DT_, IT_>(
1876 r.template elements<Perspective::pod>(), DT_(1.), x.template elements<Perspective::pod>(), DT_(0.), r.template elements<Perspective::pod>(),
1877 this->val(), this->col_ind(), this->row_ptr(), this->rows(), this->columns(), this->used_elements());
1878
1879 TimeStamp ts_stop;
1880 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
1881 }
1882
1894 void apply(
1896 const DenseVector<DT_, IT_> & x,
1897 const DenseVector<DT_, IT_> & y,
1898 const DT_ alpha = DT_(1)) const
1899 {
1900 XASSERTM(r.size() == this->rows(), "Vector size of r does not match!");
1901 XASSERTM(x.size() == this->columns(), "Vector size of x does not match!");
1902 XASSERTM(y.size() == this->rows(), "Vector size of y does not match!");
1903
1904 TimeStamp ts_start;
1905
1906 if (this->used_elements() == 0 || Math::abs(alpha) < Math::eps<DT_>())
1907 {
1908 r.copy(y);
1909 //r.scale(beta);
1910 return;
1911 }
1912
1913 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1914
1915 Statistics::add_flops( 2 * (this->used_elements() + this->rows()) );
1916 Arch::Apply::csr(r.elements(), alpha, x.elements(), DT_(1.), y.elements(),
1917 this->val(), this->col_ind(), this->row_ptr(), this->rows(), this->columns(), this->used_elements(), false);
1918
1919 TimeStamp ts_stop;
1920 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
1921 }
1922
1936 const DenseVector<DT_, IT_> & x,
1937 const DenseVector<DT_, IT_> & y,
1938 const DT_ alpha = DT_(1)) const
1939 {
1940 XASSERTM(r.size() == this->columns(), "Vector size of r does not match!");
1941 XASSERTM(x.size() == this->rows(), "Vector size of x does not match!");
1942 XASSERTM(y.size() == this->columns(), "Vector size of y does not match!");
1943
1944 TimeStamp ts_start;
1945
1946 if (this->used_elements() == 0 || Math::abs(alpha) < Math::eps<DT_>())
1947 {
1948 r.copy(y);
1949 //r.scale(beta);
1950 return;
1951 }
1952
1953 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1954
1955 Statistics::add_flops( 2 * (this->used_elements() + this->rows()) );
1956 Arch::Apply::csr(r.elements(), alpha, x.elements(), DT_(1.), y.elements(),
1957 this->val(), this->col_ind(), this->row_ptr(), this->rows(), this->columns(), this->used_elements(), true);
1958
1959 TimeStamp ts_stop;
1960 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
1961 }
1962
1976 template<int BlockSize_>
1977 void apply(
1981 const DT_ alpha = DT_(1)) const
1982 {
1983 XASSERTM(r.size() == this->rows(), "Vector size of r does not match!");
1984 XASSERTM(x.size() == this->columns(), "Vector size of x does not match!");
1985 XASSERTM(y.size() == this->rows(), "Vector size of y does not match!");
1986
1987 TimeStamp ts_start;
1988
1989 if (this->used_elements() == 0 || Math::abs(alpha) < Math::eps<DT_>())
1990 {
1991 r.copy(y);
1992 //r.scale(beta);
1993 return;
1994 }
1995
1996 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1997
1998 Statistics::add_flops( (this->used_elements() + this->rows()) * 2 * BlockSize_);
1999 Arch::Apply::template csrsb<BlockSize_, DT_, IT_>(
2000 r.template elements<Perspective::pod>(), alpha, x.template elements<Perspective::pod>(), DT_(1), y.template elements<Perspective::pod>(),
2001 this->val(), this->col_ind(), this->row_ptr(), this->rows(), this->columns(), this->used_elements());
2002
2003 TimeStamp ts_stop;
2004 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
2005 }
2006
2043 const DT_ alpha = DT_(1),
2044 const bool allow_incomplete = false)
2045 {
2046 // validate matrix dimensions
2047 XASSERT(this->rows() == d.rows());
2048 XASSERT(d.columns() == a.rows());
2049 XASSERT(a.columns() == b.rows());
2050 XASSERT(b.columns() == this->columns());
2051
2052 // fetch matrix arrays:
2053 DT_* data_x = this->val();
2054 const DT_* data_d = d.val();
2055 const DT_* data_a = a.val();
2056 const DT_* data_b = b.val();
2057 const IT_* row_ptr_x = this->row_ptr();
2058 const IT_* col_idx_x = this->col_ind();
2059 const IT_* row_ptr_d = d.row_ptr();
2060 const IT_* col_idx_d = d.col_ind();
2061 const IT_* row_ptr_a = a.row_ptr();
2062 const IT_* col_idx_a = a.col_ind();
2063 const IT_* row_ptr_b = b.row_ptr();
2064 const IT_* col_idx_b = b.col_ind();
2065
2066 // loop over all rows of D and X, resp.
2067 for(IT_ i(0); i < IT_(this->rows()); ++i)
2068 {
2069 // loop over all non-zeros D_ik in row i of D
2070 for(IT_ ik(row_ptr_d[i]); ik < row_ptr_d[i+1]; ++ik)
2071 {
2072 // get column index k
2073 const IT_ k = col_idx_d[ik];
2074
2075 // loop over all non-zeros A_kl in row k of A
2076 for(IT_ kl(row_ptr_a[k]); kl < row_ptr_a[k+1]; ++kl)
2077 {
2078 // get column index l
2079 const IT_ l = col_idx_a[kl];
2080
2081 // pre-compute factor (alpha * D_ik * A_kl)
2082 const DT_ omega = alpha * data_d[ik] * data_a[kl];
2083
2084 // loop over all non-zeros B_lj in row j of B and
2085 // loop over all non-zeros X_ij in row i of X and
2086 // perform a "sparse axpy" of B_l onto X_i, i.e.:
2087 // X_i. += (alpha * D_ik * A_kl) * B_l.
2088 IT_ ij = row_ptr_x[i];
2089 IT_ lj = row_ptr_b[l];
2090 while(lj < row_ptr_b[l+1])
2091 {
2092 if(ij >= row_ptr_x[i+1])
2093 {
2094 // we have reached the end of row X_i, but there is at least
2095 // one entry in row B_l left, so the pattern of X is incomplete
2096 // We let the caller decide whether this is a valid case or not:
2097 if(allow_incomplete)
2098 break; // continue with next row
2099 else
2100 XABORTM("Incomplete output matrix structure");
2101 }
2102 else if(col_idx_x[ij] == col_idx_b[lj])
2103 {
2104 // okay: B_lj contributes to X_ij
2105 data_x[ij] += omega * data_b[lj];
2106 ++ij;
2107 ++lj;
2108 }
2109 else if(col_idx_x[ij] < col_idx_b[lj])
2110 {
2111 // entry X_ij exists, but B_lj is missing:
2112 // this is a perfectly valid case, so continue with the next non-zero of X_i
2113 ++ij;
2114 }
2115 else //if(col_idx_x[ij] > col_idx_b[lj])
2116 {
2117 // If we come out here, then the sparsity pattern of X is incomplete:
2118 // B_lj is meant to be added onto X_ij, but the entry X_ij is missing
2119 // We let the caller decide whether this is a valid case or not:
2120 if(allow_incomplete)
2121 ++lj;
2122 else
2123 XABORTM("Incomplete output matrix structure");
2124 }
2125 }
2126 }
2127 }
2128 }
2129 }
2130
2170 const DT_ alpha = DT_(1),
2171 const bool allow_incomplete = false)
2172 {
2173 // validate matrix dimensions
2174 XASSERT(this->rows() == d.rows());
2175 XASSERT(d.columns() == a.size());
2176 XASSERT(a.size() == b.rows());
2177 XASSERT(b.columns() == this->columns());
2178
2179 // fetch matrix arrays:
2180 DT_* data_x = this->val();
2181 const DT_* data_d = d.val();
2182 const DT_* data_a = a.elements();
2183 const DT_* data_b = b.val();
2184 const IT_* row_ptr_x = this->row_ptr();
2185 const IT_* col_idx_x = this->col_ind();
2186 const IT_* row_ptr_d = d.row_ptr();
2187 const IT_* col_idx_d = d.col_ind();
2188 const IT_* row_ptr_b = b.row_ptr();
2189 const IT_* col_idx_b = b.col_ind();
2190
2191 // loop over all rows of D and X, resp.
2192 for(IT_ i(0); i < IT_(this->rows()); ++i)
2193 {
2194 // loop over all non-zeros D_ik in row i of D
2195 for(IT_ ik(row_ptr_d[i]); ik < row_ptr_d[i+1]; ++ik)
2196 {
2197 // get column index k
2198 const IT_ k = col_idx_d[ik];
2199
2200 // pre-compute factor (alpha * D_ik * A_kk)
2201 const DT_ omega = alpha * data_d[ik] * data_a[k];
2202
2203 // loop over all non-zeros B_kj in row j of B and
2204 // loop over all non-zeros X_ij in row i of X and
2205 // perform a "sparse axpy" of B_l onto X_i, i.e.:
2206 // X_i. += (alpha * D_ik * A_kk) * B_k.
2207 IT_ ij = row_ptr_x[i];
2208 IT_ kj = row_ptr_b[k];
2209 while(kj < row_ptr_b[k+1])
2210 {
2211 if(ij >= row_ptr_x[i+1])
2212 {
2213 // we have reached the end of row X_i, but there is at least
2214 // one entry in row B_l left, so the pattern of X is incomplete
2215 // We let the caller decide whether this is a valid case or not:
2216 if(allow_incomplete)
2217 break; // continue with next row
2218 else
2219 XABORTM("Incomplete output matrix structure");
2220 }
2221 else if(col_idx_x[ij] == col_idx_b[kj])
2222 {
2223 // okay: B_kj contributes to X_ij
2224 data_x[ij] += omega * data_b[kj];
2225 ++ij;
2226 ++kj;
2227 }
2228 else if(col_idx_x[ij] < col_idx_b[kj])
2229 {
2230 // entry X_ij exists, but B_kj is missing:
2231 // this is a perfectly valid case, so continue with the next non-zero of X_i
2232 ++ij;
2233 }
2234 else //if(col_idx_x[ij] > col_idx_b[kj])
2235 {
2236 // If we come out here, then the sparsity pattern of X is incomplete:
2237 // B_kj is meant to be added onto X_ij, but the entry X_ij is missing
2238 // We let the caller decide whether this is a valid case or not:
2239 if(allow_incomplete)
2240 ++kj;
2241 else
2242 XABORTM("Incomplete output matrix structure");
2243 }
2244 }
2245 }
2246 }
2247 }
2248
2284 template<int bs_>
2289 const DT_ alpha = DT_(1),
2290 const bool allow_incomplete = false)
2291 {
2292 // validate matrix dimensions
2293 XASSERT(this->rows() == d.rows());
2294 XASSERT(d.columns() == a.size());
2295 XASSERT(a.size() == b.rows());
2296 XASSERT(b.columns() == this->columns());
2297
2298 // fetch matrix arrays:
2299 DT_* data_x = this->val();
2300 const auto* data_d = d.val();
2301 const auto* data_a = a.elements();
2302 const auto* data_b = b.val();
2303 const IT_* row_ptr_x = this->row_ptr();
2304 const IT_* col_idx_x = this->col_ind();
2305 const IT_* row_ptr_d = d.row_ptr();
2306 const IT_* col_idx_d = d.col_ind();
2307 const IT_* row_ptr_b = b.row_ptr();
2308 const IT_* col_idx_b = b.col_ind();
2309
2310 // loop over all rows of D and X, resp.
2311 for(IT_ i(0); i < IT_(this->rows()); ++i)
2312 {
2313 // loop over all non-zeros D_ik in row i of D
2314 for(IT_ ik(row_ptr_d[i]); ik < row_ptr_d[i+1]; ++ik)
2315 {
2316 // get column index k
2317 const IT_ k = col_idx_d[ik];
2318
2319 // pre-compute factor (alpha * D_ik * A_kk)
2321 for(int l = 0; l < bs_; ++l)
2322 omega[l] = alpha * data_d[ik](0,l) * data_a[k][l];
2323
2324 // loop over all non-zeros B_kj in row j of B and
2325 // loop over all non-zeros X_ij in row i of X and
2326 // perform a "sparse axpy" of B_l onto X_i, i.e.:
2327 // X_i. += (alpha * D_ik * A_kk) * B_k.
2328 IT_ ij = row_ptr_x[i];
2329 IT_ kj = row_ptr_b[k];
2330 while(kj < row_ptr_b[k+1])
2331 {
2332 if(ij >= row_ptr_x[i+1])
2333 {
2334 // we have reached the end of row X_i, but there is at least
2335 // one entry in row B_l left, so the pattern of X is incomplete
2336 // We let the caller decide whether this is a valid case or not:
2337 if(allow_incomplete)
2338 break; // continue with next row
2339 else
2340 XABORTM("Incomplete output matrix structure");
2341 }
2342 else if(col_idx_x[ij] == col_idx_b[kj])
2343 {
2344 // okay: B_kj contributes to X_ij
2345 for(int l = 0; l < bs_; ++l)
2346 data_x[ij] += omega[l] * data_b[kj](l,0);
2347 ++ij;
2348 ++kj;
2349 }
2350 else if(col_idx_x[ij] < col_idx_b[kj])
2351 {
2352 // entry X_ij exists, but B_kj is missing:
2353 // this is a perfectly valid case, so continue with the next non-zero of X_i
2354 ++ij;
2355 }
2356 else //if(col_idx_x[ij] > col_idx_b[kj])
2357 {
2358 // If we come out here, then the sparsity pattern of X is incomplete:
2359 // B_kj is meant to be added onto X_ij, but the entry X_ij is missing
2360 // We let the caller decide whether this is a valid case or not:
2361 if(allow_incomplete)
2362 ++kj;
2363 else
2364 XABORTM("Incomplete output matrix structure");
2365 }
2366 }
2367 }
2368 }
2369 }
2370
2405 const DT_ alpha = DT_(1),
2406 const bool allow_incomplete = false)
2407 {
2408 // validate matrix dimensions
2409 XASSERT(this->rows() == d.rows());
2410 XASSERT(d.columns() == b.rows());
2411 XASSERT(b.columns() == this->columns());
2412
2413 // fetch matrix arrays:
2414 DT_* data_x = this->val();
2415 const DT_* data_d = d.val();
2416 const DT_* data_b = b.val();
2417 const IT_* row_ptr_x = this->row_ptr();
2418 const IT_* col_idx_x = this->col_ind();
2419 const IT_* row_ptr_d = d.row_ptr();
2420 const IT_* col_idx_d = d.col_ind();
2421 const IT_* row_ptr_b = b.row_ptr();
2422 const IT_* col_idx_b = b.col_ind();
2423
2424 // loop over all rows of D and X, resp.
2425 for(IT_ i(0); i < IT_(this->rows()); ++i)
2426 {
2427 // loop over all non-zeros D_ik in row i of D
2428 for(IT_ ik(row_ptr_d[i]); ik < row_ptr_d[i+1]; ++ik)
2429 {
2430 // get column index k
2431 const IT_ k = col_idx_d[ik];
2432
2433 // pre-compute factor (alpha * D_ik)
2434 const DT_ omega = alpha * data_d[ik];
2435
2436 // loop over all non-zeros B_kj in row j of B and
2437 // loop over all non-zeros X_ij in row i of X and
2438 // perform a "sparse axpy" of B_l onto X_i, i.e.:
2439 // X_i. += (alpha * D_ik) * B_k.
2440 IT_ ij = row_ptr_x[i];
2441 IT_ kj = row_ptr_b[k];
2442 while(kj < row_ptr_b[k+1])
2443 {
2444 if(ij >= row_ptr_x[i+1])
2445 {
2446 // we have reached the end of row X_i, but there is at least
2447 // one entry in row B_l left, so the pattern of X is incomplete
2448 // We let the caller decide whether this is a valid case or not:
2449 if(allow_incomplete)
2450 break; // continue with next row
2451 else
2452 XABORTM("Incomplete output matrix structure");
2453 }
2454 else if(col_idx_x[ij] == col_idx_b[kj])
2455 {
2456 // okay: B_kj contributes to X_ij
2457 data_x[ij] += omega * data_b[kj];
2458 ++ij;
2459 ++kj;
2460 }
2461 else if(col_idx_x[ij] < col_idx_b[kj])
2462 {
2463 // entry X_ij exists, but B_kj is missing:
2464 // this is a perfectly valid case, so continue with the next non-zero of X_i
2465 ++ij;
2466 }
2467 else //if(col_idx_x[ij] > col_idx_b[kj])
2468 {
2469 // If we come out here, then the sparsity pattern of X is incomplete:
2470 // B_kj is meant to be added onto X_ij, but the entry X_ij is missing
2471 // We let the caller decide whether this is a valid case or not:
2472 if(allow_incomplete)
2473 ++kj;
2474 else
2475 XABORTM("Incomplete output matrix structure");
2476 }
2477 }
2478 }
2479 }
2480 }
2482
2484 void lump_rows(VectorTypeL& lump) const
2485 {
2486 XASSERTM(lump.size() == rows(), "lump vector size does not match matrix row count!");
2487
2488 Arch::Lumping::csr(lump.elements(), val(), col_ind(), row_ptr(), rows());
2489 }
2490
2501 {
2503 lump_rows(lump);
2504 return lump;
2505 }
2506
2516 void extract_diag(VectorTypeL & diag, DenseVector<IT_, IT_> & diag_indices) const
2517 {
2518 XASSERTM(diag.size() == rows(), "diag size does not match matrix row count!");
2519 XASSERTM(diag_indices.size() == rows(), "diag_indices size does not match matrix row count!");
2520 XASSERTM(rows() == columns(), "matrix is not square!");
2521
2522 const IT_ * const dindices(diag_indices.elements());
2523 const DT_ * const tval(this->val());
2524 DT_ * tdiag(diag.elements());
2525 for (Index row(0); row < rows(); row++)
2526 {
2527 const Index index(dindices[row]);
2528 tdiag[row] = (index != used_elements()) ? tval[index] : DT_(0);
2529 }
2530 }
2531
2538 void extract_diag(VectorTypeL & diag) const
2539 {
2540 auto dia_indices = extract_diag_indices();
2541 extract_diag(diag, dia_indices);
2542 }
2543
2550 {
2552 extract_diag(diag);
2553 return diag;
2554 }
2555
2563 {
2564 XASSERTM(diag_indices.size() == rows(), "diag size does not match matrix row count!");
2565 XASSERTM(rows() == columns(), "matrix is not square!");
2566
2567 Arch::Diagonal::csr(diag_indices.elements(), col_ind(), row_ptr(), rows());
2568 }
2569
2576 {
2577 DenseVector<IT_, IT_> diag_indices(rows());
2578 extract_diag_indices(diag_indices);
2579 return diag_indices;
2580 }
2581
2584 {
2585 if (perm_row.size() == 0 && perm_col.size() == 0)
2586 return;
2587
2588 XASSERTM(perm_row.size() == this->rows(), "Container rows does not match permutation size");
2589 XASSERTM(perm_col.size() == this->columns(), "Container columns does not match permutation size");
2590
2591 // http://de.mathworks.com/help/matlab/math/sparse-matrix-operations.html#f6-13070
2592 IT_ * temp_row_ptr = new IT_[rows() + 1];
2593 IT_ * temp_col_ind = new IT_[used_elements()];
2594 DT_ * temp_val = new DT_[used_elements()];
2595
2596 Index * perm_pos;
2597 perm_pos = perm_row.get_perm_pos();
2598
2599 //permute rows from source to temp_*
2600 Index new_start(0);
2601 temp_row_ptr[0] = 0;
2602 for (Index row(0) ; row < this->rows() ; ++row)
2603 {
2604 Index row_size(this->row_ptr()[perm_pos[row] + 1] - this->row_ptr()[perm_pos[row]]);
2605
2606 //iterate over all elements in single one new and old row
2607 for (Index i(new_start), j(this->row_ptr()[perm_pos[row]]) ; i < new_start + row_size ; ++i, ++j)
2608 {
2609 temp_col_ind[i] = this->col_ind()[j];
2610 temp_val[i] = this->val()[j];
2611 }
2612
2613 new_start += row_size;
2614 temp_row_ptr[row+1] = (IT_)new_start;
2615 }
2616
2617 //use inverse col permutation as lookup table: i -> new location of i
2618 Adjacency::Permutation perm_col_inv = perm_col.inverse();
2619 perm_pos = perm_col_inv.get_perm_pos();
2620
2621 //permute columns from temp_* to result
2622 ::memcpy(this->row_ptr(), temp_row_ptr, (rows() + 1) * sizeof(IT_));
2623 ::memcpy(this->val(), temp_val, used_elements() * sizeof(DT_));
2624 for (Index i(0) ; i < used_elements() ; ++i)
2625 {
2626 this->col_ind()[i] = (IT_)perm_pos[temp_col_ind[i]];
2627 }
2628
2629 delete[] temp_row_ptr;
2630 delete[] temp_col_ind;
2631 delete[] temp_val;
2632
2633 //sort columns in every row by column index
2634 IT_ swap_key;
2635 DT_ swap_val;
2636 for (Index row(0) ; row < rows() ; ++row)
2637 {
2638 Index offset(this->row_ptr()[row]);
2639 Index row_size(this->row_ptr()[row+1] - this->row_ptr()[row]);
2640 for (Index i(1), j ; i < row_size ; ++i)
2641 {
2642 swap_key = this->col_ind()[i + offset];
2643 swap_val = this->val()[i + offset];
2644 j = i;
2645 while (j > 0 && this->col_ind()[j - 1 + offset] > swap_key)
2646 {
2647 this->col_ind()[j + offset] = this->col_ind()[j - 1 + offset];
2648 this->val()[j + offset] = this->val()[j - 1 + offset];
2649 --j;
2650 }
2651 this->col_ind()[j + offset] = swap_key;
2652 this->val()[j + offset] = swap_val;
2653 }
2654 }
2655 }
2656
2658
2665 void shrink(DT_ eps)
2666 {
2667 std::vector<IT_> row_entries(this->rows(), IT_(0));
2668 for (Index row(0) ; row < this->rows() ; ++row)
2669 {
2670 for (Index el(this->row_ptr()[row]) ; el < this->row_ptr()[row + 1] ; ++el)
2671 {
2672 if (Math::abs(this->val()[el]) >= eps)
2673 {
2674 row_entries.at(row) += IT_(1);
2675 }
2676 }
2677 }
2678
2679 Index ue(0);
2680 for (auto& n : row_entries)
2681 {
2682 ue += n;
2683 }
2684 if (ue == Index(0))
2685 {
2686 this->move(SparseMatrixCSR<DT_, IT_>(this->rows(), this->columns()));
2687 return;
2688 }
2689
2690 DenseVector<IT_, IT_> new_row_ptr(this->rows() + 1);
2691 new_row_ptr(0, IT_(0));
2692 for (Index row(0) ; row < this->rows() ; ++row)
2693 {
2694 new_row_ptr(row + 1, new_row_ptr(row) + row_entries.at(row));
2695 }
2696
2697 row_entries.clear();
2698
2699 DenseVector<IT_, IT_> new_col_ind(ue);
2700 DenseVector<DT_, IT_> new_val(ue);
2701 Index counter(0);
2702 for (Index row(0) ; row < this->rows() ; ++row)
2703 {
2704 for (Index el(this->row_ptr()[row]) ; el < this->row_ptr()[row + 1] ; ++el)
2705 {
2706 if (Math::abs(this->val()[el]) >= eps)
2707 {
2708 new_col_ind(counter, this->col_ind()[el]);
2709 new_val(counter, this->val()[el]);
2710 ++counter;
2711 }
2712 }
2713 }
2714 this->move(SparseMatrixCSR<DT_, IT_>(this->rows(), this->columns(), new_col_ind, new_val, new_row_ptr));
2715 }
2716
2719 {
2720 return VectorTypeL(this->rows());
2721 }
2722
2724 VectorTypeR create_vector_r() const
2725 {
2726 return VectorTypeR(this->columns());
2727 }
2728
2731 {
2732 const IT_ * prow_ptr(this->row_ptr());
2733 return Index(prow_ptr[row + 1] - prow_ptr[row]);
2734 }
2735
2737
2739 void set_line(const Index row, DT_ * const pval_set, IT_ * const pcol_set,
2740 const Index col_start, const Index stride = 1) const
2741 {
2742 const IT_ * prow_ptr(this->row_ptr());
2743 const IT_ * pcol_ind(this->col_ind());
2744 const DT_ * pval(this->val());
2745
2746 const Index start((Index(prow_ptr[row])));
2747 const Index end((Index(prow_ptr[row + 1] - prow_ptr[row])));
2748 for (Index i(0); i < end; ++i)
2749 {
2750 pval_set[i * stride] = pval[start + i];
2751 pcol_set[i * stride] = pcol_ind[start + i] + IT_(col_start);
2752 }
2753 }
2754
2755 void set_line_reverse(const Index row, const DT_ * const pval_set, const Index stride = 1)
2756 {
2757 const IT_ * prow_ptr(this->row_ptr());
2758 DT_ * pval(this->val());
2759
2760 const Index start((Index(prow_ptr[row])));
2761 const Index end((Index(prow_ptr[row + 1] - prow_ptr[row])));
2762 for (Index i(0); i < end; ++i)
2763 {
2764 pval[start + i] = pval_set[i * stride];
2765 }
2766 }
2767
2769
2770 /* ******************************************************************* */
2771 /* A D J A C T O R I N T E R F A C E I M P L E M E N T A T I O N */
2772 /* ******************************************************************* */
2773 public:
2776 {
2777 return rows();
2778 }
2779
2782 {
2783 return columns();
2784 }
2785
2787 inline ImageIterator image_begin(Index domain_node) const
2788 {
2789 XASSERTM(domain_node < rows(), "Domain node index out of range");
2790 return &this->_indices.at(0)[this->_indices.at(1)[domain_node]];
2791 }
2792
2794 inline ImageIterator image_end(Index domain_node) const
2795 {
2796 XASSERTM(domain_node < rows(), "Domain node index out of range");
2797 return &this->_indices.at(0)[this->_indices.at(1)[domain_node + 1]];
2798 }
2799
2806 friend std::ostream & operator<< (std::ostream & lhs, const SparseMatrixCSR & b)
2807 {
2808
2809 lhs << "[\n";
2810 for (Index i(0) ; i < b.rows() ; ++i)
2811 {
2812 lhs << "[";
2813 for (Index j(0) ; j < b.columns() ; ++j)
2814 {
2815 lhs << " " << stringify(b(i, j));
2816 }
2817 lhs << "]\n";
2818 }
2819 lhs << "]\n";
2820
2821 return lhs;
2822 }
2823 }; //SparseMatrixCSR
2824
2825#ifdef FEAT_EICKT
2826 extern template class SparseMatrixCSR<float, std::uint32_t>;
2827 extern template class SparseMatrixCSR<double, std::uint32_t>;
2828 extern template class SparseMatrixCSR<float, std::uint64_t>;
2829 extern template class SparseMatrixCSR<double, std::uint64_t>;
2830#endif
2831
2832 } // namespace LAFEM
2833} // namespace FEAT
#define ASSERT(expr)
Debug-Assertion macro definition.
Definition: assertion.hpp:229
#define XABORTM(msg)
Abortion macro definition with custom message.
Definition: assertion.hpp:192
#define ASSERTM(expr, msg)
Debug-Assertion macro definition with custom message.
Definition: assertion.hpp:230
#define XASSERT(expr)
Assertion macro definition.
Definition: assertion.hpp:262
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Definition: assertion.hpp:263
FEAT Kernel base header.
Adjacency Graph implementation.
Definition: graph.hpp:34
Index * get_domain_ptr()
Returns the domain pointer array.
Definition: graph.hpp:359
Index * get_image_idx()
Returns the image node index array.
Definition: graph.hpp:374
Index get_num_indices() const
Returns the total number indices.
Definition: graph.hpp:390
Index size() const
returns the size of the permutation
Index * get_perm_pos()
returns the permute-position array
Permutation inverse() const
Computes the inverse permutation.
Container base class.
Definition: container.hpp:220
const std::vector< IT_ * > & get_indices() const
Returns a list of all Index arrays.
Definition: container.hpp:1078
std::vector< DT_ * > _elements
List of pointers to all datatype dependent arrays.
Definition: container.hpp:226
Index used_elements() const
Returns the number of effective stored elements.
Definition: container.hpp:1155
const std::vector< DT_ * > & get_elements() const
Returns a list of all data arrays.
Definition: container.hpp:1068
std::vector< Index > _elements_size
List of corresponding datatype array sizes.
Definition: container.hpp:230
Index size() const
Returns the containers size.
Definition: container.hpp:1136
void assign(const Container< DT2_, IT2_ > &other)
Assignment operation.
Definition: container.hpp:280
std::vector< IT_ * > _indices
List of pointers to all IT_ dependent arrays.
Definition: container.hpp:228
void clone(const Container &other, CloneMode clone_mode=CloneMode::Weak)
Clone operation.
Definition: container.hpp:902
void move(Container &&other)
Assignment move operation.
Definition: container.hpp:989
virtual void clear()
Free all allocated arrays.
Definition: container.hpp:875
std::vector< Index > _indices_size
List of corresponding IT_ array sizes.
Definition: container.hpp:232
void format(DT_ value=DT_(0))
Reset all elements of the container to a given value or zero if missing.
Definition: container.hpp:851
std::vector< Index > _scalar_index
List of scalars with datatype index.
Definition: container.hpp:234
Blocked Dense data vector class template.
void copy(const DenseVectorBlocked &x, bool full=false)
Performs .
auto elements() const -> const typename Intern::DenseVectorBlockedPerspectiveHelper< DT_, BlockSize_, perspective_ >::Type *
Retrieve a pointer to the data array.
Index size() const
The number of elements.
Dense data vector class template.
DT_ * elements()
Get a pointer to the data array.
void copy(const VT_ &a)
Performs .
Config class for serialize parameter.
Definition: container.hpp:47
Layout scheme for sparse matrix containers.
CSR based blocked sparse matrix.
Index rows() const
Retrieve matrix row count.
IT_ * col_ind()
Retrieve column indices array.
IT_ * row_ptr()
Retrieve row start index array.
Index columns() const
Retrieve matrix column count.
auto val() const -> const typename Intern::BCSRPerspectiveHelper< DT_, BlockHeight_, BlockWidth_, perspective_ >::Type *
Retrieve non zero element array.
Index rows() const
Retrieve matrix row count.
Index used_elements() const
Retrieve non zero element count.
Index columns() const
Retrieve matrix column count.
Gather-Axpy operation for SparseMatrixCSR.
Scatter-Axpy operation for SparseMatrixCSR.
CSR based sparse matrix.
void row_norm2(VectorTypeL &row_norms) const
Computes the 2-norm for every row.
void extract_diag(VectorTypeL &diag) const
extract main diagonal vector from matrix
void apply_transposed(DenseVector< DT_, IT_ > &r, const DenseVector< DT_, IT_ > &x) const
Calculate .
VectorTypeR create_vector_r() const
Returns a new compatible R-Vector.
void add_double_mat_product(const LAFEM::SparseMatrixCSR< DT_, IT_ > &d, const LAFEM::DenseVector< DT_, IT_ > &a, const LAFEM::SparseMatrixCSR< DT_, IT_ > &b, const DT_ alpha=DT_(1), const bool allow_incomplete=false)
Adds a double-matrix product onto this matrix.
DT_ norm_frobenius() const
Calculates the Frobenius norm of this matrix.
SparseMatrixCSR & operator=(SparseMatrixCSR &&other)
Move operator.
void bandwidth_row(Index &bandw, Index &bandw_i) const
Retrieve maximum bandwidth among all rows.
void radius_row(Index &radius, Index &radius_i) const
Retrieve maximum radius among all rows.
void scale(const SparseMatrixCSR &x, const DT_ alpha)
Calculate .
void extract_diag_indices(DenseVector< IT_, IT_ > &diag_indices) const
extract main diagonal vector from matrix
DenseVector< DT_, IT_ > VectorTypeL
Compatible L-vector type.
void apply(DenseVector< DT_, IT_ > &r, const DenseVector< DT_, IT_ > &x, const DenseVector< DT_, IT_ > &y, const DT_ alpha=DT_(1)) const
Calculate .
void clone(const SparseMatrixCSR< DT2_, IT2_ > &other, CloneMode clone_mode=CloneMode::Weak)
Clone operation.
void copy(const SparseMatrixCSR &x, bool full=false)
Performs .
void read_from(FileMode mode, const String &filename)
Read in matrix from file.
SparseMatrixCSR transpose() const
Calculate .
DT_ ValueType
Value type, meaning the type of each 'block'.
SparseMatrixCSR(const Adjacency::Graph &graph)
Constructor.
void add_mat_mat_product(const LAFEM::SparseMatrixCSR< DT_, IT_ > &d, const LAFEM::SparseMatrixCSR< DT_, IT_ > &b, const DT_ alpha=DT_(1), const bool allow_incomplete=false)
Adds a matrix-matrix product onto this matrix.
DT_ max_element() const
Retrieve the maximum value of this matrix.
DT_ max_abs_element() const
Retrieve the absolute maximum value of this matrix.
void add_double_mat_product(const LAFEM::SparseMatrixCSR< DT_, IT_ > &d, const LAFEM::SparseMatrixCSR< DT_, IT_ > &a, const LAFEM::SparseMatrixCSR< DT_, IT_ > &b, const DT_ alpha=DT_(1), const bool allow_incomplete=false)
Adds a double-matrix product onto this matrix.
DT_ operator()(Index row, Index col) const
Retrieve specific matrix element.
void convert(const SparseMatrixCSR< DT2_, IT2_ > &other)
Conversion method.
void axpy(const SparseMatrixCSR &x, const DT_ alpha=DT_(1))
Calculate .
SparseMatrixCSR(SparseMatrixCSR &&other)
Move Constructor.
VectorTypeL extract_diag() const
extract main diagonal vector from matrix
void row_norm2sqr(VectorTypeL &row_norms) const
Computes the square of the 2-norm for every row.
void lump_rows(VectorTypeL &lump) const
SparseMatrixCSR(std::vector< char > input)
Constructor.
void apply(DenseVectorBlocked< DT_, IT_, BlockSize_ > &r, const DenseVectorBlocked< DT_, IT_, BlockSize_ > &x) const
Calculate .
DT_ max_rel_diff(const SparseMatrixCSR &x) const
Retrieve the maximum relative difference of this matrix and another one y.max_rel_diff(x) returns .
std::vector< char > serialize(const LAFEM::SerialConfig &config=SerialConfig()) const
Serialization of complete container entity.
void convert(const MT_ &a)
Conversion method.
SparseMatrixCSR(FileMode mode, std::istream &file)
Constructor.
ImageIterator image_begin(Index domain_node) const
void bandwidth_column(Index &bandw, Index &bandw_i) const
Retrieve maximum bandwidth among all columns.
void write_out(FileMode mode, const String &filename, bool symmetric=false) const
Write out matrix to file.
void permute(Adjacency::Permutation &perm_row, Adjacency::Permutation &perm_col)
Permutate matrix rows and columns according to the given Permutations.
DT_ min_element() const
Retrieve the minimum value of this matrix.
SparseMatrixCSR(Index rows_in, Index columns_in)
Constructor.
VectorTypeL create_vector_l() const
Returns a new compatible L-Vector.
IT_ * col_ind()
Retrieve column indices array.
DT_ * val()
Retrieve non zero element array.
DenseVector< DT_, IT_ > VectorTypeR
Compatible R-vector type.
Index rows() const
Retrieve matrix row count.
void deserialize(std::vector< char > input)
Deserialization of complete container entity.
SparseMatrixCSR clone(CloneMode clone_mode=CloneMode::Weak) const
Clone operation.
SparseLayout< IT_, layout_id > layout() const
Retrieve convenient sparse matrix layout object.
Index get_length_of_line(const Index row) const
Returns the number of NNZ-elements of the selected row.
DT_ min_abs_element() const
Retrieve the absolute minimum value of this matrix.
SparseMatrixCSR(const Index rows_in, const Index columns_in, DenseVector< IT_, IT_ > &col_ind_in, DenseVector< DT_, IT_ > &val_in, DenseVector< IT_, IT_ > &row_ptr_in)
Constructor.
Index columns() const
Retrieve matrix column count.
void extract_diag(VectorTypeL &diag, DenseVector< IT_, IT_ > &diag_indices) const
extract main diagonal vector from matrix
void add_double_mat_product(const LAFEM::SparseMatrixBCSR< DT_, IT_, 1, bs_ > &d, const LAFEM::DenseVectorBlocked< DT_, IT_, bs_ > &a, const LAFEM::SparseMatrixBCSR< DT_, IT_, bs_, 1 > &b, const DT_ alpha=DT_(1), const bool allow_incomplete=false)
Adds a double-matrix product onto this matrix.
void row_norm2sqr(VectorTypeL &row_norms, const VectorTypeR &scal) const
Computes the square of the 2-norm for every row, where every row is scaled by a vector.
void scale_rows(const SparseMatrixCSR &x, const DenseVector< DT_, IT_ > &s)
Calculate .
VectorTypeL lump_rows() const
Returns the lumped rows vector.
static constexpr SparseLayoutId layout_id
Our used layout type.
void apply_transposed(DenseVector< DT_, IT_ > &r, const DenseVector< DT_, IT_ > &x, const DenseVector< DT_, IT_ > &y, const DT_ alpha=DT_(1)) const
Calculate .
void read_from(FileMode mode, std::istream &file)
Read in matrix from stream.
void apply(DenseVector< DT_, IT_ > &r, const DenseVector< DT_, IT_ > &x) const
Calculate .
void radius_column(Index &radius, Index &radius_i) const
Retrieve maximum radius among all column.
SparseMatrixCSR(Index rows_in, Index columns_in, Index used_elements_in)
Constructor.
bool same_layout(const SparseMatrixCSR &x) const
Checks if the structural layout of this matrix matches that of another matrix. This excludes comparis...
Index used_elements() const
Retrieve non zero element count.
SparseMatrixCSR(FileMode mode, const String &filename)
Constructor.
void shrink(DT_ eps)
Shrink matrix and drop small values.
void scale_cols(const SparseMatrixCSR &x, const DenseVector< DT_, IT_ > &s)
Calculate .
IT_ * row_ptr()
Retrieve row start index array.
void write_out(FileMode mode, std::ostream &file, bool symmetric=false) const
Write out matrix to file.
DenseVector< IT_, IT_ > extract_diag_indices() const
extract main diagonal vector from matrix
void apply(DenseVectorBlocked< DT_, IT_, BlockSize_ > &r, const DenseVectorBlocked< DT_, IT_, BlockSize_ > &x, const DenseVectorBlocked< DT_, IT_, BlockSize_ > &y, const DT_ alpha=DT_(1)) const
Calculate .
void convert_reverse(MT_ &a) const
Reverse Conversion method.
friend std::ostream & operator<<(std::ostream &lhs, const SparseMatrixCSR &b)
SparseMatrixCSR streaming operator.
SparseMatrixCSR(const SparseLayout< IT_, layout_id > &layout_in)
Constructor.
ImageIterator image_end(Index domain_node) const
void transpose(const SparseMatrixCSR &x)
Calculate .
void convert(const SparseMatrixBCSR< DT2_, IT2_, BlockHeight_, BlockWidth_ > &other)
Conversion method.
void convert(const Adjacency::Graph &graph)
Conversion method.
void convert(const SparseMatrixBanded< DT2_, IT2_ > &other)
Conversion method.
SparseMatrixCSR(const MT_ &other)
Constructor.
static String name()
Returns a descriptive string.
const IT_ * ImageIterator
ImageIterator typedef for Adjactor interface implementation.
static void copy(DT_ *dest, const DT_ *src, const Index count)
Copy memory area from src to dest.
static void release_memory(void *address)
release memory or decrease reference counter
static void increase_memory(void *address)
increase memory counter
static void add_flops(Index flops)
Add an amount of flops to the global flop counter.
Definition: statistics.hpp:206
String class implementation.
Definition: string.hpp:46
Time stamp class.
Definition: time_stamp.hpp:54
double elapsed(const TimeStamp &before) const
Calculates the time elapsed between two time stamps.
Definition: time_stamp.hpp:100
Tiny Matrix class template.
Tiny Vector class template.
constexpr std::size_t FileOutStreamBufferSize
OutStreamBufferSize.
Definition: base.hpp:124
SparseLayoutId
Definition: base.hpp:63
@ symmetric
matrix with symmetric structure and values
T_ abs(T_ x)
Returns the absolute value.
Definition: math.hpp:275
T_ min(T_ a, T_ b)
Returns the minimum of two values.
Definition: math.hpp:123
T_ max(T_ a, T_ b)
Returns the maximum of two values.
Definition: math.hpp:137
FEAT namespace.
Definition: adjactor.hpp:12
String stringify(const T_ &item)
Converts an item into a String.
Definition: string.hpp:944
String stringify_fp_sci(DataType_ value, int precision=0, int width=0, bool sign=false)
Prints a floating point value to a string in scientific notation.
Definition: string.hpp:1088
std::uint64_t Index
Index data type.