FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
sparse_matrix_bcsr.hpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#pragma once
7
8// includes, FEAT
11#include <kernel/lafem/forward.hpp>
12#include <kernel/lafem/container.hpp>
13#include <kernel/lafem/dense_vector.hpp>
14#include <kernel/lafem/sparse_layout.hpp>
15#include <kernel/lafem/arch/scale.hpp>
16#include <kernel/lafem/arch/axpy.hpp>
17#include <kernel/lafem/arch/apply.hpp>
18#include <kernel/lafem/arch/lumping.hpp>
19#include <kernel/lafem/arch/norm.hpp>
20#include <kernel/lafem/arch/scale_row_col.hpp>
21#include <kernel/lafem/arch/row_norm.hpp>
22#include <kernel/lafem/arch/diagonal.hpp>
23#include <kernel/adjacency/graph.hpp>
24#include <kernel/util/tiny_algebra.hpp>
25#include <kernel/util/statistics.hpp>
26#include <kernel/util/time_stamp.hpp>
28
29#include <fstream>
30#include <set>
31
32namespace FEAT
33{
34 namespace LAFEM
35 {
37 namespace Intern
38 {
39 template<typename DT_, typename IT_, int size_>
40 struct BCSRVectorHelper
41 {
42 static_assert(size_ > 1, "invalid block size");
43 typedef DenseVectorBlocked<DT_, IT_, size_> VectorType;
44 };
45
46 template<typename DT_, typename IT_>
47 struct BCSRVectorHelper<DT_, IT_, 1>
48 {
49 typedef DenseVector< DT_, IT_> VectorType;
50 };
51
52 template<typename DT_, int BlockHeight_, int BlockWidth_, Perspective perspective_>
53 struct BCSRPerspectiveHelper
54 {
55 typedef Tiny::Matrix<DT_, BlockHeight_, BlockWidth_> Type;
56 };
57
58 template<typename DT_, int BlockHeight_, int BlockWidth_>
59 struct BCSRPerspectiveHelper<DT_, BlockHeight_, BlockWidth_, Perspective::pod>
60 {
61 typedef DT_ Type;
62 };
63 } // namespace Intern
65
89 template <typename DT_, typename IT_, int BlockHeight_, int BlockWidth_>
90 class SparseMatrixBCSR : public Container<DT_, IT_>
91 {
92 static_assert(BlockHeight_ > 0, "invalid block size");
93 static_assert(BlockWidth_ > 0, "invalid block size");
94
95 public:
97 typedef DT_ DataType;
99 typedef IT_ IndexType;
101 static constexpr int BlockHeight = BlockHeight_;
103 static constexpr int BlockWidth = BlockWidth_;
106
109
111 typedef const IT_* ImageIterator;
112
114 template <typename DT2_ = DT_, typename IT2_ = IT_>
116
118 template <typename DataType2_, typename IndexType2_>
120
122 typedef typename Intern::BCSRVectorHelper<DT_, IT_, BlockHeight_>::VectorType VectorTypeL;
124 typedef typename Intern::BCSRVectorHelper<DT_, IT_, BlockWidth_>::VectorType VectorTypeR;
125
126 static constexpr bool is_global = false;
127 static constexpr bool is_local = true;
128
137 {
138 public:
140 typedef DT_ DataType;
141 typedef IT_ IndexType;
143
144 private:
145#ifdef DEBUG
146 const IT_ _deadcode;
147#endif
148 Index _num_rows;
149 Index _num_cols;
150 IT_* _row_ptr;
151 IT_* _col_idx;
152 IT_* _col_ptr;
153 ValueType* _data;
154
155 public:
156 explicit ScatterAxpy(MatrixType& matrix) :
157#ifdef DEBUG
158 _deadcode(~IT_(0)),
159#endif
160 _num_rows(matrix.rows()),
161 _num_cols(matrix.columns()),
162 _row_ptr(matrix.row_ptr()),
163 _col_idx(matrix.col_ind()),
164 _col_ptr(nullptr),
165 _data(matrix.val())
166 {
167 // allocate column-pointer array
168 _col_ptr = new IT_[_num_cols];
169#ifdef DEBUG
170 for(Index i(0); i < _num_cols; ++i)
171 {
172 _col_ptr[i] = _deadcode;
173 }
174#endif
175 }
176
177 virtual ~ScatterAxpy()
178 {
179 if(_col_ptr != nullptr)
180 {
181 delete [] _col_ptr;
182 }
183 }
184
185 template<typename LocalMatrix_, typename RowMapping_, typename ColMapping_>
186 void operator()(const LocalMatrix_& loc_mat, const RowMapping_& row_map,
187 const ColMapping_& col_map, DT_ alpha = DT_(1))
188 {
189 // loop over all local row entries
190 for(int i(0); i < row_map.get_num_local_dofs(); ++i)
191 {
192 // fetch row index
193 const Index ix = row_map.get_index(i);
194
195 // build column pointer for this row entry contribution
196 for(IT_ k(_row_ptr[ix]); k < _row_ptr[ix + 1]; ++k)
197 {
198 _col_ptr[_col_idx[k]] = k;
199 }
200
201 // loop over all local column entries
202 for(int j(0); j < col_map.get_num_local_dofs(); ++j)
203 {
204 // fetch column index
205 const Index jx = col_map.get_index(j);
206
207 // ensure that the column pointer is valid for this index
208 ASSERTM(_col_ptr[jx] != _deadcode, "invalid column index");
209
210 // incorporate data into global matrix
211 _data[_col_ptr[jx]] += alpha * loc_mat[i][j];
212
213 // continue with next column entry
214 }
215
216#ifdef DEBUG
217 // reformat column-pointer array
218 for(IT_ k(_row_ptr[ix]); k < _row_ptr[ix + 1]; ++k)
219 {
220 _col_ptr[_col_idx[k]] = _deadcode;
221 }
222#endif
223 // continue with next row entry
224 }
225 }
226 }; // class ScatterAxpy
227
228 private:
229 Index & _size()
230 {
231 return this->_scalar_index.at(0);
232 }
233
234 Index & _rows()
235 {
236 return this->_scalar_index.at(1);
237 }
238
239 Index & _columns()
240 {
241 return this->_scalar_index.at(2);
242 }
243
244 Index & _used_elements()
245 {
246 return this->_scalar_index.at(3);
247 }
248
249 public:
250
256 explicit SparseMatrixBCSR() :
257 Container<DT_, IT_> (0)
258 {
259 this->_scalar_index.push_back(0);
260 this->_scalar_index.push_back(0);
261 this->_scalar_index.push_back(0);
262 }
263
275 explicit SparseMatrixBCSR(Index rows_in, Index columns_in) :
276 Container<DT_, IT_> (rows_in * columns_in)
277 {
278 this->_scalar_index.push_back(rows_in);
279 this->_scalar_index.push_back(columns_in);
280 this->_scalar_index.push_back(0);
281 }
282
294 explicit SparseMatrixBCSR(Index rows_in, Index columns_in, Index used_elements_in) :
295 Container<DT_, IT_> (rows_in * columns_in)
296 {
297 XASSERT(rows_in != Index(0) && columns_in != Index(0));
298
299 this->_scalar_index.push_back(rows_in);
300 this->_scalar_index.push_back(columns_in);
301 this->_scalar_index.push_back(used_elements_in);
302
303 this->_indices.push_back(MemoryPool::template allocate_memory<IT_>(_used_elements()));
304 this->_indices_size.push_back(_used_elements());
305
306 this->_indices.push_back(MemoryPool::template allocate_memory<IT_>(_rows() + 1));
307 this->_indices_size.push_back(_rows() + 1);
308
309 this->_elements.push_back(MemoryPool::template allocate_memory<DT_>(used_elements<Perspective::pod>()));
310 this->_elements_size.push_back(used_elements<Perspective::pod>());
311
312 }
313
321 explicit SparseMatrixBCSR(const SparseLayout<IT_, layout_id> & layout_in) :
322 Container<DT_, IT_> (layout_in._scalar_index.at(0))
323 {
324 this->_indices.assign(layout_in._indices.begin(), layout_in._indices.end());
325 this->_indices_size.assign(layout_in._indices_size.begin(), layout_in._indices_size.end());
326 this->_scalar_index.assign(layout_in._scalar_index.begin(), layout_in._scalar_index.end());
327
328 for (auto i : this->_indices)
330
331 this->_elements.push_back(MemoryPool::template allocate_memory<DT_>(used_elements<Perspective::pod>()));
332 this->_elements_size.push_back(used_elements<Perspective::pod>());
333 }
334
342 explicit SparseMatrixBCSR(const Adjacency::Graph & graph) :
343 SparseMatrixBCSR(graph.get_num_nodes_domain(), graph.get_num_nodes_image(), graph.get_num_indices())
344 {
345 const Index num_nnze = graph.get_num_indices();
346 const Index num_rows = graph.get_num_nodes_domain();
347 const Index * dom_ptr(graph.get_domain_ptr());
348 const Index * img_idx(graph.get_image_idx());
349 IT_ * prow_ptr(this->row_ptr());
350 IT_ * pcol_idx(this->col_ind());
351
352 FEAT_PRAGMA_OMP(parallel for)
353 for(Index i = 0; i <= num_rows; ++i)
354 prow_ptr[i] = IT_(dom_ptr[i]);
355
356 FEAT_PRAGMA_OMP(parallel for)
357 for(Index i = 0; i < num_nnze; ++i)
358 pcol_idx[i] = IT_(img_idx[i]);
359 }
360
369 explicit SparseMatrixBCSR(FileMode mode, const String& filename) :
370 Container< DT_, IT_>(0)
371 {
372 read_from(mode, filename);
373 }
374
383 explicit SparseMatrixBCSR(FileMode mode, std::istream& file) :
384 Container<DT_, IT_>(0)
385 {
386 read_from(mode, file);
387 }
388
401 explicit SparseMatrixBCSR(const Index rows_in, const Index columns_in,
402 DenseVector< IT_, IT_> & col_ind_in, DenseVector< DT_, IT_> & val_in, DenseVector< IT_, IT_> & row_ptr_in) :
403 Container<DT_, IT_>(rows_in * columns_in)
404 {
406 XASSERT(col_ind_in.size() > 0);
407 XASSERT(val_in.size() > 0);
408 XASSERT(row_ptr_in.size() > 0);
409
410 XASSERT(rows_in != Index(0) && columns_in != Index(0));
411 XASSERTM(val_in.size() % (BlockHeight_ * BlockWidth_) == 0, "input values size is not a multiple of container blocksize!");
412
413 this->_scalar_index.push_back(rows_in);
414 this->_scalar_index.push_back(columns_in);
415 this->_scalar_index.push_back(val_in.size() / Index(BlockHeight_ * BlockWidth_));
416
417 this->_elements.push_back(val_in.elements());
418 this->_elements_size.push_back(val_in.size());
419 this->_indices.push_back(col_ind_in.elements());
420 this->_indices_size.push_back(col_ind_in.size());
421 this->_indices.push_back(row_ptr_in.elements());
422 this->_indices_size.push_back(row_ptr_in.size());
423
424 for (Index i(0) ; i < this->_elements.size() ; ++i)
426 for (Index i(0) ; i < this->_indices.size() ; ++i)
428 }
429
437 template <typename DT2_ = DT_, typename IT2_ = IT_>
438 explicit SparseMatrixBCSR(std::vector<char> input) :
439 Container<DT_, IT_>(0)
440 {
441 deserialize<DT2_, IT2_>(input);
442 }
443
452 Container<DT_, IT_>(std::forward<SparseMatrixBCSR>(other))
453 {
454 }
455
464 {
465 this->move(std::forward<SparseMatrixBCSR>(other));
466
467 return *this;
468 }
469
479 {
481 t.clone(*this, clone_mode);
482 return t;
483 }
484
493 template<typename DT2_, typename IT2_>
495 {
496 Container<DT_, IT_>::clone(other, clone_mode);
497 }
498
506 template <typename DT2_, typename IT2_>
508 {
509 this->assign(other);
510 }
511
512 template <typename DT2_, typename IT2_>
513 void convert(const SparseMatrixCSR<DT2_, IT2_>& other)
514 {
515 this->clear();
516 // setup internal sizes
517 Index n_rows = Index(other.rows()) / Index(BlockHeight);
518 Index n_cols = Index(other.columns()) / Index(BlockWidth);
519
520 // temprorary arrays for row and column ptr
521 std::vector<IndexType> tmp_row_ptr(n_rows+1u);
522 std::vector<IndexType> tmp_col_ptr;
523
524 // ptrs of csr matrix
525 const IT2_* _crs_row = other.row_ptr();
526 const IT2_* _crs_col = other.col_ind();
527 const DT2_ * _crs_val = other.val();
528
529 // reserve to sensical size, we will have at most used_element blocks
530 tmp_col_ptr.reserve(other.used_elements());
531
532 // for each block row, gather which blocks should be actually allocated
533 std::set<IndexType> tmp_uni_col;
534
535 tmp_row_ptr[0] = IndexType(0);
536 for(Index b_row = 0; b_row < n_rows; ++b_row)
537 {
538 for(Index l_row = b_row*Index(BlockHeight); l_row < (b_row+1)*Index(BlockHeight); ++l_row)
539 {
540 for(IT2_ idx = _crs_row[l_row]; idx < _crs_row[l_row+1]; ++idx)
541 {
542 tmp_uni_col.insert(IndexType(_crs_col[idx])/IndexType(BlockWidth));
543 }
544 }
545 tmp_row_ptr[b_row+1] = IndexType(tmp_uni_col.size()) + tmp_row_ptr[b_row];
546 std::for_each(tmp_uni_col.begin(), tmp_uni_col.end(), [&](const auto& ele){tmp_col_ptr.push_back(ele);});
547 // std::printf("Brow %i: ", int(b_row));
548 // std::for_each(tmp_uni_col.begin(), tmp_uni_col.end(), [&](const auto& ele){std::cout << ele << ", ";});
549 // std::cout << "\n";
550 tmp_uni_col.clear();
551 }
552
553 Index elements_size = tmp_col_ptr.size();
554 // std::printf("Elements size %i\n", int(elements_size));
555 // std::printf("Tmp col ptr:\n");
556 // std::for_each(tmp_col_ptr.begin(), tmp_col_ptr.end(), [&](const auto& ele){std::cout << ele << ", ";});
557 // std::cout << "\n";
558
559 // allocate actual matrix data
560 this->_scalar_index.push_back(n_rows*n_cols);
561 this->_scalar_index.push_back(n_rows);
562 this->_scalar_index.push_back(n_cols);
563 this->_scalar_index.push_back(elements_size);
564
565 this->_indices.push_back(MemoryPool::template allocate_memory<IT_>(_used_elements()));
566 this->_indices_size.push_back(_used_elements());
567
568 this->_indices.push_back(MemoryPool::template allocate_memory<IT_>(_rows() + 1));
569 this->_indices_size.push_back(_rows() + 1);
570
571 this->_elements.push_back(MemoryPool::template allocate_memory<DT_>(used_elements<Perspective::pod>()));
572 this->_elements_size.push_back(used_elements<Perspective::pod>());
573
574 auto* row_ptr = this->_indices.at(1);
575 auto* col = this->_indices.at(0);
576 auto* val = this->_elements.at(0);
577 // now, fill up array data
578 std::copy(tmp_row_ptr.begin(), tmp_row_ptr.end(), row_ptr);
579 std::copy(tmp_col_ptr.begin(), tmp_col_ptr.end(), col);
580 // std::printf("Row ptr:\n");
581 // std::for_each(row_ptr, row_ptr+n_rows+1, [&](const auto& ele){std::cout << ele << ", ";});
582 // std::cout << "\n";
583 // std::printf("Col ptr:\n");
584 // std::for_each(col, col+elements_size, [&](const auto& ele){std::cout << ele << ", ";});
585 // std::cout << "\n";
586
587 // clear temprorary arrays
588 tmp_row_ptr.clear();
589 tmp_col_ptr.clear();
590
591 // format data
592 this->format();
593
594 // run through crs data and simply write to the correct location, this can be done
595 // in parallel
596 FEAT_PRAGMA_OMP(parallel for)
597 for(Index b_row = Index(0); b_row < n_rows; ++b_row)
598 {
599 for(Index l_row = Index(0); l_row < Index(BlockHeight); ++l_row)
600 {
601 IndexType b_idx = row_ptr[b_row];
602 for(IT2_ idx = _crs_row[b_row*Index(BlockHeight) + l_row]; idx < _crs_row[b_row*Index(BlockHeight) + l_row + Index(1)]; ++idx)
603 {
604 //advance block row index until we find our current row
605 while(col[b_idx] != IndexType(_crs_col[idx]/BlockWidth))
606 {
607 ++b_idx;
608 }
609 ASSERTM(col[b_idx] == IndexType(_crs_col[idx]/BlockWidth), "Blocked Column Array does not fit csr columns");
610 // write data
611 val[b_idx*BlockHeight*BlockWidth + BlockWidth*l_row + (_crs_col[idx]%BlockWidth)] = DT_(_crs_val[idx]);
612 }
613 }
614 }
615 //done
616 }
617
625 void convert(const Adjacency::Graph & graph)
626 {
627 this->move(SparseMatrixBCSR(graph));
628 }
629
638 {
639 for (Index i(0) ; i < this->_elements.size() ; ++i)
641 for (Index i(0) ; i < this->_indices.size() ; ++i)
643
644 this->_elements.clear();
645 this->_indices.clear();
646 this->_elements_size.clear();
647 this->_indices_size.clear();
648 this->_scalar_index.clear();
649
650 this->_indices.assign(layout_in._indices.begin(), layout_in._indices.end());
651 this->_indices_size.assign(layout_in._indices_size.begin(), layout_in._indices_size.end());
652 this->_scalar_index.assign(layout_in._scalar_index.begin(), layout_in._scalar_index.end());
653
654 for (auto i : this->_indices)
656
657 this->_elements.push_back(MemoryPool::template allocate_memory<DT_>(used_elements<Perspective::pod>()));
658 this->_elements_size.push_back(used_elements<Perspective::pod>());
659
660 return *this;
661 }
662
670 template <typename DT2_ = DT_, typename IT2_ = IT_>
671 void deserialize(std::vector<char> input)
672 {
673 this->template _deserialize<DT2_, IT2_>(FileMode::fm_bcsr, input);
674 }
675
686 template <typename DT2_ = DT_, typename IT2_ = IT_>
687 std::vector<char> serialize(const LAFEM::SerialConfig& config = SerialConfig()) const
688 {
689 return this->template _serialize<DT2_, IT2_>(FileMode::fm_bcsr, config);
690 }
691
698 void read_from(FileMode mode, const String& filename)
699 {
700 std::ios_base::openmode bin = std::ifstream::in | std::ifstream::binary;
701 if(mode == FileMode::fm_mtx)
702 bin = std::ifstream::in;
703 std::ifstream file(filename.c_str(), bin);
704 if (! file.is_open())
705 XABORTM("Unable to open Matrix file " + filename);
706 read_from(mode, file);
707 file.close();
708 }
715 void read_from(FileMode mode, std::istream& file)
716 {
717 switch(mode)
718 {
720 /*case FileMode::fm_mtx:
721 * read_from_mtx(filename);
722 * break;*/
725 this->template _deserialize<double, std::uint64_t>(FileMode::fm_bcsr, file);
726 break;
727 default:
728 XABORTM("Filemode not supported!");
729 }
730 }
731
732 /*
733 * \brief Read in matrix from MatrixMarket mtx file.
734 *
735 * \param[in] filename The file that shall be read in.
736 */
737 /*void read_from_mtx(String filename)
738 {
739 std::ifstream file(filename.c_str(), std::ifstream::in);
740 if (! file.is_open())
741 XABORTM("Unable to open Matrix file " + filename);
742 read_from_mtx(file);
743 file.close();
744 }*/
745
746 /*
747 * \brief Read in matrix from MatrixMarket mtx stream.
748 *
749 * \param[in] file The stream that shall be read in.
750 */
751 /*void read_from_mtx(std::istream& file)
752 {
753 this->clear();
754 this->_scalar_index.push_back(0);
755 this->_scalar_index.push_back(0);
756 this->_scalar_index.push_back(0);
757 this->_scalar_index.push_back(0);
758
759 itd::map<IT_, std::map<IT_, DT_> > entries; // map<row, map<column, value> >
760
761 Index ue(0);
762 String line;
763 std::getline(file, line);
764 const bool general((line.find("%%MatrixMarket matrix coordinate real general") != String::npos) ? true : false);
765 const bool symmetric((line.find("%%MatrixMarket matrix coordinate real symmetric") != String::npos) ? true : false);
766
767 if (symmetric == false && general == false)
768 {
769 XABORTM("Input-file is not a compatible mtx-file");
770 }
771
772 while(!file.eof())
773 {
774 std::getline(file,line);
775 if (file.eof())
776 XABORTM("Input-file is empty");
777
778 String::size_type begin(line.find_first_not_of(" "));
779 if (line.at(begin) != '%')
780 break;
781 }
782 {
783 String::size_type begin(line.find_first_not_of(" "));
784 line.erase(0, begin);
785 String::size_type end(line.find_first_of(" "));
786 String srow(line, 0, end);
787 Index row((Index)atol(srow.c_str()));
788 line.erase(0, end);
789
790 begin = line.find_first_not_of(" ");
791 line.erase(0, begin);
792 end = line.find_first_of(" ");
793 String scol(line, 0, end);
794 Index col((Index)atol(scol.c_str()));
795 line.erase(0, end);
796 _rows() = row;
797 _columns() = col;
798 _size() = this->rows() * this->columns();
799 }
800
801 while(!file.eof())
802 {
803 std::getline(file, line);
804 if (file.eof())
805 break;
806
807 String::size_type begin(line.find_first_not_of(" "));
808 line.erase(0, begin);
809 String::size_type end(line.find_first_of(" "));
810 String srow(line, 0, end);
811 IT_ row((IT_)atol(srow.c_str()));
812 --row;
813 line.erase(0, end);
814
815 begin = line.find_first_not_of(" ");
816 line.erase(0, begin);
817 end = line.find_first_of(" ");
818 String scol(line, 0, end);
819 IT_ col((IT_)atol(scol.c_str()));
820 --col;
821 line.erase(0, end);
822
823 begin = line.find_first_not_of(" ");
824 line.erase(0, begin);
825 end = line.find_first_of(" ");
826 String sval(line, 0, end);
827 DT_ tval((DT_)atof(sval.c_str()));
828
829 entries[IT_(row)].insert(std::pair<IT_, DT_>(col, tval));
830 ++ue;
831 if (symmetric == true && row != col)
832 {
833 entries[IT_(col)].insert(std::pair<IT_, DT_>(row, tval));
834 ++ue;
835 }
836 }
837 _size() = this->rows() * this->columns();
838 _used_elements() = ue;
839
840 DT_ * tval = new DT_[ue];
841 IT_ * tcol_ind = new IT_[ue];
842 IT_ * trow_ptr = new IT_[rows() + 1];
843
844 IT_ idx(0);
845 Index row_idx(0);
846 for (auto row : entries)
847 {
848 trow_ptr[row_idx] = idx;
849 for (auto col : row.second )
850 {
851 tcol_ind[idx] = col.first;
852 tval[idx] = col.second;
853 ++idx;
854 }
855 row.second.clear();
856 ++row_idx;
857 }
858 trow_ptr[rows()] = IT_(ue);
859 entries.clear();
860
861 this->_elements.push_back(MemoryPool<Mem_>::template allocate_memory<DT_>(_used_elements()));
862 this->_elements_size.push_back(_used_elements());
863 this->_indices.push_back(MemoryPool<Mem_>::template allocate_memory<IT_>(_used_elements()));
864 this->_indices_size.push_back(_used_elements());
865 this->_indices.push_back(MemoryPool<Mem_>::template allocate_memory<IT_>(rows() + 1));
866 this->_indices_size.push_back(rows() + 1);
867
868 MemoryPool<Mem_>::template upload<DT_>(this->_elements.at(0), tval, _used_elements());
869 MemoryPool<Mem_>::template upload<IT_>(this->_indices.at(0), tcol_ind, _used_elements());
870 MemoryPool<Mem_>::template upload<IT_>(this->_indices.at(1), trow_ptr, rows() + 1);
871
872 delete[] tval;
873 delete[] tcol_ind;
874 delete[] trow_ptr;
875 }*/
882 void write_out(FileMode mode, const String& filename) const
883 {
884 std::ios_base::openmode bin = std::ofstream::out | std::ofstream::binary;
885 if(mode == FileMode::fm_mtx)
886 bin = std::ofstream::out;
887 std::ofstream file;
888 char* buff = nullptr;
889 if(mode == FileMode::fm_mtx)
890 {
891 buff = new char[LAFEM::FileOutStreamBufferSize];
892 file.rdbuf()->pubsetbuf(buff, LAFEM::FileOutStreamBufferSize);
893 }
894 file.open(filename.c_str(), bin);
895 if(! file.is_open())
896 XABORTM("Unable to open Matrix file " + filename);
897 write_out(mode, file);
898 file.close();
899 delete[] buff;
900 }
901
908 void write_out(FileMode mode, std::ostream& file) const
909 {
910 switch(mode)
911 {
914 this->template _serialize<double, std::uint64_t>(FileMode::fm_bcsr, file);
915 break;
916 case FileMode::fm_mtx:
917 {
918 file << "%%MatrixMarket matrix coordinate real general\n";
919 file << this->template rows<Perspective::pod>() << " " << this->template columns<Perspective::pod>() << " " << this->template used_elements<Perspective::pod>() << "\n";
920
921 for (Index row(0) ; row < rows() ; ++row)
922 {
923 const IT_ end(this->row_ptr()[row + 1]);
924 for (IT_ i(this->row_ptr()[row]) ; i < end ; ++i)
925 {
926 auto block = this->val()[i];
927 for (int y(0) ; y < BlockHeight_ ; ++y)
928 {
929 for (int x(0) ; x < BlockWidth_ ; ++x)
930 {
931 file << ((int)row * BlockHeight_) + y + 1 << " " << ((int)this->col_ind()[i] * BlockWidth_) + x + 1 << " " << stringify_fp_sci(block[y][x]) << "\n";
932 }
933 }
934 }
935 }
936 break;
937 }
938 default:
939 XABORTM("Filemode not supported!");
940 }
941 }
942
952 {
953 ASSERT(row < rows());
954 ASSERT(col < columns());
955
956 MemoryPool::synchronize();
957
958 for (Index i(this->row_ptr()[row]) ; i < this->row_ptr()[row+1] ; ++i)
959 {
960 if (this->col_ind()[i] == col)
961 {
962 return this->val<Perspective::native>()[i];
963 }
964 if (this->col_ind()[i] > col)
965 break; //return zero element
966 }
967
968 return ValueType(0.);
969 }
970
977 {
979 }
980
987 template <Perspective perspective_ = Perspective::native>
988 Index rows() const
989 {
990 if (perspective_ == Perspective::pod)
991 return this->_scalar_index.at(1) * Index(BlockHeight_);
992 else
993 return this->_scalar_index.at(1);
994 }
995
1002 template <Perspective perspective_ = Perspective::native>
1004 {
1005 if (perspective_ == Perspective::pod)
1006 return this->_scalar_index.at(2) * Index(BlockWidth_);
1007 else
1008 return this->_scalar_index.at(2);
1009 }
1010
1017 template <Perspective perspective_ = Perspective::native>
1019 {
1020 if (perspective_ == Perspective::pod)
1021 return this->_scalar_index.at(3) * Index(BlockHeight_ * BlockWidth_);
1022 else
1023 return this->_scalar_index.at(3);
1024 }
1025
1031 IT_ * col_ind()
1032 {
1033 if (this->size() == 0)
1034 return nullptr;
1035
1036 return this->_indices.at(0);
1037 }
1038
1041 IT_ const * col_ind() const
1042 {
1043 if (this->size() == 0)
1044 return nullptr;
1045
1046 return this->_indices.at(0);
1047 }
1048
1057 template <Perspective perspective_ = Perspective::native>
1058 auto val() const -> const typename Intern::BCSRPerspectiveHelper<DT_, BlockHeight_, BlockWidth_, perspective_>::Type *
1059 {
1060 if (this->size() == 0)
1061 return nullptr;
1062
1063 return (const typename Intern::BCSRPerspectiveHelper<DT_, BlockHeight_, BlockWidth_, perspective_>::Type *)(this->_elements.at(0));
1064 }
1065
1068 template <Perspective perspective_ = Perspective::native>
1069 auto val() -> typename Intern::BCSRPerspectiveHelper<DT_, BlockHeight_, BlockWidth_, perspective_>::Type *
1070 {
1071 if (this->size() == 0)
1072 return nullptr;
1073
1074 return (typename Intern::BCSRPerspectiveHelper<DT_, BlockHeight_, BlockWidth_, perspective_>::Type *)(this->_elements.at(0));
1075 }
1076
1082 IT_ * row_ptr()
1083 {
1084 if (this->_indices.size() == 0)
1085 return nullptr;
1086
1087 return this->_indices.at(1);
1088 }
1089
1092 IT_ const * row_ptr() const
1093 {
1094 if (this->_indices.size() == 0)
1095 return nullptr;
1096
1097 return this->_indices.at(1);
1098 }
1099
1105 static String name()
1106 {
1107 return "SparseMatrixBCSR";
1108 }
1109
1116 void copy(const SparseMatrixBCSR & x, bool full = false)
1117 {
1118 this->_copy_content(x, full);
1119 }
1120
1123
1132 void axpy(
1133 const SparseMatrixBCSR & x,
1134 const DT_ alpha = DT_(1))
1135 {
1136 XASSERTM(x.rows() == this->rows(), "Matrix rows do not match!");
1137 XASSERTM(x.columns() == this->columns(), "Matrix columns do not match!");
1138 XASSERTM(x.used_elements() == this->used_elements(), "Matrix used_elements do not match!");
1139
1140 TimeStamp ts_start;
1141
1142 Statistics::add_flops(this->used_elements<Perspective::pod>() * 2);
1143 Arch::Axpy::value(this->template val<Perspective::pod>(),
1144 alpha,
1145 x.template val<Perspective::pod>(),
1147
1148 TimeStamp ts_stop;
1149 Statistics::add_time_axpy(ts_stop.elapsed(ts_start));
1150 }
1151
1158 void scale(const SparseMatrixBCSR & x, const DT_ alpha)
1159 {
1160 XASSERTM(x.rows() == this->rows(), "Row count does not match!");
1161 XASSERTM(x.columns() == this->columns(), "Column count does not match!");
1162 XASSERTM(x.used_elements() == this->used_elements(), "Nonzero count does not match!");
1163
1164 TimeStamp ts_start;
1165 Statistics::add_flops(this->used_elements<Perspective::pod>());
1166 Arch::Scale::value(this->template val<Perspective::pod>(), x.template val<Perspective::pod>(), alpha, this->used_elements<Perspective::pod>());
1167 TimeStamp ts_stop;
1168 Statistics::add_time_axpy(ts_stop.elapsed(ts_start));
1169 }
1170
1176 DT_ norm_frobenius() const
1177 {
1178 TimeStamp ts_start;
1179 Statistics::add_flops(this->used_elements<Perspective::pod>() * 2);
1180 DT_ result = Arch::Norm2::value(this->template val<Perspective::pod>(),
1181 this->used_elements<Perspective::pod>());
1182 TimeStamp ts_stop;
1183 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1184 return result;
1185 }
1186
1193 void row_norm2(VectorTypeL& row_norms) const
1194 {
1195 XASSERTM(row_norms.size() == this->rows(), "Matrix/Vector dimension mismatch");
1196
1197 TimeStamp ts_start;
1198 Statistics::add_flops(this->used_elements<Perspective::pod>() * 2);
1199
1200 Arch::RowNorm::bcsr_norm2(row_norms.template elements<Perspective::pod>(),
1201 this->template val<Perspective::pod>(),
1203
1204 TimeStamp ts_stop;
1205 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1206 }
1207
1214 void row_norm2sqr(VectorTypeL& row_norms) const
1215 {
1216 XASSERTM(row_norms.size() == this->rows(), "Matrix/Vector dimension mismatch");
1217
1218 TimeStamp ts_start;
1219 Statistics::add_flops(this->used_elements<Perspective::pod>() * 2);
1220
1221 Arch::RowNorm::bcsr_norm2sqr(row_norms.template elements<Perspective::pod>(),
1222 this->template val<Perspective::pod>(),
1224
1225 TimeStamp ts_stop;
1226 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1227 }
1228
1248 void row_norm2sqr(VectorTypeL& row_norms, const VectorTypeR& scal) const
1249 {
1250 XASSERTM(row_norms.size() == this->rows(), "Matrix/Vector dimension mismatch");
1251 XASSERTM(scal.size() == this->columns(), "Matrix/scalings dimension mismatch");
1252
1253 TimeStamp ts_start;
1254 Statistics::add_flops(this->used_elements<Perspective::pod>() * 2);
1255
1256 Arch::RowNorm::bcsr_scaled_norm2sqr(row_norms.template elements<Perspective::pod>(),
1257 scal.template elements<Perspective::pod>(), this->template val<Perspective::pod>(),
1259
1260 TimeStamp ts_stop;
1261 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1262 }
1263
1270 {
1271 TimeStamp ts_start;
1272
1273 Index max_abs_index = Arch::MaxAbsIndex::value(this->template val<Perspective::pod>(), this->template used_elements<Perspective::pod>());
1274 ASSERT(max_abs_index < this->template used_elements<Perspective::pod>());
1275 DT_ result;
1276 MemoryPool::template copy<DT_>(&result, this->template val<Perspective::pod>() + max_abs_index, 1);
1277 result = Math::abs(result);
1278
1279 TimeStamp ts_stop;
1280 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1281
1282 return result;
1283 }
1284
1291 {
1292 TimeStamp ts_start;
1293
1294 Index min_abs_index = Arch::MinAbsIndex::value(this->template val<Perspective::pod>(), this->template used_elements<Perspective::pod>());
1295 ASSERT(min_abs_index < this->template used_elements<Perspective::pod>());
1296 DT_ result;
1297 MemoryPool::template copy<DT_>(&result, this->template val<Perspective::pod>() + min_abs_index, 1);
1298 result = Math::abs(result);
1299
1300 TimeStamp ts_stop;
1301 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1302
1303 return result;
1304 }
1305
1311 DT_ max_element() const
1312 {
1313 TimeStamp ts_start;
1314
1315 Index max_index = Arch::MaxIndex::value(this->template val<Perspective::pod>(), this->template used_elements<Perspective::pod>());
1316 ASSERT(max_index < this->template used_elements<Perspective::pod>());
1317 DT_ result;
1318 MemoryPool::template copy<DT_>(&result, this->template val<Perspective::pod>() + max_index, 1);
1319
1320 TimeStamp ts_stop;
1321 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1322
1323 return result;
1324 }
1325
1331 DT_ min_element() const
1332 {
1333 TimeStamp ts_start;
1334
1335 Index min_index = Arch::MinIndex::value(this->template val<Perspective::pod>(), this->template used_elements<Perspective::pod>());
1336 ASSERT(min_index < this->template used_elements<Perspective::pod>());
1337 DT_ result;
1338 MemoryPool::template copy<DT_>(&result, this->template val<Perspective::pod>() + min_index, 1);
1339
1340 TimeStamp ts_stop;
1341 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1342
1343 return result;
1344 }
1345
1352 DT_ max_rel_diff(const SparseMatrixBCSR& x) const
1353 {
1354 XASSERTM(x.used_elements() == this->used_elements(), "Nonzero count does not match!");
1355 TimeStamp ts_start;
1356
1357 DataType max_rel_diff = Arch::MaxRelDiff::value(this->template val<Perspective::pod>(), x.template val<Perspective::pod>(), this->template used_elements<Perspective::pod>());
1358
1359 TimeStamp ts_stop;
1360 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1361
1362 return max_rel_diff;
1363 }
1364
1373 bool same_layout(const SparseMatrixBCSR& x) const
1374 {
1375 if (this->row_ptr ()== x.row_ptr() && this->col_ind() == x.col_ind())
1376 return true;
1377 if(this->size() == 0 && x.size() == 0 && this->get_elements().size() == 0 && this->get_indices().size() == 0 && x.get_elements().size() == 0 && x.get_indices().size() == 0)
1378 return true;
1379 if (this->rows() != x.rows())
1380 return false;
1381 if (this->columns() != x.columns())
1382 return false;
1383 if (this->used_elements() != x.used_elements())
1384 return false;
1385
1386 IT_ * col_ind_a;
1387 IT_ * col_ind_b;
1388 IT_ * row_ptr_a;
1389 IT_ * row_ptr_b;
1390
1391 col_ind_a = const_cast<IT_*>(this->col_ind());
1392 row_ptr_a = const_cast<IT_*>(this->row_ptr());
1393 col_ind_b = const_cast<IT_*>(x.col_ind());
1394 row_ptr_b = const_cast<IT_*>(x.row_ptr());
1395
1396 bool ret(true);
1397 for (Index i(0) ; i < this->used_elements() ; ++i)
1398 {
1399 if (col_ind_a[i] != col_ind_b[i])
1400 {
1401 ret = false;
1402 break;
1403 }
1404 }
1405 if (ret)
1406 {
1407 for (Index i(0) ; i < this->rows() + 1; ++i)
1408 {
1409 if (row_ptr_a[i] != row_ptr_b[i])
1410 {
1411 ret = false;
1412 break;
1413 }
1414 }
1415 }
1416
1417 return ret;
1418 }
1419
1428 {
1430 x_t.transpose(*this);
1431 return x_t;
1432 }
1433
1440 {
1442 if (x.used_elements() == 0)
1443 {
1444 this->move(SparseMatrixBCSR(x.rows(), x.columns()));
1445 return;
1446 }
1447
1448 const Index txrows(x.rows());
1449 const Index txcolumns(x.columns());
1450 const Index txused_elements(x.used_elements());
1451
1452 const IT_ * ptxcol_ind(x.col_ind());
1453 const IT_ * ptxrow_ptr(x.row_ptr());
1454 const typename XType::ValueType * ptxval(x.val());
1455
1456 DenseVector<IT_, IT_> tcol_ind(txused_elements);
1457 DenseVector<DT_, IT_> tval(txused_elements * BlockHeight_ * BlockWidth_);
1458 DenseVector<IT_, IT_> trow_ptr(txcolumns + 1, IT_(0));
1459
1460 IT_ * ptcol_ind(tcol_ind.elements());
1461 ValueType * ptval((ValueType*)tval.elements());
1462 IT_ * ptrow_ptr(trow_ptr.elements());
1463
1464 ptrow_ptr[0] = 0;
1465
1466 for (Index i(0); i < txused_elements; ++i)
1467 {
1468 ++ptrow_ptr[ptxcol_ind[i] + 1];
1469 }
1470
1471 for (Index i(1); i < txcolumns - 1; ++i)
1472 {
1473 ptrow_ptr[i + 1] += ptrow_ptr[i];
1474 }
1475
1476 for (Index i(0); i < txrows; ++i)
1477 {
1478 for (IT_ k(ptxrow_ptr[i]); k < ptxrow_ptr[i+1]; ++k)
1479 {
1480 const IT_ l(ptxcol_ind[k]);
1481 const IT_ j(ptrow_ptr[l]);
1482 ptval[j].set_transpose(ptxval[k]);
1483 ptcol_ind[j] = IT_(i);
1484 ++ptrow_ptr[l];
1485 }
1486 }
1487
1488 for (Index i(txcolumns); i > 0; --i)
1489 {
1490 ptrow_ptr[i] = ptrow_ptr[i - 1];
1491 }
1492 ptrow_ptr[0] = 0;
1493
1494 this->move(SparseMatrixBCSR<DT_, IT_, BlockHeight_, BlockWidth_> (txcolumns, txrows, tcol_ind, tval, trow_ptr));
1495 }
1496
1504 {
1505 XASSERTM(x.rows() == this->rows(), "Row count does not match!");
1506 XASSERTM(x.columns() == this->columns(), "Column count does not match!");
1507 XASSERTM(x.used_elements() == this->used_elements(), "Nonzero count does not match!");
1508 XASSERTM(s.size() == this->rows(), "Vector size does not match!");
1509
1510 TimeStamp ts_start;
1511
1512 Statistics::add_flops(this->used_elements<Perspective::pod>());
1513 Arch::ScaleRows::template bcsr<BlockHeight_, BlockWidth_>(this->val<Perspective::pod>(), x.template val<Perspective::pod>(),
1514 this->col_ind(), this->row_ptr(), s.template elements<Perspective::pod>(), this->rows(), this->columns(), this->used_elements());
1515
1516 TimeStamp ts_stop;
1517 Statistics::add_time_axpy(ts_stop.elapsed(ts_start));
1518 }
1519
1520 void scale_cols(const SparseMatrixBCSR & x, const DenseVectorBlocked<DT_,IT_, BlockWidth_> & s)
1521 {
1522 XASSERTM(x.rows() == this->rows(), "Row count does not match!");
1523 XASSERTM(x.columns() == this->columns(), "Column count does not match!");
1524 XASSERTM(x.used_elements() == this->used_elements(), "Nonzero count does not match!");
1525 XASSERTM(s.size() == this->columns(), "Vector size does not match!");
1526
1527 TimeStamp ts_start;
1528
1529 Statistics::add_flops(this->used_elements<Perspective::pod>());
1530 Arch::ScaleCols::template bcsr<BlockHeight_, BlockWidth_>(this->val<Perspective::pod>(), x.template val<Perspective::pod>(),
1531 this->col_ind(), this->row_ptr(), s.template elements<Perspective::pod>(), this->rows(), this->columns(), this->used_elements());
1532
1533 TimeStamp ts_stop;
1534 Statistics::add_time_axpy(ts_stop.elapsed(ts_start));
1535 }
1536
1546 {
1547 XASSERTM(r.size() == this->rows<Perspective::pod>(), "Vector size of r does not match!");
1548 XASSERTM(x.size() == this->columns<Perspective::pod>(), "Vector size of x does not match!");
1549
1550 TimeStamp ts_start;
1551 FEAT_KERNEL_MARKER_START("BCSR-apply");
1552
1553 if (this->used_elements() == 0)
1554 {
1555 r.format();
1556 return;
1557 }
1558
1559 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1560
1561 Statistics::add_flops(this->used_elements<Perspective::pod>() * 2);
1562 Arch::Apply::template bcsr<BlockHeight_, BlockWidth_>(
1563 r.elements(), DT_(1), x.elements(), DT_(0), r.elements(), this->template val<Perspective::pod>(), this->col_ind(),
1564 this->row_ptr(), this->rows(), this->columns(), this->used_elements());
1565
1566 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
1567 TimeStamp ts_stop;
1568 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1569 }
1570
1580 {
1581 XASSERTM(r.size() == this->columns<Perspective::pod>(), "Vector size of r does not match!");
1582 XASSERTM(x.size() == this->rows<Perspective::pod>(), "Vector size of x does not match!");
1583
1584 TimeStamp ts_start;
1585 FEAT_KERNEL_MARKER_START("BCSR-apply");
1586
1587 if (this->used_elements() == 0)
1588 {
1589 r.format();
1590 return;
1591 }
1592
1593 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1594
1595 Statistics::add_flops(this->used_elements<Perspective::pod>() * 2);
1596 Arch::Apply::template bcsr_transposed<BlockHeight_, BlockWidth_>(
1597 r.elements(), DT_(1), x.elements(), DT_(0), r.elements(), this->template val<Perspective::pod>(), this->col_ind(),
1598 this->row_ptr(), this->rows(), this->columns(), this->used_elements());
1599
1600 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
1601 TimeStamp ts_stop;
1602 Statistics::add_time_reduction(ts_stop.elapsed(ts_start));
1603 }
1604
1614 {
1615 XASSERTM(r.size() == this->rows(), "Vector size of r does not match!");
1616 XASSERTM(x.size() == this->columns<Perspective::pod>(), "Vector size of x does not match!");
1617
1618 TimeStamp ts_start;
1619
1620 FEAT_KERNEL_MARKER_START("BCSR-apply");
1621 if (this->used_elements() == 0)
1622 {
1623 r.format();
1624 return;
1625 }
1626
1627 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1628
1629 Statistics::add_flops(this->used_elements<Perspective::pod>() * 2);
1630
1631 Arch::Apply::template bcsr<BlockHeight_, BlockWidth_>(
1632 r.template elements<Perspective::pod>(), DT_(1), x.elements(), DT_(0), r.template elements<Perspective::pod>(), this->template val<Perspective::pod>(), this->col_ind(),
1633 this->row_ptr(), this->rows(), this->columns(), this->used_elements());
1634
1635 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
1636 TimeStamp ts_stop;
1637 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
1638 }
1639
1640
1650 {
1651 XASSERTM(r.size() == this->columns(), "Vector size of r does not match!");
1652 XASSERTM(x.size() == this->rows<Perspective::pod>(), "Vector size of x does not match!");
1653
1654 TimeStamp ts_start;
1655
1656 FEAT_KERNEL_MARKER_START("BCSR-apply");
1657 if (this->used_elements() == 0)
1658 {
1659 r.format();
1660 return;
1661 }
1662
1663 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1664
1665 Statistics::add_flops(this->used_elements<Perspective::pod>() * 2);
1666
1667 Arch::Apply::template bcsr_transposed<BlockHeight_, BlockWidth_>(
1668 r.template elements<Perspective::pod>(), DT_(1), x.elements(), DT_(0), r.template elements<Perspective::pod>(), this->template val<Perspective::pod>(), this->col_ind(),
1669 this->row_ptr(), this->rows(), this->columns(), this->used_elements());
1670
1671 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
1672 TimeStamp ts_stop;
1673 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
1674 }
1675
1685 {
1686 XASSERTM(r.size() == this->rows<Perspective::pod>(), "Vector size of r does not match!");
1687 XASSERTM(x.size() == this->columns(), "Vector size of x does not match!");
1688
1689 TimeStamp ts_start;
1690
1691 FEAT_KERNEL_MARKER_START("BCSR-apply");
1692 if (this->used_elements() == 0)
1693 {
1694 r.format();
1695 return;
1696 }
1697
1698 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1699
1700 Statistics::add_flops(this->used_elements<Perspective::pod>() * 2);
1701
1702 Arch::Apply::template bcsr<BlockHeight_, BlockWidth_>(
1703 r.elements(), DT_(1), x.template elements<Perspective::pod>(), DT_(0), r.elements(), this->template val<Perspective::pod>(), this->col_ind(),
1704 this->row_ptr(), this->rows(), this->columns(), this->used_elements());
1705
1706 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
1707 TimeStamp ts_stop;
1708 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
1709 }
1710
1720 {
1721 XASSERTM(r.size() == this->columns<Perspective::pod>(), "Vector size of r does not match!");
1722 XASSERTM(x.size() == this->rows(), "Vector size of x does not match!");
1723
1724 TimeStamp ts_start;
1725
1726 FEAT_KERNEL_MARKER_START("BCSR-apply");
1727 if (this->used_elements() == 0)
1728 {
1729 r.format();
1730 return;
1731 }
1732
1733 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1734
1735 Statistics::add_flops(this->used_elements<Perspective::pod>() * 2);
1736
1737 Arch::Apply::template bcsr_transposed<BlockHeight_, BlockWidth_>(
1738 r.elements(), DT_(1), x.template elements<Perspective::pod>(), DT_(0), r.elements(), this->template val<Perspective::pod>(), this->col_ind(),
1739 this->row_ptr(), this->rows(), this->columns(), this->used_elements());
1740
1741 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
1742 TimeStamp ts_stop;
1743 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
1744 }
1745
1755 {
1756 XASSERTM(r.size() == this->rows(), "Vector size of r does not match!");
1757 XASSERTM(x.size() == this->columns(), "Vector size of x does not match!");
1758
1759 TimeStamp ts_start;
1760
1761 FEAT_KERNEL_MARKER_START("BCSR-apply");
1762 if (this->used_elements() == 0)
1763 {
1764 r.format();
1765 return;
1766 }
1767
1768 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1769
1770 Statistics::add_flops(this->used_elements<Perspective::pod>() * 2);
1771
1772 Arch::Apply::template bcsr<BlockHeight_, BlockWidth_>(
1773 r.template elements<Perspective::pod>(), DT_(1), x.template elements<Perspective::pod>(), DT_(0), r.template elements<Perspective::pod>(), this->template val<Perspective::pod>(),
1774 this->col_ind(), this->row_ptr(), this->rows(), this->columns(), this->used_elements());
1775
1776 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
1777 TimeStamp ts_stop;
1778 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
1779 }
1780
1790 {
1791 XASSERTM(r.size() == this->columns(), "Vector size of r does not match!");
1792 XASSERTM(x.size() == this->rows(), "Vector size of x does not match!");
1793
1794 TimeStamp ts_start;
1795
1796 FEAT_KERNEL_MARKER_START("BCSR-apply");
1797 if (this->used_elements() == 0)
1798 {
1799 r.format();
1800 return;
1801 }
1802
1803 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1804
1805 Statistics::add_flops(this->used_elements<Perspective::pod>() * 2);
1806
1807 Arch::Apply::template bcsr_transposed<BlockHeight_, BlockWidth_>(
1808 r.template elements<Perspective::pod>(), DT_(1), x.template elements<Perspective::pod>(), DT_(0), r.template elements<Perspective::pod>(), this->template val<Perspective::pod>(),
1809 this->col_ind(), this->row_ptr(), this->rows(), this->columns(), this->used_elements());
1810
1811 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
1812 TimeStamp ts_stop;
1813 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
1814 }
1815
1827 void apply(
1829 const DenseVector< DT_, IT_> & x,
1830 const DenseVector< DT_, IT_> & y,
1831 const DT_ alpha = DT_(1)) const
1832 {
1833 XASSERTM(r.size() == this->rows<Perspective::pod>(), "Vector size of r does not match!");
1834 XASSERTM(x.size() == this->columns<Perspective::pod>(), "Vector size of x does not match!");
1835 XASSERTM(y.size() == this->rows<Perspective::pod>(), "Vector size of y does not match!");
1836
1837 TimeStamp ts_start;
1838
1839 FEAT_KERNEL_MARKER_START("BCSR-apply");
1840 if (this->used_elements() == 0 || Math::abs(alpha) < Math::eps<DT_>())
1841 {
1842 r.copy(y);
1843 //r.scale(beta);
1844 return;
1845 }
1846
1847 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1848
1849 Statistics::add_flops( (this->used_elements<Perspective::pod>() + this->rows<Perspective::pod>()) * 2 );
1850
1851 Arch::Apply::template bcsr<BlockHeight_, BlockWidth_>(
1852 r.elements(), alpha, x.elements(), DT_(1), y.elements(), this->template val<Perspective::pod>(), this->col_ind(),
1853 this->row_ptr(), this->rows(), this->columns(), this->used_elements());
1854
1855 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
1856 TimeStamp ts_stop;
1857 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
1858 }
1859
1873 const DenseVector< DT_, IT_> & x,
1874 const DenseVector< DT_, IT_> & y,
1875 const DT_ alpha = DT_(1)) const
1876 {
1877 XASSERTM(r.size() == this->columns<Perspective::pod>(), "Vector size of r does not match!");
1878 XASSERTM(x.size() == this->rows<Perspective::pod>(), "Vector size of x does not match!");
1879 XASSERTM(y.size() == this->columns<Perspective::pod>(), "Vector size of y does not match!");
1880
1881 TimeStamp ts_start;
1882
1883 FEAT_KERNEL_MARKER_START("BCSR-apply");
1884 if (this->used_elements() == 0 || Math::abs(alpha) < Math::eps<DT_>())
1885 {
1886 r.copy(y);
1887 //r.scale(beta);
1888 return;
1889 }
1890
1891 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1892
1893 Statistics::add_flops( (this->used_elements<Perspective::pod>() + this->rows<Perspective::pod>()) * 2 );
1894
1895 Arch::Apply::template bcsr_transposed<BlockHeight_, BlockWidth_>(
1896 r.elements(), alpha, x.elements(), DT_(1), y.elements(), this->template val<Perspective::pod>(), this->col_ind(),
1897 this->row_ptr(), this->rows(), this->columns(), this->used_elements());
1898
1899 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
1900 TimeStamp ts_stop;
1901 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
1902 }
1903
1915 void apply(
1917 const DenseVector< DT_, IT_> & x,
1919 const DT_ alpha = DT_(1)) const
1920 {
1921 XASSERTM(r.size() == this->rows(), "Vector size of r does not match!");
1922 XASSERTM(x.size() == this->columns<Perspective::pod>(), "Vector size of x does not match!");
1923 XASSERTM(y.size() == this->rows(), "Vector size of y does not match!");
1924
1925 TimeStamp ts_start;
1926
1927 FEAT_KERNEL_MARKER_START("BCSR-apply");
1928 if (this->used_elements() == 0 || Math::abs(alpha) < Math::eps<DT_>())
1929 {
1930 r.copy(y);
1931 //r.scale(beta);
1932 return;
1933 }
1934
1935 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1936
1937 Statistics::add_flops( (this->used_elements<Perspective::pod>() + this->rows<Perspective::pod>()) * 2 );
1938 Arch::Apply::template bcsr<BlockHeight_, BlockWidth_>(
1939 r.template elements<Perspective::pod>(), alpha, x.elements(), DT_(1), y.template elements<Perspective::pod>(), this->template val<Perspective::pod>(), this->col_ind(),
1940 this->row_ptr(), this->rows(), this->columns(), this->used_elements());
1941
1942 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
1943 TimeStamp ts_stop;
1944 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
1945 }
1946
1960 const DenseVector< DT_, IT_> & x,
1962 const DT_ alpha = DT_(1)) const
1963 {
1964 XASSERTM(r.size() == this->columns(), "Vector size of r does not match!");
1965 XASSERTM(x.size() == this->rows<Perspective::pod>(), "Vector size of x does not match!");
1966 XASSERTM(y.size() == this->columns(), "Vector size of y does not match!");
1967
1968 TimeStamp ts_start;
1969
1970 FEAT_KERNEL_MARKER_START("BCSR-apply");
1971 if(this->used_elements() == 0 || Math::abs(alpha) < Math::eps<DT_>())
1972 {
1973 r.copy(y);
1974 //r.scale(beta);
1975 return;
1976 }
1977
1978 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
1979
1980 Statistics::add_flops((this->used_elements<Perspective::pod>() + this->rows<Perspective::pod>()) * 2);
1981 Arch::Apply::template bcsr_transposed<BlockHeight_, BlockWidth_>(
1982 r.template elements<Perspective::pod>(), alpha, x.elements(), DT_(1), y.template elements<Perspective::pod>(), this->template val<Perspective::pod>(), this->col_ind(),
1983 this->row_ptr(), this->rows(), this->columns(), this->used_elements());
1984
1985 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
1986 TimeStamp ts_stop;
1987 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
1988 }
1989
2001 void apply(
2004 const DenseVector< DT_, IT_> & y,
2005 const DT_ alpha = DT_(1)) const
2006 {
2007 XASSERTM(r.size() == this->rows<Perspective::pod>(), "Vector size of r does not match!");
2008 XASSERTM(x.size() == this->columns(), "Vector size of x does not match!");
2009 XASSERTM(y.size() == this->rows<Perspective::pod>(), "Vector size of y does not match!");
2010
2011 TimeStamp ts_start;
2012
2013 FEAT_KERNEL_MARKER_START("BCSR-apply");
2014 if (this->used_elements() == 0 || Math::abs(alpha) < Math::eps<DT_>())
2015 {
2016 r.copy(y);
2017 //r.scale(beta);
2018 return;
2019 }
2020
2021 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
2022
2023 Statistics::add_flops( (this->used_elements<Perspective::pod>() + this->rows<Perspective::pod>()) * 2 );
2024 Arch::Apply::template bcsr<BlockHeight_, BlockWidth_>(
2025 r.elements(), alpha, x.template elements<Perspective::pod>(), DT_(1), y.elements(), this->template val<Perspective::pod>(), this->col_ind(),
2026 this->row_ptr(), this->rows(), this->columns(), this->used_elements());
2027
2028 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
2029 TimeStamp ts_stop;
2030 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
2031 }
2032
2047 const DenseVector< DT_, IT_> & y,
2048 const DT_ alpha = DT_(1)) const
2049 {
2050 XASSERTM(r.size() == this->columns<Perspective::pod>(), "Vector size of r does not match!");
2051 XASSERTM(x.size() == this->rows(), "Vector size of x does not match!");
2052 XASSERTM(y.size() == this->columns<Perspective::pod>(), "Vector size of y does not match!");
2053
2054 TimeStamp ts_start;
2055
2056 FEAT_KERNEL_MARKER_START("BCSR-apply");
2057 if (this->used_elements() == 0 || Math::abs(alpha) < Math::eps<DT_>())
2058 {
2059 r.copy(y);
2060 //r.scale(beta);
2061 return;
2062 }
2063
2064 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
2065
2066 Statistics::add_flops( (this->used_elements<Perspective::pod>() + this->rows<Perspective::pod>()) * 2 );
2067 Arch::Apply::template bcsr_transposed<BlockHeight_, BlockWidth_>(
2068 r.elements(), alpha, x.template elements<Perspective::pod>(), DT_(1), y.elements(), this->template val<Perspective::pod>(), this->col_ind(),
2069 this->row_ptr(), this->rows(), this->columns(), this->used_elements());
2070
2071 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
2072 TimeStamp ts_stop;
2073 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
2074 }
2075
2087 void apply(
2091 const DT_ alpha = DT_(1)) const
2092 {
2093 XASSERTM(r.size() == this->rows(), "Vector size of r does not match!");
2094 XASSERTM(x.size() == this->columns(), "Vector size of x does not match!");
2095 XASSERTM(y.size() == this->rows(), "Vector size of y does not match!");
2096
2097 TimeStamp ts_start;
2098
2099 FEAT_KERNEL_MARKER_START("BCSR-apply");
2100 if (this->used_elements() == 0 || Math::abs(alpha) < Math::eps<DT_>())
2101 {
2102 r.copy(y);
2103 //r.scale(beta);
2104 return;
2105 }
2106
2107 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
2108
2109 Statistics::add_flops( (this->used_elements<Perspective::pod>() + this->rows<Perspective::pod>()) * 2 );
2110 Arch::Apply::template bcsr<BlockHeight_, BlockWidth_>(
2111 r.template elements<Perspective::pod>(), alpha, x.template elements<Perspective::pod>(), DT_(1), y.template elements<Perspective::pod>(), this->template val<Perspective::pod>(),
2112 this->col_ind(), this->row_ptr(), this->rows(), this->columns(), this->used_elements());
2113
2114 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
2115 TimeStamp ts_stop;
2116 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
2117 }
2118
2134 const DT_ alpha = DT_(1)) const
2135 {
2136 XASSERTM(r.size() == this->columns(), "Vector size of r does not match!");
2137 XASSERTM(x.size() == this->rows(), "Vector size of x does not match!");
2138 XASSERTM(y.size() == this->columns(), "Vector size of y does not match!");
2139
2140 TimeStamp ts_start;
2141
2142 FEAT_KERNEL_MARKER_START("BCSR-apply");
2143 if (this->used_elements() == 0 || Math::abs(alpha) < Math::eps<DT_>())
2144 {
2145 r.copy(y);
2146 //r.scale(beta);
2147 return;
2148 }
2149
2150 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
2151
2152 Statistics::add_flops( (this->used_elements<Perspective::pod>() + this->rows<Perspective::pod>()) * 2 );
2153 Arch::Apply::template bcsr_transposed<BlockHeight_, BlockWidth_>(
2154 r.template elements<Perspective::pod>(), alpha, x.template elements<Perspective::pod>(), DT_(1), y.template elements<Perspective::pod>(), this->template val<Perspective::pod>(),
2155 this->col_ind(), this->row_ptr(), this->rows(), this->columns(), this->used_elements());
2156
2157 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
2158 TimeStamp ts_stop;
2159 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
2160 }
2161
2173 void apply(
2176 const DenseVector< DT_, IT_> & y,
2177 const DT_ alpha = DT_(1)) const
2178 {
2179 XASSERTM(r.size() == this->rows(), "Vector size of r does not match!");
2180 XASSERTM(x.size() == this->columns(), "Vector size of x does not match!");
2181 XASSERTM(y.size() == this->rows<Perspective::pod>(), "Vector size of y does not match!");
2182
2183 TimeStamp ts_start;
2184
2185 FEAT_KERNEL_MARKER_START("BCSR-apply");
2186 if (this->used_elements() == 0 || Math::abs(alpha) < Math::eps<DT_>())
2187 {
2188 r.convert(y);
2189 //r.scale(beta);
2190 return;
2191 }
2192
2193 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
2194
2195 Statistics::add_flops( (this->used_elements<Perspective::pod>() + this->rows<Perspective::pod>()) * 2 );
2196 Arch::Apply::template bcsr<BlockHeight_, BlockWidth_>(
2197 r.template elements<Perspective::pod>(), alpha, x.template elements<Perspective::pod>(), DT_(1), y.template elements<Perspective::pod>(), this->template val<Perspective::pod>(),
2198 this->col_ind(), this->row_ptr(), this->rows(), this->columns(), this->used_elements());
2199
2200 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
2201 TimeStamp ts_stop;
2202 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
2203 }
2204
2219 const DenseVector< DT_, IT_> & y,
2220 const DT_ alpha = DT_(1)) const
2221 {
2222 XASSERTM(r.size() == this->columns(), "Vector size of r does not match!");
2223 XASSERTM(x.size() == this->rows(), "Vector size of x does not match!");
2224 XASSERTM(y.size() == this->columns<Perspective::pod>(), "Vector size of y does not match!");
2225
2226 TimeStamp ts_start;
2227
2228 FEAT_KERNEL_MARKER_START("BCSR-apply");
2229 if (this->used_elements() == 0 || Math::abs(alpha) < Math::eps<DT_>())
2230 {
2231 r.convert(y);
2232 //r.scale(beta);
2233 return;
2234 }
2235
2236 XASSERTM(r.template elements<Perspective::pod>() != x.template elements<Perspective::pod>(), "Vector x and r must not share the same memory!");
2237
2238 Statistics::add_flops( (this->used_elements<Perspective::pod>() + this->rows<Perspective::pod>()) * 2 );
2239 Arch::Apply::template bcsr_transposed<BlockHeight_, BlockWidth_>(
2240 r.template elements<Perspective::pod>(), alpha, x.template elements<Perspective::pod>(), DT_(1), y.template elements<Perspective::pod>(), this->template val<Perspective::pod>(),
2241 this->col_ind(), this->row_ptr(), this->rows(), this->columns(), this->used_elements());
2242
2243 FEAT_KERNEL_MARKER_STOP("BCSR-apply");
2244 TimeStamp ts_stop;
2245 Statistics::add_time_blas2(ts_stop.elapsed(ts_start));
2246 }
2247
2284 const DT_ alpha = DT_(1),
2285 const bool allow_incomplete = false)
2286 {
2287 // validate matrix dimensions
2288 XASSERT(BlockHeight_ == BlockWidth_);
2289 XASSERT(this->rows() == d.rows());
2290 XASSERT(d.columns() == a.rows());
2291 XASSERT(a.columns() == b.rows());
2292 XASSERT(b.columns() == this->columns());
2293
2294 // fetch matrix arrays:
2295 ValueType* data_x = this->val();
2296 const ValueType* data_d = d.val();
2297 const ValueType* data_a = a.val();
2298 const ValueType* data_b = b.val();
2299 const IT_* row_ptr_x = this->row_ptr();
2300 const IT_* col_idx_x = this->col_ind();
2301 const IT_* row_ptr_d = d.row_ptr();
2302 const IT_* col_idx_d = d.col_ind();
2303 const IT_* row_ptr_a = a.row_ptr();
2304 const IT_* col_idx_a = a.col_ind();
2305 const IT_* row_ptr_b = b.row_ptr();
2306 const IT_* col_idx_b = b.col_ind();
2307
2308 // loop over all rows of D and X, resp.
2309 for(IT_ i(0); i < IT_(this->rows()); ++i)
2310 {
2311 // loop over all non-zeros D_ik in row i of D
2312 for(IT_ ik(row_ptr_d[i]); ik < row_ptr_d[i+1]; ++ik)
2313 {
2314 // get column index k
2315 const IT_ k = col_idx_d[ik];
2316
2317 // loop over all non-zeros A_kl in row k of A
2318 for(IT_ kl(row_ptr_a[k]); kl < row_ptr_a[k+1]; ++kl)
2319 {
2320 // get column index l
2321 const IT_ l = col_idx_a[kl];
2322
2323 // pre-compute factor (alpha * D_ik * A_kl)
2324 ValueType omega;
2325 omega.set_mat_mat_mult(data_d[ik], data_a[kl]);
2326 omega *= alpha;
2327
2328 // loop over all non-zeros B_lj in row j of B and
2329 // loop over all non-zeros X_ij in row i of X and
2330 // perform a "sparse axpy" of B_l onto X_i, i.e.:
2331 // X_i. += (alpha * D_ik * A_kl) * B_l.
2332 IT_ ij = row_ptr_x[i];
2333 IT_ lj = row_ptr_b[l];
2334 while(lj < row_ptr_b[l+1])
2335 {
2336 if(ij >= row_ptr_x[i+1])
2337 {
2338 // we have reached the end of row X_i, but there is at least
2339 // one entry in row B_l left, so the pattern of X is incomplete
2340 // We let the caller decide whether this is a valid case or not:
2341 if(allow_incomplete)
2342 break; // continue with next row
2343 else
2344 XABORTM("Incomplete output matrix structure");
2345 }
2346 else if(col_idx_x[ij] == col_idx_b[lj])
2347 {
2348 // okay: B_lj contributes to X_ij
2349 ValueType temp;
2350 temp.set_mat_mat_mult(omega, data_b[lj]);
2351 data_x[ij] += temp;
2352 ++ij;
2353 ++lj;
2354 }
2355 else if(col_idx_x[ij] < col_idx_b[lj])
2356 {
2357 // entry X_ij exists, but B_lj is missing:
2358 // this is a perfectly valid case, so continue with the next non-zero of X_i
2359 ++ij;
2360 }
2361 else //if(col_idx_x[ij] > col_idx_b[lj])
2362 {
2363 // If we come out here, then the sparsity pattern of X is incomplete:
2364 // B_lj is meant to be added onto X_ij, but the entry X_ij is missing
2365 // We let the caller decide whether this is a valid case or not:
2366 if(allow_incomplete)
2367 ++lj;
2368 else
2369 XABORTM("Incomplete output matrix structure");
2370 }
2371 }
2372 }
2373 }
2374 }
2375 }
2376
2410 const DT_ alpha = DT_(1),
2411 const bool allow_incomplete = false)
2412 {
2413 // validate matrix dimensions
2414 XASSERT(BlockHeight_ == BlockWidth_);
2415 XASSERT(this->rows() == d.rows());
2416 XASSERT(d.columns() == a.rows());
2417 XASSERT(a.columns() == b.rows());
2418 XASSERT(b.columns() == this->columns());
2419
2420 // fetch matrix arrays:
2421 ValueType* data_x = this->val();
2422 const DT_* data_d = d.val();
2423 const ValueType* data_a = a.val();
2424 const DT_* data_b = b.val();
2425 const IT_* row_ptr_x = this->row_ptr();
2426 const IT_* col_idx_x = this->col_ind();
2427 const IT_* row_ptr_d = d.row_ptr();
2428 const IT_* col_idx_d = d.col_ind();
2429 const IT_* row_ptr_a = a.row_ptr();
2430 const IT_* col_idx_a = a.col_ind();
2431 const IT_* row_ptr_b = b.row_ptr();
2432 const IT_* col_idx_b = b.col_ind();
2433
2434 // loop over all rows of D and X, resp.
2435 for(IT_ i(0); i < IT_(this->rows()); ++i)
2436 {
2437 // loop over all non-zeros D_ik in row i of D
2438 for(IT_ ik(row_ptr_d[i]); ik < row_ptr_d[i+1]; ++ik)
2439 {
2440 // get column index k
2441 const IT_ k = col_idx_d[ik];
2442
2443 // loop over all non-zeros A_kl in row k of A
2444 for(IT_ kl(row_ptr_a[k]); kl < row_ptr_a[k+1]; ++kl)
2445 {
2446 // get column index l
2447 const IT_ l = col_idx_a[kl];
2448
2449 // pre-compute factor (alpha * D_ik * A_kl)
2450 ValueType omega = (alpha * data_d[ik]) * data_a[kl];
2451
2452 // loop over all non-zeros B_lj in row j of B and
2453 // loop over all non-zeros X_ij in row i of X and
2454 // perform a "sparse axpy" of B_l onto X_i, i.e.:
2455 // X_i. += (alpha * D_ik * A_kl) * B_l.
2456 IT_ ij = row_ptr_x[i];
2457 IT_ lj = row_ptr_b[l];
2458 while(lj < row_ptr_b[l+1])
2459 {
2460 if(ij >= row_ptr_x[i+1])
2461 {
2462 // we have reached the end of row X_i, but there is at least
2463 // one entry in row B_l left, so the pattern of X is incomplete
2464 // We let the caller decide whether this is a valid case or not:
2465 if(allow_incomplete)
2466 break; // continue with next row
2467 else
2468 XABORTM("Incomplete output matrix structure");
2469 }
2470 else if(col_idx_x[ij] == col_idx_b[lj])
2471 {
2472 // okay: B_lj contributes to X_ij
2473 Tiny::axpy(data_x[ij], omega, data_b[lj]);
2474 ++ij;
2475 ++lj;
2476 }
2477 else if(col_idx_x[ij] < col_idx_b[lj])
2478 {
2479 // entry X_ij exists, but B_lj is missing:
2480 // this is a perfectly valid case, so continue with the next non-zero of X_i
2481 ++ij;
2482 }
2483 else //if(col_idx_x[ij] > col_idx_b[lj])
2484 {
2485 // If we come out here, then the sparsity pattern of X is incomplete:
2486 // B_lj is meant to be added onto X_ij, but the entry X_ij is missing
2487 // We let the caller decide whether this is a valid case or not:
2488 if(allow_incomplete)
2489 ++lj;
2490 else
2491 XABORTM("Incomplete output matrix structure");
2492 }
2493 }
2494 }
2495 }
2496 }
2497 }
2499
2501 void lump_rows(VectorTypeL& lump) const
2502 {
2503 XASSERTM(lump.size() == rows(), "lump vector size does not match matrix row count!");
2504
2505 Arch::Lumping::bcsr(
2506 lump.template elements<Perspective::pod>(),
2507 this->template val<Perspective::pod>(),
2509 }
2510
2521 {
2522 VectorTypeL lump = create_vector_l();
2523 lump_rows(lump);
2524 return lump;
2525 }
2526
2527
2537 void extract_diag(VectorTypeL & diag, DenseVector<IT_, IT_> & diag_indices) const
2538 {
2539 XASSERTM(diag.size() == rows(), "diag size does not match matrix row count!");
2540 XASSERTM(rows() == columns(), "matrix is not square!");
2541
2542 for (Index row(0); row < rows(); row++)
2543 {
2544 const Index index = diag_indices.elements()[row];
2545 typename VectorTypeL::ValueType t(0);
2546 if (index != used_elements())
2547 {
2548 ValueType m = this->template val<LAFEM::Perspective::native>()[index];
2549 for (int i(0) ; i < BlockHeight_ ; ++i)
2550 {
2551 t[i] = m[i][i];
2552 }
2553 }
2554 diag(row, t);
2555 }
2556 }
2557
2564 void extract_diag(VectorTypeL & diag) const
2565 {
2566 auto diag_indices = extract_diag_indices();
2567 extract_diag(diag, diag_indices);
2568 }
2569
2570
2577 {
2578 VectorTypeL diag = create_vector_l();
2579 extract_diag(diag);
2580 return diag;
2581 }
2582
2590 {
2591 XASSERTM(diag_indices.size() == rows(), "diag size does not match matrix row count!");
2592 XASSERTM(rows() == columns(), "matrix is not square!");
2593
2594 Arch::Diagonal::csr(diag_indices.elements(), col_ind(), row_ptr(), rows());
2595 }
2596
2603 {
2604 DenseVector<IT_, IT_> diag_indices(this->template rows<LAFEM::Perspective::native>());
2605 extract_diag_indices(diag_indices);
2606 return diag_indices;
2607 }
2608
2611 {
2612 if (perm_row.size() == 0 && perm_col.size() == 0)
2613 return;
2614
2615 XASSERTM(perm_row.size() == this->rows(), "Container rows does not match permutation size");
2616 XASSERTM(perm_col.size() == this->columns(), "Container columns does not match permutation size");
2617
2618 // http://de.mathworks.com/help/matlab/math/sparse-matrix-operations.html#f6-13070
2619 IT_ * temp_row_ptr = new IT_[rows() + 1];
2620 IT_ * temp_col_ind = new IT_[used_elements()];
2621 ValueType * temp_val = new ValueType[used_elements()];
2622
2623 Index * perm_pos;
2624 perm_pos = perm_row.get_perm_pos();
2625
2626 //permute rows from source to temp_*
2627 Index new_start(0);
2628 temp_row_ptr[0] = 0;
2629 for (Index row(0) ; row < this->rows() ; ++row)
2630 {
2631 Index row_size(this->row_ptr()[perm_pos[row] + 1] - this->row_ptr()[perm_pos[row]]);
2632
2633 //iterate over all elements in single one new and old row
2634 for (Index i(new_start), j(this->row_ptr()[perm_pos[row]]) ; i < new_start + row_size ; ++i, ++j)
2635 {
2636 temp_col_ind[i] = this->col_ind()[j];
2637 temp_val[i] = this->val()[j];
2638 }
2639
2640 new_start += row_size;
2641 temp_row_ptr[row+1] = (IT_)new_start;
2642 }
2643
2644 //use inverse col permutation as lookup table: i -> new location of i
2645 Adjacency::Permutation perm_col_inv = perm_col.inverse();
2646 perm_pos = perm_col_inv.get_perm_pos();
2647
2648 //permute columns from temp_* to source
2649 ::memcpy(this->row_ptr(), temp_row_ptr, (rows() + 1) * sizeof(IT_));
2650 ::memcpy(this->val(), temp_val, used_elements() * sizeof(ValueType));
2651 for (Index i(0) ; i < used_elements() ; ++i)
2652 {
2653 this->col_ind()[i] = (IT_)perm_pos[temp_col_ind[i]];
2654 }
2655
2656 delete[] temp_row_ptr;
2657 delete[] temp_col_ind;
2658 delete[] temp_val;
2659
2660 //sort columns in every row by column index
2661 IT_ swap_key;
2662 ValueType swap_val;
2663 for (Index row(0) ; row < rows() ; ++row)
2664 {
2665 Index offset(this->row_ptr()[row]);
2666 Index row_size(this->row_ptr()[row+1] - this->row_ptr()[row]);
2667 for (Index i(1), j ; i < row_size ; ++i)
2668 {
2669 swap_key = this->col_ind()[i + offset];
2670 swap_val = this->val()[i + offset];
2671 j = i;
2672 while (j > 0 && this->col_ind()[j - 1 + offset] > swap_key)
2673 {
2674 this->col_ind()[j + offset] = this->col_ind()[j - 1 + offset];
2675 this->val()[j + offset] = this->val()[j - 1 + offset];
2676 --j;
2677 }
2678 this->col_ind()[j + offset] = swap_key;
2679 this->val()[j + offset] = swap_val;
2680 }
2681 }
2682 }
2683
2722 template<int BHD_>
2727 const DT_ alpha = DT_(1)) const
2728 {
2730
2731 static constexpr IT_ bhd = IT_(BHD_);
2732 static constexpr IT_ bwd = IT_(BlockHeight);
2733
2734 static constexpr IT_ bh = IT_(BlockHeight);
2735 static constexpr IT_ bw = IT_(BlockWidth);
2736
2737 const auto& b(*this);
2738
2739 // Check matrix dimensions
2740 XASSERT(v.size() == d.rows());
2741 XASSERT(d.columns() == a.size());
2742 XASSERT(a.size() == b.rows());
2743 XASSERT(b.columns() == this->columns());
2744
2745 // Fetch matrix arrays
2746 // Use POD here to avoid hassle with 1x1 matrices and stuff
2747 DT_* data_v = v.template elements<LAFEM::Perspective::pod>();
2748 const DT_* data_d = d.template val<LAFEM::Perspective::pod>();
2749 const DT_* data_a = a.template elements<LAFEM::Perspective::pod>();
2750 const DT_* data_b = b.template val<LAFEM::Perspective::pod>();
2751 const IT_* row_ptr_d = d.row_ptr();
2752 const IT_* col_idx_d = d.col_ind();
2753 const IT_* row_ptr_b = b.row_ptr();
2754 const IT_* col_idx_b = b.col_ind();
2755
2756 // Loop over all rows i of v
2757 for(IT_ row(0); row < IT_(v.size()); ++row)
2758 {
2759 // For all non-zeros D_ik in row i of D
2760 for(IT_ pos_d(row_ptr_d[row]); pos_d < row_ptr_d[row+1]; ++pos_d)
2761 {
2762 // Get column index k
2763 const IT_ col_d(col_idx_d[pos_d]);
2764
2765 // Pre-compute block factor (alpha * D_ik * A_kk)
2766 typename MatrixD::ValueType omega(0);
2767 for(IT_ i(0); i < bhd; ++i)
2768 {
2769 for(IT_ j(0); j < bwd; ++j)
2770 {
2771 omega[int(i)][int(j)] = alpha * data_d[bhd*bwd*pos_d + bwd*i + j] * data_a[bwd*col_d + j];
2772 }
2773 }
2774
2775 // Loop over all non-zero blocks B_kj in row j of B and perform a "sparse axpy" of B_l onto v_i, i.e.:
2776 // v_i += alpha * (D_ik * A_kk) * B_k.
2777 IT_ pos_b(row_ptr_b[col_d]);
2778 while(pos_b < row_ptr_b[col_d+1])
2779 {
2780 if(col_idx_b[pos_b] == row)
2781 {
2782 // B_kj contributes to v_i here
2783 for(IT_ i(0); i < bhd; ++i)
2784 {
2785 for(IT_ j(0); j < bh; ++j)
2786 {
2787 data_v[bhd*row + i] += omega[int(i)][int(j)]*data_b[bh*bw*pos_b + bw*j + i];
2788 }
2789 }
2790 break;
2791 }
2792 ++pos_b;
2793 }
2794 }
2795 }
2796 }
2797
2804 friend std::ostream & operator<< (std::ostream & lhs, const SparseMatrixBCSR & b)
2805 {
2806 lhs << "[\n";
2807 for (Index i(0) ; i < b.rows() ; ++i)
2808 {
2809 for (int k(0) ; k < BlockHeight_ ; ++k)
2810 {
2811 lhs << "[";
2812 for (Index j(0) ; j < b.columns() ; ++j)
2813 {
2814 for (int l(0) ; l < BlockWidth_ ; ++l)
2815 lhs << " " << stringify(b(i, j).v[k][l]);
2816 }
2817 lhs << "]\n";
2818 }
2819 }
2820 lhs << "]\n";
2821
2822 return lhs;
2823 }
2824
2826 // Returns a new compatible L-Vector.
2827 VectorTypeL create_vector_l() const
2828 {
2829 return VectorTypeL(this->rows());
2830 }
2831
2832 // Returns a new compatible R-Vector.
2833 VectorTypeR create_vector_r() const
2834 {
2835 return VectorTypeR(this->columns());
2836 }
2837
2839 Index get_length_of_line(const Index row) const
2840 {
2841 const auto * prow_ptr(this->row_ptr());
2842 const auto trow = row / Index(BlockHeight_);
2843 return Index(prow_ptr[trow + 1] - prow_ptr[trow]) * Index(BlockWidth_);
2844 }
2845
2847 void set_line(const Index row, DT_ * const pval_set, IT_ * const pcol_set,
2848 const Index col_start, const Index stride = 1) const
2849 {
2850 const auto * prow_ptr(this->row_ptr());
2851 const auto * pcol_ind(this->col_ind());
2852 const auto * pval(this->val());
2853
2854 const auto trow = int(row) / BlockHeight_;
2855 const auto lrow = int(row) - trow * BlockHeight_;
2856
2857 const Index start((Index(prow_ptr[trow])));
2858 const Index end((Index(prow_ptr[trow + 1] - prow_ptr[trow])));
2859 for (Index i(0); i < end; ++i)
2860 {
2861 for (Index ti(0); ti < Index(BlockWidth_); ++ti)
2862 {
2863 pval_set[(i * Index(BlockWidth_) + ti) * stride] = pval[start + i](lrow, int(ti));
2864 pcol_set[(i * Index(BlockWidth_) + ti) * stride] = pcol_ind[start + i] * IT_(BlockWidth_) + IT_(ti) + IT_(col_start);
2865 }
2866 }
2867 }
2868
2869 void set_line_reverse(const Index row, const DT_ * const pval_set, const Index stride = 1)
2870 {
2871 const auto * prow_ptr(this->row_ptr());
2872 auto * pval(this->val());
2873
2874 const auto trow = int(row) / BlockHeight_;
2875 const auto lrow = int(row) - trow * BlockHeight_;
2876
2877 const Index start((Index(prow_ptr[trow])));
2878 const Index end((Index(prow_ptr[trow + 1] - prow_ptr[trow])));
2879 for (Index i(0); i < end; ++i)
2880 {
2881 for (Index ti(0); ti < Index(BlockWidth_); ++ti)
2882 {
2883 pval[start + i](lrow, int(ti)) = pval_set[(i * Index(BlockWidth_) + ti) * stride];
2884 }
2885 }
2886 }
2888
2889 /* ******************************************************************* */
2890 /* A D J A C T O R I N T E R F A C E I M P L E M E N T A T I O N */
2891 /* ******************************************************************* */
2892 public:
2895 {
2896 return rows();
2897 }
2898
2901 {
2902 return columns();
2903 }
2904
2906 inline ImageIterator image_begin(Index domain_node) const
2907 {
2908 XASSERTM(domain_node < rows(), "Domain node index out of range");
2909 return &this->_indices.at(0)[this->_indices.at(1)[domain_node]];
2910 }
2911
2913 inline ImageIterator image_end(Index domain_node) const
2914 {
2915 XASSERTM(domain_node < rows(), "Domain node index out of range");
2916 return &this->_indices.at(0)[this->_indices.at(1)[domain_node + 1]];
2917 }
2918 }; // class SparseMatrixBCSR
2919 } // namespace LAFEM
2920} // namespace FEAT
#define ASSERT(expr)
Debug-Assertion macro definition.
Definition: assertion.hpp:229
#define XABORTM(msg)
Abortion macro definition with custom message.
Definition: assertion.hpp:192
#define ASSERTM(expr, msg)
Debug-Assertion macro definition with custom message.
Definition: assertion.hpp:230
#define XASSERT(expr)
Assertion macro definition.
Definition: assertion.hpp:262
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Definition: assertion.hpp:263
FEAT Kernel base header.
Adjacency Graph implementation.
Definition: graph.hpp:34
Index * get_domain_ptr()
Returns the domain pointer array.
Definition: graph.hpp:359
Index * get_image_idx()
Returns the image node index array.
Definition: graph.hpp:374
Index get_num_indices() const
Returns the total number indices.
Definition: graph.hpp:390
Index size() const
returns the size of the permutation
Index * get_perm_pos()
returns the permute-position array
Permutation inverse() const
Computes the inverse permutation.
Container base class.
Definition: container.hpp:220
const std::vector< IT_ * > & get_indices() const
Returns a list of all Index arrays.
Definition: container.hpp:1078
std::vector< DT_ * > _elements
List of pointers to all datatype dependent arrays.
Definition: container.hpp:226
Index used_elements() const
Returns the number of effective stored elements.
Definition: container.hpp:1155
const std::vector< DT_ * > & get_elements() const
Returns a list of all data arrays.
Definition: container.hpp:1068
std::vector< Index > _elements_size
List of corresponding datatype array sizes.
Definition: container.hpp:230
Index size() const
Returns the containers size.
Definition: container.hpp:1136
void assign(const Container< DT2_, IT2_ > &other)
Assignment operation.
Definition: container.hpp:280
std::vector< IT_ * > _indices
List of pointers to all IT_ dependent arrays.
Definition: container.hpp:228
void clone(const Container &other, CloneMode clone_mode=CloneMode::Weak)
Clone operation.
Definition: container.hpp:902
void move(Container &&other)
Assignment move operation.
Definition: container.hpp:989
virtual void clear()
Free all allocated arrays.
Definition: container.hpp:875
std::vector< Index > _indices_size
List of corresponding IT_ array sizes.
Definition: container.hpp:232
void format(DT_ value=DT_(0))
Reset all elements of the container to a given value or zero if missing.
Definition: container.hpp:851
std::vector< Index > _scalar_index
List of scalars with datatype index.
Definition: container.hpp:234
Blocked Dense data vector class template.
void convert(const DenseVectorBlocked< DT2_, IT2_, BlockSize_ > &other)
Conversion method.
void copy(const DenseVectorBlocked &x, bool full=false)
Performs .
Index size() const
The number of elements.
Dense data vector class template.
DT_ * elements()
Get a pointer to the data array.
void copy(const VT_ &a)
Performs .
Config class for serialize parameter.
Definition: container.hpp:47
Layout scheme for sparse matrix containers.
Scatter-Axpy operation for SparseMatrixBCSR.
CSR based blocked sparse matrix.
void apply_transposed(DenseVectorBlocked< DT_, IT_, BlockWidth_ > &r, const DenseVectorBlocked< DT_, IT_, BlockHeight_ > &x, const DenseVector< DT_, IT_ > &y, const DT_ alpha=DT_(1)) const
Calculate .
const IT_ * ImageIterator
ImageIterator typedef for Adjactor interface implementation.
SparseMatrixBCSR(FileMode mode, const String &filename)
Constructor.
IT_ const * col_ind() const
Retrieve column indices array.
ImageIterator image_end(Index domain_node) const
SparseMatrixBCSR(Index rows_in, Index columns_in)
Constructor.
void apply_transposed(DenseVectorBlocked< DT_, IT_, BlockWidth_ > &r, const DenseVectorBlocked< DT_, IT_, BlockHeight_ > &x) const
Calculate .
void row_norm2(VectorTypeL &row_norms) const
Computes the 2-norm for every row.
SparseMatrixBCSR(SparseMatrixBCSR &&other)
Move Constructor.
DT_ min_abs_element() const
Retrieve the absolute minimum value of this matrix.
void copy(const SparseMatrixBCSR &x, bool full=false)
Performs .
DT_ min_element() const
Retrieve the minimum value of this matrix.
void row_norm2sqr(VectorTypeL &row_norms, const VectorTypeR &scal) const
Computes the square of the 2-norm for every row, where every row is scaled by a vector.
Intern::BCSRVectorHelper< DT_, IT_, BlockHeight_ >::VectorType VectorTypeL
Compatible L-vector type.
void permute(Adjacency::Permutation &perm_row, Adjacency::Permutation &perm_col)
Permutate matrix rows and columns according to the given Permutations.
void apply(DenseVectorBlocked< DT_, IT_, BlockHeight_ > &r, const DenseVector< DT_, IT_ > &x, const DenseVectorBlocked< DT_, IT_, BlockHeight_ > &y, const DT_ alpha=DT_(1)) const
Calculate .
SparseMatrixBCSR(std::vector< char > input)
Constructor.
static constexpr SparseLayoutId layout_id
Our used layout type.
void apply(DenseVector< DT_, IT_ > &r, const DenseVector< DT_, IT_ > &x, const DenseVector< DT_, IT_ > &y, const DT_ alpha=DT_(1)) const
Calculate .
void axpy(const SparseMatrixBCSR &x, const DT_ alpha=DT_(1))
Calculate .
void row_norm2sqr(VectorTypeL &row_norms) const
Computes the square of the 2-norm for every row.
void convert(const SparseMatrixBCSR< DT2_, IT2_, BlockHeight_, BlockWidth_ > &other)
Conversion method.
void clone(const SparseMatrixBCSR< DT2_, IT2_, BlockHeight_, BlockWidth_ > &other, CloneMode clone_mode=CloneMode::Weak)
Clone operation.
ValueType operator()(Index row, Index col) const
Retrieve specific matrix element.
Index rows() const
Retrieve matrix row count.
DenseVector< IT_, IT_ > extract_diag_indices() const
extract main diagonal vector from matrix
friend std::ostream & operator<<(std::ostream &lhs, const SparseMatrixBCSR &b)
SparseMatrixBCSR streaming operator.
VectorTypeL extract_diag() const
extract main diagonal vector from matrix
SparseMatrixBCSR(Index rows_in, Index columns_in, Index used_elements_in)
Constructor.
void write_out(FileMode mode, const String &filename) const
Write out matrix to file.
void apply(DenseVector< DT_, IT_ > &r, const DenseVectorBlocked< DT_, IT_, BlockWidth_ > &x) const
Calculate .
static constexpr int BlockHeight
Our block height.
SparseMatrixBCSR(const Index rows_in, const Index columns_in, DenseVector< IT_, IT_ > &col_ind_in, DenseVector< DT_, IT_ > &val_in, DenseVector< IT_, IT_ > &row_ptr_in)
Constructor.
void apply_transposed(DenseVectorBlocked< DT_, IT_, BlockWidth_ > &r, const DenseVector< DT_, IT_ > &x) const
Calculate .
SparseMatrixBCSR(const SparseLayout< IT_, layout_id > &layout_in)
Constructor.
Intern::BCSRVectorHelper< DT_, IT_, BlockWidth_ >::VectorType VectorTypeR
Compatible R-vector type.
bool same_layout(const SparseMatrixBCSR &x) const
Checks if the structural layout of this matrix matches that of another matrix. This excludes comparis...
void extract_diag_indices(DenseVector< IT_, IT_ > &diag_indices) const
extract main diagonal vector from matrix
void apply_transposed(DenseVectorBlocked< DT_, IT_, BlockWidth_ > &r, const DenseVectorBlocked< DT_, IT_, BlockHeight_ > &x, const DenseVectorBlocked< DT_, IT_, BlockWidth_ > &y, const DT_ alpha=DT_(1)) const
Calculate .
static String name()
Returns a descriptive string.
IT_ * col_ind()
Retrieve column indices array.
void apply(DenseVector< DT_, IT_ > &r, const DenseVector< DT_, IT_ > &x) const
Calculate .
DT_ norm_frobenius() const
Calculates the Frobenius norm of this matrix.
IT_ * row_ptr()
Retrieve row start index array.
void add_double_mat_product(const LAFEM::SparseMatrixBCSR< DT_, IT_, BlockHeight_, BlockWidth_ > &d, const LAFEM::SparseMatrixBCSR< DT_, IT_, BlockHeight_, BlockWidth_ > &a, const LAFEM::SparseMatrixBCSR< DT_, IT_, BlockHeight_, BlockWidth_ > &b, const DT_ alpha=DT_(1), const bool allow_incomplete=false)
Adds a double-matrix product onto this matrix.
void write_out(FileMode mode, std::ostream &file) const
Write out matrix to file.
void convert(const Adjacency::Graph &graph)
Conversion method.
void lump_rows(VectorTypeL &lump) const
void apply(DenseVectorBlocked< DT_, IT_, BlockHeight_ > &r, const DenseVectorBlocked< DT_, IT_, BlockWidth_ > &x, const DenseVector< DT_, IT_ > &y, const DT_ alpha=DT_(1)) const
Calculate .
VectorTypeL lump_rows() const
Returns the lumped rows vector.
static constexpr int BlockWidth
Our block width.
Index columns() const
Retrieve matrix column count.
void apply(DenseVectorBlocked< DT_, IT_, BlockHeight_ > &r, const DenseVector< DT_, IT_ > &x) const
Calculate .
SparseMatrixBCSR clone(CloneMode clone_mode=CloneMode::Weak) const
Clone operation.
void apply(DenseVectorBlocked< DT_, IT_, BlockHeight_ > &r, const DenseVectorBlocked< DT_, IT_, BlockWidth_ > &x) const
Calculate .
SparseLayout< IT_, layout_id > layout() const
Retrieve convenient sparse matrix layout object.
Index used_elements() const
Retrieve non zero element count.
void add_double_mat_product(const LAFEM::SparseMatrixCSR< DT_, IT_ > &d, const LAFEM::SparseMatrixBCSR< DT_, IT_, BlockHeight_, BlockWidth_ > &a, const LAFEM::SparseMatrixCSR< DT_, IT_ > &b, const DT_ alpha=DT_(1), const bool allow_incomplete=false)
Adds a double-matrix product onto this matrix.
void add_trace_double_mat_mult(typename LAFEM::SparseMatrixBCSR< DT_, IT_, BHD_, BlockHeight >::VectorTypeL &v, const LAFEM::SparseMatrixBCSR< DT_, IT_, BHD_, BlockHeight > &d, const LAFEM::DenseVectorBlocked< DT_, IT_, BlockHeight > &a, const DT_ alpha=DT_(1)) const
Adds the trace of a double matrix multiplikation to a vector.
void read_from(FileMode mode, std::istream &file)
Read in matrix from stream.
DT_ max_element() const
Retrieve the maximum value of this matrix.
SparseMatrixBCSR< DT_, IT_, BlockWidth_, BlockHeight_ > transpose() const
Calculate .
void apply_transposed(DenseVector< DT_, IT_ > &r, const DenseVector< DT_, IT_ > &x, const DenseVector< DT_, IT_ > &y, const DT_ alpha=DT_(1)) const
Calculate .
void apply(DenseVector< DT_, IT_ > &r, const DenseVectorBlocked< DT_, IT_, BlockWidth_ > &x, const DenseVector< DT_, IT_ > &y, const DT_ alpha=DT_(1)) const
Calculate .
void extract_diag(VectorTypeL &diag) const
extract main diagonal vector from matrix
DT_ max_abs_element() const
Retrieve the absolute maximum value of this matrix.
ImageIterator image_begin(Index domain_node) const
DT_ max_rel_diff(const SparseMatrixBCSR &x) const
Retrieve the maximum relative difference of this matrix and another one y.max_rel_diff(x) returns .
void apply_transposed(DenseVector< DT_, IT_ > &r, const DenseVectorBlocked< DT_, IT_, BlockHeight_ > &x, const DenseVector< DT_, IT_ > &y, const DT_ alpha=DT_(1)) const
Calculate .
void deserialize(std::vector< char > input)
Deserialization of complete container entity.
void apply_transposed(DenseVector< DT_, IT_ > &r, const DenseVector< DT_, IT_ > &x) const
Calculate .
void scale_rows(const SparseMatrixBCSR &x, const DenseVectorBlocked< DT_, IT_, BlockHeight_ > &s)
Calculate .
void read_from(FileMode mode, const String &filename)
Read in matrix from file.
SparseMatrixBCSR & operator=(SparseMatrixBCSR &&other)
Move operator.
void extract_diag(VectorTypeL &diag, DenseVector< IT_, IT_ > &diag_indices) const
extract main diagonal vector from matrix
void scale(const SparseMatrixBCSR &x, const DT_ alpha)
Calculate .
IT_ const * row_ptr() const
Retrieve row start index array.
void apply_transposed(DenseVectorBlocked< DT_, IT_, BlockWidth_ > &r, const DenseVector< DT_, IT_ > &x, const DenseVectorBlocked< DT_, IT_, BlockWidth_ > &y, const DT_ alpha=DT_(1)) const
Calculate .
std::vector< char > serialize(const LAFEM::SerialConfig &config=SerialConfig()) const
Serialization of complete container entity.
void apply_transposed(DenseVector< DT_, IT_ > &r, const DenseVectorBlocked< DT_, IT_, BlockHeight_ > &x) const
Calculate .
Tiny::Matrix< DataType, BlockHeight, BlockWidth > ValueType
Value type, meaning the type of each block.
auto val() const -> const typename Intern::BCSRPerspectiveHelper< DT_, BlockHeight_, BlockWidth_, perspective_ >::Type *
Retrieve non zero element array.
void apply(DenseVectorBlocked< DT_, IT_, BlockHeight_ > &r, const DenseVectorBlocked< DT_, IT_, BlockWidth_ > &x, const DenseVectorBlocked< DT_, IT_, BlockHeight_ > &y, const DT_ alpha=DT_(1)) const
Calculate .
auto val() -> typename Intern::BCSRPerspectiveHelper< DT_, BlockHeight_, BlockWidth_, perspective_ >::Type *
void transpose(const SparseMatrixBCSR< DT_, IT_, BlockWidth_, BlockHeight_ > &x)
Calculate .
SparseMatrixBCSR(FileMode mode, std::istream &file)
Constructor.
SparseMatrixBCSR(const Adjacency::Graph &graph)
Constructor.
CSR based sparse matrix.
IT_ * col_ind()
Retrieve column indices array.
DT_ * val()
Retrieve non zero element array.
Index rows() const
Retrieve matrix row count.
Index columns() const
Retrieve matrix column count.
IT_ * row_ptr()
Retrieve row start index array.
static void release_memory(void *address)
release memory or decrease reference counter
static void increase_memory(void *address)
increase memory counter
static void add_flops(Index flops)
Add an amount of flops to the global flop counter.
Definition: statistics.hpp:206
String class implementation.
Definition: string.hpp:46
Time stamp class.
Definition: time_stamp.hpp:54
double elapsed(const TimeStamp &before) const
Calculates the time elapsed between two time stamps.
Definition: time_stamp.hpp:100
Tiny Matrix class template.
CUDA_HOST_DEVICE Matrix & set_mat_mat_mult(const Matrix< T_, m_, la_, sma_, sna_ > &a, const Matrix< T_, lb_, n_, smb_, snb_ > &b)
Sets this matrix to the algebraic matrix-product of two other matrices.
CUDA_HOST_DEVICE Matrix & set_transpose(const Matrix< T_, n_, m_, sma_, sna_ > &a)
Sets this matrix to the transpose of another matrix.
constexpr std::size_t FileOutStreamBufferSize
OutStreamBufferSize.
Definition: base.hpp:124
SparseLayoutId
Definition: base.hpp:63
T_ abs(T_ x)
Returns the absolute value.
Definition: math.hpp:275
Type
bitmask for zfp header
Definition: pack.hpp:81
CUDA_HOST_DEVICE void axpy(T_ &y, const T_ &x, const T_ &alpha)
Performs an AXPY of two scalars.
FEAT namespace.
Definition: adjactor.hpp:12
String stringify(const T_ &item)
Converts an item into a String.
Definition: string.hpp:944
String stringify_fp_sci(DataType_ value, int precision=0, int width=0, bool sign=false)
Prints a floating point value to a string in scientific notation.
Definition: string.hpp:1088
std::uint64_t Index
Index data type.