FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
vanka.hpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#pragma once
7
8// includes, FEAT
9#include <kernel/solver/base.hpp>
10#include <kernel/adjacency/graph.hpp>
11#include <kernel/lafem/dense_vector.hpp>
12#include <kernel/lafem/dense_vector_blocked.hpp>
13#include <kernel/lafem/power_vector.hpp>
14#include <kernel/lafem/tuple_vector.hpp>
15#include <kernel/lafem/sparse_matrix_csr.hpp>
16#include <kernel/lafem/sparse_matrix_bcsr.hpp>
17#include <kernel/lafem/power_diag_matrix.hpp>
18#include <kernel/lafem/power_full_matrix.hpp>
19#include <kernel/lafem/power_row_matrix.hpp>
20#include <kernel/lafem/power_col_matrix.hpp>
21#include <kernel/lafem/power_full_matrix.hpp>
22#include <kernel/lafem/saddle_point_matrix.hpp>
23#include <kernel/util/stop_watch.hpp>
24
25// includes, system
26#include <map>
27#include <set>
28#include <vector>
29
30namespace FEAT
31{
32 namespace Solver
33 {
35 namespace Intern
36 {
50 template<typename Vector_>
51 class VankaVector;
52
69 template<typename Matrix_>
70 class VankaMatrix;
71
72 template<typename DT_, typename IT_>
73 class VankaVector<LAFEM::DenseVector<DT_, IT_>>
74 {
75 public:
76 typedef LAFEM::DenseVector<DT_, IT_> VectorType;
77 static constexpr int dim = 1;
78
79 protected:
80 DT_* _vec_cor;
81
82 public:
83 explicit VankaVector(VectorType& vec_cor) :
84 _vec_cor(vec_cor.elements())
85 {
86 }
87
88 IT_ gather_def(DT_* x, const VectorType& vec, const IT_* idx, const IT_ n, const IT_ off) const
89 {
90 const DT_* vdef = vec.elements();
91
92 for(IT_ i(0); i < n; ++i)
93 {
94 x[off+i] = vdef[idx[i]];
95 }
96 return off+n;
97 }
98
99 IT_ scatter_cor(const DT_ omega, const DT_* x, const IT_* idx, const IT_ n, const IT_ off)
100 {
101 for(IT_ i(0); i < n; ++i)
102 {
103 _vec_cor[idx[i]] += omega * x[off+i];
104 }
105 return off+n;
106 }
107 };
108
109 template<typename DT_, typename IT_, int dim_>
110 class VankaVector<LAFEM::DenseVectorBlocked<DT_, IT_, dim_>>
111 {
112 public:
113 typedef LAFEM::DenseVectorBlocked<DT_, IT_, dim_> VectorType;
114 typedef typename VectorType::ValueType ValueType;
115 static constexpr int dim = dim_;
116
117 protected:
118 ValueType* _vec_cor;
119
120 public:
121 explicit VankaVector(VectorType& vec_cor) :
122 _vec_cor(vec_cor.elements())
123 {
124 }
125
126 IT_ gather_def(DT_* x, const VectorType& vec, const IT_* idx, const IT_ n, const IT_ off) const
127 {
128 const ValueType* vdef = vec.elements();
129
130 for(IT_ i(0); i < n; ++i)
131 {
132 const ValueType& vi = vdef[idx[i]];
133 for(int j(0); j < dim; ++j)
134 {
135 x[off + i*IT_(dim) + IT_(j)] = vi[j];
136 }
137 }
138 return off + IT_(dim)*n;
139 }
140
141 IT_ scatter_cor(const DT_ omega, const DT_* x, const IT_* idx, const IT_ n, const IT_ off)
142 {
143 for(IT_ i(0); i < n; ++i)
144 {
145 ValueType& vi = _vec_cor[idx[i]];
146 for(int j(0); j < dim; ++j)
147 {
148 vi[j] += omega * x[off + i*IT_(dim) + IT_(j)];
149 }
150 }
151 return off + IT_(dim)*n;
152 }
153 };
154
155 template<typename SubVector_, int dim_>
156 class VankaVector<LAFEM::PowerVector<SubVector_, dim_>>
157 {
158 public:
159 typedef LAFEM::PowerVector<SubVector_, dim_> VectorType;
160 typedef VankaVector<SubVector_> FirstClass;
161 typedef VankaVector<LAFEM::PowerVector<SubVector_, dim_-1>> RestClass;
162
163 static constexpr int dim = FirstClass::dim + RestClass::dim;
164
165 protected:
166 FirstClass _first;
167 RestClass _rest;
168
169 public:
170 explicit VankaVector(VectorType& vec_cor) :
171 _first(vec_cor.first()),
172 _rest(vec_cor.rest())
173 {
174 }
175
176 template<typename DT_, typename IT_>
177 IT_ gather_def(DT_* x, const VectorType& vec, const IT_* idx, const IT_ n, const IT_ off) const
178 {
179 IT_ noff = _first.gather_def(x, vec.first(), idx, n, off);
180 return _rest.gather_def(x, vec.rest(), idx, n, noff);
181 }
182
183 template<typename DT_, typename IT_>
184 IT_ scatter_cor(const DT_ omega, const DT_* x, const IT_* idx, const IT_ n, const IT_ off)
185 {
186 IT_ noff = _first.scatter_cor(omega, x, idx, n, off);
187 return _rest.scatter_cor(omega, x, idx, n, noff);
188 }
189 };
190
191 template<typename SubVector_>
192 class VankaVector<LAFEM::PowerVector<SubVector_, 1>>
193 {
194 public:
195 typedef LAFEM::PowerVector<SubVector_, 1> VectorType;
196 typedef VankaVector<SubVector_> FirstClass;
197
198 static constexpr int dim = FirstClass::dim;
199
200 protected:
201 FirstClass _first;
202
203 public:
204 explicit VankaVector(VectorType& vec_cor) :
205 _first(vec_cor.first())
206 {
207 }
208
209 template<typename DT_, typename IT_>
210 IT_ gather_def(DT_* x, const VectorType& vec, const IT_* idx, const IT_ n, const IT_ off) const
211 {
212 return _first.gather_def(x, vec.first(), idx, n, off);
213 }
214
215 template<typename DT_, typename IT_>
216 IT_ scatter_cor(const DT_ omega, const DT_* x, const IT_* idx, const IT_ n, const IT_ off)
217 {
218 return _first.scatter_cor(omega, x, idx, n, off);
219 }
220 };
221
222 template<typename DT_, typename IT_>
223 class VankaMatrix<LAFEM::SparseMatrixCSR<DT_, IT_>>
224 {
225 public:
226 typedef LAFEM::SparseMatrixCSR<DT_, IT_> MatrixType;
227 typedef LAFEM::DenseVector<DT_, IT_> VectorTypeR;
228
229 static constexpr int row_dim = 1;
230 static constexpr int col_dim = 1;
231
232 protected:
233 const IT_* _row_ptr;
234 const IT_* _col_idx;
235 const DT_* _mat_val;
236
237 public:
238 explicit VankaMatrix(const MatrixType& matrix) :
239 _row_ptr(matrix.row_ptr()),
240 _col_idx(matrix.col_ind()),
241 _mat_val(matrix.val())
242 {
243 }
244
245 std::pair<IT_, IT_> gather_full(
246 DT_* data, const IT_* ridx, const IT_* cidx,
247 const IT_ m, const IT_ n, const IT_ stride,
248 const IT_ mo = IT_(0), const IT_ no = IT_(0)) const
249 {
250 // empty matrix?
251 if(_mat_val == nullptr)
252 return std::make_pair(mo+m, no+n);
253
254 // loop over all local rows
255 for(IT_ i(0); i < m; ++i)
256 {
257 // get row index
258 const IT_ ri = ridx[i];
259
260 // initialize loop variable for local columns
261 IT_ j = IT_(0);
262
263 // loop over the ri row of our matrix
264 for(IT_ ai = _row_ptr[ri]; ai < _row_ptr[ri+1]; ++ai)
265 {
266 // get column index
267 const IT_ rj = _col_idx[ai];
268
269 // now search its position in our local matrix
270 while((j < n) && (cidx[j] < rj))
271 {
272 ++j;
273 }
274 // did we find our local entry?
275 if((j < n) && (cidx[j] == rj))
276 {
277 // found, so store it in our local matrix
278 data[(mo + i) * stride + no + j] = _mat_val[ai];
279 }
280 }
281 }
282 return std::make_pair(mo+m, no+n);
283 }
284
285 IT_ gather_diag(DT_* data, const IT_* idx, const IT_ m, const IT_ mo = IT_(0)) const
286 {
287 // loop over all local rows
288 for(IT_ i(0); i < m; ++i)
289 {
290 // get row index
291 const IT_ ri = idx[i];
292
293 // find diagonal entry
294 for(IT_ ai = _row_ptr[ri]; ai < _row_ptr[ri+1]; ++ai)
295 {
296 // is it our diagonal?
297 if(_col_idx[ai] == ri)
298 {
299 data[mo+i] = _mat_val[ai];
300 break;
301 }
302 }
303 }
304 return mo + m;
305 }
306
307 IT_ mult_cor(DT_* x, const DT_ alpha, const VectorTypeR& vec_cor, const IT_* idx, const IT_ m, const IT_ off) const
308 {
309 if(_mat_val == nullptr)
310 return off + m;
311
312 const DT_* v = vec_cor.elements();
313
314 // loop over all local rows
315 for(IT_ i(0); i < m; ++i)
316 {
317 const IT_ vi = idx[i];
318
319 // loop over all columns
320 DT_ r = DT_(0);
321 for(IT_ k = _row_ptr[vi]; k < _row_ptr[vi+1]; ++k)
322 {
323 r += _mat_val[k] * v[_col_idx[k]];
324 }
325 x[off+i] += alpha * r;
326 }
327 return off + m;
328 }
329 };
330
331 // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
332 // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
333 // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
334
335 template<typename DT_, typename IT_, int row_dim_, int col_dim_>
336 class VankaMatrix<LAFEM::SparseMatrixBCSR<DT_, IT_, row_dim_, col_dim_>>
337 {
338 public:
339 typedef LAFEM::SparseMatrixBCSR<DT_, IT_, row_dim_, col_dim_> MatrixType;
340
341 typedef typename MatrixType::ValueType MatVal;
342
343 static constexpr int row_dim = row_dim_;
344 static constexpr int col_dim = col_dim_;
345
346 protected:
347 const IT_* _row_ptr;
348 const IT_* _col_idx;
349 const MatVal* _mat_val;
350
351 public:
352 explicit VankaMatrix(const MatrixType& matrix) :
353 _row_ptr(matrix.row_ptr()),
354 _col_idx(matrix.col_ind()),
355 _mat_val(matrix.val())
356 {
357 }
358
359 std::pair<IT_, IT_> gather_full(
360 DT_* data, const IT_* ridx, const IT_* cidx,
361 const IT_ m, const IT_ n, const IT_ stride,
362 const IT_ mo = IT_(0), const IT_ no = IT_(0)) const
363 {
364 // empty matrix?
365 XASSERTM(_mat_val != nullptr, "Vanka: invalid empty BCSR matrix");
366
367 // loop over all local rows
368 for(IT_ i(0); i < m; ++i)
369 {
370 // get row index
371 const IT_ ri = ridx[i];
372
373 // initialize loop variable for local columns
374 IT_ j = IT_(0);
375
376 // loop over the ri row of our matrix
377 for(IT_ ai = _row_ptr[ri]; ai < _row_ptr[ri+1]; ++ai)
378 {
379 // get column index
380 const IT_ rj = _col_idx[ai];
381
382 // now search its position in our local matrix
383 while((j < n) && (cidx[j] < rj))
384 {
385 ++j;
386 }
387 // did we find our local entry?
388 if((j < n) && (cidx[j] == rj))
389 {
390 // found, so store it in our local matrix
391 const MatVal& mv = _mat_val[ai];
392
393 // copy matrix block
394 for(int ii(0); ii < row_dim; ++ii)
395 {
396 // compute offset
397 const IT_ lro = (mo + i * IT_(row_dim) + IT_(ii)) * stride + no + j * IT_(col_dim);
398
399 // copy block row
400 for(int jj(0); jj < col_dim; ++jj)
401 {
402 data[lro + IT_(jj)] = mv[ii][jj];
403 }
404 }
405 }
406 }
407 }
408 return std::make_pair(mo + IT_(row_dim)*m, no + IT_(col_dim)*n);
409 }
410
411 IT_ gather_diag(DT_* data, const IT_* idx, const IT_ m, const IT_ mo = IT_(0)) const
412 {
413 // loop over all local rows
414 for(IT_ i(0); i < m; ++i)
415 {
416 // get row index
417 const IT_ ri = idx[i];
418
419 // find diagonal entry
420 for(IT_ ai = _row_ptr[ri]; ai < _row_ptr[ri+1]; ++ai)
421 {
422 // is it our diagonal?
423 if(_col_idx[ai] == ri)
424 {
425 const MatVal& mv = _mat_val[ai];
426
427 // copy block diagonal
428 for(int k(0); k < row_dim; ++k)
429 {
430 data[mo + i*IT_(row_dim) + IT_(k)] = mv[k][k];
431 }
432 break;
433 }
434 }
435 }
436 return mo + m;
437 }
438
439 IT_ mult_cor(DT_* x, const DT_ alpha, const LAFEM::DenseVectorBlocked<DT_, IT_, col_dim_>& vec_cor,
440 const IT_* idx, const IT_ m, const IT_ off) const
441 {
442 if(_mat_val == nullptr)
443 return off + m;
444
445 typedef LAFEM::DenseVectorBlocked<DT_, IT_, col_dim_> VectorType;
446 typedef typename VectorType::ValueType VecVal;
447 const VecVal* v = vec_cor.elements();
448
449 // loop over all local rows
450 for(IT_ i(0); i < m; ++i)
451 {
452 const IT_ vi = idx[i];
453
454 // loop over all columns
455 for(IT_ k = _row_ptr[vi]; k < _row_ptr[vi+1]; ++k)
456 {
457 // get matrix and vector values
458 const MatVal& mv = _mat_val[k];
459 const VecVal& vv = v[_col_idx[k]];
460
461 // loop over all block rows
462 for(int ii(0); ii < row_dim; ++ii)
463 {
464 DT_ r = DT_(0);
465 // loop over all block columns
466 for(int jj(0); jj < col_dim; ++jj)
467 {
468 r += mv[ii][jj] * vv[jj];
469 }
470 x[off + i*IT_(row_dim) + IT_(ii)] += alpha * r;
471 }
472 }
473 }
474 return off + m* IT_(row_dim);
475 }
476
477 IT_ mult_cor(DT_* x, const DT_ alpha, const LAFEM::DenseVector<DT_, IT_>& vec_cor,
478 const IT_* idx, const IT_ m, const IT_ off) const
479 {
480 if(_mat_val == nullptr)
481 return off + m;
482
483 const DT_* v = vec_cor.elements();
484
485 // loop over all local rows
486 for(IT_ i(0); i < m; ++i)
487 {
488 const IT_ vi = idx[i];
489
490 // loop over all columns
491 for(IT_ k = _row_ptr[vi]; k < _row_ptr[vi+1]; ++k)
492 {
493 // get matrix and vector values
494 const MatVal& mv = _mat_val[k];
495 //const VecVal& vv = v[_col_idx[k]];
496
497 // compute vector offset
498 const IT_ vo = _col_idx[k] * IT_(col_dim);
499
500 // loop over all block rows
501 for(int ii(0); ii < row_dim; ++ii)
502 {
503 DT_ r = DT_(0);
504 // loop over all block columns
505 for(int jj(0); jj < col_dim; ++jj)
506 {
507 r += mv[ii][jj] * v[vo + IT_(jj)];
508 }
509 x[off + i*IT_(row_dim) + IT_(ii)] += alpha * r;
510 }
511 }
512 }
513 return off + m* IT_(row_dim);
514 }
515 };
516
517 // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
518 // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
519 // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
520
521 template<typename SubMatrix_, int dim_>
522 class VankaMatrix<LAFEM::PowerDiagMatrix<SubMatrix_, dim_>>
523 {
524 public:
525 typedef LAFEM::PowerDiagMatrix<SubMatrix_, dim_> MatrixType;
526 typedef typename MatrixType::VectorTypeR VectorTypeR;
527 typedef VankaMatrix<SubMatrix_> FirstClass;
528 typedef VankaMatrix<LAFEM::PowerDiagMatrix<SubMatrix_, dim_-1>> RestClass;
529
530 static constexpr int row_dim = FirstClass::row_dim + RestClass::row_dim;
531 static constexpr int col_dim = FirstClass::col_dim + RestClass::col_dim;
532
533 protected:
534 FirstClass _first;
535 RestClass _rest;
536
537 public:
538 explicit VankaMatrix(const MatrixType& matrix) :
539 _first(matrix.first()),
540 _rest(matrix.rest())
541 {
542 }
543
544 template<typename DT_, typename IT_>
545 std::pair<IT_, IT_> gather_full(
546 DT_* data, const IT_* ridx, const IT_* cidx,
547 const IT_ m, const IT_ n, const IT_ stride,
548 const IT_ mo = IT_(0), const IT_ no = IT_(0)) const
549 {
550 std::pair<IT_, IT_> mn = _first.gather_full(data, ridx, cidx, m, n, stride, mo, no);
551 return _rest.gather_full(data, ridx, cidx, m, n, stride, mn.first, mn.second);
552 }
553
554 template<typename DT_, typename IT_>
555 IT_ gather_diag(DT_* data, const IT_* idx, const IT_ m, const IT_ mo = IT_(0)) const
556 {
557 IT_ mno = _first.gather_diag(data, idx, m, mo);
558 return _rest.gather_diag(data, idx, m, mno);
559 }
560
561 template<typename DT_, typename IT_>
562 IT_ mult_cor(DT_* x, const DT_ alpha, const VectorTypeR& vec_cor, const IT_* idx, const IT_ m, const IT_ off) const
563 {
564 IT_ noff = _first.mult_cor(x, alpha, vec_cor.first(), idx, m, off);
565 return _rest.mult_cor(x, alpha, vec_cor.rest(), idx, m, noff);
566 }
567 };
568
569 template<typename SubMatrix_>
570 class VankaMatrix<LAFEM::PowerDiagMatrix<SubMatrix_, 1>>
571 {
572 public:
573 typedef LAFEM::PowerDiagMatrix<SubMatrix_, 1> MatrixType;
574 typedef typename MatrixType::VectorTypeR VectorTypeR;
575 typedef VankaMatrix<SubMatrix_> FirstClass;
576
577 static constexpr int row_dim = FirstClass::row_dim;
578 static constexpr int col_dim = FirstClass::col_dim;
579
580 protected:
581 FirstClass _first;
582
583 public:
584 explicit VankaMatrix(const MatrixType& matrix) :
585 _first(matrix.first())
586 {
587 }
588
589 template<typename DT_, typename IT_>
590 std::pair<IT_, IT_> gather_full(
591 DT_* data, const IT_* ridx, const IT_* cidx,
592 const IT_ m, const IT_ n, const IT_ stride,
593 const IT_ mo = IT_(0), const IT_ no = IT_(0)) const
594 {
595 return _first.gather_full(data, ridx, cidx, m, n, stride, mo, no);
596 }
597
598 template<typename DT_, typename IT_>
599 IT_ gather_diag(DT_* data, const IT_* idx, const IT_ m, const IT_ mo = IT_(0)) const
600 {
601 return _first.gather_diag(data, idx, m, mo);
602 }
603
604 template<typename DT_, typename IT_>
605 IT_ mult_cor(DT_* x, const DT_ alpha, const VectorTypeR& vec_cor, const IT_* idx, const IT_ m, const IT_ off) const
606 {
607 return _first.mult_cor(x, alpha, vec_cor.first(), idx, m, off);
608 }
609 };
610
611 template<typename SubMatrix_, int dim_>
612 class VankaMatrix<LAFEM::PowerColMatrix<SubMatrix_, dim_>>
613 {
614 public:
615 typedef LAFEM::PowerColMatrix<SubMatrix_, dim_> MatrixType;
616 typedef typename MatrixType::VectorTypeR VectorTypeR;
617 typedef VankaMatrix<SubMatrix_> FirstClass;
618 typedef VankaMatrix<LAFEM::PowerColMatrix<SubMatrix_, dim_-1>> RestClass;
619
620 static constexpr int row_dim = FirstClass::row_dim + RestClass::row_dim;
621 static constexpr int col_dim = FirstClass::col_dim;
622
623 protected:
624 FirstClass _first;
625 RestClass _rest;
626
627 public:
628 explicit VankaMatrix(const MatrixType& matrix) :
629 _first(matrix.first()),
630 _rest(matrix.rest())
631 {
632 }
633
634 template<typename DT_, typename IT_>
635 std::pair<IT_, IT_> gather_full(
636 DT_* data, const IT_* ridx, const IT_* cidx,
637 const IT_ m, const IT_ n, const IT_ stride,
638 const IT_ mo = IT_(0), const IT_ no = IT_(0)) const
639 {
640 std::pair<IT_, IT_> mno = _first.gather_full(data, ridx, cidx, m, n, stride, mo, no);
641 return _rest.gather_full(data, ridx, cidx, m, n, stride, mno.first, no);
642 }
643
644 template<int ro_, int co_, typename DT_, typename IT_>
645 IT_ gather_diag_pfm(DT_* data, const IT_* idx, const IT_ m, const IT_ mo = IT_(0)) const
646 {
647 IT_ mno = _first.template gather_diag_pfm<ro_, co_>(data, idx, m, mo);
648 return _rest.template gather_diag_pfm<ro_+1, co_>(data, idx, m, mno);
649 }
650
651 template<typename DT_, typename IT_>
652 IT_ mult_cor(DT_* x, const DT_ alpha, const VectorTypeR& vec_cor, const IT_* idx, const IT_ m, const IT_ off) const
653 {
654 IT_ noff = _first.mult_cor(x, alpha, vec_cor, idx, m, off);
655 return _rest.mult_cor(x, alpha, vec_cor, idx, m, noff);
656 }
657 };
658
659 template<typename SubMatrix_>
660 class VankaMatrix<LAFEM::PowerColMatrix<SubMatrix_, 1>>
661 {
662 public:
663 typedef LAFEM::PowerColMatrix<SubMatrix_, 1> MatrixType;
664 typedef typename MatrixType::VectorTypeR VectorTypeR;
665 typedef VankaMatrix<SubMatrix_> FirstClass;
666
667 static constexpr int row_dim = FirstClass::row_dim;
668 static constexpr int col_dim = FirstClass::col_dim;
669
670 protected:
671 VankaMatrix<SubMatrix_> _first;
672
673 public:
674 explicit VankaMatrix(const MatrixType& matrix) :
675 _first(matrix.first())
676 {
677 }
678
679 template<typename DT_, typename IT_>
680 std::pair<IT_, IT_> gather_full(
681 DT_* data, const IT_* ridx, const IT_* cidx,
682 const IT_ m, const IT_ n, const IT_ stride,
683 const IT_ mo = IT_(0), const IT_ no = IT_(0)) const
684 {
685 return _first.gather_full(data, ridx, cidx, m, n, stride, mo, no);
686 }
687
688 template<int ro_, int co_, typename DT_, typename IT_>
689 IT_ gather_diag_pfm(DT_* data, const IT_* idx, const IT_ m, const IT_ mo = IT_(0)) const
690 {
691 return _first.template gather_diag_pfm<ro_, co_>(data, idx, m, mo);
692 }
693
694 template<typename DT_, typename IT_>
695 IT_ mult_cor(DT_* x, const DT_ alpha, const VectorTypeR& vec_cor, const IT_* idx, const IT_ m, const IT_ off) const
696 {
697 return _first.mult_cor(x, alpha, vec_cor, idx, m, off);
698 }
699 };
700
701 template<typename SubMatrix_, int dim_>
702 class VankaMatrix<LAFEM::PowerRowMatrix<SubMatrix_, dim_>>
703 {
704 public:
705 typedef LAFEM::PowerRowMatrix<SubMatrix_, dim_> MatrixType;
706 typedef typename MatrixType::VectorTypeR VectorTypeR;
707 typedef VankaMatrix<SubMatrix_> FirstClass;
708 typedef VankaMatrix<LAFEM::PowerRowMatrix<SubMatrix_, dim_-1>> RestClass;
709
710 static constexpr int row_dim = FirstClass::row_dim;
711 static constexpr int col_dim = FirstClass::col_dim + RestClass::col_dim;
712
713 protected:
714 FirstClass _first;
715 RestClass _rest;
716
717 public:
718 explicit VankaMatrix(const MatrixType& matrix) :
719 _first(matrix.first()),
720 _rest(matrix.rest())
721 {
722 }
723
724 template<typename DT_, typename IT_>
725 std::pair<IT_, IT_> gather_full(
726 DT_* data, const IT_* ridx, const IT_* cidx,
727 const IT_ m, const IT_ n, const IT_ stride,
728 const IT_ mo = IT_(0), const IT_ no = IT_(0)) const
729 {
730 std::pair<IT_, IT_> mno = _first.gather_full(data, ridx, cidx, m, n, stride, mo, no);
731 return _rest.gather_full(data, ridx, cidx, m, n, stride, mo, mno.second);
732 }
733
734 template<int ro_, int co_, typename DT_, typename IT_>
735 IT_ gather_diag_pfm(DT_* data, const IT_* idx, const IT_ m, const IT_ mo = IT_(0)) const
736 {
737 if(ro_ == co_)
738 return _first.gather_diag(data, idx, m, mo);
739 else
740 return _rest.template gather_diag_pfm<ro_, co_+1>(data, idx, m, mo);
741 }
742
743 template<typename DT_, typename IT_>
744 IT_ mult_cor(DT_* x, const DT_ alpha, const VectorTypeR& vec_cor, const IT_* idx, const IT_ m, const IT_ off) const
745 {
746 _first.mult_cor(x, alpha, vec_cor.first(), idx, m, off);
747 return _rest.mult_cor(x, alpha, vec_cor.rest(), idx, m, off);
748 }
749 };
750
751 template<typename SubMatrix_>
752 class VankaMatrix<LAFEM::PowerRowMatrix<SubMatrix_, 1>>
753 {
754 public:
755 typedef LAFEM::PowerRowMatrix<SubMatrix_, 1> MatrixType;
756 typedef VankaMatrix<SubMatrix_> FirstClass;
757 typedef typename MatrixType::VectorTypeR VectorTypeR;
758
759 static constexpr int row_dim = FirstClass::row_dim;
760 static constexpr int col_dim = FirstClass::col_dim;
761
762 protected:
763 FirstClass _first;
764
765 public:
766 explicit VankaMatrix(const MatrixType& matrix) :
767 _first(matrix.first())
768 {
769 }
770
771 template<typename DT_, typename IT_>
772 std::pair<IT_, IT_> gather_full(
773 DT_* data, const IT_* ridx, const IT_* cidx,
774 const IT_ m, const IT_ n, const IT_ stride,
775 const IT_ mo = IT_(0), const IT_ no = IT_(0)) const
776 {
777 return _first.gather_full(data, ridx, cidx, m, n, stride, mo, no);
778 }
779
780 template<int ro_, int co_, typename DT_, typename IT_>
781 IT_ gather_diag_pfm(DT_* data, const IT_* idx, const IT_ m, const IT_ mo = IT_(0)) const
782 {
783 if(ro_ == co_)
784 return _first.gather_diag(data, idx, m, mo);
785 else
786 return mo;
787 }
788
789 template<typename DT_, typename IT_>
790 IT_ mult_cor(DT_* x, const DT_ alpha, const VectorTypeR& vec_cor, const IT_* idx, const IT_ m, const IT_ off) const
791 {
792 return _first.mult_cor(x, alpha, vec_cor.first(), idx, m, off);
793 }
794 };
795
796 template<typename SubMatrix_, int dim_w_, int dim_h_>
797 class VankaMatrix<LAFEM::PowerFullMatrix<SubMatrix_, dim_w_, dim_h_>>
798 {
799 public:
800 typedef LAFEM::PowerFullMatrix<SubMatrix_, dim_w_, dim_h_> MatrixType;
801 typedef typename MatrixType::VectorTypeR VectorTypeR;
802 typedef VankaMatrix<typename MatrixType::ContClass> ContClass;
803
804 static constexpr int row_dim = ContClass::row_dim;
805 static constexpr int col_dim = ContClass::col_dim;
806
807 protected:
808 ContClass _cont;
809
810 public:
811 explicit VankaMatrix(const MatrixType& matrix) :
812 _cont(matrix.get_container())
813 {
814 }
815
816 template<typename DT_, typename IT_>
817 std::pair<IT_, IT_> gather_full(
818 DT_* data, const IT_* ridx, const IT_* cidx,
819 const IT_ m, const IT_ n, const IT_ stride,
820 const IT_ mo = IT_(0), const IT_ no = IT_(0)) const
821 {
822 return _cont.gather_full(data, ridx, cidx, m, n, stride, mo, no);
823 }
824
825 template<typename DT_, typename IT_>
826 IT_ gather_diag(DT_* data, const IT_* idx, const IT_ m, const IT_ mo = IT_(0)) const
827 {
828 return _cont.template gather_diag_pfm<0,0>(data, idx, m, mo);
829 }
830
831 template<typename DT_, typename IT_>
832 IT_ mult_cor(DT_* x, const DT_ alpha, const VectorTypeR& vec_cor, const IT_* idx, const IT_ m, const IT_ off) const
833 {
834 return _cont.mult_cor(x, alpha, vec_cor, idx, m, off);
835 }
836 };
837
838 template<typename DT_, typename IT_>
839 std::pair<const IT_*, const IT_*> vanka_graph(const LAFEM::SparseMatrixCSR<DT_, IT_>& matrix)
840 {
841 return std::make_pair(matrix.row_ptr(), matrix.col_ind());
842 }
843
844 template<typename DT_, typename IT_, int m_, int n_>
845 std::pair<const IT_*, const IT_*> vanka_graph(const LAFEM::SparseMatrixBCSR<DT_, IT_, m_, n_>& matrix)
846 {
847 return std::make_pair(matrix.row_ptr(), matrix.col_ind());
848 }
849
850 template<typename DT_, typename IT_, int dim_>
851 std::pair<const IT_*, const IT_*> vanka_graph(
852 const LAFEM::PowerColMatrix<LAFEM::SparseMatrixCSR<DT_, IT_>, dim_>& matrix)
853 {
854 const auto& m = matrix.first();
855 return std::make_pair(m.row_ptr(), m.col_ind());
856 }
857
858 template<typename DT_, typename IT_, int dim_>
859 std::pair<const IT_*, const IT_*> vanka_graph(
860 const LAFEM::PowerRowMatrix<LAFEM::SparseMatrixCSR<DT_, IT_>, dim_>& matrix)
861 {
862 const auto& m = matrix.first();
863 return std::make_pair(m.row_ptr(), m.col_ind());
864 }
865
866 template<typename DT_, typename IT_, int dim_>
867 std::pair<const IT_*, const IT_*> vanka_graph(
868 const LAFEM::PowerDiagMatrix<LAFEM::SparseMatrixCSR<DT_, IT_>, dim_>& matrix)
869 {
870 const auto& m = matrix.first();
871 return std::make_pair(m.row_ptr(), m.col_ind());
872 }
873
874 template<typename DT_, typename IT_, int row_dim_, int col_dim_>
875 std::pair<const IT_*, const IT_*> vanka_graph(
876 const LAFEM::PowerFullMatrix<LAFEM::SparseMatrixCSR<DT_, IT_>, row_dim_, col_dim_>& matrix)
877 {
878 const auto& m = matrix.template at<0,0>();
879 return std::make_pair(m.row_ptr(), m.col_ind());
880 }
881
882 } // namespace Intern
884
891 enum class VankaType
892 {
894 nodal_diag_mult = 0x000,
896 nodal_full_mult = 0x001,
898 block_diag_mult = 0x010,
900 block_full_mult = 0x011,
902 nodal_diag_add = 0x100,
904 nodal_full_add = 0x101,
906 block_diag_add = 0x110,
908 block_full_add = 0x111
909 };
910
919 public SolverException
920 {
921 public:
922 VankaFactorError() : SolverException("Vanka Factorization Error") {}
923 };
924
933 template<typename Matrix_, typename Filter_>
934 class Vanka;
935
1008 template<typename MatrixA_, typename MatrixB_, typename MatrixD_, typename Filter_>
1009 class Vanka<LAFEM::SaddlePointMatrix<MatrixA_, MatrixB_, MatrixD_>, Filter_> :
1010 public SolverBase<LAFEM::TupleVector<typename MatrixB_::VectorTypeL, typename MatrixD_::VectorTypeL>>
1011 {
1012 public:
1016 typedef Filter_ FilterType;
1017
1024
1026 typedef typename MatrixD_::VectorTypeR VectorV;
1028 typedef typename MatrixB_::VectorTypeR VectorP;
1029
1030 protected:
1031 // our Vanka matrix types
1032 typedef Intern::VankaMatrix<MatrixA_> VankaMatrixA;
1033 typedef Intern::VankaMatrix<MatrixB_> VankaMatrixB;
1034 typedef Intern::VankaMatrix<MatrixD_> VankaMatrixD;
1035
1036 // dimension sanity checks
1037 static_assert(VankaMatrixA::row_dim == VankaMatrixA::col_dim, "Matrix A has invalid dimensions");
1038 static_assert(VankaMatrixA::row_dim == VankaMatrixB::row_dim, "Matrices A and B have incompatible dimensions");
1039 static_assert(VankaMatrixA::col_dim == VankaMatrixD::col_dim, "Matrices A and D have incompatible dimensions");
1040 static_assert(VankaMatrixB::col_dim == VankaMatrixD::row_dim, "Matrices B and D have incompatible dimensions");
1041
1042 // for now, B and D cannot have more than 1 pressure dimension...
1043 static_assert(VankaMatrixB::col_dim == 1, "Invalid pressure space dimension");
1044
1060 std::vector<IndexType> _block_v_ptr, _block_v_idx;
1062 std::vector<IndexType> _block_p_ptr, _block_p_idx;
1064 std::vector<DataType> _data;
1066 std::vector<DataType> _vdef, _vcor;
1068 VectorType _vec_scale, _vec_tmp1, _vec_tmp2;
1069
1070 // stop watch for symbolic factorization
1071 StopWatch watch_init_symbolic;
1072 // stop watch for numeric factorization
1073 StopWatch watch_init_numeric;
1074 // stop watch for apply time
1075 StopWatch watch_apply;
1076
1077 public:
1096 explicit Vanka(const MatrixType& matrix, const FilterType& filter, VankaType type,
1097 DataType omega = DataType(1), Index num_iter = Index(1)) :
1098 _matrix(matrix),
1099 _filter(filter),
1100 _type(type),
1101 _omega(omega),
1102 _num_iter(num_iter)
1103 {
1104 }
1105
1107 virtual String name() const override
1108 {
1109 return "Vanka";
1110 }
1111
1113 virtual void init_symbolic() override
1114 {
1115 watch_init_symbolic.start();
1116
1117 bool block = ((int(_type) & 0x010) != 0);
1118 bool multi = ((int(_type) & 0x100) == 0);
1119
1120 // compute pressure block graph
1121 if(block)
1122 {
1123 this->_build_p_block();
1124 }
1125 else
1126 {
1127 this->_build_p_nodal();
1128 }
1129
1130 // compute velocity block graph
1131 this->_build_v_block();
1132
1133 // allocate memory for numerical factorization
1134 this->_alloc_data();
1135
1136 // allocate temporary vector for additive
1137 if(!multi)
1138 {
1139 this->_vec_scale = this->_matrix.create_vector_r();
1140 this->_vec_tmp1 = this->_matrix.create_vector_r();
1141 this->_vec_tmp2 = this->_matrix.create_vector_r();
1142 }
1143
1144 watch_init_symbolic.stop();
1145 }
1146
1148 virtual void done_symbolic() override
1149 {
1150 _vec_tmp2.clear();
1151 _vec_tmp1.clear();
1152 _vec_scale.clear();
1153 _vdef.clear();
1154 _vcor.clear();
1155 _data.clear();
1156 _block_v_ptr.clear();
1157 _block_v_idx.clear();
1158 _block_p_ptr.clear();
1159 _block_p_idx.clear();
1160 }
1161
1163 virtual void init_numeric() override
1164 {
1165 watch_init_numeric.start();
1166
1167 bool full = ((int(_type) & 0x001) != 0);
1168 bool multi = ((int(_type) & 0x100) == 0);
1169
1170 if(full)
1171 {
1172 this->_factor_full();
1173 }
1174 else
1175 {
1176 this->_factor_diag();
1177 }
1178
1179 if(!multi)
1180 {
1181 this->_calc_scale();
1182 }
1183
1184 watch_init_numeric.stop();
1185 }
1186
1187 virtual Status apply(VectorType& vec_cor, const VectorType& vec_def) override
1188 {
1189 watch_apply.start();
1190
1191 bool full = ((int(_type) & 0x001) != 0);
1192 if(full)
1193 {
1194 this->_apply_full(vec_cor, vec_def);
1195 }
1196 else
1197 {
1198 this->_apply_diag(vec_cor, vec_def);
1199 }
1200
1201 watch_apply.stop();
1202
1203 return Status::success;
1204 }
1205
1206 std::size_t data_size() const
1207 {
1208 return _data.size();
1209 }
1210
1211 std::size_t bytes() const
1212 {
1213 return sizeof(IndexType) * (_block_v_ptr.size() + _block_v_idx.size() + _block_p_ptr.size() + _block_p_idx.size())
1214 + sizeof(DataType) * (_data.size() + _vdef.size() + _vcor.size())
1215 + _vec_scale.bytes() + _vec_tmp1.bytes() + _vec_tmp2.bytes();
1216 }
1217
1222 {
1223 watch_init_symbolic.reset();
1224 watch_init_numeric.reset();
1225 watch_apply.reset();
1226 }
1227
1231 double time_init_symbolic() const
1232 {
1233 return watch_init_symbolic.elapsed();
1234 }
1235
1239 double time_init_numeric() const
1240 {
1241 return watch_init_numeric.elapsed();
1242 }
1243
1247 double time_apply() const
1248 {
1249 return watch_apply.elapsed();
1250 }
1251
1252 protected:
1260 {
1261 // fetch matrix dimensions
1262 const IndexType m = IndexType(_matrix.block_d().rows());
1263
1264 // fetch the matrix arrays
1265 auto graph_b = Intern::vanka_graph(_matrix.block_b());
1266 const IndexType* row_ptr_b = graph_b.first;
1267 const IndexType* col_idx_b = graph_b.second;
1268 auto graph_d = Intern::vanka_graph(_matrix.block_d());
1269 const IndexType* row_ptr_d = graph_d.first;
1270 const IndexType* col_idx_d = graph_d.second;
1271
1272 // clear block arrays
1273 _block_p_ptr.clear();
1274 _block_p_idx.clear();
1275 _block_p_ptr.push_back(IndexType(0));
1276
1277 // local map
1278 std::map<IndexType, int> map_s;
1279
1280 // allocate mask vector
1281 std::vector<int> mask(std::size_t(m), 0);
1282
1283 // loop over all pressure nodes
1284 for(Index i(0); i < m; ++i)
1285 {
1286 // is this node already processed?
1287 if(mask[i] != 0)
1288 continue;
1289
1290 // clear the local map
1291 map_s.clear();
1292
1293 // okay, loop over all entries of D
1294 for(IndexType kd = row_ptr_d[i]; kd < row_ptr_d[i+1]; ++kd)
1295 {
1296 // fetch the column index of D
1297 const IndexType col_d = col_idx_d[kd];
1298
1299 // loop over the row of B
1300 for(IndexType kb = row_ptr_b[col_d]; kb < row_ptr_b[col_d+1]; ++kb)
1301 {
1302 // fetch the column index of B
1303 const IndexType col_b = col_idx_b[kb];
1304
1305 // insert into map
1306 auto ib = map_s.emplace(col_b, 1);
1307 if(!ib.second)
1308 {
1309 // node already exists, increment counter then
1310 ++(ib.first->second);
1311 }
1312 }
1313 }
1314
1315 // compute the map degree
1316 int ideg = 0;
1317 for(auto it = map_s.begin(); it != map_s.end(); ++it)
1318 ideg = Math::max(ideg, it->second);
1319
1320 // loop over all map entries with maximum degree
1321 for(auto it = map_s.begin(); it != map_s.end(); ++it)
1322 {
1323 if(ideg == it->second)
1324 {
1325 // insert into map
1326 _block_p_idx.push_back(it->first);
1327 mask[it->first] = 1;
1328 }
1329 }
1330
1331 // push row
1332 _block_p_ptr.push_back(IndexType(_block_p_idx.size()));
1333 }
1334 }
1335
1340 {
1341 const IndexType m = IndexType(_matrix.block_d().rows());
1342
1343 // clear block arrays
1344 _block_p_ptr.clear();
1345 _block_p_idx.clear();
1346 _block_p_ptr.reserve(m+1);
1347 _block_p_idx.reserve(m);
1348 for(IndexType i(0); i < m; ++i)
1349 {
1350 _block_p_ptr.push_back(i);
1351 _block_p_idx.push_back(i);
1352 }
1353 _block_p_ptr.push_back(m);
1354 }
1355
1358 {
1359 // fetch the matrix arrays
1360 auto graph_d = Intern::vanka_graph(_matrix.block_d());
1361 const IndexType* row_ptr_d = graph_d.first;
1362 const IndexType* col_idx_d = graph_d.second;
1363
1364 // fetch number of pressure blocks
1365 const IndexType m = IndexType(_block_p_ptr.size()-1);
1366
1367 // clear block arrays
1368 _block_v_ptr.clear();
1369 _block_v_idx.clear();
1370 _block_v_ptr.reserve(m+1);
1371 _block_v_ptr.push_back(IndexType(0));
1372
1373 std::set<IndexType> set_v;
1374
1375 // loop over all pressure blocks
1376 for(IndexType i(0); i < m; ++i)
1377 {
1378 // loop over all pressure dofs in the current block
1379 for(IndexType j = _block_p_ptr[i]; j < _block_p_ptr[i+1]; ++j)
1380 {
1381 // get the pressure dof index
1382 const IndexType pix = _block_p_idx[j];
1383
1384 // now loop over the corresponding row of D
1385 for(IndexType k = row_ptr_d[pix]; k < row_ptr_d[pix+1]; ++k)
1386 {
1387 // insert velocity dof into block set
1388 set_v.insert(col_idx_d[k]);
1389 }
1390 }
1391
1392 // push velocity block
1393 for(auto it = set_v.begin(); it != set_v.end(); ++it)
1394 {
1395 _block_v_idx.push_back(*it);
1396 }
1397
1398 // update pointer
1399 _block_v_ptr.push_back(IndexType(_block_v_idx.size()));
1400
1401 // clear local set
1402 set_v.clear();
1403 }
1404 }
1405
1408 {
1409 // get the dimension of our system
1410 const IndexType dim = IndexType(Intern::VankaMatrix<MatrixA_>::row_dim);
1411
1412 // use diagonal?
1413 bool diag = ((int(_type) & 1) == 0);
1414
1415 // number of non-zero entries
1416 IndexType nze = IndexType(0);
1417
1418 // reset degrees
1419 _degree_v = _degree_p = IndexType(0);
1420
1421 // loop over all blocks
1422 const IndexType m = IndexType(_block_p_ptr.size()-1);
1423 for(IndexType i(0); i < m; ++i)
1424 {
1425 // get number of velocity and pressure dofs
1426 IndexType nv = _block_v_ptr[i+1] - _block_v_ptr[i];
1427 IndexType np = _block_p_ptr[i+1] - _block_p_ptr[i];
1428
1429 // update degrees
1430 _degree_v = Math::max(nv, _degree_v);
1431 _degree_p = Math::max(np, _degree_p);
1432
1433 // update count
1434 if(diag)
1435 {
1436 // main diagonal of A
1437 nze += dim * nv;
1438 // matrices B and D
1439 nze += IndexType(2) * dim * nv * np;
1440 // Schur-Complement matrix S
1441 nze += np * np;
1442 }
1443 else
1444 {
1445 // dense local systems
1446 nze += Math::sqr(dim*nv + np);
1447 }
1448 }
1449
1450 // finally, allocate our memory
1451 _data.resize(nze);
1452 _vdef.resize(dim*_degree_v + _degree_p);
1453 _vcor.resize(dim*_degree_v + _degree_p);
1454 }
1455
1458 {
1459 // create a Vanka Vector
1460 this->_vec_scale.format();
1461 Intern::VankaVector<VectorV> vanka_v(this->_vec_scale.template at<0>());
1462 Intern::VankaVector<VectorP> vanka_p(this->_vec_scale.template at<1>());
1463
1464 // get our data arrays
1465 const IndexType* vptr = _block_v_ptr.data();
1466 const IndexType* vidx = _block_v_idx.data();
1467 const IndexType* pptr = _block_p_ptr.data();
1468 const IndexType* pidx = _block_p_idx.data();
1469
1470 // create a vector of ones
1471 std::vector<DataType> vec_one(vanka_v.dim*_degree_v + _degree_p, DataType(1));
1472 const DataType* vone = vec_one.data();
1473
1474 // loop over all blocks
1475 const IndexType nblocks = IndexType(_block_v_ptr.size() - 1);
1476 for(IndexType iblock(0); iblock < nblocks; ++iblock)
1477 {
1478 // get number of velocity and pressure dofs
1479 const IndexType nv = vptr[iblock+1] - vptr[iblock];
1480 const IndexType np = pptr[iblock+1] - pptr[iblock];
1481
1482 // scatter ones
1483 vanka_v.scatter_cor(DataType(1), vone, &vidx[vptr[iblock]], nv, IndexType(0));
1484 vanka_p.scatter_cor(DataType(1), vone, &pidx[pptr[iblock]], np, IndexType(0));
1485 }
1486
1487 // invert components
1488 this->_vec_scale.component_invert(this->_vec_scale);
1489 }
1490
1493 {
1494 Intern::VankaMatrix<MatrixA_> vanka_a(_matrix.block_a());
1495 Intern::VankaMatrix<MatrixB_> vanka_b(_matrix.block_b());
1496 Intern::VankaMatrix<MatrixD_> vanka_d(_matrix.block_d());
1497
1498 // get the dimension of our system
1499 const IndexType dim = IndexType(vanka_a.row_dim);
1500
1501 // get our data arrays
1502 DataType* data = _data.data();
1503 const IndexType* vptr = _block_v_ptr.data();
1504 const IndexType* vidx = _block_v_idx.data();
1505 const IndexType* pptr = _block_p_ptr.data();
1506 const IndexType* pidx = _block_p_idx.data();
1507
1508 // format block data
1509 ::memset(data, 0, sizeof(DataType) * _data.size());
1510
1511 // allocate pivot array
1512 std::vector<IndexType> pivot(3*(dim*_degree_v + _degree_p));
1513
1514 // current block offset
1515 IndexType block_offset = IndexType(0);
1516
1517 // loop over all blocks
1518 const IndexType nblocks = IndexType(_block_v_ptr.size() - 1);
1519 for(IndexType iblock(0); iblock < nblocks; ++iblock)
1520 {
1521 // get number of velocity and pressure dofs
1522 const IndexType nv = vptr[iblock+1] - vptr[iblock];
1523 const IndexType np = pptr[iblock+1] - pptr[iblock];
1524
1525 // get our local indices
1526 const IndexType* loc_vidx = &vidx[vptr[iblock]];
1527 const IndexType* loc_pidx = &pidx[pptr[iblock]];
1528
1529 // compute matrix stride
1530 const IndexType n = dim*nv + np;
1531 const IndexType block_size = n*n;
1532
1533 // get our block data array pointer
1534 DataType* block_data = &data[block_offset];
1535
1536 // gather matrix a
1537 std::pair<IndexType,IndexType> ao =
1538 vanka_a.gather_full(block_data, loc_vidx, loc_vidx, nv, nv, n);
1539 vanka_b.gather_full(block_data, loc_vidx, loc_pidx, nv, np, n, IndexType(0), ao.second);
1540 vanka_d.gather_full(block_data, loc_pidx, loc_vidx, np, nv, n, ao.first, IndexType(0));
1541
1542 // invert local matrix block
1543 /*DataType ret =*/ Math::invert_matrix(n, n, block_data, pivot.data());
1544 //if(!Math::isnormal(ret))
1545 //{
1546 // Don't throw any exceptions, as this often leads to false alerts
1547 //throw VankaFactorError();
1548 //}
1549
1550 // increment block data offset
1551 block_offset += block_size;
1552 }
1553 }
1554
1556 void _apply_full(VectorType& vec_cor, const VectorType& vec_def)
1557 {
1558 // format correction vector
1559 vec_cor.format();
1560
1561 // do we use the multiplicative variant?
1562 const bool multi = ((int(_type) & 0x100) == 0);
1563
1564 // additive?
1565 if(!multi)
1566 {
1567 this->_vec_tmp1.copy(vec_def);
1568 this->_vec_tmp2.format();
1569 }
1570
1571 // get our sub-vectors
1572 VectorV& vec_cv = (multi ? vec_cor.template at<0>() : this->_vec_tmp2.template at<0>());
1573 VectorP& vec_cp = (multi ? vec_cor.template at<1>() : this->_vec_tmp2.template at<1>());
1574 const VectorV& vec_dv = (multi ? vec_def.template at<0>() : this->_vec_tmp1.template at<0>());
1575 const VectorP& vec_dp = (multi ? vec_def.template at<1>() : this->_vec_tmp1.template at<1>());
1576
1577 // create our vanka matrix and vector objects
1578 Intern::VankaMatrix<MatrixA_> vanka_a(_matrix.block_a());
1579 Intern::VankaMatrix<MatrixB_> vanka_b(_matrix.block_b());
1580 Intern::VankaMatrix<MatrixD_> vanka_d(_matrix.block_d());
1581 Intern::VankaVector<VectorV> vanka_v(vec_cv);
1582 Intern::VankaVector<VectorP> vanka_p(vec_cp);
1583
1584 // get velocity vector dimension
1585 const IndexType velo_dim = IndexType(vanka_v.dim);
1586
1587 // get block count
1588 const IndexType num_blocks = IndexType(_block_v_ptr.size()-1);
1589
1590 // get block data arrays
1591 const IndexType* vptr = _block_v_ptr.data();
1592 const IndexType* vidx = _block_v_idx.data();
1593 const IndexType* pptr = _block_p_ptr.data();
1594 const IndexType* pidx = _block_p_idx.data();
1595
1596 // get local data and vectors
1597 const DataType* lmat_data = _data.data();
1598 DataType* lcor = _vcor.data();
1599 DataType* ldef = _vdef.data();
1600
1601 Index flops(0);
1602
1603 // iterate
1604 for(IndexType iter(0); iter < _num_iter; ++iter)
1605 {
1606 // additive variant?
1607 if((!multi) && (iter > IndexType(0)))
1608 {
1609 // compute current defect
1610 this->_matrix.apply(_vec_tmp1, vec_cor, vec_def, -DataType(1));
1611 _vec_tmp2.format();
1612 }
1613
1614 TimeStamp stamp_kernel;
1615
1616 // reset block offset
1617 IndexType block_offset = IndexType(0);
1618
1619 // loop over all blocks
1620 for(IndexType iblock(0); iblock < num_blocks; ++iblock)
1621 {
1622 // get local sizes
1623 const IndexType nv = vptr[iblock+1] - vptr[iblock];
1624 const IndexType np = pptr[iblock+1] - pptr[iblock];
1625
1626 // get our local indices
1627 const IndexType* loc_vidx = &vidx[vptr[iblock]];
1628 const IndexType* loc_pidx = &pidx[pptr[iblock]];
1629
1630 // compute local degree
1631 const IndexType n = velo_dim * nv + np;
1632
1633 // get our local matrix data
1634 const DataType* lmat = &lmat_data[block_offset];
1635
1636 // get local vectors
1637 DataType* lcor_v = lcor;
1638 DataType* lcor_p = &lcor[velo_dim * nv];
1639 DataType* ldef_v = ldef;
1640 DataType* ldef_p = &ldef[velo_dim * nv];
1641
1642 // gather local defect
1643 vanka_v.gather_def(ldef_v, vec_dv, loc_vidx, nv, IndexType(0));
1644 vanka_p.gather_def(ldef_p, vec_dp, loc_pidx, np, IndexType(0));
1645
1646 // multiplicative variants only:
1647 if(multi)
1648 {
1649 // subtract A*x
1650 vanka_a.mult_cor(ldef_v, -DataType(1), vec_cv, loc_vidx, nv, IndexType(0));
1651 vanka_b.mult_cor(ldef_v, -DataType(1), vec_cp, loc_vidx, nv, IndexType(0));
1652 vanka_d.mult_cor(ldef_p, -DataType(1), vec_cv, loc_pidx, np, IndexType(0));
1653 }
1654
1655 // solve local system
1656 for(IndexType i(0); i < n; ++i)
1657 {
1658 lcor[i] = DataType(0);
1659 for(IndexType j(0); j < n; ++j)
1660 {
1661 lcor[i] += lmat[i*n + j] * ldef[j];
1662 }
1663 }
1664 flops += 2*n*n;
1665
1666 // scatter result
1667 vanka_v.scatter_cor(_omega, lcor_v, loc_vidx, nv, IndexType(0));
1668 vanka_p.scatter_cor(_omega, lcor_p, loc_pidx, np, IndexType(0));
1669
1670 // update block offset
1671 block_offset += n*n;
1672 }
1673
1674 Statistics::add_time_precon(stamp_kernel.elapsed_now());
1675
1676 // additive variant?
1677 if(!multi)
1678 {
1679 // multiply by scaling vector
1680 this->_vec_tmp2.component_product(this->_vec_tmp2, this->_vec_scale);
1681
1682 // update correction vector
1683 vec_cor.axpy(this->_vec_tmp2);
1684 }
1685
1686 // apply filter
1687 _filter.filter_cor(vec_cor);
1688 }
1689
1690 Statistics::add_flops(flops);
1691 }
1692
1695 {
1696 Intern::VankaMatrix<MatrixA_> vanka_a(_matrix.block_a());
1697 Intern::VankaMatrix<MatrixB_> vanka_b(_matrix.block_b());
1698 Intern::VankaMatrix<MatrixD_> vanka_d(_matrix.block_d());
1699
1700 // get the dimension of our system
1701 const IndexType dim = IndexType(vanka_a.row_dim);
1702
1703 // get our data arrays
1704 DataType* data = _data.data();
1705 const IndexType* vptr = _block_v_ptr.data();
1706 const IndexType* vidx = _block_v_idx.data();
1707 const IndexType* pptr = _block_p_ptr.data();
1708 const IndexType* pidx = _block_p_idx.data();
1709
1710 // format block data
1711 ::memset(data, 0, sizeof(DataType) * _data.size());
1712
1713 // allocate pivot array (pressure only)
1714 std::vector<IndexType> pivot(3*_degree_p);
1715
1716 // current block offset
1717 IndexType block_offset = IndexType(0);
1718
1719 // loop over all blocks
1720 const IndexType nblocks = IndexType(_block_v_ptr.size() - 1);
1721 for(IndexType iblock(0); iblock < nblocks; ++iblock)
1722 {
1723 // get number of velocity and pressure dofs
1724 const IndexType nv = vptr[iblock+1] - vptr[iblock];
1725 const IndexType np = pptr[iblock+1] - pptr[iblock];
1726 const IndexType dnv = dim*nv;
1727
1728 // get our local indices
1729 const IndexType* loc_vidx = &vidx[vptr[iblock]];
1730 const IndexType* loc_pidx = &pidx[pptr[iblock]];
1731
1732 // compute local block size
1733 const IndexType block_size = dnv + IndexType(2) * dnv * np + np*np;
1734
1735 // get our block data array pointer
1736 DataType* block_data = &data[block_offset];
1737
1738 // get our local matrices
1739 DataType* loc_a = block_data;
1740 DataType* loc_b = &block_data[dnv];
1741 DataType* loc_d = &block_data[dnv + dnv*np];
1742 DataType* loc_s = &block_data[dnv + IndexType(2)*dnv*np];
1743
1744 // gather diag(A)
1745 vanka_a.gather_diag(loc_a, loc_vidx, nv);
1746 // gather B and D
1747 vanka_b.gather_full(loc_b, loc_vidx, loc_pidx, nv, np, np);
1748 vanka_d.gather_full(loc_d, loc_pidx, loc_vidx, np, nv, dnv);
1749
1750 // invert diag(A) and pre-multiply D by diag(A)^{-1}
1751 for(IndexType i(0); i < dnv; ++i)
1752 {
1753 // invert a_ii
1754 loc_a[i] = DataType(1) / loc_a[i];
1755
1756 // make sure we have a normal value
1757 if(!Math::isnormal(loc_a[i]))
1758 {
1759 throw VankaFactorError();
1760 }
1761
1762 // pre-multiply D by diag(A)^{-1}
1763 for(IndexType j(0); j < np; ++j)
1764 {
1765 loc_d[j*dnv + i] *= loc_a[i];
1766 }
1767 }
1768
1769 // calculate Schur-complement of A:
1770 // S := -D * diag(A)^{-1} * B
1771 for(IndexType i(0); i < np; ++i)
1772 {
1773 for(IndexType j(0); j < np; ++j)
1774 {
1775 DataType s = DataType(0);
1776 for(IndexType k(0); k < dnv; ++k)
1777 {
1778 // Note: D is already pre-multiplied by diag(A)^{-1}
1779 s += loc_d[i*dnv + k] * loc_b[k*np + j];
1780 }
1781 loc_s[i*np + j] = -s;
1782 }
1783 }
1784
1785 // invert local matrix S
1786 DataType det = Math::invert_matrix(np, np, loc_s, pivot.data());
1787 if(!Math::isnormal(det))
1788 {
1790 throw VankaFactorError();
1791 }
1792
1793 // increment block data offset
1794 block_offset += block_size;
1795 }
1796 }
1797
1799 void _apply_diag(VectorType& vec_cor, const VectorType& vec_def)
1800 {
1801 // format correction vector
1802 vec_cor.format();
1803
1804 // do we use the multiplicative variant?
1805 const bool multi = ((int(_type) & 0x100) == 0);
1806
1807 // additive?
1808 if(!multi)
1809 {
1810 this->_vec_tmp1.copy(vec_def);
1811 this->_vec_tmp2.format();
1812 }
1813
1814 // get our sub-vectors
1815 VectorV& vec_cv = (multi ? vec_cor.template at<0>() : this->_vec_tmp2.template at<0>());
1816 VectorP& vec_cp = (multi ? vec_cor.template at<1>() : this->_vec_tmp2.template at<1>());
1817 const VectorV& vec_dv = (multi ? vec_def.template at<0>() : this->_vec_tmp1.template at<0>());
1818 const VectorP& vec_dp = (multi ? vec_def.template at<1>() : this->_vec_tmp1.template at<1>());
1819
1820 // create our vanka matrix and vector objects
1821 Intern::VankaMatrix<MatrixA_> vanka_a(_matrix.block_a());
1822 Intern::VankaMatrix<MatrixB_> vanka_b(_matrix.block_b());
1823 Intern::VankaMatrix<MatrixD_> vanka_d(_matrix.block_d());
1824 Intern::VankaVector<VectorV> vanka_v(vec_cv);
1825 Intern::VankaVector<VectorP> vanka_p(vec_cp);
1826
1827 // get velocity vector dimension
1828 const IndexType velo_dim = IndexType(vanka_v.dim);
1829
1830 // get block count
1831 const IndexType num_blocks = IndexType(_block_v_ptr.size()-1);
1832
1833 // get block data arrays
1834 const IndexType* vptr = _block_v_ptr.data();
1835 const IndexType* vidx = _block_v_idx.data();
1836 const IndexType* pptr = _block_p_ptr.data();
1837 const IndexType* pidx = _block_p_idx.data();
1838
1839 // get local data and vectors
1840 const DataType* data = _data.data();
1841 DataType* lcor = _vcor.data();
1842 DataType* ldef = _vdef.data();
1843
1844 Index flops(0);
1845
1846 // iterate
1847 for(IndexType iter(0); iter < _num_iter; ++iter)
1848 {
1849 // additive variant?
1850 if((!multi) && (iter > IndexType(0)))
1851 {
1852 // compute current defect
1853 this->_matrix.apply(_vec_tmp1, vec_cor, vec_def, -DataType(1));
1854 _vec_tmp2.format();
1855 }
1856
1857 TimeStamp stamp_kernel;
1858
1859 // reset block offset
1860 IndexType block_offset = IndexType(0);
1861
1862 // loop over all blocks
1863 for(IndexType iblock(0); iblock < num_blocks; ++iblock)
1864 {
1865 // get local sizes
1866 const IndexType nv = vptr[iblock+1] - vptr[iblock];
1867 const IndexType np = pptr[iblock+1] - pptr[iblock];
1868 const IndexType dnv = velo_dim*nv;
1869
1870 // get our local indices
1871 const IndexType* loc_vidx = &vidx[vptr[iblock]];
1872 const IndexType* loc_pidx = &pidx[pptr[iblock]];
1873
1874 // compute local block size
1875 const IndexType block_size = dnv + IndexType(2) * dnv * np + np*np;
1876
1877 // get our block data array pointer
1878 const DataType* block_data = &data[block_offset];
1879
1880 // get our local matrices
1881 const DataType* loc_a = block_data;
1882 const DataType* loc_b = &block_data[dnv];
1883 const DataType* loc_d = &block_data[dnv + dnv*np];
1884 const DataType* loc_s = &block_data[dnv + IndexType(2)*dnv*np];
1885
1886 // get local vectors
1887 DataType* lcor_v = lcor;
1888 DataType* lcor_p = &lcor[dnv];
1889 DataType* ldef_v = ldef;
1890 DataType* ldef_p = &ldef[dnv];
1891
1892 // gather local defect
1893 vanka_v.gather_def(ldef_v, vec_dv, loc_vidx, nv, IndexType(0));
1894 vanka_p.gather_def(ldef_p, vec_dp, loc_pidx, np, IndexType(0));
1895
1896 // multiplicative variants only:
1897 if(multi)
1898 {
1899 // subtract A*x
1900 vanka_a.mult_cor(ldef_v, -DataType(1), vec_cv, loc_vidx, nv, IndexType(0));
1901 vanka_b.mult_cor(ldef_v, -DataType(1), vec_cp, loc_vidx, nv, IndexType(0));
1902 vanka_d.mult_cor(ldef_p, -DataType(1), vec_cv, loc_pidx, np, IndexType(0));
1903 }
1904
1905 // update pressure RHS:
1906 // g_p := f_p - D * diag(A)^{-1} * f_u
1907 for(IndexType i(0); i < np; ++i)
1908 {
1909 DataType r = DataType(0);
1910 for(IndexType j(0); j < dnv; ++j)
1911 {
1912 // Note: D is already pre-multiplied by diag(A)^{-1}
1913 r += loc_d[i*dnv + j] * ldef_v[j];
1914 }
1915 ldef_p[i] -= r;
1916 }
1917 flops += 2*np*dnv;
1918
1919 // solve pressure:
1920 // p := S^{-1} * g_p
1921 for(IndexType i(0); i < np; ++i)
1922 {
1923 DataType r = DataType(0);
1924 for(IndexType j(0); j < np; ++j)
1925 {
1926 r += loc_s[i*np + j] * ldef_p[j];
1927 }
1928 lcor_p[i] = r;
1929 }
1930 flops += 2*np*np;
1931
1932 // update velocity RHS and solve velocity
1933 for(IndexType i(0); i < dnv; ++i)
1934 {
1935 DataType xb = DataType(0);
1936 for(IndexType j(0); j < np; ++j)
1937 {
1938 xb += loc_b[i*np + j] * lcor_p[j];
1939 }
1940 // solve: u := diag(A)^{-1} * (f_u - B*p)
1941 lcor_v[i] = loc_a[i] * (ldef_v[i] - xb);
1942 }
1943 flops += dnv * (2*np + 2);
1944
1945 // scatter result
1946 vanka_v.scatter_cor(_omega, lcor_v, loc_vidx, nv, IndexType(0));
1947 vanka_p.scatter_cor(_omega, lcor_p, loc_pidx, np, IndexType(0));
1948
1949 // update block offset
1950 block_offset += block_size;
1951 }
1952
1953 Statistics::add_time_precon(stamp_kernel.elapsed_now());
1954
1955 // additive variant?
1956 if(!multi)
1957 {
1958 // multiply by scaling vector
1959 this->_vec_tmp2.component_product(this->_vec_tmp2, this->_vec_scale);
1960
1961 // update correction vector
1962 vec_cor.axpy(this->_vec_tmp2);
1963 }
1964
1965 // apply filter
1966 _filter.filter_cor(vec_cor);
1967 }
1968
1969 Statistics::add_flops(flops);
1970 }
1971 }; // class Vanka<...>
1972
1994 template<typename MatrixA_, typename MatrixB_, typename MatrixD_, typename Filter_>
1995 std::shared_ptr<Vanka<LAFEM::SaddlePointMatrix<MatrixA_, MatrixB_, MatrixD_>, Filter_>> new_vanka(
1997 const Filter_& filter,
1998 VankaType type,
1999 typename MatrixA_::DataType omega = typename MatrixA_::DataType(1),
2000 Index num_iter = Index(1))
2001 {
2002 return std::make_shared<Vanka<LAFEM::SaddlePointMatrix<MatrixA_, MatrixB_, MatrixD_>, Filter_>>
2003 (matrix, filter, type, omega, num_iter);
2004 }
2005 } // namespace Solver
2006} // namespace FEAT
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Definition: assertion.hpp:263
Saddle-Point matrix meta class template.
MatrixTypeA::DataType DataType
data type
MatrixTypeA & block_a()
Returns the sub-matrix block A.
VectorTypeR create_vector_r() const
Returns a new compatible R-Vector.
void apply(VectorTypeL &r, const VectorTypeR &x) const
Applies this matrix onto a vector.
MatrixTypeA::IndexType IndexType
index type
MatrixTypeB & block_b()
Returns the sub-matrix block B.
MatrixTypeD & block_d()
Returns the sub-matrix block D.
Variadic TupleVector class template.
std::size_t bytes() const
Returns the total amount of bytes allocated.
void copy(const TupleVector &x, bool full=false)
Performs .
void format(DataType value=DataType(0))
Reset all elements of the container to a given value or zero if missing.
void clear()
Free all allocated arrays.
Polymorphic solver interface.
Definition: base.hpp:183
Base-class for solver generated exceptions.
Definition: base.hpp:127
virtual void init_numeric() override
Performs numeric factorization.
Definition: vanka.hpp:1163
void _calc_scale()
Calculate the scaling vector for additive Vanka.
Definition: vanka.hpp:1457
double time_apply() const
Returns the total accumulated time for the solver application.
Definition: vanka.hpp:1247
virtual void done_symbolic() override
Releases the symbolic factorization data.
Definition: vanka.hpp:1148
void _factor_diag()
Performs the 'diagonal' numerical factorization.
Definition: vanka.hpp:1694
void _alloc_data()
Allocates the data arrays for numerical factorization.
Definition: vanka.hpp:1407
virtual void init_symbolic() override
Performs symbolic factorization.
Definition: vanka.hpp:1113
void _apply_full(VectorType &vec_cor, const VectorType &vec_def)
Applies the 'full' Vanka iteration.
Definition: vanka.hpp:1556
double time_init_numeric() const
Returns the total accumulated time for numeric initialization.
Definition: vanka.hpp:1239
virtual String name() const override
Returns the name of the solver.
Definition: vanka.hpp:1107
void reset_timings()
Resets the internal stop watches for time measurement.
Definition: vanka.hpp:1221
Vanka(const MatrixType &matrix, const FilterType &filter, VankaType type, DataType omega=DataType(1), Index num_iter=Index(1))
Constructor.
Definition: vanka.hpp:1096
void _apply_diag(VectorType &vec_cor, const VectorType &vec_def)
Applies the 'diagonal' Vanka iteration.
Definition: vanka.hpp:1799
double time_init_symbolic() const
Returns the total accumulated time for symbolic initialization.
Definition: vanka.hpp:1231
LAFEM::SaddlePointMatrix< MatrixA_, MatrixB_, MatrixD_ > MatrixType
our matrix type
Definition: vanka.hpp:1014
Vanka Factorization Error.
Definition: vanka.hpp:920
Vanka preconditioner/smoother class template.
Definition: vanka.hpp:934
static void add_flops(Index flops)
Add an amount of flops to the global flop counter.
Definition: statistics.hpp:206
Stop-Watch class.
Definition: stop_watch.hpp:21
double elapsed() const
Returns the total elapsed time in seconds.
Definition: stop_watch.hpp:70
void start()
Starts the stop-watch.
Definition: stop_watch.hpp:43
void reset()
Resets the elapsed time.
Definition: stop_watch.hpp:36
void stop()
Stops the stop-watch and increments elapsed time.
Definition: stop_watch.hpp:51
String class implementation.
Definition: string.hpp:46
Time stamp class.
Definition: time_stamp.hpp:54
double elapsed_now() const
Calculates the time elapsed between the time stamp and now.
Definition: time_stamp.hpp:121
bool isnormal(T_ x)
Checks whether a value is normal.
T_ sqr(T_ x)
Returns the square of a value.
Definition: math.hpp:95
DT_ invert_matrix(const IT_ n, const IT_ stride, DT_ a[], IT_ p[])
Inverts a matrix and returns its determinant.
Definition: math.hpp:1292
T_ max(T_ a, T_ b)
Returns the maximum of two values.
Definition: math.hpp:137
VankaType
Vanka type enumeration.
Definition: vanka.hpp:892
@ nodal_full_mult
Nodal full Vanka type (multiplicative)
@ block_diag_add
Blocked diagonal Vanka type (additive)
@ nodal_full_add
Nodal full Vanka type (additive)
@ block_diag_mult
Blocked diagonal Vanka type (multiplicative)
@ nodal_diag_mult
Nodal diagonal Vanka type (multiplicative)
@ block_full_mult
Blocked full Vanka type (multiplicative)
@ nodal_diag_add
Nodal diagonal Vanka type (additive)
@ block_full_add
Blocked full Vanka type (additive)
@ iter
Plot every iteration (if applicable)
@ full
full Uzawa preconditioner
@ rest
restriction (multigrid)
std::shared_ptr< Vanka< LAFEM::SaddlePointMatrix< MatrixA_, MatrixB_, MatrixD_ >, Filter_ > > new_vanka(const LAFEM::SaddlePointMatrix< MatrixA_, MatrixB_, MatrixD_ > &matrix, const Filter_ &filter, VankaType type, typename MatrixA_::DataType omega=typename MatrixA_::DataType(1), Index num_iter=Index(1))
Creates a new Vanka solver object.
Definition: vanka.hpp:1995
Status
Solver status return codes enumeration.
Definition: base.hpp:47
@ success
solving successful (convergence criterion fulfilled)
FEAT namespace.
Definition: adjactor.hpp:12
std::uint64_t Index
Index data type.