FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
pmdcdsc_matrix.hpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#pragma once
7
8// includes, FEAT
9#include <kernel/global/vector.hpp>
10#include <kernel/global/matrix.hpp>
11#include <kernel/lafem/dense_vector.hpp>
12#include <kernel/lafem/dense_vector_blocked.hpp>
13#include <kernel/lafem/sparse_matrix_csr.hpp>
14#include <kernel/lafem/sparse_matrix_bcsr.hpp>
15#include <kernel/lafem/sparse_matrix_cscr.hpp>
16#include <kernel/util/stop_watch.hpp>
17
18#include <vector>
19#include <map>
20#include <set>
21
22namespace FEAT
23{
24 namespace Global
25 {
29 template<typename MatrixB_, typename MatrixD_>
31
57 template<typename DT_, typename IT_, int dim_, typename MirrorV_, typename MirrorP_>
59 Global::Matrix<LAFEM::SparseMatrixBCSR<DT_, IT_, dim_, 1>, MirrorV_, MirrorP_>,
60 Global::Matrix<LAFEM::SparseMatrixBCSR<DT_, IT_, 1, dim_>, MirrorP_, MirrorV_>>
61 {
62 public:
63 typedef DT_ DataType;
64 typedef IT_ IndexType;
65 static constexpr int dim = dim_;
66
67 typedef MirrorV_ MirrorTypeV;
68 typedef MirrorP_ MirrorTypeP;
69
74
77
81
84
87
90
94
99
100 static constexpr bool is_global = true;
101 static constexpr bool is_local = false;
102
103 protected:
110
118
145
147 std::vector<int> _ranks;
148
158 std::vector<MirrorTypeP> _pres_mirrors;
159
167 std::vector<MirrorTypeP> _data_mirrors;
168
175 std::vector<Adjacency::Graph> _neighbor_graphs;
176
184 std::vector<NeighMatrixTypeS> _neighbor_matrices;
185
198
209
216
217 public:
234 const GlobalVectorTypeV& diagonal_a,
235 const GlobalMatrixTypeB& matrix_b,
236 const GlobalMatrixTypeD& matrix_d) :
237 _diagonal_a(diagonal_a),
238 _matrix_b(matrix_b),
239 _matrix_d(matrix_d)
240 {
241 }
242
245 {
246 }
247
249 const Dist::Comm* get_comm() const
250 {
251 return _diagonal_a.get_comm();
252 }
253
256 {
257 return _matrix_d.create_vector_l();
258 }
259
262 {
263 return _matrix_b.create_vector_r();
264 }
265
268 {
269 return _matrix_s;
270 }
271
283 void extract_diag(GlobalVectorTypeP& vec_diag) const
284 {
285 _matrix_s.extract_diag(vec_diag.local());
286 }
287
300 {
301 GlobalVectorTypeP vec_diag = _matrix_d.create_vector_l();
302 _matrix_s.extract_diag(vec_diag.local());
303 return vec_diag;
304 }
305
308 {
309 watch_init_symbolic.reset();
310 watch_init_sym_matrix_loc.reset();
311 watch_init_sym_pres_mirror.reset();
312 watch_init_sym_reduced_b.reset();
313 watch_init_sym_data_mirror.reset();
314 watch_init_sym_neighbor_s.reset();
315 watch_init_numeric.reset();
316 watch_init_num_matrix_loc.reset();
317 watch_init_num_gather_b.reset();
318 watch_init_num_premult_da.reset();
319 watch_init_num_neighbor_s.reset();
320 watch_apply.reset();
321 watch_apply_matrix_loc.reset();
322 watch_apply_neighbor_s.reset();
323 }
324
334 {
335 static constexpr std::size_t nt = 14;
336 double tsum[nt], tmax[nt], tloc[nt] =
337 {
338 watch_init_symbolic.elapsed(),
339 watch_init_sym_matrix_loc.elapsed(),
340 watch_init_sym_pres_mirror.elapsed(),
341 watch_init_sym_reduced_b.elapsed(),
342 watch_init_sym_data_mirror.elapsed(),
343 watch_init_sym_neighbor_s.elapsed(),
344 watch_init_numeric.elapsed(),
345 watch_init_num_matrix_loc.elapsed(),
346 watch_init_num_gather_b.elapsed(),
347 watch_init_num_premult_da.elapsed(),
348 watch_init_num_neighbor_s.elapsed(),
349 watch_apply.elapsed(),
350 watch_apply_matrix_loc.elapsed(),
351 watch_apply_neighbor_s.elapsed()
352 };
353
354 this->get_comm()->allreduce(tloc, tsum, nt, Dist::op_sum);
355 this->get_comm()->allreduce(tloc, tmax, nt, Dist::op_max);
356
357 // divide sum by number of ranks to obtain mean
358 {
359 const double ds = 1.0 / double(this->get_comm()->size());
360 for(std::size_t i(0); i < nt; ++i)
361 tsum[i] *= ds;
362 }
363
364 String s;
365 s += String(34, ' ') + "Mean Time Max Time\n";
366 s += _fmt_time(tsum[0], tmax[0], "Total Symbolic Initialization");
367 s += _fmt_time(tsum[0], tmax[0], tsum[1], tmax[1], "Local Schur Matrix Structure");
368 s += _fmt_time(tsum[0], tmax[0], tsum[2], tmax[2], "Pressure Mirror");
369 s += _fmt_time(tsum[0], tmax[0], tsum[3], tmax[3], "Reduced-B Matrix Structure");
370 s += _fmt_time(tsum[0], tmax[0], tsum[4], tmax[4], "Reduced-B Data Mirror");
371 s += _fmt_time(tsum[0], tmax[0], tsum[5], tmax[5], "Neighbor Matrix Structure");
372 double tsym_other_sum = tsum[0] - tsum[1] - tsum[2] - tsum[3] - tsum[4] - tsum[5];
373 double tsym_other_max = tmax[0] - tmax[1] - tmax[2] - tmax[3] - tmax[4] - tmax[5];
374 s += _fmt_time(tsum[0], tmax[0], tsym_other_sum, tsym_other_max, "Other Symbolic");
375
376 s += _fmt_time(tsum[6], tmax[6], "Total Numeric Initialization");
377 s += _fmt_time(tsum[6], tmax[6], tsum[7], tmax[7], "Local Schur Matrix Values");
378 s += _fmt_time(tsum[6], tmax[6], tsum[8], tmax[8], "Reduced-B Gather");
379 s += _fmt_time(tsum[6], tmax[6], tsum[9], tmax[9], "Pre-Multiply D*A");
380 s += _fmt_time(tsum[6], tmax[6], tsum[10], tmax[10], "Neighbor Matrix Values");
381 double tnum_other_sum = tsum[6] - tsum[7] - tsum[8] - tsum[9] - tsum[10];
382 double tnum_other_max = tmax[6] - tmax[7] - tmax[8] - tmax[9] - tmax[10];
383 s += _fmt_time(tsum[6], tmax[6], tnum_other_sum, tnum_other_max, "Other Numeric");
384
385 s += _fmt_time(tsum[11], tmax[11], "Total Matrix Apply");
386 s += _fmt_time(tsum[11], tmax[11], tsum[12], tmax[12], "Local Schur Matrix");
387 s += _fmt_time(tsum[11], tmax[11], tsum[13], tmax[13], "Neighbor Schur Matrix");
388 double tapp_other_sum = tsum[11] - tsum[12] - tsum[13];
389 double tapp_other_max = tmax[11] - tmax[12] - tmax[13];
390 s += _fmt_time(tsum[11], tmax[11], tapp_other_sum, tapp_other_max, "Other Apply");
391 return s;
392 }
393
397 void init()
398 {
399 init_symbolic();
400 init_numeric();
401 }
402
424 {
425 watch_init_symbolic.start();
426
427 // get the velocity and pressure gates
428 const GateTypeV* gate_v = this->_matrix_b.get_row_gate();
429 const GateTypeP* gate_p = this->_matrix_d.get_row_gate();
430 XASSERT(gate_v != nullptr);
431 XASSERT(gate_p != nullptr);
432
433 // the pressure gate must be empty, otherwise the pressure space is not discontinuous
434 XASSERTM(gate_p->_ranks.empty(), "pressure space is not discontinuous");
435
436 // compute the local matrix structure of S by D * B
437 {
438 watch_init_sym_matrix_loc.start();
439
440 // compose structures of D and B
441 Adjacency::Graph graph_s(Adjacency::RenderType::injectify_sorted, _matrix_d.local(), _matrix_b.local());
442 // create the matrix layout of S
443 _matrix_s = LocalMatrixTypeS(graph_s);
444
445 watch_init_sym_matrix_loc.stop();
446 }
447
448 // get our communicator
449 const Dist::Comm& comm = *gate_v->get_comm();
450
451 // get neighbor ranks
452 this->_ranks = gate_v->_ranks;
453
454 // get the number of our neighbors
455 const std::size_t num_neighs = this->_ranks.size();
456 if(num_neighs <= std::size_t(0))
457 {
458 watch_init_symbolic.stop();
459 return; // no neighbors, no problems :)
460 }
461
462 // copy the layout of B into (D*A)^T
463 this->_matrix_da = this->_matrix_b.local().clone(LAFEM::CloneMode::Layout);
464
465 // resize our member arrays
466 this->_pres_mirrors.resize(num_neighs);
467 this->_data_mirrors.resize(num_neighs);
468 this->_neighbor_graphs.resize(num_neighs);
469 this->_neighbor_matrices.resize(num_neighs);
470
471 // allocate a vector of graphs for B
472 std::vector<Adjacency::Graph> my_graphs(num_neighs);
473
474 // loop over all neighbor processes
475 for(std::size_t i(0); i < num_neighs; ++i)
476 {
477 // get the velocity mirror
478 const MirrorTypeV& mirror_v = gate_v->_mirrors.at(i);
479
480 // assemble the pressure mirror for this neighbor
481 watch_init_sym_pres_mirror.start();
482 MirrorTypeP& mirror_p = this->_pres_mirrors.at(i);
483 this->_asm_pres_mirror(mirror_p, mirror_v, this->_matrix_b.local());
484 watch_init_sym_pres_mirror.stop();
485
486 // assemble reduced B-matrix graph
487 watch_init_sym_reduced_b.start();
488 my_graphs.at(i) = this->_asm_reduced_b( mirror_p, mirror_v, this->_matrix_b.local());
489 watch_init_sym_reduced_b.stop();
490
491 // assemble B' data mirror
492 watch_init_sym_data_mirror.start();
493 this->_data_mirrors.at(i) = this->_asm_data_mirror(mirror_p, mirror_v, this->_matrix_b.local(), my_graphs.at(i));
494 watch_init_sym_data_mirror.stop();
495 }
496
497 // dimension send/receive buffers and requests
498 std::vector<std::array<Index,3>> recv_dims(num_neighs), send_dims(num_neighs);
499 Dist::RequestVector recv_reqs(num_neighs), send_reqs(num_neighs);
500
501 // post receive requests for dimensions
502 for(std::size_t i(0); i < num_neighs; ++i)
503 {
504 recv_reqs[i] = comm.irecv(recv_dims.at(i).data(), std::size_t(3), this->_ranks.at(i));
505 }
506
507 // send dimensions
508 for(std::size_t i(0); i < num_neighs; ++i)
509 {
510 const Adjacency::Graph& g = my_graphs.at(i);
511 auto& sdim = send_dims.at(i);
512 sdim[0] = g.get_num_nodes_domain(); // corresponds to velocity mirror index size
513 sdim[1] = g.get_num_nodes_image();
514 sdim[2] = g.get_num_indices();
515 send_reqs[i] = comm.isend(sdim.data(), std::size_t(3), this->_ranks.at(i));
516 }
517
518 // process all pending receives
519 for(std::size_t i(0u); recv_reqs.wait_any(i); )
520 {
521 // get received dimension
522 auto& rdim = recv_dims.at(i);
523
524 // the first dimension must match our velocity mirror index set size
525 XASSERT(rdim[0] == gate_v->_mirrors.at(i).num_indices());
526
527 // allocate graph of corresponding dimensions
528 this->_neighbor_graphs.at(i) = Adjacency::Graph(rdim[0], rdim[1], rdim[2]);
529 }
530
531 // post domain-pointer array receives
532 for(std::size_t i(0); i < num_neighs; ++i)
533 {
534 recv_reqs[i] = comm.irecv(this->_neighbor_graphs.at(i).get_domain_ptr(),
535 recv_dims.at(i)[0] + std::size_t(1), this->_ranks.at(i));
536 }
537
538 // wait for all previous sends to finish
539 send_reqs.wait_all();
540
541 // post domain-pointer array sends
542 for(std::size_t i(0); i < num_neighs; ++i)
543 {
544 send_reqs[i] = comm.isend(my_graphs.at(i).get_domain_ptr(),
545 send_dims.at(i)[0] + std::size_t(1), this->_ranks.at(i));
546 }
547
548 // wait for all pending receives to finish
549 recv_reqs.wait_all();
550
551 // post image-index array receives
552 for(std::size_t i(0); i < num_neighs; ++i)
553 {
554 recv_reqs[i] = comm.irecv(this->_neighbor_graphs.at(i).get_image_idx(),
555 recv_dims.at(i)[2], this->_ranks.at(i));
556 }
557
558 // wait for all previous sends to finish
559 send_reqs.wait_all();
560
561 // post image-index array sends
562 for(std::size_t i(0); i < num_neighs; ++i)
563 {
564 send_reqs[i] = comm.isend(my_graphs.at(i).get_image_idx(), send_dims.at(i)[2], this->_ranks.at(i));
565 }
566
567 // wait for all pending receives to finish
568 recv_reqs.wait_all();
569
570 // wait for all previous sends to finish
571 send_reqs.wait_all();
572
573 // compute Schur-matrix structures for neighbors
574 for(std::size_t i(0); i < num_neighs; ++i)
575 {
576 watch_init_sym_neighbor_s.start();
577
578 // D*M^T = (M*B)^T
579 Adjacency::Graph graph_dm(Adjacency::RenderType::injectify_transpose, gate_v->_mirrors.at(i), this->_matrix_b.local());
580
581 // S = (D*M^T) * B'
582 Adjacency::Graph graph_s(Adjacency::RenderType::injectify_sorted, graph_dm, this->_neighbor_graphs.at(i));
583
584 // allocate Schur-matrix
585 this->_neighbor_matrices.at(i).convert(NeighMatrixTypeS(graph_s));
586
587 watch_init_sym_neighbor_s.stop();
588 }
589
590 // that's it
591 watch_init_symbolic.stop();
592 }
593
606 {
607 watch_init_numeric.start();
608
609 // pre-multiply local matrix product
610 watch_init_num_matrix_loc.start();
611 this->_matrix_s.format();
612 _asm_local_schur_matrix(this->_matrix_s, this->_matrix_d.local(), this->_diagonal_a.local(), this->_matrix_b.local());
613 watch_init_num_matrix_loc.stop();
614
615 // get the number of our neighbors
616 const std::size_t num_neighs = this->_ranks.size();
617 if(num_neighs <= std::size_t(0))
618 {
619 watch_init_numeric.stop();
620 return; // no neighbors, no problems :)
621 }
622
623 // get our communicator
624 const Dist::Comm& comm = *this->_matrix_b.get_comm();
625
626 // send/receive buffers and requests
627 std::vector<BufferVectorType> recv_bufs(num_neighs), send_bufs(num_neighs);
628 Dist::RequestVector recv_reqs(num_neighs), send_reqs(num_neighs);
629
630 // allocate receive buffer matrices B' and post receives
631 for(std::size_t i(0); i < num_neighs; ++i)
632 {
633 recv_bufs.at(i) = BufferVectorType(Index(dim) * this->_neighbor_graphs.at(i).get_num_indices());
634 recv_reqs[i] = comm.irecv(recv_bufs.at(i).elements(), recv_bufs.at(i).size(), this->_ranks.at(i));
635 }
636
637 // extract reduced matrix data and post send
638 for(std::size_t i(0); i < num_neighs; ++i)
639 {
640 watch_init_num_gather_b.start();
641 send_bufs.at(i) = _gather_b(this->_data_mirrors.at(i), this->_matrix_b.local());
642 watch_init_num_gather_b.stop();
643 send_reqs[i] = comm.isend(send_bufs.at(i).elements(), send_bufs.at(i).size(), this->_ranks.at(i));
644 }
645
646 // pre-multiply D*A and store in transposed form, i.e. CSC rather than CSR
647 watch_init_num_premult_da.start();
648 _premult_da(this->_matrix_da, this->_matrix_d.local(), this->_diagonal_a.local());
649 watch_init_num_premult_da.stop();
650
651 // process receives and compute neighbor schur matrices
652 for(std::size_t i(0u); recv_reqs.wait_any(i); )
653 {
654 watch_init_num_neighbor_s.start();
655 this->_neighbor_matrices.at(i).format();
656 _asm_neighbor_schur_matrix(this->_neighbor_matrices.at(i), this->_matrix_da,
657 this->_matrix_b.get_row_gate()->_mirrors.at(i), this->_neighbor_graphs.at(i), recv_bufs.at(i));
658 watch_init_num_neighbor_s.stop();
659 }
660
661 // wait for all previous sends to finish
662 send_reqs.wait_all();
663 watch_init_numeric.stop();
664 }
665
672 void apply(VectorTypeL& r, const VectorTypeR& x) const
673 {
674 watch_apply.start();
675 this->_apply(r.local(), x.local(), r.local(), DataType(1), true);
676 watch_apply.stop();
677 }
678
687 void apply(VectorTypeL& r, const VectorTypeR& x, const VectorTypeL& y, const DataType alpha = DataType(1)) const
688 {
689 watch_apply.start();
690 this->_apply(r.local(), x.local(), y.local(), alpha, false);
691 watch_apply.stop();
692 }
693
712 LocalMatrixTypeS asm_adp_symbolic(Index& glob_dof_offset, Index& glob_dof_count) const
713 {
714 // no neighbors?
715 if(_ranks.empty())
716 {
717 glob_dof_offset = Index(0);
718 glob_dof_count = _matrix_s.rows();
719 return _matrix_s.clone(LAFEM::CloneMode::Layout);
720 }
721
722 // get our communicator
723 const Dist::Comm& comm = *this->get_comm();
724
725 const std::size_t num_neighs = this->_ranks.size();
726
727 // get number of local DOFs
728 const Index num_loc_dofs = _matrix_s.rows();
729
730 // compute our global DOF offset and count
731 comm.exscan(&num_loc_dofs, &glob_dof_offset, std::size_t(1), Dist::op_sum);
732 comm.allreduce(&num_loc_dofs, &glob_dof_count, std::size_t(1), Dist::op_sum);
733
734 // The columns of our neighbor matrices correspond to the entries in the pressure mirror.
735 // However, for the desired ADP matrix, we have to translate these into global DOF indices.
736 // For this, each process has to map the DOF in its pressure mirrors to global DOFs and then
737 // send these DOF indices to the corresponding neighbor, so that it can map the column
738 // indices of its neighbor matrix to global DOF indices.
739
740 // send/receive mirrors and requests
741 std::vector<std::vector<IndexType>> recv_dofs(num_neighs), send_dofs(num_neighs);
742 Dist::RequestVector recv_reqs(num_neighs), send_reqs(num_neighs);
743
744 // allocate receive vectors and post receives
745 for(std::size_t i(0); i < num_neighs; ++i)
746 {
747 recv_dofs.at(i).resize(_neighbor_graphs.at(i).get_num_nodes_image());
748 recv_reqs[i] = comm.irecv(recv_dofs.at(i).data(), recv_dofs.at(i).size(), this->_ranks.at(i));
749 }
750
751 // translate our pressure mirrors to global DOF vectors and post sends
752 for(std::size_t i(0); i < num_neighs; ++i)
753 {
754 const Index num_idx = _pres_mirrors.at(i).num_indices();
755 const IndexType* pidx = _pres_mirrors.at(i).indices();
756 send_dofs.at(i).resize(num_idx);
757 IndexType* sidx = send_dofs.at(i).data();
758 for(Index k(0); k < num_idx; ++k)
759 sidx[k] = glob_dof_offset + pidx[k];
760 send_reqs[i] = comm.isend(send_dofs.at(i).data(), send_dofs.at(i).size(), this->_ranks.at(i));
761 }
762
763 // get local matrix stuff
764 const Index num_rows = num_loc_dofs;
765 const IndexType* row_ptr_s = _matrix_s.row_ptr();
766 const IndexType* col_idx_s = _matrix_s.col_ind();
767
768 // compute number of non-zeros per row and total
769 Index num_nzes = _matrix_s.used_elements();
770 std::vector<IndexType> row_aux(num_rows, IndexType(0));
771 for(Index i(0); i < num_rows; ++i)
772 row_aux[i] = (row_ptr_s[i+1] - row_ptr_s[i]);
773
774 for(const auto& x : _neighbor_matrices)
775 {
776 num_nzes += x.used_elements();
777 const Index used_rows = x.used_rows();
778 const IndexType* row_ptr_x = x.row_ptr();
779 const IndexType* row_idx_x = x.row_numbers();
780 for(Index i(0); i < used_rows; ++i)
781 row_aux[row_idx_x[i]] += (row_ptr_x[i+1] - row_ptr_x[i]);
782 }
783
784 // allocate our matrix
785 LocalMatrixTypeS matrix_g(num_rows, glob_dof_count, num_nzes);
786
787 // get our index arrays
788 IndexType* row_ptr_g = matrix_g.row_ptr();
789 IndexType* col_idx_g = matrix_g.col_ind();
790
791 // compute row pointer array and store backup in aux
792 row_ptr_g[0] = IndexType(0);
793 for(Index i(0); i < num_rows; ++i)
794 {
795 row_ptr_g[i+1] = row_ptr_g[i] + row_aux[i];
796 row_aux[i] = row_ptr_g[i];
797 }
798
799 // Note: For the sake of compatibility with picky third-party libraries, we want to
800 // ensure that the column indices of the output matrix are in ascending order.
801 // For this, we have to combine the matrix layout from our own local matrix and
802 // the matrices of our neighbors in rank-ascending order. So we first create two
803 // rank maps for our neighbors with lower and higher ranks, so that we can easily
804 // loop over all matrices in rank-order.
805
806 // create two neighbors maps: one of all lower ranks and one of all higher ranks
807 std::map<int, std::size_t> neigh_map_l, neigh_map_h;
808 for(std::size_t ineigh(0); ineigh < num_neighs; ++ineigh)
809 {
810 if(_ranks.at(ineigh) < comm.rank())
811 neigh_map_l.emplace(_ranks.at(ineigh), ineigh);
812 else
813 neigh_map_h.emplace(_ranks.at(ineigh), ineigh);
814 }
815
816 // wait for all receive requests to finish
817 recv_reqs.wait_all();
818
819 // first, insert all neighbor matrices with a lower rank in rank-ascending order
820 for(auto it = neigh_map_l.begin(); it != neigh_map_l.end(); ++it)
821 {
822 std::size_t ineigh = it->second;
823 const Index used_rows = _neighbor_matrices.at(ineigh).used_rows();
824 const IndexType* row_ptr_x = _neighbor_matrices.at(ineigh).row_ptr();
825 const IndexType* row_idx_x = _neighbor_matrices.at(ineigh).row_numbers();
826 const IndexType* col_idx_x = _neighbor_matrices.at(ineigh).col_ind();
827 const IndexType* dof_idx_x = recv_dofs.at(ineigh).data();
828 for(Index i(0); i < used_rows; ++i)
829 {
830 IndexType& k = row_aux[row_idx_x[i]];
831 for(IndexType j(row_ptr_x[i]); j < row_ptr_x[i + 1]; ++j, ++k)
832 col_idx_g[k] = dof_idx_x[col_idx_x[j]];
833 }
834 }
835
836 // now insert our local matrix S
837 for(Index i(0); i < num_rows; ++i)
838 {
839 IndexType k = row_aux[i];
840 for(IndexType j(row_ptr_s[i]); j < row_ptr_s[i + 1]; ++j, ++k)
841 col_idx_g[k] = glob_dof_offset + col_idx_s[j];
842 row_aux[i] = k;
843 }
844
845 // finally, insert all neighbor matrices with a higher rank in rank-ascending order
846 for(auto it = neigh_map_h.begin(); it != neigh_map_h.end(); ++it)
847 {
848 std::size_t ineigh = it->second;
849 const Index used_rows = _neighbor_matrices.at(ineigh).used_rows();
850 const IndexType* row_ptr_x = _neighbor_matrices.at(ineigh).row_ptr();
851 const IndexType* row_idx_x = _neighbor_matrices.at(ineigh).row_numbers();
852 const IndexType* col_idx_x = _neighbor_matrices.at(ineigh).col_ind();
853 const IndexType* dof_idx_x = recv_dofs.at(ineigh).data();
854 for(Index i(0); i < used_rows; ++i)
855 {
856 IndexType& k = row_aux[row_idx_x[i]];
857 for(IndexType j(row_ptr_x[i]); j < row_ptr_x[i + 1]; ++j, ++k)
858 col_idx_g[k] = dof_idx_x[col_idx_x[j]];
859 }
860 }
861
862 // wait for all send requests to finish
863 send_reqs.wait_all();
864
865#ifdef DEBUG
866 // sanity check: ensure that the column indices are sorted correctly
867 for(Index i = 0; i < num_rows; ++i)
868 {
869 for(IndexType j(row_ptr_g[i]); j + 1 < row_ptr_g[i + 1]; ++j)
870 {
871 ASSERT(col_idx_g[j] < col_idx_g[j+1]);
872 }
873 }
874#endif // DEBUG
875
876 // that's it
877 return matrix_g;
878 }
879
888 {
889 // no neighbors?
890 if(_ranks.empty())
891 {
892 // copy values
893 matrix.copy(_matrix_s);
894 return;
895 }
896
897 // get number of local DOFs
898 const Index num_rows = _matrix_s.rows();
899 XASSERT(matrix.rows() == _matrix_s.rows());
900
901 // get row pointer arrays
902 const IndexType* row_ptr_s = _matrix_s.row_ptr();
903 const IndexType* row_ptr_g = matrix.row_ptr();
904 const DataType* val_s = _matrix_s.val();
905 DataType* val_g = matrix.val();
906
907 // make a copy of the row pointer
908 std::vector<IndexType> row_aux(num_rows);
909
910 // copy the row-pointer array
911 for(Index i(0); i < num_rows; ++i)
912 row_aux[i] = row_ptr_g[i];
913
914 // get this process's rank
915 const int my_rank = this->get_comm()->rank();
916
917 // create two neighbors maps: one of all lower ranks and one of all higher ranks
918 std::map<int, std::size_t> neigh_map_l, neigh_map_h;
919 for(std::size_t ineigh(0); ineigh < _ranks.size(); ++ineigh)
920 {
921 if(_ranks.at(ineigh) < my_rank)
922 neigh_map_l.emplace(_ranks.at(ineigh), ineigh);
923 else
924 neigh_map_h.emplace(_ranks.at(ineigh), ineigh);
925 }
926
927 // first, copy all neighbor matrices with a lower rank in rank-ascending order
928 for(auto it = neigh_map_l.begin(); it != neigh_map_l.end(); ++it)
929 {
930 std::size_t ineigh = it->second;
931 const Index used_rows = _neighbor_matrices.at(ineigh).used_rows();
932 const IndexType* row_ptr_x = _neighbor_matrices.at(ineigh).row_ptr();
933 const IndexType* row_idx_x = _neighbor_matrices.at(ineigh).row_numbers();
934 const DataType* val_x = _neighbor_matrices.at(ineigh).val();
935 for(Index i(0); i < used_rows; ++i)
936 {
937 IndexType& k = row_aux[row_idx_x[i]];
938 for(IndexType j(row_ptr_x[i]); j < row_ptr_x[i + 1]; ++j, ++k)
939 val_g[k] = val_x[j];
940 }
941 }
942
943 // now copy our own matrix
944 for(Index i(0); i < num_rows; ++i)
945 {
946 Index k = row_aux[i];
947 for(Index j(row_ptr_s[i]); j < row_ptr_s[i+1]; ++j, ++k)
948 val_g[k] = val_s[j];
949 row_aux[i] = k;
950 }
951
952 // finally, copy all neighbor matrices with a higher rank in rank-ascending order
953 for(auto it = neigh_map_h.begin(); it != neigh_map_h.end(); ++it)
954 {
955 std::size_t ineigh = it->second;
956 const Index used_rows = _neighbor_matrices.at(ineigh).used_rows();
957 const IndexType* row_ptr_x = _neighbor_matrices.at(ineigh).row_ptr();
958 const IndexType* row_idx_x = _neighbor_matrices.at(ineigh).row_numbers();
959 const DataType* val_x = _neighbor_matrices.at(ineigh).val();
960 for(Index i(0); i < used_rows; ++i)
961 {
962 IndexType& k = row_aux[row_idx_x[i]];
963 for(IndexType j(row_ptr_x[i]); j < row_ptr_x[i + 1]; ++j, ++k)
964 val_g[k] = val_x[j];
965 }
966 }
967
968#ifdef DEBUG
969 // sanity check: ensure that all entries have been processed
970 for(Index i(0); i < num_rows; ++i)
971 {
972 ASSERT(row_aux[i] == row_ptr_g[i+1]);
973 }
974#endif // DEBUG
975 }
976
977 protected:
979 static String _fmt_time(double tsum_total, double tmax_total, String st)
980 {
981 String s = st.pad_back(30, '.') + ":";
982 s += stringify_fp_fix(tsum_total, 6, 12) + " :";
983 s += stringify_fp_fix(tmax_total, 6, 12) + "\n";
984 return s;
985 }
987 static String _fmt_time(double tsum_total, double tmax_total, double tsum, double tmax, String st)
988 {
989 String s = st.pad_back(30, '.') + ":";
990 s += stringify_fp_fix(tsum, 6, 12) + " :";
991 s += stringify_fp_fix(tmax, 6, 12) + " [";
992 if(tsum_total > 1E-8 * Math::abs(tsum))
993 s += stringify_fp_fix(100.0*tsum/tsum_total, 2, 6) + "% :";
994 else
995 s += " --- :";
996 if(tmax_total > 1E-8 * Math::abs(tmax))
997 s += stringify_fp_fix(100.0*tmax/tmax_total, 2, 6) + "% ]\n";
998 else
999 s += " --- ]\n";
1000 return s;
1001 }
1002
1016 static void _asm_pres_mirror(MirrorTypeP& mirror_p, const MirrorTypeV& mirror_v, const LocalMatrixTypeB& matrix_b)
1017 {
1018 const Index num_dof_mir_v = mirror_v.num_indices();
1019 const IndexType* velo_idx = mirror_v.indices();
1020 const IndexType* row_ptr = matrix_b.row_ptr();
1021 const IndexType* col_idx = matrix_b.col_ind();
1022
1023 // loop over all rows of B, which are indexed in the velocity mirror,
1024 // and add all column indices (pressure DOFs) into the pressure DOF set
1025 std::set<IndexType> dof_set;
1026 for(Index i(0); i < num_dof_mir_v; ++i)
1027 {
1028 const Index irow = velo_idx[i];
1029 for(IndexType j(row_ptr[irow]); j < row_ptr[irow+1]; ++j)
1030 dof_set.insert(col_idx[j]);
1031 }
1032
1033 // convert DOF set into a mirror
1034 mirror_p = MirrorTypeP(matrix_b.columns(), Index(dof_set.size()));
1035 IndexType* pidx = mirror_p.indices();
1036 for(auto it = dof_set.begin(); it != dof_set.end(); ++it, ++pidx)
1037 *pidx = *it;
1038 }
1039
1055 static Adjacency::Graph _asm_reduced_b(const MirrorTypeP& mirror_p, const MirrorTypeV& mirror_v,
1056 const LocalMatrixTypeB& matrix_b)
1057 {
1058 const Index num_dof_mir_v = mirror_v.num_indices();
1059 const Index num_dof_mir_p = mirror_p.num_indices();
1060 const IndexType* velo_idx = mirror_v.indices();
1061 const IndexType* pres_idx = mirror_p.indices();
1062 const IndexType* row_ptr = matrix_b.row_ptr();
1063 const IndexType* col_idx = matrix_b.col_ind();
1064
1065 // count number of non-zeros in indexed rows of B = non-zeros in B'
1066 Index num_red_nzes = Index(0);
1067 for(Index i(0); i < num_dof_mir_v; ++i)
1068 {
1069 const Index irow = velo_idx[i];
1070 num_red_nzes += Index(row_ptr[irow+1] - row_ptr[irow]);
1071 }
1072
1073 // allocate matrix graph for reduced part B' of B
1074 Adjacency::Graph graph(num_dof_mir_v, num_dof_mir_p, num_red_nzes);
1075 Index* dom_ptr = graph.get_domain_ptr();
1076 Index* img_idx = graph.get_image_idx();
1077
1078 // loop over all rows of reduced part B'
1079 dom_ptr[0] = Index(0);
1080 for(Index i(0); i < num_dof_mir_v; ++i)
1081 {
1082 // get the B-row index of the i-th row of B'
1083 const IndexType irow = velo_idx[i];
1084 // loop over all non-zeroes in the row of B / B'
1085 for(IndexType j(row_ptr[irow]), k(dom_ptr[i]); j < row_ptr[irow+1]; ++j, ++k)
1086 {
1087 // initialize invalid index for assertion below
1088 img_idx[k] = ~IndexType(0);
1089
1090 // try to find this column index (=pressure DOF) in our pressure DOF mirror
1091 for(Index l(0); l < num_dof_mir_p; ++l)
1092 {
1093 if(col_idx[j] == pres_idx[l])
1094 {
1095 // that's our pressure DOF, so store its index as column index of B'
1096 img_idx[k] = l;
1097 break;
1098 }
1099 }
1100 ASSERT(img_idx[k] != ~IndexType(0));
1101 }
1102 // set next row pointer of B'
1103 dom_ptr[i+1] = dom_ptr[i] + (row_ptr[irow+1] - row_ptr[irow]);
1104 }
1105
1106 // sort indices of B' and return graph
1107 graph.sort_indices();
1108 return graph;
1109 }
1110
1129 static MirrorTypeP _asm_data_mirror(const MirrorTypeP& mirror_p, const MirrorTypeV& mirror_v,
1130 const LocalMatrixTypeB& matrix_b, const Adjacency::Graph& graph)
1131 {
1132 const Index num_dof_mir_v = mirror_v.num_indices();
1133 const IndexType* velo_idx = mirror_v.indices();
1134 const IndexType* pres_idx = mirror_p.indices();
1135 const IndexType* row_ptr = matrix_b.row_ptr();
1136 const IndexType* col_idx = matrix_b.col_ind();
1137 const Index* dom_ptr = graph.get_domain_ptr();
1138 const Index* img_idx = graph.get_image_idx();
1139
1140 // allocate mirror for data array of B'
1141 MirrorTypeP data_mirror(matrix_b.used_elements(), graph.get_num_indices());
1142 IndexType* dat_idx = data_mirror.indices();
1143
1144 // loop over all rows of B' again
1145 for(Index i(0); i < num_dof_mir_v; ++i)
1146 {
1147 // get the B-row index (=velocity DOF index)
1148 const IndexType irow = velo_idx[i];
1149
1150 // loop over all non-zeroes in the row of B / B'
1151 for(Index k(dom_ptr[i]); k < dom_ptr[i+1]; ++k)
1152 {
1153 // initialize invalid index for assertion below
1154 dat_idx[k] = ~IndexType(0);
1155
1156 // get the B-column index (=pressure DOF index)
1157 const Index jcol = pres_idx[img_idx[k]];
1158
1159 // try to find that column in our matrix
1160 for(IndexType j(row_ptr[irow]); j < row_ptr[irow + 1]; ++j)
1161 {
1162 if(jcol == col_idx[j])
1163 {
1164 // that's the entry we were looking for
1165 dat_idx[k] = j;
1166 break;
1167 }
1168 }
1169 ASSERT(dat_idx[k] != ~IndexType(0));
1170 }
1171 }
1172
1173 // that's it
1174 return data_mirror;
1175 }
1176
1189 static BufferVectorType _gather_b(const MirrorTypeP& data_mirror, const LocalMatrixTypeB& matrix_b)
1190 {
1191 const Index num_idx = data_mirror.num_indices();
1192 const IndexType* idx = data_mirror.indices();
1193 const ValueTypeB* mat_val = matrix_b.val();
1194
1195 BufferVectorType buf(Index(dim)*num_idx);
1196 ValueTypeB* buf_val = reinterpret_cast<ValueTypeB*>(buf.elements());
1197
1198 for(Index i(0); i < num_idx; ++i)
1199 {
1200 buf_val[i] = mat_val[idx[i]];
1201 }
1202
1203 return buf;
1204 }
1205
1207 inline static ValueTypeD _mat_mult_d_a(const ValueTypeD& val_d, const ValueTypeA& val_a)
1208 {
1209 ValueTypeD da;
1210 for(int i(0); i < dim; ++i)
1211 da(0, i) = val_d(0, i) * val_a(i);
1212 return da;
1213 }
1214
1216 inline static DataType _mat_mult_da_b(const ValueTypeD& val_da, const ValueTypeB& val_b)
1217 {
1218 DataType s = DataType(0);
1219 for(int i(0); i < dim; ++i)
1220 s += val_da(0, i) * val_b(i, 0);
1221 return s;
1222 }
1223
1225 inline static void _mat_mult_d_a(ValueTypeB& val_da, const ValueTypeD& val_d, const ValueTypeA& val_a)
1226 {
1227 for(int i(0); i < dim; ++i)
1228 val_da(i, 0) = val_d(0, i) * val_a(i);
1229 }
1230
1232 inline static DataType _mat_mult_da_b(const ValueTypeB& val_da, const ValueTypeB& val_b)
1233 {
1234 DataType s = DataType(0);
1235 for(int i(0); i < dim; ++i)
1236 s += val_da(i, 0) * val_b(i, 0);
1237 return s;
1238 }
1239
1252 static void _premult_da(LocalMatrixTypeB& mat_da, const LocalMatrixTypeD& mat_d, const LocalVectorTypeV& mat_a)
1253 {
1254 const Index num_rows = mat_d.rows();
1255 const Index num_cols = mat_d.columns();
1256
1257 const IndexType* row_ptr = mat_d.row_ptr();
1258 const IndexType* col_idx = mat_d.col_ind();
1259 const IndexType* row_ptr_da = mat_da.row_ptr();
1260
1261 ValueTypeB* val_da = mat_da.val();
1262 const ValueTypeD* val_d = mat_d.val();
1263 const ValueTypeA* val_a = mat_a.elements();
1264
1265 // create a temporary copy of the row-pointer of D^T
1266 std::vector<Index> ptr(num_cols);
1267 for(Index i(0); i < num_cols; ++i)
1268 ptr[i] = row_ptr_da[i];
1269
1270 // transpose matrix
1271 for(Index i(0); i < num_rows; ++i)
1272 {
1273 for(Index j(row_ptr[i]); j < row_ptr[i + 1]; ++j)
1274 {
1275 const Index col = col_idx[j];
1276 _mat_mult_d_a(val_da[ptr[col]++], val_d[j], val_a[col]);
1277 }
1278 }
1279 }
1280
1291 const LocalVectorTypeV& a, const LocalMatrixTypeB& b)
1292 {
1293 // Note: this is a modified version of SparseMatrixCSR::add_double_mat_product for BCSR multiplicands
1294
1295 // validate matrix dimensions
1296 XASSERT(s.rows() == d.rows());
1297 XASSERT(d.columns() == a.size());
1298 XASSERT(a.size() == b.rows());
1299 XASSERT(b.columns() == s.columns());
1300
1301 // fetch matrix arrays:
1302 DataType* data_s = s.val();
1303 const ValueTypeD* data_d = d.val();
1304 const ValueTypeA* data_a = a.elements();
1305 const ValueTypeB* data_b = b.val();
1306 const IndexType* row_ptr_s = s.row_ptr();
1307 const IndexType* col_idx_s = s.col_ind();
1308 const IndexType* row_ptr_d = d.row_ptr();
1309 const IndexType* col_idx_d = d.col_ind();
1310 const IndexType* row_ptr_b = b.row_ptr();
1311 const IndexType* col_idx_b = b.col_ind();
1312
1313 // loop over all rows of D and S, resp.
1314 for(IndexType i(0); i < IndexType(s.rows()); ++i)
1315 {
1316 // loop over all non-zeros D_ik in row i of D
1317 for(IndexType ik(row_ptr_d[i]); ik < row_ptr_d[i+1]; ++ik)
1318 {
1319 // get column index k
1320 const IndexType k = col_idx_d[ik];
1321
1322 // pre-compute (D_ik * A_kk)
1323 const ValueTypeD val_da = _mat_mult_d_a(data_d[ik], data_a[k]);
1324
1325 // S_i. += (D_ik * A_kk) * B_k.
1326 for(IndexType ij(row_ptr_s[i]), kj(row_ptr_b[k]); kj < row_ptr_b[k+1]; ++ij)
1327 {
1328 ASSERT(ij < row_ptr_s[i+1]);
1329 ASSERT(col_idx_s[ij] <= col_idx_b[kj]);
1330 if(col_idx_s[ij] == col_idx_b[kj])
1331 {
1332 data_s[ij] += _mat_mult_da_b(val_da, data_b[kj]);
1333 ++kj;
1334 }
1335 }
1336 }
1337 }
1338 }
1339
1359 const MirrorTypeV& mirror_v, const Adjacency::Graph& graph_b, const BufferVectorType& buffer_b)
1360 {
1361 // validate matrix dimensions
1362 XASSERT(s.rows() == da.columns());
1363 XASSERT(da.rows() == mirror_v.size());
1364 XASSERT(graph_b.get_num_nodes_domain() == mirror_v.num_indices());
1365 XASSERT(graph_b.get_num_nodes_image() == s.columns());
1366 XASSERT(mirror_v.size() == da.rows());
1367
1368 // fetch matrix arrays:
1369 DataType* data_s = s.val();
1370 const IndexType* row_ptr_s = s.row_ptr();
1371 const IndexType* row_idx_s = s.row_numbers();
1372 const IndexType* col_idx_s = s.col_ind();
1373 const Index used_rows_s = s.used_rows();
1374
1375 // we use CSR for (D*A)^T here, which is effectively a CSC storage of (D*A),
1376 // thus rows and columns swap their meaning here
1377 const ValueTypeB* data_da = da.val();
1378 const IndexType* col_ptr_da = da.row_ptr();
1379 const IndexType* row_idx_da = da.col_ind();
1380
1381 const Index num_mir_idx = mirror_v.num_indices();
1382 const IndexType* mir_idx = mirror_v.indices();
1383
1384 const ValueTypeB* data_b = reinterpret_cast<const ValueTypeB*>(buffer_b.elements());
1385 const Index* dom_ptr_b = graph_b.get_domain_ptr();
1386 const Index* img_idx_b = graph_b.get_image_idx();
1387
1388 // loop over all velocity mirror entries, which correspond to the rows of B and the columns of D
1389 for(Index l(0); l < num_mir_idx; ++l)
1390 {
1391 // get velocity DOF index k = column index of D = row index of B
1392 const Index k = mir_idx[l];
1393
1394 // loop over all columns k of D
1395 for(Index ik(col_ptr_da[k]), si(0); (ik < col_ptr_da[k + 1]) && (si < used_rows_s); )
1396 {
1397 // check if we have found a common row of S and the k-th column of D
1398 if(row_idx_da[ik] < row_idx_s[si])
1399 {
1400 // row i exists in column k of D, but not in matrix S
1401 ++ik;
1402 continue;
1403 }
1404 if(row_idx_s[si] < row_idx_da[ik])
1405 {
1406 // row i exists in matrix S, but not in column k of D
1407 ++si;
1408 continue;
1409 }
1410
1411 // S_i. += (D_ik * A_kk) * B_l.
1412 for(IndexType ij(row_ptr_s[si]), kj(dom_ptr_b[l]); kj < dom_ptr_b[l+1]; ++ij)
1413 {
1414 ASSERT(ij < row_ptr_s[si+1]);
1415 ASSERT(col_idx_s[ij] <= img_idx_b[kj]);
1416 if(col_idx_s[ij] == img_idx_b[kj])
1417 {
1418 data_s[ij] += _mat_mult_da_b(data_da[ik], data_b[kj]);
1419 ++kj;
1420 }
1421 }
1422
1423 // okay, continue with next row of S and D
1424 ++ik;
1425 ++si;
1426 }
1427 }
1428 }
1429
1448 void _apply(LocalVectorTypeP& r, const LocalVectorTypeP& x, const LocalVectorTypeP& y, const DataType alpha, bool only_Ax) const
1449 {
1450 // get the number of our neighbors
1451 const std::size_t num_neighs = this->_ranks.size();
1452
1453 // no neighbors?
1454 if(num_neighs <= std::size_t(0))
1455 {
1456 // multiply by local schur matrix and return
1457 watch_apply_matrix_loc.start();
1458 if(only_Ax)
1459 this->_matrix_s.apply(r, x);
1460 else
1461 this->_matrix_s.apply(r, x, y, alpha);
1462 watch_apply_matrix_loc.stop();
1463 return;
1464 }
1465
1466 // get our communicator
1467 const Dist::Comm& comm = *this->_matrix_b.get_comm();
1468
1469 // send/receive buffers and requests
1470 std::vector<BufferVectorType> recv_bufs(num_neighs), send_bufs(num_neighs);
1471 Dist::RequestVector recv_reqs(num_neighs), send_reqs(num_neighs);
1472
1473 // allocate receive buffer vectors and post receives
1474 for(std::size_t i(0); i < num_neighs; ++i)
1475 {
1476 recv_bufs.at(i) = BufferVectorType(this->_neighbor_matrices.at(i).columns());
1477 recv_reqs[i] = comm.irecv(recv_bufs.at(i).elements(), recv_bufs.at(i).size(), this->_ranks.at(i));
1478 }
1479
1480 // extract pressure dofs and post sends
1481 for(std::size_t i(0); i < num_neighs; ++i)
1482 {
1483 send_bufs.at(i) = BufferVectorType(this->_pres_mirrors.at(i).num_indices());
1484 this->_pres_mirrors.at(i).gather(send_bufs.at(i), x);
1485 send_reqs[i] = comm.isend(send_bufs.at(i).elements(), send_bufs.at(i).size(), this->_ranks.at(i));
1486 }
1487
1488 // multiply by local schur matrix
1489 watch_apply_matrix_loc.start();
1490 if(only_Ax)
1491 this->_matrix_s.apply(r, x);
1492 else
1493 this->_matrix_s.apply(r, x, y, alpha);
1494 watch_apply_matrix_loc.stop();
1495
1496 // process receives and multiply by neighbor schur matrices
1497 for(std::size_t i(0u); recv_reqs.wait_any(i); )
1498 {
1499 watch_apply_neighbor_s.start();
1500 this->_neighbor_matrices.at(i).apply(r, recv_bufs.at(i), r, (only_Ax ? DataType(1) : alpha));
1501 watch_apply_neighbor_s.stop();
1502 }
1503
1504 // wait for all previous sends to finish
1505 send_reqs.wait_all();
1506 }
1507 }; // class PMDCDSCMatrix<...>
1508 } // namespace Global
1509} // namespace FEAT
#define ASSERT(expr)
Debug-Assertion macro definition.
Definition: assertion.hpp:229
#define XASSERT(expr)
Assertion macro definition.
Definition: assertion.hpp:262
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Definition: assertion.hpp:263
Adjacency Graph implementation.
Definition: graph.hpp:34
void sort_indices()
Sorts the image indices to non-descending order.
Definition: graph.cpp:206
Index * get_domain_ptr()
Returns the domain pointer array.
Definition: graph.hpp:359
Index * get_image_idx()
Returns the image node index array.
Definition: graph.hpp:374
Index get_num_indices() const
Returns the total number indices.
Definition: graph.hpp:390
Communicator class.
Definition: dist.hpp:1349
void allreduce(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op) const
Blocking All-Reduce.
Definition: dist.cpp:655
void exscan(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op) const
Blocking Exclusive Scan.
Definition: dist.cpp:683
Request irecv(void *buffer, std::size_t count, const Datatype &datatype, int source, int tag=0) const
Nonblocking Receive.
Definition: dist.cpp:716
Request isend(const void *buffer, std::size_t count, const Datatype &datatype, int dest, int tag=0) const
Nonblocking Send.
Definition: dist.cpp:704
int rank() const
Returns the rank of this process in this communicator.
Definition: dist.hpp:1494
Communication Request vector class.
Definition: dist.hpp:640
void wait_all()
Blocks until all active requests are fulfilled.
Definition: dist.cpp:324
Global gate implementation.
Definition: gate.hpp:51
const Dist::Comm * get_comm() const
Returns a const pointer to the underlying communicator.
Definition: gate.hpp:138
std::vector< Mirror_ > _mirrors
vector mirrors
Definition: gate.hpp:73
std::vector< int > _ranks
communication ranks
Definition: gate.hpp:71
Global Matrix wrapper class template.
Definition: matrix.hpp:40
VectorTypeR create_vector_r() const
Creates and returns a new R-compatible global vector object.
Definition: matrix.hpp:207
VectorTypeL create_vector_l() const
Creates and returns a new L-compatible global vector object.
Definition: matrix.hpp:197
const GateRowType * get_row_gate() const
Returns a const pointer to the internal row gate of the matrix.
Definition: matrix.hpp:154
const Dist::Comm * get_comm() const
Returns a const pointer to the internal communicator of the gates of the matrix.
Definition: matrix.hpp:174
LocalMatrix_ & local()
Returns a reference to the internal local LAFEM matrix object.
Definition: matrix.hpp:126
static ValueTypeD _mat_mult_d_a(const ValueTypeD &val_d, const ValueTypeA &val_a)
auxiliary function for _asm_local_schur_matrix: multiply two values D*A
static MirrorTypeP _asm_data_mirror(const MirrorTypeP &mirror_p, const MirrorTypeV &mirror_v, const LocalMatrixTypeB &matrix_b, const Adjacency::Graph &graph)
Auxiliary Function: computes a data mirror from B to B'.
static Adjacency::Graph _asm_reduced_b(const MirrorTypeP &mirror_p, const MirrorTypeV &mirror_v, const LocalMatrixTypeB &matrix_b)
Auxiliary Function: reduces the matrix B to B'.
LocalMatrixTypeS asm_adp_symbolic(Index &glob_dof_offset, Index &glob_dof_count) const
Assembles the matrix structure of an algebraic DOF partitioned matrix.
static void _premult_da(LocalMatrixTypeB &mat_da, const LocalMatrixTypeD &mat_d, const LocalVectorTypeV &mat_a)
Auxiliary Function: Computes the product (D*A) and stores the result in a CSC matrix.
static void _asm_pres_mirror(MirrorTypeP &mirror_p, const MirrorTypeV &mirror_v, const LocalMatrixTypeB &matrix_b)
Auxiliary function: assembles a pressure mirror from a velocity mirror and the B-matrix.
static void _asm_local_schur_matrix(LocalMatrixTypeS &s, const LocalMatrixTypeD &d, const LocalVectorTypeV &a, const LocalMatrixTypeB &b)
Assembles the local Schur-Matrix S = (D*A*B)
static DataType _mat_mult_da_b(const ValueTypeB &val_da, const ValueTypeB &val_b)
auxiliary function for _asm_neighbor_schur_matrix: multiply two values DA*B with DA in transposed for...
static DataType _mat_mult_da_b(const ValueTypeD &val_da, const ValueTypeB &val_b)
auxiliary function for _asm_local_schur_matrix: multiply two values DA*B
static void _mat_mult_d_a(ValueTypeB &val_da, const ValueTypeD &val_d, const ValueTypeA &val_a)
auxiliary function for _asm_neighbor_schur_matrix: multiply two values D*A and store in transposed fo...
static void _asm_neighbor_schur_matrix(NeighMatrixTypeS &s, const LocalMatrixTypeB &da, const MirrorTypeV &mirror_v, const Adjacency::Graph &graph_b, const BufferVectorType &buffer_b)
Assembles a neighbor Schur-Matrix S_k = (D*A*M_k^T*B_k)
void _apply(LocalVectorTypeP &r, const LocalVectorTypeP &x, const LocalVectorTypeP &y, const DataType alpha, bool only_Ax) const
Applies this matrix onto a vector.
static BufferVectorType _gather_b(const MirrorTypeP &data_mirror, const LocalMatrixTypeB &matrix_b)
Auxiliary Function: Gathers the data array from B to B' using the data mirror.
Pre-Multiplied Discontinuous Diagonal Schur-Complement Matrix.
Global vector wrapper class template.
Definition: vector.hpp:68
const Dist::Comm * get_comm() const
Returns a const pointer to the internal communicator of the gate of the vector.
Definition: vector.hpp:158
LocalVector_ & local()
Returns a reference to the internal local LAFEM vector object.
Definition: vector.hpp:121
void format(DT_ value=DT_(0))
Reset all elements of the container to a given value or zero if missing.
Definition: container.hpp:851
Blocked Dense data vector class template.
auto elements() const -> const typename Intern::DenseVectorBlockedPerspectiveHelper< DT_, BlockSize_, perspective_ >::Type *
Retrieve a pointer to the data array.
Index size() const
The number of elements.
Dense data vector class template.
DT_ * elements()
Get a pointer to the data array.
CSR based blocked sparse matrix.
Index rows() const
Retrieve matrix row count.
IT_ * col_ind()
Retrieve column indices array.
IT_ * row_ptr()
Retrieve row start index array.
Index columns() const
Retrieve matrix column count.
Index used_elements() const
Retrieve non zero element count.
auto val() const -> const typename Intern::BCSRPerspectiveHelper< DT_, BlockHeight_, BlockWidth_, perspective_ >::Type *
Retrieve non zero element array.
CSCR based sparse matrix.
IT_ * col_ind()
Retrieve column indices array.
IT_ * row_numbers()
Retrieve row numbers of non zero rows.
Index columns() const
Retrieve matrix column count.
Index rows() const
Retrieve matrix row count.
DT_ * val()
Retrieve non zero element array.
Index used_rows() const
Retrieve used matrix non zero row count.
IT_ * row_ptr()
Retrieve row start index array.
CSR based sparse matrix.
void copy(const SparseMatrixCSR &x, bool full=false)
Performs .
IT_ * col_ind()
Retrieve column indices array.
DT_ * val()
Retrieve non zero element array.
Index rows() const
Retrieve matrix row count.
SparseMatrixCSR clone(CloneMode clone_mode=CloneMode::Weak) const
Clone operation.
Index columns() const
Retrieve matrix column count.
void extract_diag(VectorTypeL &diag, DenseVector< IT_, IT_ > &diag_indices) const
extract main diagonal vector from matrix
void apply(DenseVector< DT_, IT_ > &r, const DenseVector< DT_, IT_ > &x) const
Calculate .
Index used_elements() const
Retrieve non zero element count.
IT_ * row_ptr()
Retrieve row start index array.
Stop-Watch class.
Definition: stop_watch.hpp:21
double elapsed() const
Returns the total elapsed time in seconds.
Definition: stop_watch.hpp:70
void start()
Starts the stop-watch.
Definition: stop_watch.hpp:43
void reset()
Resets the elapsed time.
Definition: stop_watch.hpp:36
void stop()
Stops the stop-watch and increments elapsed time.
Definition: stop_watch.hpp:51
String class implementation.
Definition: string.hpp:46
String pad_back(size_type len, char c=' ') const
Pads the back of the string up to a desired length.
Definition: string.hpp:415
Tiny Matrix class template.
Tiny Vector class template.
@ injectify_sorted
Render-Injectified mode, sort image indices.
@ injectify_transpose
Render-Injectified-Transpose mode.
const Operation op_max(MPI_MAX)
Operation wrapper for MPI_MAX.
Definition: dist.hpp:273
const Operation op_sum(MPI_SUM)
Operation wrapper for MPI_SUM.
Definition: dist.hpp:271
T_ abs(T_ x)
Returns the absolute value.
Definition: math.hpp:275
FEAT namespace.
Definition: adjactor.hpp:12
String stringify_fp_fix(DataType_ value, int precision=0, int width=0, bool sign=false)
Prints a floating point value to a string in fixed-point notation.
Definition: string.hpp:1142
std::uint64_t Index
Index data type.