FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
pmdcdsc_matrix.hpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#pragma once
7
8// includes, FEAT
9#include <kernel/global/vector.hpp>
10#include <kernel/global/matrix.hpp>
11#include <kernel/lafem/dense_vector.hpp>
12#include <kernel/lafem/dense_vector_blocked.hpp>
13#include <kernel/lafem/sparse_matrix_csr.hpp>
14#include <kernel/lafem/sparse_matrix_bcsr.hpp>
15#include <kernel/lafem/sparse_matrix_cscr.hpp>
16#include <kernel/util/stop_watch.hpp>
17
18#include <vector>
19#include <map>
20#include <set>
21
22namespace FEAT
23{
24 namespace Global
25 {
29 template<typename MatrixB_, typename MatrixD_>
31
57 template<typename DT_, typename IT_, int dim_, typename MirrorV_, typename MirrorP_>
59 Global::Matrix<LAFEM::SparseMatrixBCSR<DT_, IT_, dim_, 1>, MirrorV_, MirrorP_>,
60 Global::Matrix<LAFEM::SparseMatrixBCSR<DT_, IT_, 1, dim_>, MirrorP_, MirrorV_>>
61 {
62 public:
63 typedef DT_ DataType;
64 typedef IT_ IndexType;
65 static constexpr int dim = dim_;
66
67 typedef MirrorV_ MirrorTypeV;
68 typedef MirrorP_ MirrorTypeP;
69
74
77
81
84
87
90
94
99
100 static constexpr bool is_global = true;
101 static constexpr bool is_local = false;
102
103 protected:
110
118
145
147 std::vector<int> _ranks;
148
158 std::vector<MirrorTypeP> _pres_mirrors;
159
167 std::vector<MirrorTypeP> _data_mirrors;
168
175 std::vector<Adjacency::Graph> _neighbor_graphs;
176
184 std::vector<NeighMatrixTypeS> _neighbor_matrices;
185
198
209
216
217 public:
234 const GlobalVectorTypeV& diagonal_a,
235 const GlobalMatrixTypeB& matrix_b,
236 const GlobalMatrixTypeD& matrix_d) :
237 _diagonal_a(diagonal_a),
238 _matrix_b(matrix_b),
239 _matrix_d(matrix_d)
240 {
241 }
242
245 {
246 }
247
249 const Dist::Comm* get_comm() const
250 {
251 return _diagonal_a.get_comm();
252 }
253
256 {
257 return _matrix_d.create_vector_l();
258 }
259
262 {
263 return _matrix_b.create_vector_r();
264 }
265
268 {
269 return _matrix_s;
270 }
271
283 void extract_diag(GlobalVectorTypeP& vec_diag) const
284 {
285 _matrix_s.extract_diag(vec_diag.local());
286 }
287
300 {
301 GlobalVectorTypeP vec_diag = _matrix_d.create_vector_l();
302 _matrix_s.extract_diag(vec_diag.local());
303 return vec_diag;
304 }
305
308 {
309 watch_init_symbolic.reset();
310 watch_init_sym_matrix_loc.reset();
311 watch_init_sym_pres_mirror.reset();
312 watch_init_sym_reduced_b.reset();
313 watch_init_sym_data_mirror.reset();
314 watch_init_sym_neighbor_s.reset();
315 watch_init_numeric.reset();
316 watch_init_num_matrix_loc.reset();
317 watch_init_num_gather_b.reset();
318 watch_init_num_premult_da.reset();
319 watch_init_num_neighbor_s.reset();
320 watch_apply.reset();
321 watch_apply_matrix_loc.reset();
322 watch_apply_neighbor_s.reset();
323 }
324
334 {
335 static constexpr std::size_t nt = 14;
336 double tsum[nt], tmax[nt], tloc[nt] =
337 {
338 watch_init_symbolic.elapsed(),
339 watch_init_sym_matrix_loc.elapsed(),
340 watch_init_sym_pres_mirror.elapsed(),
341 watch_init_sym_reduced_b.elapsed(),
342 watch_init_sym_data_mirror.elapsed(),
343 watch_init_sym_neighbor_s.elapsed(),
344 watch_init_numeric.elapsed(),
345 watch_init_num_matrix_loc.elapsed(),
346 watch_init_num_gather_b.elapsed(),
347 watch_init_num_premult_da.elapsed(),
348 watch_init_num_neighbor_s.elapsed(),
349 watch_apply.elapsed(),
350 watch_apply_matrix_loc.elapsed(),
351 watch_apply_neighbor_s.elapsed()
352 };
353
354 this->get_comm()->allreduce(tloc, tsum, nt, Dist::op_sum);
355 this->get_comm()->allreduce(tloc, tmax, nt, Dist::op_max);
356
357 // divide sum by number of ranks to obtain mean
358 {
359 const double ds = 1.0 / double(this->get_comm()->size());
360 for(std::size_t i(0); i < nt; ++i)
361 tsum[i] *= ds;
362 }
363
364 String s;
365 s += String(34, ' ') + "Mean Time Max Time\n";
366 s += _fmt_time(tsum[0], tmax[0], "Total Symbolic Initialization");
367 s += _fmt_time(tsum[0], tmax[0], tsum[1], tmax[1], "Local Schur Matrix Structure");
368 s += _fmt_time(tsum[0], tmax[0], tsum[2], tmax[2], "Pressure Mirror");
369 s += _fmt_time(tsum[0], tmax[0], tsum[3], tmax[3], "Reduced-B Matrix Structure");
370 s += _fmt_time(tsum[0], tmax[0], tsum[4], tmax[4], "Reduced-B Data Mirror");
371 s += _fmt_time(tsum[0], tmax[0], tsum[5], tmax[5], "Neighbor Matrix Structure");
372 double tsym_other_sum = tsum[0] - tsum[1] - tsum[2] - tsum[3] - tsum[4] - tsum[5];
373 double tsym_other_max = tmax[0] - tmax[1] - tmax[2] - tmax[3] - tmax[4] - tmax[5];
374 s += _fmt_time(tsum[0], tmax[0], tsym_other_sum, tsym_other_max, "Other Symbolic");
375
376 s += _fmt_time(tsum[6], tmax[6], "Total Numeric Initialization");
377 s += _fmt_time(tsum[6], tmax[6], tsum[7], tmax[7], "Local Schur Matrix Values");
378 s += _fmt_time(tsum[6], tmax[6], tsum[8], tmax[8], "Reduced-B Gather");
379 s += _fmt_time(tsum[6], tmax[6], tsum[9], tmax[9], "Pre-Multiply D*A");
380 s += _fmt_time(tsum[6], tmax[6], tsum[10], tmax[10], "Neighbor Matrix Values");
381 double tnum_other_sum = tsum[6] - tsum[7] - tsum[8] - tsum[9] - tsum[10];
382 double tnum_other_max = tmax[6] - tmax[7] - tmax[8] - tmax[9] - tmax[10];
383 s += _fmt_time(tsum[6], tmax[6], tnum_other_sum, tnum_other_max, "Other Numeric");
384
385 s += _fmt_time(tsum[11], tmax[11], "Total Matrix Apply");
386 s += _fmt_time(tsum[11], tmax[11], tsum[12], tmax[12], "Local Schur Matrix");
387 s += _fmt_time(tsum[11], tmax[11], tsum[13], tmax[13], "Neighbor Schur Matrix");
388 double tapp_other_sum = tsum[11] - tsum[12] - tsum[13];
389 double tapp_other_max = tmax[11] - tmax[12] - tmax[13];
390 s += _fmt_time(tsum[11], tmax[11], tapp_other_sum, tapp_other_max, "Other Apply");
391 return s;
392 }
393
397 void init()
398 {
399 init_symbolic();
400 init_numeric();
401 }
402
424 {
425 watch_init_symbolic.start();
426
427 // get the velocity and pressure gates
428 const GateTypeV* gate_v = this->_matrix_b.get_row_gate();
429 const GateTypeP* gate_p = this->_matrix_d.get_row_gate();
430 XASSERT(gate_v != nullptr);
431 XASSERT(gate_p != nullptr);
432
433 // the pressure gate must be empty, otherwise the pressure space is not discontinuous
434 XASSERTM(gate_p->_ranks.empty(), "pressure space is not discontinuous");
435
436 // compute the local matrix structure of S by D * B
437 {
438 watch_init_sym_matrix_loc.start();
439
440 // compose structures of D and B
441 Adjacency::Graph graph_s(Adjacency::RenderType::injectify_sorted, _matrix_d.local(), _matrix_b.local());
442 // create the matrix layout of S
443 _matrix_s = LocalMatrixTypeS(graph_s);
444
445 watch_init_sym_matrix_loc.stop();
446 }
447
448 // get our communicator
449 const Dist::Comm& comm = *gate_v->get_comm();
450
451 // get neighbor ranks
452 this->_ranks = gate_v->_ranks;
453
454 // get the number of our neighbors
455 const std::size_t num_neighs = this->_ranks.size();
456 if(num_neighs <= std::size_t(0))
457 {
458 watch_init_symbolic.stop();
459 return; // no neighbors, no problems :)
460 }
461
462 // copy the layout of B into (D*A)^T
463 this->_matrix_da = this->_matrix_b.local().clone(LAFEM::CloneMode::Layout);
464
465 // resize our member arrays
466 this->_pres_mirrors.resize(num_neighs);
467 this->_data_mirrors.resize(num_neighs);
468 this->_neighbor_graphs.resize(num_neighs);
469 this->_neighbor_matrices.resize(num_neighs);
470
471 // allocate a vector of graphs for B
472 std::vector<Adjacency::Graph> my_graphs(num_neighs);
473
474 // loop over all neighbor processes
475 for(std::size_t i(0); i < num_neighs; ++i)
476 {
477 // get the velocity mirror
478 const MirrorTypeV& mirror_v = gate_v->_mirrors.at(i);
479
480 // assemble the pressure mirror for this neighbor
481 watch_init_sym_pres_mirror.start();
482 MirrorTypeP& mirror_p = this->_pres_mirrors.at(i);
483 this->_asm_pres_mirror(mirror_p, mirror_v, this->_matrix_b.local());
484 watch_init_sym_pres_mirror.stop();
485
486 // assemble reduced B-matrix graph
487 watch_init_sym_reduced_b.start();
488 my_graphs.at(i) = this->_asm_reduced_b( mirror_p, mirror_v, this->_matrix_b.local());
489 watch_init_sym_reduced_b.stop();
490
491 // assemble B' data mirror
492 watch_init_sym_data_mirror.start();
493 this->_data_mirrors.at(i) = this->_asm_data_mirror(mirror_p, mirror_v, this->_matrix_b.local(), my_graphs.at(i));
494 watch_init_sym_data_mirror.stop();
495 }
496
497 // dimension send/receive buffers and requests
498 std::vector<std::array<Index,3>> recv_dims(num_neighs), send_dims(num_neighs);
499 Dist::RequestVector recv_reqs(num_neighs), send_reqs(num_neighs);
500
501 // post receive requests for dimensions
502 for(std::size_t i(0); i < num_neighs; ++i)
503 {
504 recv_reqs[i] = comm.irecv(recv_dims.at(i).data(), std::size_t(3), this->_ranks.at(i));
505 }
506
507 // send dimensions
508 for(std::size_t i(0); i < num_neighs; ++i)
509 {
510 const Adjacency::Graph& g = my_graphs.at(i);
511 auto& sdim = send_dims.at(i);
512 sdim[0] = g.get_num_nodes_domain(); // corresponds to velocity mirror index size
513 sdim[1] = g.get_num_nodes_image();
514 sdim[2] = g.get_num_indices();
515 send_reqs[i] = comm.isend(sdim.data(), std::size_t(3), this->_ranks.at(i));
516 }
517
518 // process all pending receives
519 for(std::size_t i(0u); recv_reqs.wait_any(i); )
520 {
521 // get received dimension
522 auto& rdim = recv_dims.at(i);
523
524 // the first dimension must match our velocity mirror index set size
525 XASSERT(rdim[0] == gate_v->_mirrors.at(i).num_indices());
526
527 // allocate graph of corresponding dimensions
528 this->_neighbor_graphs.at(i) = Adjacency::Graph(rdim[0], rdim[1], rdim[2]);
529 }
530
531 // post domain-pointer array receives
532 for(std::size_t i(0); i < num_neighs; ++i)
533 {
534 recv_reqs[i] = comm.irecv(this->_neighbor_graphs.at(i).get_domain_ptr(),
535 recv_dims.at(i)[0] + std::size_t(1), this->_ranks.at(i));
536 }
537
538 // wait for all previous sends to finish
539 send_reqs.wait_all();
540
541 // post domain-pointer array sends
542 for(std::size_t i(0); i < num_neighs; ++i)
543 {
544 send_reqs[i] = comm.isend(my_graphs.at(i).get_domain_ptr(),
545 send_dims.at(i)[0] + std::size_t(1), this->_ranks.at(i));
546 }
547
548 // wait for all pending receives to finish
549 recv_reqs.wait_all();
550
551 // post image-index array receives
552 for(std::size_t i(0); i < num_neighs; ++i)
553 {
554 recv_reqs[i] = comm.irecv(this->_neighbor_graphs.at(i).get_image_idx(),
555 recv_dims.at(i)[2], this->_ranks.at(i));
556 }
557
558 // wait for all previous sends to finish
559 send_reqs.wait_all();
560
561 // post image-index array sends
562 for(std::size_t i(0); i < num_neighs; ++i)
563 {
564 send_reqs[i] = comm.isend(my_graphs.at(i).get_image_idx(), send_dims.at(i)[2], this->_ranks.at(i));
565 }
566
567 // wait for all pending receives to finish
568 recv_reqs.wait_all();
569
570 // wait for all previous sends to finish
571 send_reqs.wait_all();
572
573 // compute Schur-matrix structures for neighbors
574 for(std::size_t i(0); i < num_neighs; ++i)
575 {
576 watch_init_sym_neighbor_s.start();
577
578 // D*M^T = (M*B)^T
579 Adjacency::Graph graph_dm(Adjacency::RenderType::injectify_transpose, gate_v->_mirrors.at(i), this->_matrix_b.local());
580
581 // S = (D*M^T) * B'
582 Adjacency::Graph graph_s(Adjacency::RenderType::injectify_sorted, graph_dm, this->_neighbor_graphs.at(i));
583
584 // allocate Schur-matrix
585 this->_neighbor_matrices.at(i).convert(NeighMatrixTypeS(graph_s));
586
587 watch_init_sym_neighbor_s.stop();
588 }
589
590 // that's it
591 watch_init_symbolic.stop();
592 }
593
606 {
607 watch_init_numeric.start();
608
609 // pre-multiply local matrix product
610 watch_init_num_matrix_loc.start();
611 this->_matrix_s.format();
612 _asm_local_schur_matrix(this->_matrix_s, this->_matrix_d.local(), this->_diagonal_a.local(), this->_matrix_b.local());
613 watch_init_num_matrix_loc.stop();
614
615 // get the number of our neighbors
616 const std::size_t num_neighs = this->_ranks.size();
617 if(num_neighs <= std::size_t(0))
618 {
619 watch_init_numeric.stop();
620 return; // no neighbors, no problems :)
621 }
622
623 // get our communicator
624 const Dist::Comm& comm = *this->_matrix_b.get_comm();
625
626 // send/receive buffers and requests
627 std::vector<BufferVectorType> recv_bufs(num_neighs), send_bufs(num_neighs);
628 Dist::RequestVector recv_reqs(num_neighs), send_reqs(num_neighs);
629
630 // allocate receive buffer matrices B' and post receives
631 for(std::size_t i(0); i < num_neighs; ++i)
632 {
633 recv_bufs.at(i) = BufferVectorType(Index(dim) * this->_neighbor_graphs.at(i).get_num_indices());
634 recv_reqs[i] = comm.irecv(recv_bufs.at(i).elements(), recv_bufs.at(i).size(), this->_ranks.at(i));
635 }
636
637 // extract reduced matrix data and post send
638 for(std::size_t i(0); i < num_neighs; ++i)
639 {
640 watch_init_num_gather_b.start();
641 send_bufs.at(i) = _gather_b(this->_data_mirrors.at(i), this->_matrix_b.local());
642 watch_init_num_gather_b.stop();
643 send_reqs[i] = comm.isend(send_bufs.at(i).elements(), send_bufs.at(i).size(), this->_ranks.at(i));
644 }
645
646 // pre-multiply D*A and store in transposed form, i.e. CSC rather than CSR
647 watch_init_num_premult_da.start();
648 _premult_da(this->_matrix_da, this->_matrix_d.local(), this->_diagonal_a.local());
649 watch_init_num_premult_da.stop();
650
651 // process receives and compute neighbor schur matrices
652 for(std::size_t i(0u); recv_reqs.wait_any(i); )
653 {
654 watch_init_num_neighbor_s.start();
655 this->_neighbor_matrices.at(i).format();
656 _asm_neighbor_schur_matrix(this->_neighbor_matrices.at(i), this->_matrix_da,
657 this->_matrix_b.get_row_gate()->_mirrors.at(i), this->_neighbor_graphs.at(i), recv_bufs.at(i));
658 watch_init_num_neighbor_s.stop();
659 }
660
661 // wait for all previous sends to finish
662 send_reqs.wait_all();
663 watch_init_numeric.stop();
664 }
665
672 void apply(VectorTypeL& r, const VectorTypeR& x) const
673 {
674 watch_apply.start();
675 this->_apply(r.local(), x.local(), r.local(), DataType(1), true);
676 watch_apply.stop();
677 }
678
687 void apply(VectorTypeL& r, const VectorTypeR& x, const VectorTypeL& y, const DataType alpha = DataType(1)) const
688 {
689 watch_apply.start();
690 this->_apply(r.local(), x.local(), y.local(), alpha, false);
691 watch_apply.stop();
692 }
693
700 void adp_compute_counts(Index& global_dof_offset, Index& global_dof_count,
701 Index& owned_dof_count, Index& owned_num_nzes, Index& global_num_nzes) const
702 {
703 // initialize values for serial case
704 global_dof_offset = Index(0);
705 global_dof_count = owned_dof_count = _matrix_s.rows();
706
707 // compute number of non-zero entries
708 owned_num_nzes = _matrix_s.used_elements();
709 for(const auto& x : _neighbor_matrices)
710 owned_num_nzes += x.used_elements();
711 global_num_nzes = owned_num_nzes;
712
713 // compute our global DOF offset and count
714 if(!_ranks.empty())
715 {
716 this->get_comm()->exscan(&owned_dof_count, &global_dof_offset, std::size_t(1), Dist::op_sum);
717 this->get_comm()->allreduce(&owned_dof_count, &global_dof_count, std::size_t(1), Dist::op_sum);
718 this->get_comm()->allreduce(&owned_num_nzes, &global_num_nzes, std::size_t(1), Dist::op_sum);
719 }
720 }
721
741 template<typename RPT_, typename CIT_>
742 void adp_upload_symbolic(RPT_* row_ptr, CIT_* col_idx, Index global_dof_offset) const
743 {
744 // maximum allowed row-pointer/column index values assuming signed int types
745 static constexpr std::uint64_t max_rpt = 1ull << (8*sizeof(RPT_) - 1);
746 static constexpr std::uint64_t max_cit = 1ull << (8*sizeof(CIT_) - 1);
747
748 // prevent "unused variable" warnings in non-debug builds
749 (void)max_rpt;
750 (void)max_cit;
751
752 // no neighbors?
753 if(_ranks.empty())
754 {
755 // simply copy our local matrix S
756 const Index n = _matrix_s.rows();
757 const Index m = _matrix_s.used_elements();
758 const IndexType* row_ptr_s = _matrix_s.row_ptr();
759 const IndexType* col_idx_s = _matrix_s.col_ind();
760
761 FEAT_PRAGMA_OMP(parallel for)
762 for(Index i = 0; i <= n; ++i)
763 {
764 ASSERTM(std::uint64_t(row_ptr_s[i]) < max_rpt, "row-pointer exceeds RPT_ type range!");
765 row_ptr[i] = RPT_(row_ptr_s[i]);
766 }
767
768 FEAT_PRAGMA_OMP(parallel for)
769 for(Index i = 0; i < m; ++i)
770 {
771 ASSERTM(std::uint64_t(col_idx_s[i]) < max_cit, "column-index exceeds CIT_ type range!");
772 col_idx[i] = CIT_(col_idx_s[i]);
773 }
774
775 return;
776 }
777
778 // get our communicator
779 const Dist::Comm& comm = *this->get_comm();
780
781 const std::size_t num_neighs = this->_ranks.size();
782
783 // The columns of our neighbor matrices correspond to the entries in the pressure mirror.
784 // However, for the desired ADP matrix, we have to translate these into global DOF indices.
785 // For this, each process has to map the DOF in its pressure mirrors to global DOFs and then
786 // send these DOF indices to the corresponding neighbor, so that it can map the column
787 // indices of its neighbor matrix to global DOF indices.
788
789 // send/receive mirrors and requests
790 std::vector<std::vector<IndexType>> recv_dofs(num_neighs), send_dofs(num_neighs);
791 Dist::RequestVector recv_reqs(num_neighs), send_reqs(num_neighs);
792
793 // allocate receive vectors and post receives
794 for(std::size_t i(0); i < num_neighs; ++i)
795 {
796 recv_dofs.at(i).resize(_neighbor_graphs.at(i).get_num_nodes_image());
797 recv_reqs[i] = comm.irecv(recv_dofs.at(i).data(), recv_dofs.at(i).size(), this->_ranks.at(i));
798 }
799
800 // translate our pressure mirrors to global DOF vectors and post sends
801 for(std::size_t i(0); i < num_neighs; ++i)
802 {
803 const Index num_idx = _pres_mirrors.at(i).num_indices();
804 const IndexType* pidx = _pres_mirrors.at(i).indices();
805 send_dofs.at(i).resize(num_idx);
806 IndexType* sidx = send_dofs.at(i).data();
807 for(Index k(0); k < num_idx; ++k)
808 sidx[k] = global_dof_offset + pidx[k];
809 send_reqs[i] = comm.isend(send_dofs.at(i).data(), send_dofs.at(i).size(), this->_ranks.at(i));
810 }
811
812 // get local matrix stuff
813 const Index num_rows = _matrix_s.rows();
814 const IndexType* row_ptr_s = _matrix_s.row_ptr();
815 const IndexType* col_idx_s = _matrix_s.col_ind();
816
817 // compute number of non-zeros per row and total
818 std::vector<IndexType> row_aux(num_rows, IndexType(0));
819 for(Index i(0); i < num_rows; ++i)
820 row_aux[i] = (row_ptr_s[i+1] - row_ptr_s[i]);
821
822 for(const auto& x : _neighbor_matrices)
823 {
824 const Index used_rows = x.used_rows();
825 const IndexType* row_ptr_x = x.row_ptr();
826 const IndexType* row_idx_x = x.row_numbers();
827 for(Index i(0); i < used_rows; ++i)
828 row_aux[row_idx_x[i]] += (row_ptr_x[i+1] - row_ptr_x[i]);
829 }
830
831 // compute row pointer array and store backup in aux
832 row_ptr[0] = RPT_(0);
833 for(Index i(0); i < num_rows; ++i)
834 {
835 row_ptr[i+1] = row_ptr[i] + row_aux[i];
836 row_aux[i] = IndexType(row_ptr[i]);
837 }
838
839 // Note: For the sake of compatibility with picky third-party libraries, we want to
840 // ensure that the column indices of the output matrix are in ascending order.
841 // For this, we have to combine the matrix layout from our own local matrix and
842 // the matrices of our neighbors in rank-ascending order. So we first create two
843 // rank maps for our neighbors with lower and higher ranks, so that we can easily
844 // loop over all matrices in rank-order.
845
846 // create two neighbors maps: one of all lower ranks and one of all higher ranks
847 std::map<int, std::size_t> neigh_map_l, neigh_map_h;
848 for(std::size_t ineigh(0); ineigh < num_neighs; ++ineigh)
849 {
850 if(_ranks.at(ineigh) < comm.rank())
851 neigh_map_l.emplace(_ranks.at(ineigh), ineigh);
852 else
853 neigh_map_h.emplace(_ranks.at(ineigh), ineigh);
854 }
855
856 // wait for all receive requests to finish
857 recv_reqs.wait_all();
858
859 // first, insert all neighbor matrices with a lower rank in rank-ascending order
860 for(auto it = neigh_map_l.begin(); it != neigh_map_l.end(); ++it)
861 {
862 std::size_t ineigh = it->second;
863 const Index used_rows = _neighbor_matrices.at(ineigh).used_rows();
864 const IndexType* row_ptr_x = _neighbor_matrices.at(ineigh).row_ptr();
865 const IndexType* row_idx_x = _neighbor_matrices.at(ineigh).row_numbers();
866 const IndexType* col_idx_x = _neighbor_matrices.at(ineigh).col_ind();
867 const IndexType* dof_idx_x = recv_dofs.at(ineigh).data();
868 for(Index i(0); i < used_rows; ++i)
869 {
870 IndexType& k = row_aux[row_idx_x[i]];
871 for(IndexType j(row_ptr_x[i]); j < row_ptr_x[i + 1]; ++j, ++k)
872 col_idx[k] = CIT_(dof_idx_x[col_idx_x[j]]);
873 }
874 }
875
876 // now insert our local matrix S
877 for(Index i(0); i < num_rows; ++i)
878 {
879 IndexType k = row_aux[i];
880 for(IndexType j(row_ptr_s[i]); j < row_ptr_s[i + 1]; ++j, ++k)
881 col_idx[k] = CIT_(global_dof_offset + col_idx_s[j]);
882 row_aux[i] = k;
883 }
884
885 // finally, insert all neighbor matrices with a higher rank in rank-ascending order
886 for(auto it = neigh_map_h.begin(); it != neigh_map_h.end(); ++it)
887 {
888 std::size_t ineigh = it->second;
889 const Index used_rows = _neighbor_matrices.at(ineigh).used_rows();
890 const IndexType* row_ptr_x = _neighbor_matrices.at(ineigh).row_ptr();
891 const IndexType* row_idx_x = _neighbor_matrices.at(ineigh).row_numbers();
892 const IndexType* col_idx_x = _neighbor_matrices.at(ineigh).col_ind();
893 const IndexType* dof_idx_x = recv_dofs.at(ineigh).data();
894 for(Index i(0); i < used_rows; ++i)
895 {
896 IndexType& k = row_aux[row_idx_x[i]];
897 for(IndexType j(row_ptr_x[i]); j < row_ptr_x[i + 1]; ++j, ++k)
898 col_idx[k] = CIT_(dof_idx_x[col_idx_x[j]]);
899 }
900 }
901
902 // wait for all send requests to finish
903 send_reqs.wait_all();
904
905#ifdef DEBUG
906 // sanity check: ensure that the column indices are sorted correctly
907 for(Index i = 0; i < num_rows; ++i)
908 {
909 for(RPT_ j(row_ptr[i]); j + 1 < row_ptr[i + 1]; ++j)
910 {
911 ASSERT(col_idx[j] < col_idx[j+1]);
912 }
913 }
914#endif // DEBUG
915 }
916
932 template<typename DTV_, typename RPT_, typename CIT_>
933 void adp_upload_numeric(DTV_* val, const RPT_* row_ptr, const CIT_* DOXY(col_idx)) const
934 {
935 // no neighbors?
936 if(_ranks.empty())
937 {
938 const Index num_nzes = _matrix_s.used_elements();
939 const DataType* val_s = _matrix_s.val();
940
941 FEAT_PRAGMA_OMP(parallel for)
942 for(Index i = 0; i < num_nzes; ++i)
943 val[i] = DTV_(val_s[i]);
944
945 return;
946 }
947
948 // get number of local DOFs
949 const Index num_rows = _matrix_s.rows();
950
951 // get row pointer arrays
952 const IndexType* row_ptr_s = _matrix_s.row_ptr();
953 const DataType* val_s = _matrix_s.val();
954
955 // make a copy of the row pointer
956 std::vector<IndexType> row_aux(num_rows);
957
958 // copy the row-pointer array
959 for(Index i(0); i < num_rows; ++i)
960 row_aux[i] = IndexType(row_ptr[i]);
961
962 // get this process's rank
963 const int my_rank = this->get_comm()->rank();
964
965 // create two neighbors maps: one of all lower ranks and one of all higher ranks
966 std::map<int, std::size_t> neigh_map_l, neigh_map_h;
967 for(std::size_t ineigh(0); ineigh < _ranks.size(); ++ineigh)
968 {
969 if(_ranks.at(ineigh) < my_rank)
970 neigh_map_l.emplace(_ranks.at(ineigh), ineigh);
971 else
972 neigh_map_h.emplace(_ranks.at(ineigh), ineigh);
973 }
974
975 // first, copy all neighbor matrices with a lower rank in rank-ascending order
976 for(auto it = neigh_map_l.begin(); it != neigh_map_l.end(); ++it)
977 {
978 std::size_t ineigh = it->second;
979 const Index used_rows = _neighbor_matrices.at(ineigh).used_rows();
980 const IndexType* row_ptr_x = _neighbor_matrices.at(ineigh).row_ptr();
981 const IndexType* row_idx_x = _neighbor_matrices.at(ineigh).row_numbers();
982 const DataType* val_x = _neighbor_matrices.at(ineigh).val();
983 for(Index i(0); i < used_rows; ++i)
984 {
985 IndexType& k = row_aux[row_idx_x[i]];
986 for(IndexType j(row_ptr_x[i]); j < row_ptr_x[i + 1]; ++j, ++k)
987 val[k] = DTV_(val_x[j]);
988 }
989 }
990
991 // now copy our own matrix
992 for(Index i(0); i < num_rows; ++i)
993 {
994 Index k = row_aux[i];
995 for(Index j(row_ptr_s[i]); j < row_ptr_s[i+1]; ++j, ++k)
996 val[k] = DTV_(val_s[j]);
997 row_aux[i] = k;
998 }
999
1000 // finally, copy all neighbor matrices with a higher rank in rank-ascending order
1001 for(auto it = neigh_map_h.begin(); it != neigh_map_h.end(); ++it)
1002 {
1003 std::size_t ineigh = it->second;
1004 const Index used_rows = _neighbor_matrices.at(ineigh).used_rows();
1005 const IndexType* row_ptr_x = _neighbor_matrices.at(ineigh).row_ptr();
1006 const IndexType* row_idx_x = _neighbor_matrices.at(ineigh).row_numbers();
1007 const DataType* val_x = _neighbor_matrices.at(ineigh).val();
1008 for(Index i(0); i < used_rows; ++i)
1009 {
1010 IndexType& k = row_aux[row_idx_x[i]];
1011 for(IndexType j(row_ptr_x[i]); j < row_ptr_x[i + 1]; ++j, ++k)
1012 val[k] = DTV_(val_x[j]);
1013 }
1014 }
1015
1016#ifdef DEBUG
1017 // sanity check: ensure that all entries have been processed
1018 for(Index i(0); i < num_rows; ++i)
1019 {
1020 ASSERT(row_aux[i] == IndexType(row_ptr[i+1]));
1021 }
1022#endif // DEBUG
1023 }
1024
1025 protected:
1027 static String _fmt_time(double tsum_total, double tmax_total, String st)
1028 {
1029 String s = st.pad_back(30, '.') + ":";
1030 s += stringify_fp_fix(tsum_total, 6, 12) + " :";
1031 s += stringify_fp_fix(tmax_total, 6, 12) + "\n";
1032 return s;
1033 }
1035 static String _fmt_time(double tsum_total, double tmax_total, double tsum, double tmax, String st)
1036 {
1037 String s = st.pad_back(30, '.') + ":";
1038 s += stringify_fp_fix(tsum, 6, 12) + " :";
1039 s += stringify_fp_fix(tmax, 6, 12) + " [";
1040 if(tsum_total > 1E-8 * Math::abs(tsum))
1041 s += stringify_fp_fix(100.0*tsum/tsum_total, 2, 6) + "% :";
1042 else
1043 s += " --- :";
1044 if(tmax_total > 1E-8 * Math::abs(tmax))
1045 s += stringify_fp_fix(100.0*tmax/tmax_total, 2, 6) + "% ]\n";
1046 else
1047 s += " --- ]\n";
1048 return s;
1049 }
1050
1064 static void _asm_pres_mirror(MirrorTypeP& mirror_p, const MirrorTypeV& mirror_v, const LocalMatrixTypeB& matrix_b)
1065 {
1066 const Index num_dof_mir_v = mirror_v.num_indices();
1067 const IndexType* velo_idx = mirror_v.indices();
1068 const IndexType* row_ptr = matrix_b.row_ptr();
1069 const IndexType* col_idx = matrix_b.col_ind();
1070
1071 // loop over all rows of B, which are indexed in the velocity mirror,
1072 // and add all column indices (pressure DOFs) into the pressure DOF set
1073 std::set<IndexType> dof_set;
1074 for(Index i(0); i < num_dof_mir_v; ++i)
1075 {
1076 const Index irow = velo_idx[i];
1077 for(IndexType j(row_ptr[irow]); j < row_ptr[irow+1]; ++j)
1078 dof_set.insert(col_idx[j]);
1079 }
1080
1081 // convert DOF set into a mirror
1082 mirror_p = MirrorTypeP(matrix_b.columns(), Index(dof_set.size()));
1083 IndexType* pidx = mirror_p.indices();
1084 for(auto it = dof_set.begin(); it != dof_set.end(); ++it, ++pidx)
1085 *pidx = *it;
1086 }
1087
1103 static Adjacency::Graph _asm_reduced_b(const MirrorTypeP& mirror_p, const MirrorTypeV& mirror_v,
1104 const LocalMatrixTypeB& matrix_b)
1105 {
1106 const Index num_dof_mir_v = mirror_v.num_indices();
1107 const Index num_dof_mir_p = mirror_p.num_indices();
1108 const IndexType* velo_idx = mirror_v.indices();
1109 const IndexType* pres_idx = mirror_p.indices();
1110 const IndexType* row_ptr = matrix_b.row_ptr();
1111 const IndexType* col_idx = matrix_b.col_ind();
1112
1113 // count number of non-zeros in indexed rows of B = non-zeros in B'
1114 Index num_red_nzes = Index(0);
1115 for(Index i(0); i < num_dof_mir_v; ++i)
1116 {
1117 const Index irow = velo_idx[i];
1118 num_red_nzes += Index(row_ptr[irow+1] - row_ptr[irow]);
1119 }
1120
1121 // allocate matrix graph for reduced part B' of B
1122 Adjacency::Graph graph(num_dof_mir_v, num_dof_mir_p, num_red_nzes);
1123 Index* dom_ptr = graph.get_domain_ptr();
1124 Index* img_idx = graph.get_image_idx();
1125
1126 // loop over all rows of reduced part B'
1127 dom_ptr[0] = Index(0);
1128 for(Index i(0); i < num_dof_mir_v; ++i)
1129 {
1130 // get the B-row index of the i-th row of B'
1131 const IndexType irow = velo_idx[i];
1132 // loop over all non-zeroes in the row of B / B'
1133 for(IndexType j(row_ptr[irow]), k(dom_ptr[i]); j < row_ptr[irow+1]; ++j, ++k)
1134 {
1135 // initialize invalid index for assertion below
1136 img_idx[k] = ~IndexType(0);
1137
1138 // try to find this column index (=pressure DOF) in our pressure DOF mirror
1139 for(Index l(0); l < num_dof_mir_p; ++l)
1140 {
1141 if(col_idx[j] == pres_idx[l])
1142 {
1143 // that's our pressure DOF, so store its index as column index of B'
1144 img_idx[k] = l;
1145 break;
1146 }
1147 }
1148 ASSERT(img_idx[k] != ~IndexType(0));
1149 }
1150 // set next row pointer of B'
1151 dom_ptr[i+1] = dom_ptr[i] + (row_ptr[irow+1] - row_ptr[irow]);
1152 }
1153
1154 // sort indices of B' and return graph
1155 graph.sort_indices();
1156 return graph;
1157 }
1158
1177 static MirrorTypeP _asm_data_mirror(const MirrorTypeP& mirror_p, const MirrorTypeV& mirror_v,
1178 const LocalMatrixTypeB& matrix_b, const Adjacency::Graph& graph)
1179 {
1180 const Index num_dof_mir_v = mirror_v.num_indices();
1181 const IndexType* velo_idx = mirror_v.indices();
1182 const IndexType* pres_idx = mirror_p.indices();
1183 const IndexType* row_ptr = matrix_b.row_ptr();
1184 const IndexType* col_idx = matrix_b.col_ind();
1185 const Index* dom_ptr = graph.get_domain_ptr();
1186 const Index* img_idx = graph.get_image_idx();
1187
1188 // allocate mirror for data array of B'
1189 MirrorTypeP data_mirror(matrix_b.used_elements(), graph.get_num_indices());
1190 IndexType* dat_idx = data_mirror.indices();
1191
1192 // loop over all rows of B' again
1193 for(Index i(0); i < num_dof_mir_v; ++i)
1194 {
1195 // get the B-row index (=velocity DOF index)
1196 const IndexType irow = velo_idx[i];
1197
1198 // loop over all non-zeroes in the row of B / B'
1199 for(Index k(dom_ptr[i]); k < dom_ptr[i+1]; ++k)
1200 {
1201 // initialize invalid index for assertion below
1202 dat_idx[k] = ~IndexType(0);
1203
1204 // get the B-column index (=pressure DOF index)
1205 const Index jcol = pres_idx[img_idx[k]];
1206
1207 // try to find that column in our matrix
1208 for(IndexType j(row_ptr[irow]); j < row_ptr[irow + 1]; ++j)
1209 {
1210 if(jcol == col_idx[j])
1211 {
1212 // that's the entry we were looking for
1213 dat_idx[k] = j;
1214 break;
1215 }
1216 }
1217 ASSERT(dat_idx[k] != ~IndexType(0));
1218 }
1219 }
1220
1221 // that's it
1222 return data_mirror;
1223 }
1224
1237 static BufferVectorType _gather_b(const MirrorTypeP& data_mirror, const LocalMatrixTypeB& matrix_b)
1238 {
1239 const Index num_idx = data_mirror.num_indices();
1240 const IndexType* idx = data_mirror.indices();
1241 const ValueTypeB* mat_val = matrix_b.val();
1242
1243 BufferVectorType buf(Index(dim)*num_idx);
1244 ValueTypeB* buf_val = reinterpret_cast<ValueTypeB*>(buf.elements());
1245
1246 for(Index i(0); i < num_idx; ++i)
1247 {
1248 buf_val[i] = mat_val[idx[i]];
1249 }
1250
1251 return buf;
1252 }
1253
1255 inline static ValueTypeD _mat_mult_d_a(const ValueTypeD& val_d, const ValueTypeA& val_a)
1256 {
1257 ValueTypeD da;
1258 for(int i(0); i < dim; ++i)
1259 da(0, i) = val_d(0, i) * val_a(i);
1260 return da;
1261 }
1262
1264 inline static DataType _mat_mult_da_b(const ValueTypeD& val_da, const ValueTypeB& val_b)
1265 {
1266 DataType s = DataType(0);
1267 for(int i(0); i < dim; ++i)
1268 s += val_da(0, i) * val_b(i, 0);
1269 return s;
1270 }
1271
1273 inline static void _mat_mult_d_a(ValueTypeB& val_da, const ValueTypeD& val_d, const ValueTypeA& val_a)
1274 {
1275 for(int i(0); i < dim; ++i)
1276 val_da(i, 0) = val_d(0, i) * val_a(i);
1277 }
1278
1280 inline static DataType _mat_mult_da_b(const ValueTypeB& val_da, const ValueTypeB& val_b)
1281 {
1282 DataType s = DataType(0);
1283 for(int i(0); i < dim; ++i)
1284 s += val_da(i, 0) * val_b(i, 0);
1285 return s;
1286 }
1287
1300 static void _premult_da(LocalMatrixTypeB& mat_da, const LocalMatrixTypeD& mat_d, const LocalVectorTypeV& mat_a)
1301 {
1302 const Index num_rows = mat_d.rows();
1303 const Index num_cols = mat_d.columns();
1304
1305 const IndexType* row_ptr = mat_d.row_ptr();
1306 const IndexType* col_idx = mat_d.col_ind();
1307 const IndexType* row_ptr_da = mat_da.row_ptr();
1308
1309 ValueTypeB* val_da = mat_da.val();
1310 const ValueTypeD* val_d = mat_d.val();
1311 const ValueTypeA* val_a = mat_a.elements();
1312
1313 // create a temporary copy of the row-pointer of D^T
1314 std::vector<Index> ptr(num_cols);
1315 for(Index i(0); i < num_cols; ++i)
1316 ptr[i] = row_ptr_da[i];
1317
1318 // transpose matrix
1319 for(Index i(0); i < num_rows; ++i)
1320 {
1321 for(Index j(row_ptr[i]); j < row_ptr[i + 1]; ++j)
1322 {
1323 const Index col = col_idx[j];
1324 _mat_mult_d_a(val_da[ptr[col]++], val_d[j], val_a[col]);
1325 }
1326 }
1327 }
1328
1339 const LocalVectorTypeV& a, const LocalMatrixTypeB& b)
1340 {
1341 // Note: this is a modified version of SparseMatrixCSR::add_double_mat_product for BCSR multiplicands
1342
1343 // validate matrix dimensions
1344 XASSERT(s.rows() == d.rows());
1345 XASSERT(d.columns() == a.size());
1346 XASSERT(a.size() == b.rows());
1347 XASSERT(b.columns() == s.columns());
1348
1349 // fetch matrix arrays:
1350 DataType* data_s = s.val();
1351 const ValueTypeD* data_d = d.val();
1352 const ValueTypeA* data_a = a.elements();
1353 const ValueTypeB* data_b = b.val();
1354 const IndexType* row_ptr_s = s.row_ptr();
1355 const IndexType* col_idx_s = s.col_ind();
1356 const IndexType* row_ptr_d = d.row_ptr();
1357 const IndexType* col_idx_d = d.col_ind();
1358 const IndexType* row_ptr_b = b.row_ptr();
1359 const IndexType* col_idx_b = b.col_ind();
1360
1361 // loop over all rows of D and S, resp.
1362 for(IndexType i(0); i < IndexType(s.rows()); ++i)
1363 {
1364 // loop over all non-zeros D_ik in row i of D
1365 for(IndexType ik(row_ptr_d[i]); ik < row_ptr_d[i+1]; ++ik)
1366 {
1367 // get column index k
1368 const IndexType k = col_idx_d[ik];
1369
1370 // pre-compute (D_ik * A_kk)
1371 const ValueTypeD val_da = _mat_mult_d_a(data_d[ik], data_a[k]);
1372
1373 // S_i. += (D_ik * A_kk) * B_k.
1374 for(IndexType ij(row_ptr_s[i]), kj(row_ptr_b[k]); kj < row_ptr_b[k+1]; ++ij)
1375 {
1376 ASSERT(ij < row_ptr_s[i+1]);
1377 ASSERT(col_idx_s[ij] <= col_idx_b[kj]);
1378 if(col_idx_s[ij] == col_idx_b[kj])
1379 {
1380 data_s[ij] += _mat_mult_da_b(val_da, data_b[kj]);
1381 ++kj;
1382 }
1383 }
1384 }
1385 }
1386 }
1387
1407 const MirrorTypeV& mirror_v, const Adjacency::Graph& graph_b, const BufferVectorType& buffer_b)
1408 {
1409 // validate matrix dimensions
1410 XASSERT(s.rows() == da.columns());
1411 XASSERT(da.rows() == mirror_v.size());
1412 XASSERT(graph_b.get_num_nodes_domain() == mirror_v.num_indices());
1413 XASSERT(graph_b.get_num_nodes_image() == s.columns());
1414 XASSERT(mirror_v.size() == da.rows());
1415
1416 // fetch matrix arrays:
1417 DataType* data_s = s.val();
1418 const IndexType* row_ptr_s = s.row_ptr();
1419 const IndexType* row_idx_s = s.row_numbers();
1420 const IndexType* col_idx_s = s.col_ind();
1421 const Index used_rows_s = s.used_rows();
1422
1423 // we use CSR for (D*A)^T here, which is effectively a CSC storage of (D*A),
1424 // thus rows and columns swap their meaning here
1425 const ValueTypeB* data_da = da.val();
1426 const IndexType* col_ptr_da = da.row_ptr();
1427 const IndexType* row_idx_da = da.col_ind();
1428
1429 const Index num_mir_idx = mirror_v.num_indices();
1430 const IndexType* mir_idx = mirror_v.indices();
1431
1432 const ValueTypeB* data_b = reinterpret_cast<const ValueTypeB*>(buffer_b.elements());
1433 const Index* dom_ptr_b = graph_b.get_domain_ptr();
1434 const Index* img_idx_b = graph_b.get_image_idx();
1435
1436 // loop over all velocity mirror entries, which correspond to the rows of B and the columns of D
1437 for(Index l(0); l < num_mir_idx; ++l)
1438 {
1439 // get velocity DOF index k = column index of D = row index of B
1440 const Index k = mir_idx[l];
1441
1442 // loop over all columns k of D
1443 for(Index ik(col_ptr_da[k]), si(0); (ik < col_ptr_da[k + 1]) && (si < used_rows_s); )
1444 {
1445 // check if we have found a common row of S and the k-th column of D
1446 if(row_idx_da[ik] < row_idx_s[si])
1447 {
1448 // row i exists in column k of D, but not in matrix S
1449 ++ik;
1450 continue;
1451 }
1452 if(row_idx_s[si] < row_idx_da[ik])
1453 {
1454 // row i exists in matrix S, but not in column k of D
1455 ++si;
1456 continue;
1457 }
1458
1459 // S_i. += (D_ik * A_kk) * B_l.
1460 for(IndexType ij(row_ptr_s[si]), kj(dom_ptr_b[l]); kj < dom_ptr_b[l+1]; ++ij)
1461 {
1462 ASSERT(ij < row_ptr_s[si+1]);
1463 ASSERT(col_idx_s[ij] <= img_idx_b[kj]);
1464 if(col_idx_s[ij] == img_idx_b[kj])
1465 {
1466 data_s[ij] += _mat_mult_da_b(data_da[ik], data_b[kj]);
1467 ++kj;
1468 }
1469 }
1470
1471 // okay, continue with next row of S and D
1472 ++ik;
1473 ++si;
1474 }
1475 }
1476 }
1477
1496 void _apply(LocalVectorTypeP& r, const LocalVectorTypeP& x, const LocalVectorTypeP& y, const DataType alpha, bool only_Ax) const
1497 {
1498 // get the number of our neighbors
1499 const std::size_t num_neighs = this->_ranks.size();
1500
1501 // no neighbors?
1502 if(num_neighs <= std::size_t(0))
1503 {
1504 // multiply by local schur matrix and return
1505 watch_apply_matrix_loc.start();
1506 if(only_Ax)
1507 this->_matrix_s.apply(r, x);
1508 else
1509 this->_matrix_s.apply(r, x, y, alpha);
1510 watch_apply_matrix_loc.stop();
1511 return;
1512 }
1513
1514 // get our communicator
1515 const Dist::Comm& comm = *this->_matrix_b.get_comm();
1516
1517 // send/receive buffers and requests
1518 std::vector<BufferVectorType> recv_bufs(num_neighs), send_bufs(num_neighs);
1519 Dist::RequestVector recv_reqs(num_neighs), send_reqs(num_neighs);
1520
1521 // allocate receive buffer vectors and post receives
1522 for(std::size_t i(0); i < num_neighs; ++i)
1523 {
1524 recv_bufs.at(i) = BufferVectorType(this->_neighbor_matrices.at(i).columns());
1525 recv_reqs[i] = comm.irecv(recv_bufs.at(i).elements(), recv_bufs.at(i).size(), this->_ranks.at(i));
1526 }
1527
1528 // extract pressure dofs and post sends
1529 for(std::size_t i(0); i < num_neighs; ++i)
1530 {
1531 send_bufs.at(i) = BufferVectorType(this->_pres_mirrors.at(i).num_indices());
1532 this->_pres_mirrors.at(i).gather(send_bufs.at(i), x);
1533 send_reqs[i] = comm.isend(send_bufs.at(i).elements(), send_bufs.at(i).size(), this->_ranks.at(i));
1534 }
1535
1536 // multiply by local schur matrix
1537 watch_apply_matrix_loc.start();
1538 if(only_Ax)
1539 this->_matrix_s.apply(r, x);
1540 else
1541 this->_matrix_s.apply(r, x, y, alpha);
1542 watch_apply_matrix_loc.stop();
1543
1544 // process receives and multiply by neighbor schur matrices
1545 for(std::size_t i(0u); recv_reqs.wait_any(i); )
1546 {
1547 watch_apply_neighbor_s.start();
1548 this->_neighbor_matrices.at(i).apply(r, recv_bufs.at(i), r, (only_Ax ? DataType(1) : alpha));
1549 watch_apply_neighbor_s.stop();
1550 }
1551
1552 // wait for all previous sends to finish
1553 send_reqs.wait_all();
1554 }
1555 }; // class PMDCDSCMatrix<...>
1556 } // namespace Global
1557} // namespace FEAT
#define ASSERT(expr)
Debug-Assertion macro definition.
Definition: assertion.hpp:229
#define ASSERTM(expr, msg)
Debug-Assertion macro definition with custom message.
Definition: assertion.hpp:230
#define XASSERT(expr)
Assertion macro definition.
Definition: assertion.hpp:262
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Definition: assertion.hpp:263
Adjacency Graph implementation.
Definition: graph.hpp:34
void sort_indices()
Sorts the image indices to non-descending order.
Definition: graph.cpp:206
Index * get_domain_ptr()
Returns the domain pointer array.
Definition: graph.hpp:359
Index * get_image_idx()
Returns the image node index array.
Definition: graph.hpp:374
Index get_num_indices() const
Returns the total number indices.
Definition: graph.hpp:390
Communicator class.
Definition: dist.hpp:1349
Request irecv(void *buffer, std::size_t count, const Datatype &datatype, int source, int tag=0) const
Nonblocking Receive.
Definition: dist.cpp:716
Request isend(const void *buffer, std::size_t count, const Datatype &datatype, int dest, int tag=0) const
Nonblocking Send.
Definition: dist.cpp:704
int rank() const
Returns the rank of this process in this communicator.
Definition: dist.hpp:1494
Communication Request vector class.
Definition: dist.hpp:640
void wait_all()
Blocks until all active requests are fulfilled.
Definition: dist.cpp:324
Global gate implementation.
Definition: gate.hpp:51
const Dist::Comm * get_comm() const
Returns a const pointer to the underlying communicator.
Definition: gate.hpp:138
std::vector< Mirror_ > _mirrors
vector mirrors
Definition: gate.hpp:73
std::vector< int > _ranks
communication ranks
Definition: gate.hpp:71
Global Matrix wrapper class template.
Definition: matrix.hpp:40
VectorTypeR create_vector_r() const
Creates and returns a new R-compatible global vector object.
Definition: matrix.hpp:207
VectorTypeL create_vector_l() const
Creates and returns a new L-compatible global vector object.
Definition: matrix.hpp:197
const GateRowType * get_row_gate() const
Returns a const pointer to the internal row gate of the matrix.
Definition: matrix.hpp:154
const Dist::Comm * get_comm() const
Returns a const pointer to the internal communicator of the gates of the matrix.
Definition: matrix.hpp:174
LocalMatrix_ & local()
Returns a reference to the internal local LAFEM matrix object.
Definition: matrix.hpp:126
void adp_upload_symbolic(RPT_ *row_ptr, CIT_ *col_idx, Index global_dof_offset) const
Assembles the matrix structure of an algebraic DOF partitioned matrix.
static ValueTypeD _mat_mult_d_a(const ValueTypeD &val_d, const ValueTypeA &val_a)
auxiliary function for _asm_local_schur_matrix: multiply two values D*A
static MirrorTypeP _asm_data_mirror(const MirrorTypeP &mirror_p, const MirrorTypeV &mirror_v, const LocalMatrixTypeB &matrix_b, const Adjacency::Graph &graph)
Auxiliary Function: computes a data mirror from B to B'.
static Adjacency::Graph _asm_reduced_b(const MirrorTypeP &mirror_p, const MirrorTypeV &mirror_v, const LocalMatrixTypeB &matrix_b)
Auxiliary Function: reduces the matrix B to B'.
static void _premult_da(LocalMatrixTypeB &mat_da, const LocalMatrixTypeD &mat_d, const LocalVectorTypeV &mat_a)
Auxiliary Function: Computes the product (D*A) and stores the result in a CSC matrix.
static void _asm_pres_mirror(MirrorTypeP &mirror_p, const MirrorTypeV &mirror_v, const LocalMatrixTypeB &matrix_b)
Auxiliary function: assembles a pressure mirror from a velocity mirror and the B-matrix.
static void _asm_local_schur_matrix(LocalMatrixTypeS &s, const LocalMatrixTypeD &d, const LocalVectorTypeV &a, const LocalMatrixTypeB &b)
Assembles the local Schur-Matrix S = (D*A*B)
static DataType _mat_mult_da_b(const ValueTypeB &val_da, const ValueTypeB &val_b)
auxiliary function for _asm_neighbor_schur_matrix: multiply two values DA*B with DA in transposed for...
static DataType _mat_mult_da_b(const ValueTypeD &val_da, const ValueTypeB &val_b)
auxiliary function for _asm_local_schur_matrix: multiply two values DA*B
static void _mat_mult_d_a(ValueTypeB &val_da, const ValueTypeD &val_d, const ValueTypeA &val_a)
auxiliary function for _asm_neighbor_schur_matrix: multiply two values D*A and store in transposed fo...
static void _asm_neighbor_schur_matrix(NeighMatrixTypeS &s, const LocalMatrixTypeB &da, const MirrorTypeV &mirror_v, const Adjacency::Graph &graph_b, const BufferVectorType &buffer_b)
Assembles a neighbor Schur-Matrix S_k = (D*A*M_k^T*B_k)
void adp_compute_counts(Index &global_dof_offset, Index &global_dof_count, Index &owned_dof_count, Index &owned_num_nzes, Index &global_num_nzes) const
Compute the counts required for an algebraic dof partitioning of this matrix.
void _apply(LocalVectorTypeP &r, const LocalVectorTypeP &x, const LocalVectorTypeP &y, const DataType alpha, bool only_Ax) const
Applies this matrix onto a vector.
static BufferVectorType _gather_b(const MirrorTypeP &data_mirror, const LocalMatrixTypeB &matrix_b)
Auxiliary Function: Gathers the data array from B to B' using the data mirror.
Pre-Multiplied Discontinuous Diagonal Schur-Complement Matrix.
Global vector wrapper class template.
Definition: vector.hpp:68
const Dist::Comm * get_comm() const
Returns a const pointer to the internal communicator of the gate of the vector.
Definition: vector.hpp:159
LocalVector_ & local()
Returns a reference to the internal local LAFEM vector object.
Definition: vector.hpp:122
void format(DT_ value=DT_(0))
Reset all elements of the container to a given value or zero if missing.
Definition: container.hpp:851
Blocked Dense data vector class template.
auto elements() const -> const typename Intern::DenseVectorBlockedPerspectiveHelper< DT_, BlockSize_, perspective_ >::Type *
Retrieve a pointer to the data array.
Index size() const
The number of elements.
Dense data vector class template.
DT_ * elements()
Get a pointer to the data array.
CSR based blocked sparse matrix.
Index rows() const
Retrieve matrix row count.
IT_ * col_ind()
Retrieve column indices array.
IT_ * row_ptr()
Retrieve row start index array.
Index columns() const
Retrieve matrix column count.
Index used_elements() const
Retrieve non zero element count.
auto val() const -> const typename Intern::BCSRPerspectiveHelper< DT_, BlockHeight_, BlockWidth_, perspective_ >::Type *
Retrieve non zero element array.
CSCR based sparse matrix.
IT_ * col_ind()
Retrieve column indices array.
IT_ * row_numbers()
Retrieve row numbers of non zero rows.
Index columns() const
Retrieve matrix column count.
Index rows() const
Retrieve matrix row count.
DT_ * val()
Retrieve non zero element array.
Index used_rows() const
Retrieve used matrix non zero row count.
IT_ * row_ptr()
Retrieve row start index array.
CSR based sparse matrix.
IT_ * col_ind()
Retrieve column indices array.
DT_ * val()
Retrieve non zero element array.
Index rows() const
Retrieve matrix row count.
Index columns() const
Retrieve matrix column count.
void extract_diag(VectorTypeL &diag, DenseVector< IT_, IT_ > &diag_indices) const
extract main diagonal vector from matrix
void apply(DenseVector< DT_, IT_ > &r, const DenseVector< DT_, IT_ > &x) const
Calculate .
Index used_elements() const
Retrieve non zero element count.
IT_ * row_ptr()
Retrieve row start index array.
Stop-Watch class.
Definition: stop_watch.hpp:21
double elapsed() const
Returns the total elapsed time in seconds.
Definition: stop_watch.hpp:70
void start()
Starts the stop-watch.
Definition: stop_watch.hpp:43
void reset()
Resets the elapsed time.
Definition: stop_watch.hpp:36
void stop()
Stops the stop-watch and increments elapsed time.
Definition: stop_watch.hpp:51
String class implementation.
Definition: string.hpp:47
String pad_back(size_type len, char c=' ') const
Pads the back of the string up to a desired length.
Definition: string.hpp:416
Tiny Matrix class template.
Tiny Vector class template.
@ injectify_sorted
Render-Injectified mode, sort image indices.
@ injectify_transpose
Render-Injectified-Transpose mode.
const Operation op_max(MPI_MAX)
Operation wrapper for MPI_MAX.
Definition: dist.hpp:273
const Operation op_sum(MPI_SUM)
Operation wrapper for MPI_SUM.
Definition: dist.hpp:271
T_ abs(T_ x)
Returns the absolute value.
Definition: math.hpp:275
FEAT namespace.
Definition: adjactor.hpp:12
String stringify_fp_fix(DataType_ value, int precision=0, int width=0, bool sign=false)
Prints a floating point value to a string in fixed-point notation.
Definition: string.hpp:1191
std::uint64_t Index
Index data type.