feat3/pmdcdsc__matrix_8hpp_source.html

// FEAT3: Finite Element Analysis Toolbox, Version 3

// Copyright (C) 2010 by Stefan Turek & the FEAT group

// FEAT3 is released under the GNU General Public License version 3,

// see the file 'copyright.txt' in the top level directory for details.


#pragma once


// includes, FEAT

#include <kernel/global/vector.hpp>

#include <kernel/global/matrix.hpp>

#include <kernel/lafem/dense_vector.hpp>

#include <kernel/lafem/dense_vector_blocked.hpp>

#include <kernel/lafem/sparse_matrix_csr.hpp>

#include <kernel/lafem/sparse_matrix_bcsr.hpp>

#include <kernel/lafem/sparse_matrix_cscr.hpp>

#include <kernel/util/stop_watch.hpp>


#include <vector>

#include <map>

#include <set>


namespace FEAT

{

  namespace Global

  {

    template<typename MatrixB_, typename MatrixD_>

    class PMDCDSCMatrix;


    template<typename DT_, typename IT_, int dim_, typename MirrorV_, typename MirrorP_>

    class PMDCDSCMatrix<

      Global::Matrix<LAFEM::SparseMatrixBCSR<DT_, IT_, dim_, 1>, MirrorV_, MirrorP_>,

      Global::Matrix<LAFEM::SparseMatrixBCSR<DT_, IT_, 1, dim_>, MirrorP_, MirrorV_>>

    {

    public:

      typedef DT_ DataType;

      typedef IT_ IndexType;

      static constexpr int dim = dim_;


      typedef MirrorV_ MirrorTypeV;

      typedef MirrorP_ MirrorTypeP;


      typedef LAFEM::SparseMatrixBCSR<DataType, IndexType, dim, 1> LocalMatrixTypeB;

      typedef LAFEM::SparseMatrixBCSR<DataType, IndexType, 1, dim> LocalMatrixTypeD;

      typedef LAFEM::SparseMatrixCSR<DataType, IndexType> LocalMatrixTypeS;

      typedef LAFEM::SparseMatrixCSCR<DataType, IndexType> NeighMatrixTypeS;


      typedef Global::Matrix<LocalMatrixTypeB, MirrorTypeV, MirrorTypeP> GlobalMatrixTypeB;

      typedef Global::Matrix<LocalMatrixTypeD, MirrorTypeP, MirrorTypeV> GlobalMatrixTypeD;


      typedef LAFEM::DenseVectorBlocked<DataType, IndexType, dim> LocalVectorTypeV;

      typedef LAFEM::DenseVector<DataType, IndexType> LocalVectorTypeP;

      typedef LAFEM::DenseVector<DataType, IndexType> BufferVectorType;


      typedef Global::Vector<LocalVectorTypeV, MirrorTypeV> GlobalVectorTypeV;

      typedef Global::Vector<LocalVectorTypeP, MirrorTypeP> GlobalVectorTypeP;


      typedef Global::Gate<LocalVectorTypeV, MirrorTypeV> GateTypeV;

      typedef Global::Gate<LocalVectorTypeP, MirrorTypeP> GateTypeP;


      typedef GlobalVectorTypeP VectorTypeL;

      typedef GlobalVectorTypeP VectorTypeR;


      typedef typename LocalVectorTypeV::ValueType ValueTypeA;

      typedef typename LocalMatrixTypeB::ValueType ValueTypeB;

      typedef typename LocalMatrixTypeD::ValueType ValueTypeD;


      typedef typename GlobalMatrixTypeD::GateRowType GateRowType;

      typedef typename GlobalMatrixTypeB::GateColType GateColType;


      static constexpr bool is_global = true;

      static constexpr bool is_local = false;


    protected:

      const GlobalVectorTypeV& _diagonal_a;

      const GlobalMatrixTypeB& _matrix_b;

      const GlobalMatrixTypeD& _matrix_d;


      LocalMatrixTypeS _matrix_s;


      LocalMatrixTypeB _matrix_da;


      std::vector<int> _ranks;


      std::vector<MirrorTypeP> _pres_mirrors;


      std::vector<MirrorTypeP> _data_mirrors;


      std::vector<Adjacency::Graph> _neighbor_graphs;


      std::vector<NeighMatrixTypeS> _neighbor_matrices;


      StopWatch watch_init_symbolic;

      StopWatch watch_init_sym_matrix_loc;

      StopWatch watch_init_sym_pres_mirror;

      StopWatch watch_init_sym_reduced_b;

      StopWatch watch_init_sym_data_mirror;

      StopWatch watch_init_sym_neighbor_s;


      StopWatch watch_init_numeric;

      StopWatch watch_init_num_matrix_loc;

      StopWatch watch_init_num_gather_b;

      StopWatch watch_init_num_premult_da;

      StopWatch watch_init_num_neighbor_s;


      mutable StopWatch watch_apply;

      mutable StopWatch watch_apply_matrix_loc;

      mutable StopWatch watch_apply_neighbor_s;


    public:

      explicit PMDCDSCMatrix(

        const GlobalVectorTypeV& diagonal_a,

        const GlobalMatrixTypeB& matrix_b,

        const GlobalMatrixTypeD& matrix_d) :

        _diagonal_a(diagonal_a),

        _matrix_b(matrix_b),

        _matrix_d(matrix_d)

      {

      }


      virtual ~PMDCDSCMatrix()

      {

      }


      const Dist::Comm* get_comm() const

      {

        return _diagonal_a.get_comm();

      }


      VectorTypeL create_vector_l() const

      {

        return _matrix_d.create_vector_l();

      }


      VectorTypeR create_vector_r() const

      {

        return _matrix_b.create_vector_r();

      }


      const LocalMatrixTypeS& get_local_schur_matrix() const

      {

        return _matrix_s;

      }


      void extract_diag(GlobalVectorTypeP& vec_diag) const

      {

        _matrix_s.extract_diag(vec_diag.local());

      }


      GlobalVectorTypeP extract_diag() const

      {

        GlobalVectorTypeP vec_diag = _matrix_d.create_vector_l();

        _matrix_s.extract_diag(vec_diag.local());

        return vec_diag;

      }


      void reset_timings()

      {

        watch_init_symbolic.reset();

        watch_init_sym_matrix_loc.reset();

        watch_init_sym_pres_mirror.reset();

        watch_init_sym_reduced_b.reset();

        watch_init_sym_data_mirror.reset();

        watch_init_sym_neighbor_s.reset();

        watch_init_numeric.reset();

        watch_init_num_matrix_loc.reset();

        watch_init_num_gather_b.reset();

        watch_init_num_premult_da.reset();

        watch_init_num_neighbor_s.reset();

        watch_apply.reset();

        watch_apply_matrix_loc.reset();

        watch_apply_neighbor_s.reset();

      }


      String format_timings() const

      {

        static constexpr std::size_t nt = 14;

        double tsum[nt], tmax[nt], tloc[nt] =

        {

          watch_init_symbolic.elapsed(),

          watch_init_sym_matrix_loc.elapsed(),

          watch_init_sym_pres_mirror.elapsed(),

          watch_init_sym_reduced_b.elapsed(),

          watch_init_sym_data_mirror.elapsed(),

          watch_init_sym_neighbor_s.elapsed(),

          watch_init_numeric.elapsed(),

          watch_init_num_matrix_loc.elapsed(),

          watch_init_num_gather_b.elapsed(),

          watch_init_num_premult_da.elapsed(),

          watch_init_num_neighbor_s.elapsed(),

          watch_apply.elapsed(),

          watch_apply_matrix_loc.elapsed(),

          watch_apply_neighbor_s.elapsed()

        };


        this->get_comm()->allreduce(tloc, tsum, nt, Dist::op_sum);

        this->get_comm()->allreduce(tloc, tmax, nt, Dist::op_max);


        // divide sum by number of ranks to obtain mean

        {

          const double ds = 1.0 / double(this->get_comm()->size());

          for(std::size_t i(0); i < nt; ++i)

            tsum[i] *= ds;

        }


        String s;

        s += String(34, ' ') + "Mean Time      Max Time\n";

        s += _fmt_time(tsum[0], tmax[0], "Total Symbolic Initialization");

        s += _fmt_time(tsum[0], tmax[0], tsum[1], tmax[1], "Local Schur Matrix Structure");

        s += _fmt_time(tsum[0], tmax[0], tsum[2], tmax[2], "Pressure Mirror");

        s += _fmt_time(tsum[0], tmax[0], tsum[3], tmax[3], "Reduced-B Matrix Structure");

        s += _fmt_time(tsum[0], tmax[0], tsum[4], tmax[4], "Reduced-B Data Mirror");

        s += _fmt_time(tsum[0], tmax[0], tsum[5], tmax[5], "Neighbor Matrix Structure");

        double tsym_other_sum = tsum[0] - tsum[1] - tsum[2] - tsum[3] - tsum[4] - tsum[5];

        double tsym_other_max = tmax[0] - tmax[1] - tmax[2] - tmax[3] - tmax[4] - tmax[5];

        s += _fmt_time(tsum[0], tmax[0], tsym_other_sum, tsym_other_max, "Other Symbolic");


        s += _fmt_time(tsum[6], tmax[6], "Total Numeric Initialization");

        s += _fmt_time(tsum[6], tmax[6], tsum[7], tmax[7], "Local Schur Matrix Values");

        s += _fmt_time(tsum[6], tmax[6], tsum[8], tmax[8], "Reduced-B Gather");

        s += _fmt_time(tsum[6], tmax[6], tsum[9], tmax[9], "Pre-Multiply D*A");

        s += _fmt_time(tsum[6], tmax[6], tsum[10], tmax[10], "Neighbor Matrix Values");

        double tnum_other_sum = tsum[6] - tsum[7] - tsum[8] - tsum[9] - tsum[10];

        double tnum_other_max = tmax[6] - tmax[7] - tmax[8] - tmax[9] - tmax[10];

        s += _fmt_time(tsum[6], tmax[6], tnum_other_sum, tnum_other_max, "Other Numeric");


        s += _fmt_time(tsum[11], tmax[11], "Total Matrix Apply");

        s += _fmt_time(tsum[11], tmax[11], tsum[12], tmax[12], "Local Schur Matrix");

        s += _fmt_time(tsum[11], tmax[11], tsum[13], tmax[13], "Neighbor Schur Matrix");

        double tapp_other_sum = tsum[11] - tsum[12] - tsum[13];

        double tapp_other_max = tmax[11] - tmax[12] - tmax[13];

        s += _fmt_time(tsum[11], tmax[11], tapp_other_sum, tapp_other_max, "Other Apply");

        return s;

      }


      void init()

      {

        init_symbolic();

        init_numeric();

      }


      void init_symbolic()

      {

        watch_init_symbolic.start();


        // get the velocity and pressure gates

        const GateTypeV* gate_v = this->_matrix_b.get_row_gate();

        const GateTypeP* gate_p = this->_matrix_d.get_row_gate();

        XASSERT(gate_v != nullptr);

        XASSERT(gate_p != nullptr);


        // the pressure gate must be empty, otherwise the pressure space is not discontinuous

        XASSERTM(gate_p->_ranks.empty(), "pressure space is not discontinuous");


        // compute the local matrix structure of S by D * B

        {

          watch_init_sym_matrix_loc.start();


          // compose structures of D and B

          Adjacency::Graph graph_s(Adjacency::RenderType::injectify_sorted, _matrix_d.local(), _matrix_b.local());

          // create the matrix layout of S

          _matrix_s = LocalMatrixTypeS(graph_s);


          watch_init_sym_matrix_loc.stop();

        }


        // get our communicator

        const Dist::Comm& comm = *gate_v->get_comm();


        // get neighbor ranks

        this->_ranks = gate_v->_ranks;


        // get the number of our neighbors

        const std::size_t num_neighs = this->_ranks.size();

        if(num_neighs <= std::size_t(0))

        {

          watch_init_symbolic.stop();

          return; // no neighbors, no problems :)

        }


        // copy the layout of B into (D*A)^T

        this->_matrix_da = this->_matrix_b.local().clone(LAFEM::CloneMode::Layout);


        // resize our member arrays

        this->_pres_mirrors.resize(num_neighs);

        this->_data_mirrors.resize(num_neighs);

        this->_neighbor_graphs.resize(num_neighs);

        this->_neighbor_matrices.resize(num_neighs);


        // allocate a vector of graphs for B

        std::vector<Adjacency::Graph> my_graphs(num_neighs);


        // loop over all neighbor processes

        for(std::size_t i(0); i < num_neighs; ++i)

        {

          // get the velocity mirror

          const MirrorTypeV& mirror_v = gate_v->_mirrors.at(i);


          // assemble the pressure mirror for this neighbor

          watch_init_sym_pres_mirror.start();

          MirrorTypeP& mirror_p = this->_pres_mirrors.at(i);

          this->_asm_pres_mirror(mirror_p, mirror_v, this->_matrix_b.local());

          watch_init_sym_pres_mirror.stop();


          // assemble reduced B-matrix graph

          watch_init_sym_reduced_b.start();

          my_graphs.at(i) = this->_asm_reduced_b( mirror_p, mirror_v, this->_matrix_b.local());

          watch_init_sym_reduced_b.stop();


          // assemble B' data mirror

          watch_init_sym_data_mirror.start();

          this->_data_mirrors.at(i) = this->_asm_data_mirror(mirror_p, mirror_v, this->_matrix_b.local(), my_graphs.at(i));

          watch_init_sym_data_mirror.stop();

        }


        // dimension send/receive buffers and requests

        std::vector<std::array<Index,3>> recv_dims(num_neighs), send_dims(num_neighs);

        Dist::RequestVector recv_reqs(num_neighs), send_reqs(num_neighs);


        // post receive requests for dimensions

        for(std::size_t i(0); i < num_neighs; ++i)

        {

          recv_reqs[i] = comm.irecv(recv_dims.at(i).data(), std::size_t(3), this->_ranks.at(i));

        }


        // send dimensions

        for(std::size_t i(0); i < num_neighs; ++i)

        {

          const Adjacency::Graph& g = my_graphs.at(i);

          auto& sdim = send_dims.at(i);

          sdim[0] = g.get_num_nodes_domain(); // corresponds to velocity mirror index size

          sdim[1] = g.get_num_nodes_image();

          sdim[2] = g.get_num_indices();

          send_reqs[i] = comm.isend(sdim.data(), std::size_t(3), this->_ranks.at(i));

        }


        // process all pending receives

        for(std::size_t i(0u); recv_reqs.wait_any(i); )

        {

          // get received dimension

          auto& rdim = recv_dims.at(i);


          // the first dimension must match our velocity mirror index set size

          XASSERT(rdim[0] == gate_v->_mirrors.at(i).num_indices());


          // allocate graph of corresponding dimensions

          this->_neighbor_graphs.at(i) = Adjacency::Graph(rdim[0], rdim[1], rdim[2]);

        }


        // post domain-pointer array receives

        for(std::size_t i(0); i < num_neighs; ++i)

        {

          recv_reqs[i] = comm.irecv(this->_neighbor_graphs.at(i).get_domain_ptr(),

            recv_dims.at(i)[0] + std::size_t(1),  this->_ranks.at(i));

        }


        // wait for all previous sends to finish

        send_reqs.wait_all();


        // post domain-pointer array sends

        for(std::size_t i(0); i < num_neighs; ++i)

        {

          send_reqs[i] = comm.isend(my_graphs.at(i).get_domain_ptr(),

            send_dims.at(i)[0] + std::size_t(1), this->_ranks.at(i));

        }


        // wait for all pending receives to finish

        recv_reqs.wait_all();


        // post image-index array receives

        for(std::size_t i(0); i < num_neighs; ++i)

        {

          recv_reqs[i] = comm.irecv(this->_neighbor_graphs.at(i).get_image_idx(),

            recv_dims.at(i)[2],  this->_ranks.at(i));

        }


        // wait for all previous sends to finish

        send_reqs.wait_all();


        // post image-index array sends

        for(std::size_t i(0); i < num_neighs; ++i)

        {

          send_reqs[i] = comm.isend(my_graphs.at(i).get_image_idx(), send_dims.at(i)[2], this->_ranks.at(i));

        }


        // wait for all pending receives to finish

        recv_reqs.wait_all();


        // wait for all previous sends to finish

        send_reqs.wait_all();


        // compute Schur-matrix structures for neighbors

        for(std::size_t i(0); i < num_neighs; ++i)

        {

          watch_init_sym_neighbor_s.start();


          // D*M^T = (M*B)^T

          Adjacency::Graph graph_dm(Adjacency::RenderType::injectify_transpose, gate_v->_mirrors.at(i), this->_matrix_b.local());


          // S = (D*M^T) * B'

          Adjacency::Graph graph_s(Adjacency::RenderType::injectify_sorted, graph_dm, this->_neighbor_graphs.at(i));


          // allocate Schur-matrix

          this->_neighbor_matrices.at(i).convert(NeighMatrixTypeS(graph_s));


          watch_init_sym_neighbor_s.stop();

        }


        // that's it

        watch_init_symbolic.stop();

      }


      void init_numeric()

      {

        watch_init_numeric.start();


        // pre-multiply local matrix product

        watch_init_num_matrix_loc.start();

        this->_matrix_s.format();

        _asm_local_schur_matrix(this->_matrix_s, this->_matrix_d.local(), this->_diagonal_a.local(), this->_matrix_b.local());

        watch_init_num_matrix_loc.stop();


        // get the number of our neighbors

        const std::size_t num_neighs = this->_ranks.size();

        if(num_neighs <= std::size_t(0))

        {

          watch_init_numeric.stop();

          return; // no neighbors, no problems :)

        }


        // get our communicator

        const Dist::Comm& comm = *this->_matrix_b.get_comm();


        // send/receive buffers and requests

        std::vector<BufferVectorType> recv_bufs(num_neighs), send_bufs(num_neighs);

        Dist::RequestVector recv_reqs(num_neighs), send_reqs(num_neighs);


        // allocate receive buffer matrices B' and post receives

        for(std::size_t i(0); i < num_neighs; ++i)

        {

          recv_bufs.at(i) = BufferVectorType(Index(dim) * this->_neighbor_graphs.at(i).get_num_indices());

          recv_reqs[i] = comm.irecv(recv_bufs.at(i).elements(), recv_bufs.at(i).size(), this->_ranks.at(i));

        }


        // extract reduced matrix data and post send

        for(std::size_t i(0); i < num_neighs; ++i)

        {

          watch_init_num_gather_b.start();

          send_bufs.at(i) = _gather_b(this->_data_mirrors.at(i), this->_matrix_b.local());

          watch_init_num_gather_b.stop();

          send_reqs[i] = comm.isend(send_bufs.at(i).elements(), send_bufs.at(i).size(), this->_ranks.at(i));

        }


        // pre-multiply D*A and store in transposed form, i.e. CSC rather than CSR

        watch_init_num_premult_da.start();

        _premult_da(this->_matrix_da, this->_matrix_d.local(), this->_diagonal_a.local());

        watch_init_num_premult_da.stop();


        // process receives and compute neighbor schur matrices

        for(std::size_t i(0u); recv_reqs.wait_any(i); )

        {

          watch_init_num_neighbor_s.start();

          this->_neighbor_matrices.at(i).format();

          _asm_neighbor_schur_matrix(this->_neighbor_matrices.at(i), this->_matrix_da,

            this->_matrix_b.get_row_gate()->_mirrors.at(i), this->_neighbor_graphs.at(i), recv_bufs.at(i));

          watch_init_num_neighbor_s.stop();

        }


        // wait for all previous sends to finish

        send_reqs.wait_all();

        watch_init_numeric.stop();

      }


      void apply(VectorTypeL& r, const VectorTypeR& x) const

      {

        watch_apply.start();

        this->_apply(r.local(), x.local(), r.local(), DataType(1), true);

        watch_apply.stop();

      }


      void apply(VectorTypeL& r, const VectorTypeR& x, const VectorTypeL& y, const DataType alpha = DataType(1)) const

      {

        watch_apply.start();

        this->_apply(r.local(), x.local(), y.local(), alpha, false);

        watch_apply.stop();

      }


      LocalMatrixTypeS asm_adp_symbolic(Index& glob_dof_offset, Index& glob_dof_count) const

      {

        // no neighbors?

        if(_ranks.empty())

        {

          glob_dof_offset = Index(0);

          glob_dof_count = _matrix_s.rows();

          return _matrix_s.clone(LAFEM::CloneMode::Layout);

        }


        // get our communicator

        const Dist::Comm& comm = *this->get_comm();


        const std::size_t num_neighs = this->_ranks.size();


        // get number of local DOFs

        const Index num_loc_dofs = _matrix_s.rows();


        // compute our global DOF offset and count

        comm.exscan(&num_loc_dofs, &glob_dof_offset, std::size_t(1), Dist::op_sum);

        comm.allreduce(&num_loc_dofs, &glob_dof_count, std::size_t(1), Dist::op_sum);


        // The columns of our neighbor matrices correspond to the entries in the pressure mirror.

        // However, for the desired ADP matrix, we have to translate these into global DOF indices.

        // For this, each process has to map the DOF in its pressure mirrors to global DOFs and then

        // send these DOF indices to the corresponding neighbor, so that it can map the column

        // indices of its neighbor matrix to global DOF indices.


        // send/receive mirrors and requests

        std::vector<std::vector<IndexType>> recv_dofs(num_neighs), send_dofs(num_neighs);

        Dist::RequestVector recv_reqs(num_neighs), send_reqs(num_neighs);


        // allocate receive vectors and post receives

        for(std::size_t i(0); i < num_neighs; ++i)

        {

          recv_dofs.at(i).resize(_neighbor_graphs.at(i).get_num_nodes_image());

          recv_reqs[i] = comm.irecv(recv_dofs.at(i).data(), recv_dofs.at(i).size(), this->_ranks.at(i));

        }


        // translate our pressure mirrors to global DOF vectors and post sends

        for(std::size_t i(0); i < num_neighs; ++i)

        {

          const Index num_idx = _pres_mirrors.at(i).num_indices();

          const IndexType* pidx = _pres_mirrors.at(i).indices();

          send_dofs.at(i).resize(num_idx);

          IndexType* sidx = send_dofs.at(i).data();

          for(Index k(0); k < num_idx; ++k)

            sidx[k] = glob_dof_offset + pidx[k];

          send_reqs[i] = comm.isend(send_dofs.at(i).data(), send_dofs.at(i).size(), this->_ranks.at(i));

        }


        // get local matrix stuff

        const Index num_rows = num_loc_dofs;

        const IndexType* row_ptr_s = _matrix_s.row_ptr();

        const IndexType* col_idx_s = _matrix_s.col_ind();


        // compute number of non-zeros per row and total

        Index num_nzes = _matrix_s.used_elements();

        std::vector<IndexType> row_aux(num_rows, IndexType(0));

        for(Index i(0); i  < num_rows; ++i)

          row_aux[i] = (row_ptr_s[i+1] - row_ptr_s[i]);


        for(const auto& x : _neighbor_matrices)

        {

          num_nzes += x.used_elements();

          const Index used_rows = x.used_rows();

          const IndexType* row_ptr_x = x.row_ptr();

          const IndexType* row_idx_x = x.row_numbers();

          for(Index i(0); i < used_rows; ++i)

            row_aux[row_idx_x[i]] += (row_ptr_x[i+1] - row_ptr_x[i]);

        }


        // allocate our matrix

        LocalMatrixTypeS matrix_g(num_rows, glob_dof_count, num_nzes);


        // get our index arrays

        IndexType* row_ptr_g = matrix_g.row_ptr();

        IndexType* col_idx_g = matrix_g.col_ind();


        // compute row pointer array and store backup in aux

        row_ptr_g[0] = IndexType(0);

        for(Index i(0); i  < num_rows; ++i)

        {

          row_ptr_g[i+1] = row_ptr_g[i] + row_aux[i];

          row_aux[i] = row_ptr_g[i];

        }


        // Note: For the sake of compatibility with picky third-party libraries, we want to

        // ensure that the column indices of the output matrix are in ascending order.

        // For this, we have to combine the matrix layout from our own local matrix and

        // the matrices of our neighbors in rank-ascending order. So we first create two

        // rank maps for our neighbors with lower and higher ranks, so that we can easily

        // loop over all matrices in rank-order.


        // create two neighbors maps: one of all lower ranks and one of all higher ranks

        std::map<int, std::size_t> neigh_map_l, neigh_map_h;

        for(std::size_t ineigh(0); ineigh < num_neighs; ++ineigh)

        {

          if(_ranks.at(ineigh) < comm.rank())

            neigh_map_l.emplace(_ranks.at(ineigh), ineigh);

          else

            neigh_map_h.emplace(_ranks.at(ineigh), ineigh);

        }


        // wait for all receive requests to finish

        recv_reqs.wait_all();


        // first, insert all neighbor matrices with a lower rank in rank-ascending order

        for(auto it = neigh_map_l.begin(); it != neigh_map_l.end(); ++it)

        {

          std::size_t ineigh = it->second;

          const Index used_rows = _neighbor_matrices.at(ineigh).used_rows();

          const IndexType* row_ptr_x = _neighbor_matrices.at(ineigh).row_ptr();

          const IndexType* row_idx_x = _neighbor_matrices.at(ineigh).row_numbers();

          const IndexType* col_idx_x = _neighbor_matrices.at(ineigh).col_ind();

          const IndexType* dof_idx_x = recv_dofs.at(ineigh).data();

          for(Index i(0); i < used_rows; ++i)

          {

            IndexType& k = row_aux[row_idx_x[i]];

            for(IndexType j(row_ptr_x[i]); j < row_ptr_x[i + 1]; ++j, ++k)

              col_idx_g[k] = dof_idx_x[col_idx_x[j]];

          }

        }


        // now insert our local matrix S

        for(Index i(0); i < num_rows; ++i)

        {

          IndexType k = row_aux[i];

          for(IndexType j(row_ptr_s[i]); j < row_ptr_s[i + 1]; ++j, ++k)

            col_idx_g[k] = glob_dof_offset + col_idx_s[j];

          row_aux[i] = k;

        }


        // finally, insert all neighbor matrices with a higher rank in rank-ascending order

        for(auto it = neigh_map_h.begin(); it != neigh_map_h.end(); ++it)

        {

          std::size_t ineigh = it->second;

          const Index used_rows = _neighbor_matrices.at(ineigh).used_rows();

          const IndexType* row_ptr_x = _neighbor_matrices.at(ineigh).row_ptr();

          const IndexType* row_idx_x = _neighbor_matrices.at(ineigh).row_numbers();

          const IndexType* col_idx_x = _neighbor_matrices.at(ineigh).col_ind();

          const IndexType* dof_idx_x = recv_dofs.at(ineigh).data();

          for(Index i(0); i < used_rows; ++i)

          {

            IndexType& k = row_aux[row_idx_x[i]];

            for(IndexType j(row_ptr_x[i]); j < row_ptr_x[i + 1]; ++j, ++k)

              col_idx_g[k] = dof_idx_x[col_idx_x[j]];

          }

        }


        // wait for all send requests to finish

        send_reqs.wait_all();


#ifdef DEBUG

        // sanity check: ensure that the column indices are sorted correctly

        for(Index i = 0; i < num_rows; ++i)

        {

          for(IndexType j(row_ptr_g[i]); j + 1 < row_ptr_g[i + 1]; ++j)

          {

            ASSERT(col_idx_g[j] < col_idx_g[j+1]);

          }

        }

#endif // DEBUG


        // that's it

        return matrix_g;

      }


      void asm_adp_numeric(LocalMatrixTypeS& matrix) const

      {

        // no neighbors?

        if(_ranks.empty())

        {

          // copy values

          matrix.copy(_matrix_s);

          return;

        }


        // get number of local DOFs

        const Index num_rows = _matrix_s.rows();

        XASSERT(matrix.rows() == _matrix_s.rows());


        // get row pointer arrays

        const IndexType* row_ptr_s = _matrix_s.row_ptr();

        const IndexType* row_ptr_g = matrix.row_ptr();

        const DataType* val_s = _matrix_s.val();

        DataType* val_g = matrix.val();


        // make a copy of the row pointer

        std::vector<IndexType> row_aux(num_rows);


        // copy the row-pointer array

        for(Index i(0); i < num_rows; ++i)

          row_aux[i] = row_ptr_g[i];


        // get this process's rank

        const int my_rank = this->get_comm()->rank();


        // create two neighbors maps: one of all lower ranks and one of all higher ranks

        std::map<int, std::size_t> neigh_map_l, neigh_map_h;

        for(std::size_t ineigh(0); ineigh < _ranks.size(); ++ineigh)

        {

          if(_ranks.at(ineigh) < my_rank)

            neigh_map_l.emplace(_ranks.at(ineigh), ineigh);

          else

            neigh_map_h.emplace(_ranks.at(ineigh), ineigh);

        }


        // first, copy all neighbor matrices with a lower rank in rank-ascending order

        for(auto it = neigh_map_l.begin(); it != neigh_map_l.end(); ++it)

        {

          std::size_t ineigh = it->second;

          const Index used_rows = _neighbor_matrices.at(ineigh).used_rows();

          const IndexType* row_ptr_x = _neighbor_matrices.at(ineigh).row_ptr();

          const IndexType* row_idx_x = _neighbor_matrices.at(ineigh).row_numbers();

          const DataType* val_x = _neighbor_matrices.at(ineigh).val();

          for(Index i(0); i < used_rows; ++i)

          {

            IndexType& k = row_aux[row_idx_x[i]];

            for(IndexType j(row_ptr_x[i]); j < row_ptr_x[i + 1]; ++j, ++k)

              val_g[k] = val_x[j];

          }

        }


        // now copy our own matrix

        for(Index i(0); i < num_rows; ++i)

        {

          Index k = row_aux[i];

          for(Index j(row_ptr_s[i]); j < row_ptr_s[i+1]; ++j, ++k)

            val_g[k] = val_s[j];

          row_aux[i] = k;

        }


        // finally, copy all neighbor matrices with a higher rank in rank-ascending order

        for(auto it = neigh_map_h.begin(); it != neigh_map_h.end(); ++it)

        {

          std::size_t ineigh = it->second;

          const Index used_rows = _neighbor_matrices.at(ineigh).used_rows();

          const IndexType* row_ptr_x = _neighbor_matrices.at(ineigh).row_ptr();

          const IndexType* row_idx_x = _neighbor_matrices.at(ineigh).row_numbers();

          const DataType* val_x = _neighbor_matrices.at(ineigh).val();

          for(Index i(0); i < used_rows; ++i)

          {

            IndexType& k = row_aux[row_idx_x[i]];

            for(IndexType j(row_ptr_x[i]); j < row_ptr_x[i + 1]; ++j, ++k)

              val_g[k] = val_x[j];

          }

        }


#ifdef DEBUG

        // sanity check: ensure that all entries have been processed

        for(Index i(0); i < num_rows; ++i)

        {

          ASSERT(row_aux[i] == row_ptr_g[i+1]);

        }

#endif // DEBUG

      }


    protected:

      static String _fmt_time(double tsum_total, double tmax_total, String st)

      {

        String s = st.pad_back(30, '.') + ":";

        s += stringify_fp_fix(tsum_total, 6, 12) + " :";

        s += stringify_fp_fix(tmax_total, 6, 12) + "\n";

        return s;

      }

      static String _fmt_time(double tsum_total, double tmax_total, double tsum, double tmax, String st)

      {

        String s = st.pad_back(30, '.') + ":";

        s += stringify_fp_fix(tsum, 6, 12) + " :";

        s += stringify_fp_fix(tmax, 6, 12) + " [";

        if(tsum_total > 1E-8 * Math::abs(tsum))

          s += stringify_fp_fix(100.0*tsum/tsum_total, 2, 6) + "% :";

        else

          s += "    --- :";

        if(tmax_total > 1E-8 * Math::abs(tmax))

          s += stringify_fp_fix(100.0*tmax/tmax_total, 2, 6) + "% ]\n";

        else

          s += "    --- ]\n";

        return s;

      }


      static void _asm_pres_mirror(MirrorTypeP& mirror_p, const MirrorTypeV& mirror_v, const LocalMatrixTypeB& matrix_b)

      {

        const Index num_dof_mir_v = mirror_v.num_indices();

        const IndexType* velo_idx = mirror_v.indices();

        const IndexType* row_ptr = matrix_b.row_ptr();

        const IndexType* col_idx = matrix_b.col_ind();


        // loop over all rows of B, which are indexed in the velocity mirror,

        // and add all column indices (pressure DOFs) into the pressure DOF set

        std::set<IndexType> dof_set;

        for(Index i(0); i < num_dof_mir_v; ++i)

        {

          const Index irow = velo_idx[i];

          for(IndexType j(row_ptr[irow]); j < row_ptr[irow+1]; ++j)

            dof_set.insert(col_idx[j]);

        }


        // convert DOF set into a mirror

        mirror_p = MirrorTypeP(matrix_b.columns(), Index(dof_set.size()));

        IndexType* pidx = mirror_p.indices();

        for(auto it = dof_set.begin(); it != dof_set.end(); ++it, ++pidx)

          *pidx = *it;

      }


      static Adjacency::Graph _asm_reduced_b(const MirrorTypeP& mirror_p, const MirrorTypeV& mirror_v,

        const LocalMatrixTypeB& matrix_b)

      {

        const Index num_dof_mir_v = mirror_v.num_indices();

        const Index num_dof_mir_p = mirror_p.num_indices();

        const IndexType* velo_idx = mirror_v.indices();

        const IndexType* pres_idx = mirror_p.indices();

        const IndexType* row_ptr = matrix_b.row_ptr();

        const IndexType* col_idx = matrix_b.col_ind();


        // count number of non-zeros in indexed rows of B = non-zeros in B'

        Index num_red_nzes = Index(0);

        for(Index i(0); i < num_dof_mir_v; ++i)

        {

          const Index irow = velo_idx[i];

          num_red_nzes += Index(row_ptr[irow+1] - row_ptr[irow]);

        }


        // allocate matrix graph for reduced part B' of B

        Adjacency::Graph graph(num_dof_mir_v, num_dof_mir_p, num_red_nzes);

        Index* dom_ptr = graph.get_domain_ptr();

        Index* img_idx = graph.get_image_idx();


        // loop over all rows of reduced part B'

        dom_ptr[0] = Index(0);

        for(Index i(0); i < num_dof_mir_v; ++i)

        {

          // get the B-row index of the i-th row of B'

          const IndexType irow = velo_idx[i];

          // loop over all non-zeroes in the row of B / B'

          for(IndexType j(row_ptr[irow]), k(dom_ptr[i]); j < row_ptr[irow+1]; ++j, ++k)

          {

            // initialize invalid index for assertion below

            img_idx[k] = ~IndexType(0);


            // try to find this column index (=pressure DOF) in our pressure DOF mirror

            for(Index l(0); l < num_dof_mir_p; ++l)

            {

              if(col_idx[j] == pres_idx[l])

              {

                // that's our pressure DOF, so store its index as column index of B'

                img_idx[k] = l;

                break;

              }

            }

            ASSERT(img_idx[k] != ~IndexType(0));

          }

          // set next row pointer of B'

          dom_ptr[i+1] = dom_ptr[i] + (row_ptr[irow+1] - row_ptr[irow]);

        }


        // sort indices of B' and return graph

        graph.sort_indices();

        return graph;

      }


      static MirrorTypeP _asm_data_mirror(const MirrorTypeP& mirror_p, const MirrorTypeV& mirror_v,

        const LocalMatrixTypeB& matrix_b, const Adjacency::Graph& graph)

      {

        const Index num_dof_mir_v = mirror_v.num_indices();

        const IndexType* velo_idx = mirror_v.indices();

        const IndexType* pres_idx = mirror_p.indices();

        const IndexType* row_ptr = matrix_b.row_ptr();

        const IndexType* col_idx = matrix_b.col_ind();

        const Index* dom_ptr = graph.get_domain_ptr();

        const Index* img_idx = graph.get_image_idx();


        // allocate mirror for data array of B'

        MirrorTypeP data_mirror(matrix_b.used_elements(), graph.get_num_indices());

        IndexType* dat_idx = data_mirror.indices();


        // loop over all rows of B' again

        for(Index i(0); i < num_dof_mir_v; ++i)

        {

          // get the B-row index (=velocity DOF index)

          const IndexType irow = velo_idx[i];


          // loop over all non-zeroes in the row of B / B'

          for(Index k(dom_ptr[i]); k < dom_ptr[i+1]; ++k)

          {

            // initialize invalid index for assertion below

            dat_idx[k] = ~IndexType(0);


            // get the B-column index (=pressure DOF index)

            const Index jcol = pres_idx[img_idx[k]];


            // try to find that column in our matrix

            for(IndexType j(row_ptr[irow]); j < row_ptr[irow + 1]; ++j)

            {

              if(jcol == col_idx[j])

              {

                // that's the entry we were looking for

                dat_idx[k] = j;

                break;

              }

            }

            ASSERT(dat_idx[k] != ~IndexType(0));

          }

        }


        // that's it

        return data_mirror;

      }


      static BufferVectorType _gather_b(const MirrorTypeP& data_mirror, const LocalMatrixTypeB& matrix_b)

      {

        const Index num_idx = data_mirror.num_indices();

        const IndexType* idx = data_mirror.indices();

        const ValueTypeB* mat_val = matrix_b.val();


        BufferVectorType buf(Index(dim)*num_idx);

        ValueTypeB* buf_val = reinterpret_cast<ValueTypeB*>(buf.elements());


        for(Index i(0); i < num_idx; ++i)

        {

          buf_val[i] = mat_val[idx[i]];

        }


        return buf;

      }


      inline static ValueTypeD _mat_mult_d_a(const ValueTypeD& val_d, const ValueTypeA& val_a)

      {

        ValueTypeD da;

        for(int i(0); i < dim; ++i)

          da(0, i) = val_d(0, i) * val_a(i);

        return da;

      }


      inline static DataType _mat_mult_da_b(const ValueTypeD& val_da, const ValueTypeB& val_b)

      {

        DataType s = DataType(0);

        for(int i(0); i < dim; ++i)

          s += val_da(0, i) * val_b(i, 0);

        return s;

      }


      inline static void _mat_mult_d_a(ValueTypeB& val_da, const ValueTypeD& val_d, const ValueTypeA& val_a)

      {

        for(int i(0); i < dim; ++i)

          val_da(i, 0) = val_d(0, i) * val_a(i);

      }


      inline static DataType _mat_mult_da_b(const ValueTypeB& val_da, const ValueTypeB& val_b)

      {

        DataType s = DataType(0);

        for(int i(0); i < dim; ++i)

          s += val_da(i, 0) * val_b(i, 0);

        return s;

      }


      static void _premult_da(LocalMatrixTypeB& mat_da, const LocalMatrixTypeD& mat_d, const LocalVectorTypeV& mat_a)

      {

        const Index num_rows = mat_d.rows();

        const Index num_cols = mat_d.columns();


        const IndexType* row_ptr = mat_d.row_ptr();

        const IndexType* col_idx = mat_d.col_ind();

        const IndexType* row_ptr_da = mat_da.row_ptr();


        ValueTypeB* val_da = mat_da.val();

        const ValueTypeD* val_d = mat_d.val();

        const ValueTypeA* val_a = mat_a.elements();


        // create a temporary copy of the row-pointer of D^T

        std::vector<Index> ptr(num_cols);

        for(Index i(0); i < num_cols; ++i)

          ptr[i] = row_ptr_da[i];


        // transpose matrix

        for(Index i(0); i < num_rows; ++i)

        {

          for(Index j(row_ptr[i]); j < row_ptr[i + 1]; ++j)

          {

            const Index col = col_idx[j];

            _mat_mult_d_a(val_da[ptr[col]++], val_d[j], val_a[col]);

          }

        }

      }


      static void _asm_local_schur_matrix(LocalMatrixTypeS& s, const LocalMatrixTypeD& d,

        const LocalVectorTypeV& a, const LocalMatrixTypeB& b)

      {

        // Note: this is a modified version of SparseMatrixCSR::add_double_mat_product for BCSR multiplicands


        // validate matrix dimensions

        XASSERT(s.rows() == d.rows());

        XASSERT(d.columns() == a.size());

        XASSERT(a.size() == b.rows());

        XASSERT(b.columns() == s.columns());


        // fetch matrix arrays:

        DataType* data_s = s.val();

        const ValueTypeD* data_d = d.val();

        const ValueTypeA* data_a = a.elements();

        const ValueTypeB* data_b = b.val();

        const IndexType* row_ptr_s = s.row_ptr();

        const IndexType* col_idx_s = s.col_ind();

        const IndexType* row_ptr_d = d.row_ptr();

        const IndexType* col_idx_d = d.col_ind();

        const IndexType* row_ptr_b = b.row_ptr();

        const IndexType* col_idx_b = b.col_ind();


        // loop over all rows of D and S, resp.

        for(IndexType i(0); i < IndexType(s.rows()); ++i)

        {

          // loop over all non-zeros D_ik in row i of D

          for(IndexType ik(row_ptr_d[i]); ik  < row_ptr_d[i+1]; ++ik)

          {

            // get column index k

            const IndexType k = col_idx_d[ik];


            // pre-compute (D_ik * A_kk)

            const ValueTypeD val_da = _mat_mult_d_a(data_d[ik], data_a[k]);


            //   S_i. += (D_ik * A_kk) * B_k.

            for(IndexType ij(row_ptr_s[i]), kj(row_ptr_b[k]); kj < row_ptr_b[k+1]; ++ij)

            {

              ASSERT(ij < row_ptr_s[i+1]);

              ASSERT(col_idx_s[ij] <= col_idx_b[kj]);

              if(col_idx_s[ij] == col_idx_b[kj])

              {

                data_s[ij] += _mat_mult_da_b(val_da, data_b[kj]);

                ++kj;

              }

            }

          }

        }

      }


      static void _asm_neighbor_schur_matrix(NeighMatrixTypeS& s, const LocalMatrixTypeB& da,

        const MirrorTypeV& mirror_v, const Adjacency::Graph& graph_b, const BufferVectorType& buffer_b)

      {

        // validate matrix dimensions

        XASSERT(s.rows() == da.columns());

        XASSERT(da.rows() == mirror_v.size());

        XASSERT(graph_b.get_num_nodes_domain() == mirror_v.num_indices());

        XASSERT(graph_b.get_num_nodes_image() == s.columns());

        XASSERT(mirror_v.size() == da.rows());


        // fetch matrix arrays:

        DataType* data_s = s.val();

        const IndexType* row_ptr_s = s.row_ptr();

        const IndexType* row_idx_s = s.row_numbers();

        const IndexType* col_idx_s = s.col_ind();

        const Index used_rows_s = s.used_rows();


        // we use CSR for (D*A)^T here, which is effectively a CSC storage of (D*A),

        // thus rows and columns swap their meaning here

        const ValueTypeB* data_da = da.val();

        const IndexType* col_ptr_da = da.row_ptr();

        const IndexType* row_idx_da = da.col_ind();


        const Index num_mir_idx = mirror_v.num_indices();

        const IndexType* mir_idx = mirror_v.indices();


        const ValueTypeB* data_b = reinterpret_cast<const ValueTypeB*>(buffer_b.elements());

        const Index* dom_ptr_b = graph_b.get_domain_ptr();

        const Index* img_idx_b = graph_b.get_image_idx();


        // loop over all velocity mirror entries, which correspond to the rows of B and the columns of D

        for(Index l(0); l < num_mir_idx; ++l)

        {

          // get velocity DOF index k = column index of D = row index of B

          const Index k = mir_idx[l];


          // loop over all columns k of D

          for(Index ik(col_ptr_da[k]), si(0); (ik < col_ptr_da[k + 1]) && (si < used_rows_s); )

          {

            // check if we have found a common row of S and the k-th column of D

            if(row_idx_da[ik] < row_idx_s[si])

            {

              // row i exists in column k of D, but not in matrix S

              ++ik;

              continue;

            }

            if(row_idx_s[si] < row_idx_da[ik])

            {

              // row i exists in matrix S, but not in column k of D

              ++si;

              continue;

            }


            //   S_i. += (D_ik * A_kk) * B_l.

            for(IndexType ij(row_ptr_s[si]), kj(dom_ptr_b[l]); kj < dom_ptr_b[l+1]; ++ij)

            {

              ASSERT(ij < row_ptr_s[si+1]);

              ASSERT(col_idx_s[ij] <= img_idx_b[kj]);

              if(col_idx_s[ij] == img_idx_b[kj])

              {

                data_s[ij] += _mat_mult_da_b(data_da[ik], data_b[kj]);

                ++kj;

              }

            }


            // okay, continue with next row of S and D

            ++ik;

            ++si;

          }

        }

      }


      void _apply(LocalVectorTypeP& r, const LocalVectorTypeP& x, const LocalVectorTypeP& y, const DataType alpha, bool only_Ax) const

      {

        // get the number of our neighbors

        const std::size_t num_neighs = this->_ranks.size();


        // no neighbors?

        if(num_neighs <= std::size_t(0))

        {

          // multiply by local schur matrix and return

          watch_apply_matrix_loc.start();

          if(only_Ax)

            this->_matrix_s.apply(r, x);

          else

            this->_matrix_s.apply(r, x, y, alpha);

          watch_apply_matrix_loc.stop();

          return;

        }


        // get our communicator

        const Dist::Comm& comm = *this->_matrix_b.get_comm();


        // send/receive buffers and requests

        std::vector<BufferVectorType> recv_bufs(num_neighs), send_bufs(num_neighs);

        Dist::RequestVector recv_reqs(num_neighs), send_reqs(num_neighs);


        // allocate receive buffer vectors and post receives

        for(std::size_t i(0); i < num_neighs; ++i)

        {

          recv_bufs.at(i) = BufferVectorType(this->_neighbor_matrices.at(i).columns());

          recv_reqs[i] = comm.irecv(recv_bufs.at(i).elements(), recv_bufs.at(i).size(), this->_ranks.at(i));

        }


        // extract pressure dofs and post sends

        for(std::size_t i(0); i < num_neighs; ++i)

        {

          send_bufs.at(i) = BufferVectorType(this->_pres_mirrors.at(i).num_indices());

          this->_pres_mirrors.at(i).gather(send_bufs.at(i), x);

          send_reqs[i] = comm.isend(send_bufs.at(i).elements(), send_bufs.at(i).size(), this->_ranks.at(i));

        }


        // multiply by local schur matrix

        watch_apply_matrix_loc.start();

        if(only_Ax)

          this->_matrix_s.apply(r, x);

        else

          this->_matrix_s.apply(r, x, y, alpha);

        watch_apply_matrix_loc.stop();


        // process receives and multiply by neighbor schur matrices

        for(std::size_t i(0u); recv_reqs.wait_any(i); )

        {

          watch_apply_neighbor_s.start();

          this->_neighbor_matrices.at(i).apply(r, recv_bufs.at(i), r, (only_Ax ? DataType(1) : alpha));

          watch_apply_neighbor_s.stop();

        }


        // wait for all previous sends to finish

        send_reqs.wait_all();

      }

    }; // class PMDCDSCMatrix<...>

  } // namespace Global

} // namespace FEAT

ASSERT
#define ASSERT(expr)
Debug-Assertion macro definition.
Definition: assertion.hpp:229

XASSERT
#define XASSERT(expr)
Assertion macro definition.
Definition: assertion.hpp:262

XASSERTM
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Definition: assertion.hpp:263

FEAT::Adjacency::Graph
Adjacency Graph implementation.
Definition: graph.hpp:34

FEAT::Adjacency::Graph::sort_indices
void sort_indices()
Sorts the image indices to non-descending order.
Definition: graph.cpp:206

FEAT::Adjacency::Graph::get_domain_ptr
Index * get_domain_ptr()
Returns the domain pointer array.
Definition: graph.hpp:359

FEAT::Adjacency::Graph::get_image_idx
Index * get_image_idx()
Returns the image node index array.
Definition: graph.hpp:374

FEAT::Adjacency::Graph::get_num_indices
Index get_num_indices() const
Returns the total number indices.
Definition: graph.hpp:390

FEAT::Dist::Comm
Communicator class.
Definition: dist.hpp:1349

FEAT::Dist::Comm::allreduce
void allreduce(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op) const
Blocking All-Reduce.
Definition: dist.cpp:655

FEAT::Dist::Comm::exscan
void exscan(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op) const
Blocking Exclusive Scan.
Definition: dist.cpp:683

FEAT::Dist::Comm::irecv
Request irecv(void *buffer, std::size_t count, const Datatype &datatype, int source, int tag=0) const
Nonblocking Receive.
Definition: dist.cpp:716

FEAT::Dist::Comm::isend
Request isend(const void *buffer, std::size_t count, const Datatype &datatype, int dest, int tag=0) const
Nonblocking Send.
Definition: dist.cpp:704

FEAT::Dist::Comm::rank
int rank() const
Returns the rank of this process in this communicator.
Definition: dist.hpp:1494

FEAT::Dist::RequestVector
Communication Request vector class.
Definition: dist.hpp:640

FEAT::Dist::RequestVector::wait_all
void wait_all()
Blocks until all active requests are fulfilled.
Definition: dist.cpp:324

FEAT::Global::Gate
Global gate implementation.
Definition: gate.hpp:51

FEAT::Global::Gate::get_comm
const Dist::Comm * get_comm() const
Returns a const pointer to the underlying communicator.
Definition: gate.hpp:138

FEAT::Global::Gate::_mirrors
std::vector< Mirror_ > _mirrors
vector mirrors
Definition: gate.hpp:73

FEAT::Global::Gate::_ranks
std::vector< int > _ranks
communication ranks
Definition: gate.hpp:71

FEAT::Global::Matrix
Global Matrix wrapper class template.
Definition: matrix.hpp:40

FEAT::Global::Matrix::create_vector_r
VectorTypeR create_vector_r() const
Creates and returns a new R-compatible global vector object.
Definition: matrix.hpp:207

FEAT::Global::Matrix::create_vector_l
VectorTypeL create_vector_l() const
Creates and returns a new L-compatible global vector object.
Definition: matrix.hpp:197

FEAT::Global::Matrix::get_row_gate
const GateRowType * get_row_gate() const
Returns a const pointer to the internal row gate of the matrix.
Definition: matrix.hpp:154

FEAT::Global::Matrix::get_comm
const Dist::Comm * get_comm() const
Returns a const pointer to the internal communicator of the gates of the matrix.
Definition: matrix.hpp:174

FEAT::Global::Matrix::local
LocalMatrix_ & local()
Returns a reference to the internal local LAFEM matrix object.
Definition: matrix.hpp:126

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_mat_mult_d_a
static ValueTypeD _mat_mult_d_a(const ValueTypeD &val_d, const ValueTypeA &val_a)
auxiliary function for _asm_local_schur_matrix: multiply two values D*A
Definition: pmdcdsc_matrix.hpp:1207

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::asm_adp_numeric
void asm_adp_numeric(LocalMatrixTypeS &matrix) const
Copies the numeric values of the matrix into an ADP matrix.
Definition: pmdcdsc_matrix.hpp:887

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_neighbor_graphs
std::vector< Adjacency::Graph > _neighbor_graphs
Neighbor B-matrix adjacency graphs.
Definition: pmdcdsc_matrix.hpp:175

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_asm_data_mirror
static MirrorTypeP _asm_data_mirror(const MirrorTypeP &mirror_p, const MirrorTypeV &mirror_v, const LocalMatrixTypeB &matrix_b, const Adjacency::Graph &graph)
Auxiliary Function: computes a data mirror from B to B'.
Definition: pmdcdsc_matrix.hpp:1129

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_fmt_time
static String _fmt_time(double tsum_total, double tmax_total, double tsum, double tmax, String st)
auxiliary function: format a time line
Definition: pmdcdsc_matrix.hpp:987

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_asm_reduced_b
static Adjacency::Graph _asm_reduced_b(const MirrorTypeP &mirror_p, const MirrorTypeV &mirror_v, const LocalMatrixTypeB &matrix_b)
Auxiliary Function: reduces the matrix B to B'.
Definition: pmdcdsc_matrix.hpp:1055

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_matrix_d
const GlobalMatrixTypeD & _matrix_d
the velocity-divergence matrix D
Definition: pmdcdsc_matrix.hpp:109

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::asm_adp_symbolic
LocalMatrixTypeS asm_adp_symbolic(Index &glob_dof_offset, Index &glob_dof_count) const
Assembles the matrix structure of an algebraic DOF partitioned matrix.
Definition: pmdcdsc_matrix.hpp:712

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_data_mirrors
std::vector< MirrorTypeP > _data_mirrors
B-matrix data mirrors.
Definition: pmdcdsc_matrix.hpp:167

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::watch_apply_matrix_loc
StopWatch watch_apply_matrix_loc
local Schur-Matrix apply time
Definition: pmdcdsc_matrix.hpp:213

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_premult_da
static void _premult_da(LocalMatrixTypeB &mat_da, const LocalMatrixTypeD &mat_d, const LocalVectorTypeV &mat_a)
Auxiliary Function: Computes the product (D*A) and stores the result in a CSC matrix.
Definition: pmdcdsc_matrix.hpp:1252

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::init_numeric
void init_numeric()
Performs the numeric initialization of the matrix.
Definition: pmdcdsc_matrix.hpp:605

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_asm_pres_mirror
static void _asm_pres_mirror(MirrorTypeP &mirror_p, const MirrorTypeV &mirror_v, const LocalMatrixTypeB &matrix_b)
Auxiliary function: assembles a pressure mirror from a velocity mirror and the B-matrix.
Definition: pmdcdsc_matrix.hpp:1016

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::~PMDCDSCMatrix
virtual ~PMDCDSCMatrix()
virtual destructor
Definition: pmdcdsc_matrix.hpp:244

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_matrix_s
LocalMatrixTypeS _matrix_s
The local Schur-complement matrix.
Definition: pmdcdsc_matrix.hpp:117

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::PMDCDSCMatrix
PMDCDSCMatrix(const GlobalVectorTypeV &diagonal_a, const GlobalMatrixTypeB &matrix_b, const GlobalMatrixTypeD &matrix_d)
Constructor.
Definition: pmdcdsc_matrix.hpp:233

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::watch_apply
StopWatch watch_apply
total apply time
Definition: pmdcdsc_matrix.hpp:211

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::init_symbolic
void init_symbolic()
Performs the symbolic initialization of the matrix.
Definition: pmdcdsc_matrix.hpp:423

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_matrix_b
const GlobalMatrixTypeB & _matrix_b
the pressure-gradient matrix B
Definition: pmdcdsc_matrix.hpp:107

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::get_comm
const Dist::Comm * get_comm() const
Returns a pointer to the underlying communicator.
Definition: pmdcdsc_matrix.hpp:249

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_asm_local_schur_matrix
static void _asm_local_schur_matrix(LocalMatrixTypeS &s, const LocalMatrixTypeD &d, const LocalVectorTypeV &a, const LocalMatrixTypeB &b)
Assembles the local Schur-Matrix S = (D*A*B)
Definition: pmdcdsc_matrix.hpp:1290

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_mat_mult_da_b
static DataType _mat_mult_da_b(const ValueTypeB &val_da, const ValueTypeB &val_b)
auxiliary function for _asm_neighbor_schur_matrix: multiply two values DA*B with DA in transposed for...
Definition: pmdcdsc_matrix.hpp:1232

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::apply
void apply(VectorTypeL &r, const VectorTypeR &x) const
Calculate .
Definition: pmdcdsc_matrix.hpp:672

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::format_timings
String format_timings() const
Returns a string that contains the formatted timing statistics.
Definition: pmdcdsc_matrix.hpp:333

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_mat_mult_da_b
static DataType _mat_mult_da_b(const ValueTypeD &val_da, const ValueTypeB &val_b)
auxiliary function for _asm_local_schur_matrix: multiply two values DA*B
Definition: pmdcdsc_matrix.hpp:1216

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::watch_init_num_neighbor_s
StopWatch watch_init_num_neighbor_s
neighbor Schur-matrix numeric assembly time
Definition: pmdcdsc_matrix.hpp:208

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_mat_mult_d_a
static void _mat_mult_d_a(ValueTypeB &val_da, const ValueTypeD &val_d, const ValueTypeA &val_a)
auxiliary function for _asm_neighbor_schur_matrix: multiply two values D*A and store in transposed fo...
Definition: pmdcdsc_matrix.hpp:1225

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::create_vector_l
VectorTypeL create_vector_l() const
Definition: pmdcdsc_matrix.hpp:255

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::watch_init_sym_neighbor_s
StopWatch watch_init_sym_neighbor_s
neighbor Schur-Matrix symbolic assembly time
Definition: pmdcdsc_matrix.hpp:197

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::GateColType
GlobalMatrixTypeB::GateColType GateColType
The column-gate type (used by SFINAE)
Definition: pmdcdsc_matrix.hpp:98

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_neighbor_matrices
std::vector< NeighMatrixTypeS > _neighbor_matrices
Pre-multiplied neighbor Schur-complement matrices.
Definition: pmdcdsc_matrix.hpp:184

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_diagonal_a
const GlobalVectorTypeV & _diagonal_a
the diagonal matrix a (stored as a vector)
Definition: pmdcdsc_matrix.hpp:105

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::get_local_schur_matrix
const LocalMatrixTypeS & get_local_schur_matrix() const
Definition: pmdcdsc_matrix.hpp:267

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::watch_init_symbolic
StopWatch watch_init_symbolic
total symbolic/numeric init time
Definition: pmdcdsc_matrix.hpp:187

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::watch_init_num_gather_b
StopWatch watch_init_num_gather_b
reduced-B matrix data gather time
Definition: pmdcdsc_matrix.hpp:204

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::watch_init_numeric
StopWatch watch_init_numeric
total numeric init time
Definition: pmdcdsc_matrix.hpp:200

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::watch_init_sym_data_mirror
StopWatch watch_init_sym_data_mirror
reduced B-matrix data mirror assembly time
Definition: pmdcdsc_matrix.hpp:195

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::extract_diag
GlobalVectorTypeP extract_diag() const
Extracts the main diagonal.
Definition: pmdcdsc_matrix.hpp:299

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::watch_init_num_premult_da
StopWatch watch_init_num_premult_da
pre-multiply D*A time
Definition: pmdcdsc_matrix.hpp:206

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_asm_neighbor_schur_matrix
static void _asm_neighbor_schur_matrix(NeighMatrixTypeS &s, const LocalMatrixTypeB &da, const MirrorTypeV &mirror_v, const Adjacency::Graph &graph_b, const BufferVectorType &buffer_b)
Assembles a neighbor Schur-Matrix S_k = (D*A*M_k^T*B_k)
Definition: pmdcdsc_matrix.hpp:1358

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::init
void init()
Performs both symbolic and numeric initialization of the matrix.
Definition: pmdcdsc_matrix.hpp:397

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::GateRowType
GlobalMatrixTypeD::GateRowType GateRowType
The row-gate type (used by SFINAE)
Definition: pmdcdsc_matrix.hpp:96

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::watch_apply_neighbor_s
StopWatch watch_apply_neighbor_s
neighbor Matrix apply time
Definition: pmdcdsc_matrix.hpp:215

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::create_vector_r
VectorTypeR create_vector_r() const
Definition: pmdcdsc_matrix.hpp:261

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::watch_init_sym_pres_mirror
StopWatch watch_init_sym_pres_mirror
pressure mirror assembly time
Definition: pmdcdsc_matrix.hpp:191

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::watch_init_num_matrix_loc
StopWatch watch_init_num_matrix_loc
local Schur-Matrix numeric assembly time
Definition: pmdcdsc_matrix.hpp:202

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::apply
void apply(VectorTypeL &r, const VectorTypeR &x, const VectorTypeL &y, const DataType alpha=DataType(1)) const
Calculate .
Definition: pmdcdsc_matrix.hpp:687

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::watch_init_sym_reduced_b
StopWatch watch_init_sym_reduced_b
reduced B-matrix symbolic assembly time
Definition: pmdcdsc_matrix.hpp:193

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_matrix_da
LocalMatrixTypeB _matrix_da
Pre-Multiplied transposed matrix-product (D*A)^T.
Definition: pmdcdsc_matrix.hpp:144

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_fmt_time
static String _fmt_time(double tsum_total, double tmax_total, String st)
auxiliary function: format a time line
Definition: pmdcdsc_matrix.hpp:979

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_pres_mirrors
std::vector< MirrorTypeP > _pres_mirrors
Pressure DOF mirrors.
Definition: pmdcdsc_matrix.hpp:158

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::reset_timings
void reset_timings()
Resets the internal stop watches used for collecting timing statistics.
Definition: pmdcdsc_matrix.hpp:307

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::extract_diag
void extract_diag(GlobalVectorTypeP &vec_diag) const
Extracts the main diagonal.
Definition: pmdcdsc_matrix.hpp:283

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::watch_init_sym_matrix_loc
StopWatch watch_init_sym_matrix_loc
local Schur-Matrix symbolic assembly time
Definition: pmdcdsc_matrix.hpp:189

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_apply
void _apply(LocalVectorTypeP &r, const LocalVectorTypeP &x, const LocalVectorTypeP &y, const DataType alpha, bool only_Ax) const
Applies this matrix onto a vector.
Definition: pmdcdsc_matrix.hpp:1448

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_gather_b
static BufferVectorType _gather_b(const MirrorTypeP &data_mirror, const LocalMatrixTypeB &matrix_b)
Auxiliary Function: Gathers the data array from B to B' using the data mirror.
Definition: pmdcdsc_matrix.hpp:1189

FEAT::Global::PMDCDSCMatrix< Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, dim_, 1 >, MirrorV_, MirrorP_ >, Global::Matrix< LAFEM::SparseMatrixBCSR< DT_, IT_, 1, dim_ >, MirrorP_, MirrorV_ > >::_ranks
std::vector< int > _ranks
neighbor process ranks
Definition: pmdcdsc_matrix.hpp:147

FEAT::Global::PMDCDSCMatrix
Pre-Multiplied Discontinuous Diagonal Schur-Complement Matrix.
Definition: pmdcdsc_matrix.hpp:30

FEAT::Global::Vector
Global vector wrapper class template.
Definition: vector.hpp:68

FEAT::Global::Vector::get_comm
const Dist::Comm * get_comm() const
Returns a const pointer to the internal communicator of the gate of the vector.
Definition: vector.hpp:158

FEAT::Global::Vector::local
LocalVector_ & local()
Returns a reference to the internal local LAFEM vector object.
Definition: vector.hpp:121

FEAT::LAFEM::Container::format
void format(DT_ value=DT_(0))
Reset all elements of the container to a given value or zero if missing.
Definition: container.hpp:851

FEAT::LAFEM::DenseVectorBlocked
Blocked Dense data vector class template.
Definition: dense_vector_blocked.hpp:81

FEAT::LAFEM::DenseVectorBlocked::elements
auto elements() const -> const typename Intern::DenseVectorBlockedPerspectiveHelper< DT_, BlockSize_, perspective_ >::Type *
Retrieve a pointer to the data array.
Definition: dense_vector_blocked.hpp:502

FEAT::LAFEM::DenseVectorBlocked::size
Index size() const
The number of elements.
Definition: dense_vector_blocked.hpp:528

FEAT::LAFEM::DenseVector
Dense data vector class template.
Definition: dense_vector.hpp:58

FEAT::LAFEM::DenseVector::elements
DT_ * elements()
Get a pointer to the data array.
Definition: dense_vector.hpp:737

FEAT::LAFEM::SparseMatrixBCSR
CSR based blocked sparse matrix.
Definition: sparse_matrix_bcsr.hpp:91

FEAT::LAFEM::SparseMatrixBCSR::rows
Index rows() const
Retrieve matrix row count.
Definition: sparse_matrix_bcsr.hpp:988

FEAT::LAFEM::SparseMatrixBCSR::col_ind
IT_ * col_ind()
Retrieve column indices array.
Definition: sparse_matrix_bcsr.hpp:1031

FEAT::LAFEM::SparseMatrixBCSR::row_ptr
IT_ * row_ptr()
Retrieve row start index array.
Definition: sparse_matrix_bcsr.hpp:1082

FEAT::LAFEM::SparseMatrixBCSR::columns
Index columns() const
Retrieve matrix column count.
Definition: sparse_matrix_bcsr.hpp:1003

FEAT::LAFEM::SparseMatrixBCSR::used_elements
Index used_elements() const
Retrieve non zero element count.
Definition: sparse_matrix_bcsr.hpp:1018

FEAT::LAFEM::SparseMatrixBCSR::val
auto val() const -> const typename Intern::BCSRPerspectiveHelper< DT_, BlockHeight_, BlockWidth_, perspective_ >::Type *
Retrieve non zero element array.
Definition: sparse_matrix_bcsr.hpp:1058

FEAT::LAFEM::SparseMatrixCSCR
CSCR based sparse matrix.
Definition: sparse_matrix_cscr.hpp:62

FEAT::LAFEM::SparseMatrixCSCR::col_ind
IT_ * col_ind()
Retrieve column indices array.
Definition: sparse_matrix_cscr.hpp:790

FEAT::LAFEM::SparseMatrixCSCR::row_numbers
IT_ * row_numbers()
Retrieve row numbers of non zero rows.
Definition: sparse_matrix_cscr.hpp:853

FEAT::LAFEM::SparseMatrixCSCR::columns
Index columns() const
Retrieve matrix column count.
Definition: sparse_matrix_cscr.hpp:758

FEAT::LAFEM::SparseMatrixCSCR::rows
Index rows() const
Retrieve matrix row count.
Definition: sparse_matrix_cscr.hpp:747

FEAT::LAFEM::SparseMatrixCSCR::val
DT_ * val()
Retrieve non zero element array.
Definition: sparse_matrix_cscr.hpp:811

FEAT::LAFEM::SparseMatrixCSCR::used_rows
Index used_rows() const
Retrieve used matrix non zero row count.
Definition: sparse_matrix_cscr.hpp:769

FEAT::LAFEM::SparseMatrixCSCR::row_ptr
IT_ * row_ptr()
Retrieve row start index array.
Definition: sparse_matrix_cscr.hpp:832

FEAT::LAFEM::SparseMatrixCSR
CSR based sparse matrix.
Definition: sparse_matrix_csr.hpp:60

FEAT::LAFEM::SparseMatrixCSR::copy
void copy(const SparseMatrixCSR &x, bool full=false)
Performs .
Definition: sparse_matrix_csr.hpp:1365

FEAT::LAFEM::SparseMatrixCSR::col_ind
IT_ * col_ind()
Retrieve column indices array.
Definition: sparse_matrix_csr.hpp:1202

FEAT::LAFEM::SparseMatrixCSR::val
DT_ * val()
Retrieve non zero element array.
Definition: sparse_matrix_csr.hpp:1223

FEAT::LAFEM::SparseMatrixCSR::rows
Index rows() const
Retrieve matrix row count.
Definition: sparse_matrix_csr.hpp:1169

FEAT::LAFEM::SparseMatrixCSR::clone
SparseMatrixCSR clone(CloneMode clone_mode=CloneMode::Weak) const
Clone operation.
Definition: sparse_matrix_csr.hpp:539

FEAT::LAFEM::SparseMatrixCSR::columns
Index columns() const
Retrieve matrix column count.
Definition: sparse_matrix_csr.hpp:1180

FEAT::LAFEM::SparseMatrixCSR::extract_diag
void extract_diag(VectorTypeL &diag, DenseVector< IT_, IT_ > &diag_indices) const
extract main diagonal vector from matrix
Definition: sparse_matrix_csr.hpp:2516

FEAT::LAFEM::SparseMatrixCSR::apply
void apply(DenseVector< DT_, IT_ > &r, const DenseVector< DT_, IT_ > &x) const
Calculate .
Definition: sparse_matrix_csr.hpp:1793

FEAT::LAFEM::SparseMatrixCSR::used_elements
Index used_elements() const
Retrieve non zero element count.
Definition: sparse_matrix_csr.hpp:1191

FEAT::LAFEM::SparseMatrixCSR::row_ptr
IT_ * row_ptr()
Retrieve row start index array.
Definition: sparse_matrix_csr.hpp:1244

FEAT::StopWatch
Stop-Watch class.
Definition: stop_watch.hpp:21

FEAT::StopWatch::elapsed
double elapsed() const
Returns the total elapsed time in seconds.
Definition: stop_watch.hpp:70

FEAT::StopWatch::start
void start()
Starts the stop-watch.
Definition: stop_watch.hpp:43

FEAT::StopWatch::reset
void reset()
Resets the elapsed time.
Definition: stop_watch.hpp:36

FEAT::StopWatch::stop
void stop()
Stops the stop-watch and increments elapsed time.
Definition: stop_watch.hpp:51

FEAT::String
String class implementation.
Definition: string.hpp:46

FEAT::String::pad_back
String pad_back(size_type len, char c=' ') const
Pads the back of the string up to a desired length.
Definition: string.hpp:415

FEAT::Tiny::Matrix
Tiny Matrix class template.
Definition: tiny_algebra.hpp:80

FEAT::Tiny::Vector
Tiny Vector class template.
Definition: tiny_algebra.hpp:52

FEAT::Adjacency::RenderType::injectify_sorted
@ injectify_sorted
Render-Injectified mode, sort image indices.

FEAT::Adjacency::RenderType::injectify_transpose
@ injectify_transpose
Render-Injectified-Transpose mode.

FEAT::Dist::op_max
const Operation op_max(MPI_MAX)
Operation wrapper for MPI_MAX.
Definition: dist.hpp:273

FEAT::Dist::op_sum
const Operation op_sum(MPI_SUM)
Operation wrapper for MPI_SUM.
Definition: dist.hpp:271

FEAT::LAFEM::CloneMode::Layout
@ Layout

FEAT::Math::abs
T_ abs(T_ x)
Returns the absolute value.
Definition: math.hpp:275

FEAT
FEAT namespace.
Definition: adjactor.hpp:12

FEAT::stringify_fp_fix
String stringify_fp_fix(DataType_ value, int precision=0, int width=0, bool sign=false)
Prints a floating point value to a string in fixed-point notation.
Definition: string.hpp:1142

FEAT::Index
std::uint64_t Index
Index data type.
Definition: base_header.hpp:122