FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
synch_mat.hpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#pragma once
7
9#include <kernel/util/dist.hpp>
10#include <kernel/util/time_stamp.hpp>
11#include <kernel/util/statistics.hpp>
12#include <kernel/lafem/matrix_mirror.hpp>
13#include <kernel/lafem/power_diag_matrix.hpp>
14
15#include <vector>
16#include <array>
17
18namespace FEAT
19{
20 namespace Global
21 {
35 template<typename MT_, typename VMT_>
37 {
38 public:
43
44 protected:
45 bool _initialized;
46#if defined(FEAT_HAVE_MPI) || defined(DOXYGEN)
50 std::vector<int> _ranks;
52 std::vector<MatrixMirrorType> _mirrors;
56 std::vector<BufferMatrixType> _send_bufs, _recv_bufs;
57#endif // FEAT_HAVE_MPI || DOXYGEN
58
59 public:
75#if defined(FEAT_HAVE_MPI) || defined(DOXYGEN)
76 SynchMatrix(const Dist::Comm& comm, const std::vector<int>& ranks,
77 const std::vector<VMT_>& mirrors_row, const std::vector<VMT_>& mirrors_col) :
78 _initialized(false),
79 _comm(comm),
80 _ranks(ranks),
81 _send_reqs(ranks.size()),
82 _recv_reqs(ranks.size()),
83 _send_bufs(ranks.size()),
84 _recv_bufs(ranks.size())
85 {
86 const std::size_t n = ranks.size();
87
88 XASSERTM(mirrors_row.size() == n, "invalid row vector mirror count");
89 XASSERTM(mirrors_col.size() == n, "invalid column vector mirror count");
90
91 _mirrors.reserve(n);
92
93 // create matrix mirrors and buffers
94 for(std::size_t i(0); i < n; ++i)
95 {
96 const VMT_& mir_r = mirrors_row.at(i);
97 const VMT_& mir_c = mirrors_col.at(i);
98
99 // create matrix mirror
100 _mirrors.push_back(MatrixMirrorType(mir_r, mir_c));
101 }
102 }
103#else // non-MPI version
104 SynchMatrix(const Dist::Comm&, const std::vector<int>& ranks, const std::vector<VMT_>&, const std::vector<VMT_>&) :
105 _initialized(false)
106 {
107 XASSERT(ranks.empty());
108 }
109#endif // FEAT_HAVE_MPI
110
112 SynchMatrix(const SynchMatrix &) = delete;
114 SynchMatrix & operator=(const SynchMatrix &) = delete;
115
123#if defined(FEAT_HAVE_MPI) || defined(DOXYGEN)
124 void init(const MT_& matrix)
125 {
126 XASSERTM(!_initialized, "SynchMatrix object is already initialized");
127
128 const std::size_t n = _ranks.size();
129
130 // create matrix mirror buffers
131 for(std::size_t i(0); i < n; ++i)
132 {
133 _send_bufs.at(i) = _mirrors.at(i).create_buffer(matrix);
134 }
135
136 // receive buffer dimensions vector
137 std::vector<std::array<Index,4>> recv_dims(n), send_dims(n);
138
139 // post send-buffer dimension receives
140 for(std::size_t i(0); i < n; ++i)
141 {
142 _recv_reqs[i] = _comm.irecv(recv_dims.at(i).data(), std::size_t(4), _ranks.at(i));
143 }
144
145 // send send-buffer dimensions
146 for(std::size_t i(0); i < n; ++i)
147 {
148 const BufferMatrixType& sbuf = _send_bufs.at(i);
149 send_dims.at(i)[0] = sbuf.rows();
150 send_dims.at(i)[1] = sbuf.columns();
151 send_dims.at(i)[2] = sbuf.entries_per_nonzero();
152 send_dims.at(i)[3] = sbuf.used_elements();
153 _send_reqs[i] = _comm.isend(send_dims.at(i).data(), std::size_t(4), _ranks.at(i));
154 }
155
156 // wait for all receives to finish
157 _recv_reqs.wait_all();
158
159 // create receive buffers and post receives
160 for(std::size_t i(0); i < n; ++i)
161 {
162 // get the receive buffer dimensions
163 Index nrows = recv_dims.at(i)[0];
164 Index ncols = recv_dims.at(i)[1];
165 Index nepnz = recv_dims.at(i)[2];
166 Index nnze = recv_dims.at(i)[3];
167
168 // allocate receive buffer
169 _recv_bufs.at(i) = BufferMatrixType(nrows, ncols, nnze, nepnz);
170 }
171
172 // post buffer row-pointer array receives
173 for(std::size_t i(0); i < n; ++i)
174 {
175 _recv_reqs[i] = _comm.irecv(_recv_bufs.at(i).row_ptr(), _recv_bufs.at(i).rows()+std::size_t(1), _ranks.at(i));
176 }
177
178 // wait for all previous sends to finish
180
181 // post buffer row-pointer array sends
182 for(std::size_t i(0); i < n; ++i)
183 {
184 _send_reqs[i] = _comm.isend(_send_bufs.at(i).row_ptr(), _send_bufs.at(i).rows()+std::size_t(1), _ranks.at(i));
185 }
186
187 // wait for all previous receives to finish
188 _recv_reqs.wait_all();
189
190 // post buffer column-index array receives
191 for(std::size_t i(0); i < n; ++i)
192 {
193 _recv_reqs[i] = _comm.irecv(_recv_bufs.at(i).col_ind(), _recv_bufs.at(i).used_elements(), _ranks.at(i));
194 }
195
196 // wait for all previous sends to finish
198
199 // post buffer column-index array sends
200 for(std::size_t i(0); i < n; ++i)
201 {
202 _send_reqs[i] = _comm.isend(_send_bufs.at(i).col_ind(), _send_bufs.at(i).used_elements(), _ranks.at(i));
203 }
204
205 // wait for all receives and sends to finish
206 _recv_reqs.wait_all();
208
209 _initialized = true;
210 }
211#else // non-MPI version
212 void init(const MT_&)
213 {
214 XASSERTM(!_initialized, "SynchMatrix object is already initialized");
215 _initialized = true;
216 }
217#endif // FEAT_HAVE_MPI
218
225#if defined(FEAT_HAVE_MPI) || defined(DOXYGEN)
226 void exec(MT_& matrix)
227 {
228 XASSERTM(_initialized, "SynchMatrix object has not been initialized");
229
230 const std::size_t n = _ranks.size();
231
232 // create receive buffers and post receives
233 for(std::size_t i(0); i < n; ++i)
234 {
235 BufferMatrixType& buf = _recv_bufs.at(i);
236
237 // post receive
238 _recv_reqs.get_request(i) = _comm.irecv(buf.val(), buf.val_size(), _ranks.at(i));
239 }
240
241 // post sends
242 for(std::size_t i(0); i < n; ++i)
243 {
244 BufferMatrixType& buf = _send_bufs.at(i);
245
246 // gather from mirror
247 _mirrors.at(i).gather(buf, matrix);
248
249 // post send
250 _send_reqs.get_request(i) = _comm.isend(buf.val(), buf.val_size(), _ranks.at(i));
251 }
252
253 // process all pending receives
254 for(std::size_t idx(0u); _recv_reqs.wait_any(idx); )
255 {
256 // scatter the receive buffer
257 _mirrors.at(idx).scatter_axpy(matrix, _recv_bufs.at(idx));
258 }
259
260 // wait for all sends to finish
262 }
263#else // non-MPI version
264 void exec(MT_&)
265 {
266 XASSERTM(_initialized, "SynchMatrix object has not been initialized");
267 }
268#endif // FEAT_HAVE_MPI
269 }; // class SynchMatrix
270
271 template <typename MT_, typename SVMT_, int blocks_>
272 class SynchMatrix<LAFEM::PowerDiagMatrix<MT_, blocks_>, SVMT_>
273 {
274 public:
275 using VMT_ = typename SVMT_::SubMirrorType;
277 bool _initialized;
278
279#if defined(FEAT_HAVE_MPI) || defined(DOXYGEN)
280 std::vector<std::shared_ptr<SynchMatrix<MT_, VMT_>>> synch_matrix_list;
281 std::vector<std::vector<VMT_>> mirrors_row_split;
282 std::vector<std::vector<VMT_>> mirrors_col_split;
283#endif // FEAT_HAVE_MPI || DOXYGEN
284
285#if defined(FEAT_HAVE_MPI) || defined(DOXYGEN)
286 SynchMatrix(const Dist::Comm& comm, const std::vector<int>& ranks,
287 const std::vector<SVMT_>& mirrors_row, const std::vector<SVMT_>& mirrors_col) :
288 _initialized(false),
289 synch_matrix_list(blocks_),
290 mirrors_row_split(blocks_),
291 mirrors_col_split(blocks_)
292 {
293 for (int block(0) ; block < blocks_ ; ++block)
294 {
295 for (Index i(0) ; i < mirrors_row.size() ; ++i)
296 {
297 mirrors_row_split.at((size_t)block).push_back(mirrors_row.at(i).get(block).clone(LAFEM::CloneMode::Shallow));
298 }
299 for (Index i(0) ; i < mirrors_col.size() ; ++i)
300 {
301 mirrors_col_split.at((size_t)block).push_back(mirrors_col.at(i).get(block).clone(LAFEM::CloneMode::Shallow));
302 }
303
304 synch_matrix_list.at((size_t)block) = std::make_shared<SynchMatrix<MT_, VMT_>>(comm, ranks, mirrors_row_split.at((size_t)block), mirrors_col_split.at((size_t)block));
305
306 }
307 }
308#else // non-MPI version
309 SynchMatrix(const Dist::Comm&, const std::vector<int>& ranks, const std::vector<SVMT_>&, const std::vector<SVMT_>&) :
310 _initialized(false)
311 {
312 XASSERT(ranks.empty());
313 }
314#endif // FEAT_HAVE_MPI
315
316#if defined(FEAT_HAVE_MPI) || defined(DOXYGEN)
317 void init(const SMT_& matrix)
318 {
319 XASSERTM(!_initialized, "SynchMatrix object is already initialized");
320
321 for (int block(0) ; block < blocks_ ; ++block)
322 {
323 synch_matrix_list.at((size_t)block)->init(matrix.get(block, block));
324 }
325
326 _initialized = true;
327 }
328#else // non-MPI version
329 void init(const SMT_&)
330 {
331 XASSERTM(!_initialized, "SynchMatrix object is already initialized");
332 _initialized = true;
333 }
334#endif // FEAT_HAVE_MPI
335
336#if defined(FEAT_HAVE_MPI) || defined(DOXYGEN)
337 void exec(SMT_& matrix)
338 {
339 XASSERTM(_initialized, "SynchMatrix object has not been initialized");
340
341 for (int block(0) ; block < blocks_ ; ++block)
342 {
343 synch_matrix_list.at((size_t)block)->exec(matrix.get(block, block));
344 }
345 }
346#else // non-MPI version
347 void exec(SMT_&)
348 {
349 XASSERTM(_initialized, "SynchMatrix object has not been initialized");
350 }
351#endif // FEAT_HAVE_MPI
352 };
353
374 template<typename MT_, typename VMT_>
375 void synch_matrix(MT_& target, const Dist::Comm& comm, const std::vector<int>& ranks,
376 const std::vector<VMT_>& mirrors_row, const std::vector<VMT_>& mirrors_col)
377 {
378 SynchMatrix<MT_, VMT_> synch(comm, ranks, mirrors_row, mirrors_col);
379 synch.init(target);
380 synch.exec(target);
381 }
382 } // namespace Global
383} // namespace FEAT
#define XASSERT(expr)
Assertion macro definition.
Definition: assertion.hpp:262
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Definition: assertion.hpp:263
FEAT Kernel base header.
Communicator class.
Definition: dist.hpp:1349
Request irecv(void *buffer, std::size_t count, const Datatype &datatype, int source, int tag=0) const
Nonblocking Receive.
Definition: dist.cpp:716
Request isend(const void *buffer, std::size_t count, const Datatype &datatype, int dest, int tag=0) const
Nonblocking Send.
Definition: dist.cpp:704
Communication Request vector class.
Definition: dist.hpp:640
Request & get_request(std::size_t idx)
Returns a (const) reference to a single request in the vector.
Definition: dist.hpp:755
bool wait_any(std::size_t &idx, Status &status)
Blocks until one of the active requests has been fulfilled.
Definition: dist.cpp:329
void wait_all()
Blocks until all active requests are fulfilled.
Definition: dist.cpp:324
Ticket class for asynchronous global matrix conversion.
Definition: synch_mat.hpp:37
std::vector< BufferMatrixType > _send_bufs
send and receive buffers
Definition: synch_mat.hpp:56
void exec(MT_ &matrix)
Converts a type-0 matrix to a type-1 matrix.
Definition: synch_mat.hpp:226
LAFEM::MatrixMirrorBuffer< typename MT_::DataType, typename MT_::IndexType > BufferMatrixType
the buffer matrix type
Definition: synch_mat.hpp:42
SynchMatrix(const SynchMatrix &)=delete
deleted copy constructor
SynchMatrix & operator=(const SynchMatrix &)=delete
deleted copy assignment operator
std::vector< int > _ranks
the neighbor ranks
Definition: synch_mat.hpp:50
Dist::RequestVector _send_reqs
send and receive request vectors
Definition: synch_mat.hpp:54
SynchMatrix(const Dist::Comm &comm, const std::vector< int > &ranks, const std::vector< VMT_ > &mirrors_row, const std::vector< VMT_ > &mirrors_col)
Constructor.
Definition: synch_mat.hpp:76
LAFEM::MatrixMirror< typename MT_::DataType, typename MT_::IndexType > MatrixMirrorType
the matrix mirror type
Definition: synch_mat.hpp:40
const Dist::Comm & _comm
our communicator
Definition: synch_mat.hpp:48
void init(const MT_ &matrix)
Initializes the internal buffers for synchronization.
Definition: synch_mat.hpp:124
std::vector< MatrixMirrorType > _mirrors
the matrix mirrors
Definition: synch_mat.hpp:52
Matrix Mirror Buffer class template.
Index val_size() const
Retrieve total length of value array.
DT_ * val()
Retrieve non zero element array.
Index columns() const
Retrieve matrix column count.
Index entries_per_nonzero() const
Retrieve entries per non zero element count.
Index used_elements() const
Retrieve non zero element count.
Index rows() const
Retrieve matrix row count.
Matrix-Mirror class template.
Power-Diag-Matrix meta class template.
SubMatrixType & get(int i, int j)
Returns a sub-matrix block.
void synch_matrix(MT_ &target, const Dist::Comm &comm, const std::vector< int > &ranks, const std::vector< VMT_ > &mirrors_row, const std::vector< VMT_ > &mirrors_col)
Synchronizes a type-0 matrix.
Definition: synch_mat.hpp:375
FEAT namespace.
Definition: adjactor.hpp:12
std::uint64_t Index
Index data type.