FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
gate.hpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#pragma once
7
9#include <kernel/util/dist.hpp>
11#include <kernel/lafem/dense_vector.hpp>
12#include <kernel/global/synch_vec.hpp>
13#include <kernel/global/synch_scal.hpp>
14
15#include <vector>
16
17namespace FEAT
18{
22 namespace Global
23 {
49 template<typename LocalVector_, typename Mirror_>
50 class Gate
51 {
52 public:
54 typedef typename LocalVector_::DataType DataType;
56 typedef typename LocalVector_::IndexType IndexType;
58 typedef LocalVector_ LocalVectorType;
60 typedef Mirror_ MirrorType;
63
66
67 public:
71 std::vector<int> _ranks;
73 std::vector<Mirror_> _mirrors;
75 LocalVector_ _freqs;
76
78 template <typename LocalVector2_, typename Mirror2_>
80
82 template <typename DataType2_, typename IndexType2_>
84
85 public:
87 explicit Gate() :
88 _comm(nullptr)
89 {
90 }
91
98 explicit Gate(const Dist::Comm& comm) :
99 _comm(&comm)
100 {
101 }
102
104 Gate(Gate&& other) :
105 _comm(other._comm),
106 _ranks(std::forward<std::vector<int>>(other._ranks)),
107 _mirrors(std::forward<std::vector<Mirror_>>(other._mirrors)),
108 _freqs(std::forward<LocalVector_>(other._freqs))
109 {
110 }
111
114 {
115 if(this == &other)
116 {
117 return *this;
118 }
119
120 _comm = other._comm;
121 _ranks = std::forward<std::vector<int>>(other._ranks);
122 _mirrors = std::forward<std::vector<Mirror_>>(other._mirrors);
123 _freqs = std::forward<LocalVector_>(other._freqs);
124
125 return *this;
126 }
127
129 virtual ~Gate()
130 {
131 }
132
138 const Dist::Comm* get_comm() const
139 {
140 return this->_comm;
141 }
142
149 void set_comm(const Dist::Comm* comm_)
150 {
151 this->_comm = comm_;
152 }
153
159 const std::vector<int> get_ranks() const
160 {
161 return this->_ranks;
162 }
163
169 const std::vector<Mirror_>& get_mirrors() const
170 {
171 return this->_mirrors;
172 }
173
179 const LocalVector_& get_freqs() const
180 {
181 return this->_freqs;
182 }
183
190 template<typename LVT2_, typename MT2_>
191 void convert(const Gate<LVT2_, MT2_>& other)
192 {
193 if((void*)this == (void*)&other)
194 return;
195
196 this->_comm = other._comm;
197 this->_ranks = other._ranks;
198
199 this->_mirrors.resize(other._mirrors.size());
200 for(std::size_t i(0); i < other._mirrors.size(); ++i)
201 {
202 this->_mirrors.at(i).convert(other._mirrors.at(i));
203 }
204
205 this->_freqs.convert(other._freqs);
206 }
207
227 template<typename LVT2_>
228 void convert(const Gate<LVT2_, Mirror_>& other, LocalVector_&& vector, LAFEM::CloneMode mode = LAFEM::CloneMode::Shallow)
229 {
230 if((void*)this == (void*)&other)
231 return;
232
233 this->_ranks.clear();
234 this->_mirrors.clear();
235 this->_mirrors.resize(other._mirrors.size());
236 this->_freqs.clear(); // will be rebuild by compile function
237
238 this->_comm = other._comm;
239 this->_ranks = other._ranks;
240
241 // shallow-clone mirrors
242 for(std::size_t i(0); i < _mirrors.size(); ++i)
243 {
244 _mirrors.at(i).clone(other._mirrors.at(i), mode);
245 }
246
247 // compile this gate
248 compile(std::forward<LocalVector_>(vector));
249 }
250
252 std::size_t bytes() const
253 {
254 size_t temp(0);
255 for (auto& i : _mirrors)
256 {
257 temp += i.bytes();
258 }
259 temp += _freqs.bytes();
260 temp += _ranks.size() * sizeof(int);
261
262 return temp;
263 }
264
274 void push(int rank, Mirror_&& mirror)
275 {
276 XASSERT(this->_comm != nullptr);
277 XASSERT(rank < this->_comm->size());
278
279 // push rank and tags
280 _ranks.push_back(rank);
281
282 // push mirror
283 _mirrors.push_back(std::move(mirror));
284 }
285
296 void compile(LocalVector_&& vector)
297 {
298 // initialize frequency vector
299 _freqs = std::move(vector);
300 _freqs.format(DataType(1));
301
302 // loop over all mirrors
303 for(std::size_t i(0); i < _mirrors.size(); ++i)
304 {
305 // sum up number of ranks per frequency entry, listed in different mirrors
306 auto temp = _mirrors.at(i).create_buffer(_freqs);
307 temp.format(DataType(1));
308
309 // gather-axpy into frequency vector
310 _mirrors.at(i).scatter_axpy(_freqs, temp);
311 }
312
313 // invert frequencies
314 _freqs.component_invert(_freqs);
315 }
316
325 template<LAFEM::Perspective perspective_ = LAFEM::Perspective::native>
327 {
328 return _freqs.template size<perspective_>();
329 }
330
342 template<LAFEM::Perspective perspective_ = LAFEM::Perspective::native>
344 {
345 XASSERT(this->_comm != nullptr);
346 if(this->_comm->size() <= 1)
347 return this->template get_num_local_dofs<perspective_>();
348
349 // get my rank
350 const int my_rank = this->_comm->rank();
351
352 // get local number of DOFs
353 const Index loc_dofs = this->template get_num_local_dofs<perspective_>();
354
355 // create a local mask vector and format it to 1
356 std::vector<int> mask(std::size_t(loc_dofs), 1);
357
358 // set all DOFs, which are shared with a lower rank neighbor, to 0
359 for(std::size_t i(0); i < _mirrors.size(); ++i)
360 {
361 if(this->_ranks.at(i) < my_rank)
362 this->_mirrors.at(i).template mask_scatter<perspective_>(this->_freqs, mask, 0);
363 }
364
365 // count the number of DOFs that are still 1 and thus not owned by a lower rank neighbor
366 Index owned_dofs(0u);
367 for(const auto& k : mask)
368 owned_dofs += Index(k);
369
370 // now sum up the number of owned DOFs over all processes
371 Index global_dofs(0u);
372 this->_comm->allreduce(&owned_dofs, &global_dofs, std::size_t(1), Dist::op_sum);
373
374 // done!
375 return global_dofs;
376 }
377
387 void from_1_to_0(LocalVector_& vector) const
388 {
389 if(!_ranks.empty())
390 {
391 vector.component_product(vector, _freqs);
392 }
393 }
394
408 void sync_0(LocalVector_& vector) const
409 {
410 if(!_ranks.empty())
411 {
413 ticket.wait();
414 }
415 }
416
432 VectorTicketType sync_0_async(LocalVector_& vector) const
433 {
434 if(_ranks.empty())
435 return SynchVectorTicket<LocalVector_, Mirror_>(); // empty ticket
436
438 }
439
457 void sync_1(LocalVector_& vector) const
458 {
459 if(!_ranks.empty())
460 {
461 from_1_to_0(vector);
463 ticket.wait();
464 }
465 }
466
485 VectorTicketType sync_1_async(LocalVector_& vector) const
486 {
487 if(_ranks.empty())
488 return SynchVectorTicket<LocalVector_, Mirror_>(); // empty ticket
489
490 from_1_to_0(vector);
492 }
493
506 DataType dot(const LocalVector_& x, const LocalVector_& y) const
507 {
508 // This is if there is only one process
509 if(_comm == nullptr || _comm->size() == 1)
510 {
511 return x.dot(y);
512 }
513 // Even if there are no neighbors, we still need to sum up globally
514 else if(_ranks.empty())
515 {
516 return sum(x.dot(y));
517 }
518 // If there are neighbors, we have to use the frequencies and sum up globally
519 else
520 {
521 return sum(_freqs.triple_dot(x, y));
522 }
523 }
524
536 ScalarTicketType dot_async(const LocalVector_& x, const LocalVector_& y, bool sqrt = false) const
537 {
538 return sum_async(_freqs.triple_dot(x, y), sqrt);
539 }
540
554 {
555 return sum_async(x).wait();
556 }
557
569 ScalarTicketType sum_async(DataType x, bool sqrt = false) const
570 {
572 }
573
587 {
588 return min_async(x).wait();
589 }
590
603 {
605 }
606
620 {
621 return max_async(x).wait();
622 }
623
636 {
638 }
639
656 {
657 return norm2_async(x).wait();
658 }
659
675 {
677 }
678 }; // class Gate<...>
679 } // namespace Global
680} // namespace FEAT
#define XASSERT(expr)
Assertion macro definition.
Definition: assertion.hpp:262
FEAT Kernel base header.
Communicator class.
Definition: dist.hpp:1349
void allreduce(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op) const
Blocking All-Reduce.
Definition: dist.cpp:655
int size() const
Returns the size of this communicator.
Definition: dist.hpp:1506
int rank() const
Returns the rank of this process in this communicator.
Definition: dist.hpp:1494
Global gate implementation.
Definition: gate.hpp:51
void compile(LocalVector_ &&vector)
Compiles the gate to finish its setup.
Definition: gate.hpp:296
virtual ~Gate()
virtual destructor
Definition: gate.hpp:129
void sync_0(LocalVector_ &vector) const
Synchronizes a type-0 vector, resulting in a type-1 vector.
Definition: gate.hpp:408
void convert(const Gate< LVT2_, MT2_ > &other)
Conversion function for same vector container type but with different MDI-Type.
Definition: gate.hpp:191
const Dist::Comm * get_comm() const
Returns a const pointer to the underlying communicator.
Definition: gate.hpp:138
const LocalVector_ & get_freqs() const
Returns a const reference to the frequencies vector.
Definition: gate.hpp:179
void push(int rank, Mirror_ &&mirror)
Adds a mirror for a neighbor process.
Definition: gate.hpp:274
void from_1_to_0(LocalVector_ &vector) const
Converts a type-1 vector into a type-0 vector.
Definition: gate.hpp:387
Index get_num_local_dofs() const
Returns the number of local DOFs.
Definition: gate.hpp:326
LocalVector_::IndexType IndexType
the index type
Definition: gate.hpp:56
LAFEM::DenseVector< DataType, IndexType > BufferVectorType
the internal buffer vector type
Definition: gate.hpp:62
ScalarTicketType sum_async(DataType x, bool sqrt=false) const
Computes a reduced sum over all processes.
Definition: gate.hpp:569
DataType max(DataType x) const
Computes the maximum of a scalar variable over all processes.
Definition: gate.hpp:619
Gate()
standard constructor
Definition: gate.hpp:87
ScalarTicketType min_async(DataType x) const
Computes the minimum of a scalar variable over all processes.
Definition: gate.hpp:602
std::vector< Mirror_ > _mirrors
vector mirrors
Definition: gate.hpp:73
Index get_num_global_dofs() const
Returns the number of global DOFs.
Definition: gate.hpp:343
DataType norm2(DataType x) const
Computes a reduced 2-norm over all processes.
Definition: gate.hpp:655
LocalVector_ LocalVectorType
the local vector type
Definition: gate.hpp:58
ScalarTicketType norm2_async(DataType x) const
Computes a reduced 2-norm over all processes.
Definition: gate.hpp:674
void set_comm(const Dist::Comm *comm_)
Sets the communicator for this gate.
Definition: gate.hpp:149
const Dist::Comm * _comm
our communicator
Definition: gate.hpp:69
Gate & operator=(Gate &&other)
move-assign operator
Definition: gate.hpp:113
const std::vector< Mirror_ > & get_mirrors() const
Returns a const reference to the neighbor mirrors vector.
Definition: gate.hpp:169
void convert(const Gate< LVT2_, Mirror_ > &other, LocalVector_ &&vector, LAFEM::CloneMode mode=LAFEM::CloneMode::Shallow)
Conversion function for different vector container type.
Definition: gate.hpp:228
ScalarTicketType dot_async(const LocalVector_ &x, const LocalVector_ &y, bool sqrt=false) const
Computes a synchronized dot-product of two type-1 vectors.
Definition: gate.hpp:536
ScalarTicketType max_async(DataType x) const
Computes the maximum of a scalar variable over all processes.
Definition: gate.hpp:635
Mirror_ MirrorType
the mirror type
Definition: gate.hpp:60
void sync_1(LocalVector_ &vector) const
Synchronizes a type-1 vector, resulting in a type-1 vector.
Definition: gate.hpp:457
VectorTicketType sync_1_async(LocalVector_ &vector) const
Synchronizes a type-1 vector, resulting in a type-1 vector.
Definition: gate.hpp:485
LocalVector_ _freqs
frequency vector
Definition: gate.hpp:75
const std::vector< int > get_ranks() const
Returns a const reference to the neighbor ranks vector.
Definition: gate.hpp:159
DataType sum(DataType x) const
Computes a reduced sum over all processes.
Definition: gate.hpp:553
DataType min(DataType x) const
Computes the minimum of a scalar variable over all processes.
Definition: gate.hpp:586
VectorTicketType sync_0_async(LocalVector_ &vector) const
Synchronizes a type-0 vector, resulting in a type-1 vector.
Definition: gate.hpp:432
Gate(Gate &&other)
move constructor
Definition: gate.hpp:104
std::vector< int > _ranks
communication ranks
Definition: gate.hpp:71
DataType dot(const LocalVector_ &x, const LocalVector_ &y) const
Computes a synchronized dot-product of two type-1 vectors.
Definition: gate.hpp:506
LocalVector_::DataType DataType
the data type
Definition: gate.hpp:54
std::size_t bytes() const
Returns the total amount of bytes allocated.
Definition: gate.hpp:252
Gate(const Dist::Comm &comm)
Constructor.
Definition: gate.hpp:98
Ticket class for asynchronous global operations on scalars.
Definition: synch_scal.hpp:31
Ticket class for asynchronous global operations on vectors.
Definition: synch_vec.hpp:27
Dense data vector class template.
const Operation op_min(MPI_MIN)
Operation wrapper for MPI_MIN.
Definition: dist.hpp:275
const Operation op_max(MPI_MAX)
Operation wrapper for MPI_MAX.
Definition: dist.hpp:273
const Operation op_sum(MPI_SUM)
Operation wrapper for MPI_SUM.
Definition: dist.hpp:271
FEAT namespace.
Definition: adjactor.hpp:12
std::uint64_t Index
Index data type.