FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
dist.cpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6// includes, FEAT
7#include <kernel/util/dist.hpp>
8#include <kernel/util/math.hpp> // for ilog10
9
10// includes, system
11#include <cstring> // for strcpy, memcpy
12#include <cstdint>
13
14namespace FEAT
15{
16 namespace Dist
17 {
18#if defined(FEAT_HAVE_MPI) || defined(DOXYGEN)
19
20#ifdef FEAT_OVERRIDE_MPI_OPS
21 // operation: x <- x + y
22 struct OpSum
23 {
24 template<typename T_>
25 static inline void eval(T_& x, T_& y)
26 {
27 x += y;
28 }
29 };
30
32 struct OpMax
33 {
34 template<typename T_>
35 static inline void eval(T_& x, T_& y)
36 {
37 if(x < y)
38 x = y;
39 }
40 };
41
43 struct OpMin
44 {
45 template<typename T_>
46 static inline void eval(T_& x, T_& y)
47 {
48 if(y < x)
49 x = y;
50 }
51 };
52
53 // Helper function:
54 // Interprets iv and iov as arrays of type T_ and
55 // applies Op_ on each pair of array elements
56 template<typename Op_, typename T_>
57 inline void func_op_t(void* iv, void* iov, int* n)
58 {
59 T_* y = reinterpret_cast<T_*>(iv);
60 T_* x = reinterpret_cast<T_*>(iov);
61 for(int i(0); i < *n; ++i)
62 {
63 Op_::eval(x[i], y[i]);
64 }
65 }
66
67 // Callback function for MPI operation overrides
68 template<typename Op_>
69 void op_callback(void* iv, void* iov, int* n, MPI_Datatype* dt)
70 {
71 if(*dt == MPI_SIGNED_CHAR) func_op_t<Op_, signed char> (iv, iov, n); else
72 if(*dt == MPI_SHORT) func_op_t<Op_, short> (iv, iov, n); else
73 if(*dt == MPI_INT) func_op_t<Op_, int> (iv, iov, n); else
74 if(*dt == MPI_LONG) func_op_t<Op_, long> (iv, iov, n); else
75 if(*dt == MPI_LONG_LONG) func_op_t<Op_, long long> (iv, iov, n); else
76 if(*dt == MPI_UNSIGNED_CHAR) func_op_t<Op_, unsigned char> (iv, iov, n); else
77 if(*dt == MPI_UNSIGNED_SHORT) func_op_t<Op_, unsigned short> (iv, iov, n); else
78 if(*dt == MPI_UNSIGNED) func_op_t<Op_, unsigned int> (iv, iov, n); else
79 if(*dt == MPI_UNSIGNED_LONG) func_op_t<Op_, unsigned long> (iv, iov, n); else
80 if(*dt == MPI_UNSIGNED_LONG_LONG) func_op_t<Op_, unsigned long long>(iv, iov, n); else
81 if(*dt == MPI_FLOAT) func_op_t<Op_, float> (iv, iov, n); else
82 if(*dt == MPI_DOUBLE) func_op_t<Op_, double> (iv, iov, n); else
83 if(*dt == MPI_LONG_DOUBLE) func_op_t<Op_, long double> (iv, iov, n); else
84 if(*dt == MPI_INT8_T) func_op_t<Op_, std::int8_t> (iv, iov, n); else
85 if(*dt == MPI_INT16_T) func_op_t<Op_, std::int16_t> (iv, iov, n); else
86 if(*dt == MPI_INT32_T) func_op_t<Op_, std::int32_t> (iv, iov, n); else
87 if(*dt == MPI_INT64_T) func_op_t<Op_, std::int64_t> (iv, iov, n); else
88 if(*dt == MPI_UINT8_T) func_op_t<Op_, std::uint8_t> (iv, iov, n); else
89 if(*dt == MPI_UINT16_T) func_op_t<Op_, std::uint16_t> (iv, iov, n); else
90 if(*dt == MPI_UINT32_T) func_op_t<Op_, std::uint32_t> (iv, iov, n); else
91 if(*dt == MPI_UINT64_T) func_op_t<Op_, std::uint64_t> (iv, iov, n); else
92#ifdef FEAT_HAVE_QUADMATH
93 if(*dt == dt__float128.dt) func_op_t<Op_, __float128> (iv, iov, n); else
94#endif
95#ifdef FEAT_HAVE_HALFMATH
96 if(*dt == dt__half.dt) func_op_t<Op_, __half> (iv, iov, n); else
97#endif
98 {
99 // unsupported datatype
100 MPI_Abort(MPI_COMM_WORLD, 1);
101 }
102 }
103#endif // FEAT_OVERRIDE_MPI_OPS
104
105 bool initialize(int& argc, char**& argv)
106 {
107 int already_initialized(0);
108 if (::MPI_Initialized(&already_initialized) != MPI_SUCCESS)
109 return false;
110
111#ifdef FEAT_MPI_THREAD_MULTIPLE
112 XASSERTM((bool)!already_initialized, "MPI was already initialized by another library, we cannot ensure MPI_THREAD_MULTIPLE support!");
113 int required = MPI_THREAD_MULTIPLE;
114#else
115 int required = MPI_THREAD_SERIALIZED;
116#endif //FEAT_MPI_THREAD_MULTIPLE
117 int provided = MPI_THREAD_SINGLE;
118 // if mpi is not already initialized, check if we are able to
119 if (!already_initialized && ::MPI_Init_thread(&argc, &argv, required, &provided) != MPI_SUCCESS)
120 return false;
121 // also, if we initialize ourselves, check if we at least get our required features
122 if (!already_initialized && (provided < required))
123 return false;
124
125#ifdef FEAT_HAVE_QUADMATH
126 // Create a custom MPI datatype for '__float128'
127 Datatype& dt_f128 = const_cast<Datatype&>(Dist::dt__float128);
128 MPI_Type_contiguous(int(sizeof(__float128)), MPI_BYTE, &dt_f128.dt);
129 MPI_Type_commit(&dt_f128.dt);
130#endif // FEAT_HAVE_QUADMATH
131#ifdef FEAT_HAVE_HALFMATH
132 // Create a custom MPI datatype for '__half'
133 Datatype& dt_half = const_cast<Datatype&>(Dist::dt__half);
134 MPI_Type_contiguous(int(sizeof(__half)), MPI_BYTE, &dt_half.dt);
135 MPI_Type_commit(&dt_half.dt);
136#endif // FEAT_HAVE_HALFMATH
137
138#ifdef FEAT_OVERRIDE_MPI_OPS
139 // override MPI operations
140 MPI_Op_create(op_callback<OpSum>, 1, &const_cast<Operation&>(Dist::op_sum).op);
141 MPI_Op_create(op_callback<OpMax>, 1, &const_cast<Operation&>(Dist::op_max).op);
142 MPI_Op_create(op_callback<OpMin>, 1, &const_cast<Operation&>(Dist::op_min).op);
143#endif // FEAT_OVERRIDE_MPI_OPS
144
145 return true;
146 }
147
148 void finalize()
149 {
150
151#ifdef FEAT_OVERRIDE_MPI_OPS
152 Operation& my_op_sum = const_cast<Operation&>(Dist::op_sum);
153 MPI_Op_free(&my_op_sum.op);
154#endif // FEAT_OVERRIDE_MPI_OPS
155
156#ifdef FEAT_HAVE_QUADMATH
157 Datatype& dt_f128 = const_cast<Datatype&>(Dist::dt__float128);
158 MPI_Type_free(&dt_f128.dt);
159#endif // FEAT_HAVE_QUADMATH
160
161#ifdef FEAT_HAVE_HALFMATH
162 Datatype& dt_half = const_cast<Datatype&>(Dist::dt__half);
163 MPI_Type_free(&dt_half.dt);
164#endif // FEAT_HAVE_HALFMATH
165
166 // finalize MPI
167 int already_finalized(0);
168 ::MPI_Finalized(&already_finalized);
169 if (!already_finalized)
170 MPI_Finalize();
171 }
172
173 // datatypes
174 const Datatype dt_byte (MPI_BYTE, sizeof(char));
175 const Datatype dt_char (MPI_CHAR, sizeof(char));
176 const Datatype dt_wchar (MPI_WCHAR, sizeof(wchar_t));
177 const Datatype dt_signed_char (MPI_SIGNED_CHAR, sizeof(signed char));
178 const Datatype dt_signed_short (MPI_SHORT, sizeof(short));
179 const Datatype dt_signed_int (MPI_INT, sizeof(int));
180 const Datatype dt_signed_long (MPI_LONG, sizeof(long));
181 const Datatype dt_signed_long_long (MPI_LONG_LONG, sizeof(long long));
182 const Datatype dt_unsigned_char (MPI_UNSIGNED_CHAR, sizeof(unsigned char));
183 const Datatype dt_unsigned_short (MPI_UNSIGNED_SHORT, sizeof(unsigned short));
184 const Datatype dt_unsigned_int (MPI_UNSIGNED, sizeof(unsigned int));
185 const Datatype dt_unsigned_long (MPI_UNSIGNED_LONG, sizeof(unsigned long));
186 const Datatype dt_unsigned_long_long (MPI_UNSIGNED_LONG_LONG, sizeof(unsigned long long));
187 const Datatype dt_float (MPI_FLOAT, sizeof(float));
188 const Datatype dt_double (MPI_DOUBLE, sizeof(double));
189 const Datatype dt_long_double (MPI_LONG_DOUBLE, sizeof(long double));
190 const Datatype dt_signed_int8 (MPI_INT8_T, sizeof(std::int8_t));
191 const Datatype dt_signed_int16 (MPI_INT16_T, sizeof(std::int16_t));
192 const Datatype dt_signed_int32 (MPI_INT32_T, sizeof(std::int32_t));
193 const Datatype dt_signed_int64 (MPI_INT64_T, sizeof(std::int64_t));
194 const Datatype dt_unsigned_int8 (MPI_UINT8_T, sizeof(std::uint8_t));
195 const Datatype dt_unsigned_int16 (MPI_UINT16_T, sizeof(std::uint16_t));
196 const Datatype dt_unsigned_int32 (MPI_UINT32_T, sizeof(std::uint32_t));
197 const Datatype dt_unsigned_int64 (MPI_UINT64_T, sizeof(std::uint64_t));
198#ifdef FEAT_HAVE_QUADMATH
199 // This needs to initialized by Dist::initialize() !
200 const Datatype dt__float128 (0, sizeof(__float128));
201#endif
202#ifdef FEAT_HAVE_HALFMATH
203 // This needs to initialized by Dist::initialize() !
204 const Datatype dt__half (0, sizeof(__half));
205#endif
206
207 // operations
208 const Operation op_sum(MPI_SUM);
209 const Operation op_max(MPI_MAX);
210 const Operation op_min(MPI_MIN);
211
212 /* ***************************************************************************************** */
213 /* ***************************************************************************************** */
214 /* MPI Request wrapper implementation */
215 /* ***************************************************************************************** */
216 /* ***************************************************************************************** */
217
219 request(MPI_REQUEST_NULL)
220 {
221 }
222
223 Request::Request(MPI_Request req_) :
224 request(req_)
225 {
226 }
227
229 request(other.request)
230 {
231 other.request = MPI_REQUEST_NULL;
232 }
233
235 {
236 if(this != &other)
237 {
238 XASSERT(is_null());
239 this->request = other.request;
240 other.request = MPI_REQUEST_NULL;
241 }
242 return *this;
243 }
244
245 bool Request::is_null() const
246 {
247 return request == MPI_REQUEST_NULL;
248 }
249
251 {
252 if(request != MPI_REQUEST_NULL)
253 MPI_Request_free(&request);
254 }
255
257 {
258 if(request != MPI_REQUEST_NULL)
259 MPI_Cancel(&request);
260 }
261
262 bool Request::test(Status& status)
263 {
264 int flag(0);
265 MPI_Test(&request, &flag, status.mpi_status());
266 return flag != 0;
267 }
268
269 bool Request::wait(Status& status)
270 {
271 if(request == MPI_REQUEST_NULL)
272 return false;
273 MPI_Wait(&request, status.mpi_status());
274 return true;
275 }
276
277 /* ***************************************************************************************** */
278 /* ***************************************************************************************** */
279 /* MPI RequestVector implementation */
280 /* ***************************************************************************************** */
281 /* ***************************************************************************************** */
282
284 {
285 int flag = 0;
286 MPI_Testall(_isize(), _reqs_array(), &flag, _stats_array());
287 return flag != 0;
288 }
289
290 bool RequestVector::test_any(std::size_t& idx, Status& status)
291 {
292 int i = 0;
293 int flag = 0;
294 MPI_Testany(_isize(), _reqs_array(), &i, &flag, status.mpi_status());
295 if(flag == 0)
296 return false;
297 idx = std::size_t(i);
298 return true;
299 }
300
301 /*std::size_t RequestVector::test_some(std::size_t* indices, Status* statuses)
302 {
303 static_assert(sizeof(int) <= sizeof(std::size_t), "size_t must be at least size of int");
304 XASSERT(indices != nullptr);
305 XASSERT(statuses != nullptr);
306
307 int outcount = -1;
308 int* idx = reinterpret_cast<int*>(indices);
309 MPI_Testsome(_isize(), _reqs_array(), &outcount, idx, (MPI_Status*)statuses);
310
311 // unpack int to size_t ?
312 if(sizeof(int) < sizeof(std::size_t))
313 {
314 for(std::size_t k = std::size_t(outcount); k > 0;)
315 {
316 --k;
317 indices[k] = std::size_t(idx[k]);
318 }
319 }
320
321 return std::size_t(outcount);
322 }*/
323
325 {
326 MPI_Waitall(_isize(), _reqs_array(), _stats_array());
327 }
328
329 bool RequestVector::wait_any(std::size_t& idx, Status& status)
330 {
331 int i = -1;
332 MPI_Waitany(_isize(), _reqs_array(), &i, status.mpi_status());
333 if(i == MPI_UNDEFINED)
334 return false;
335 idx = std::size_t(i);
336 return true;
337 }
338
339 /*std::size_t RequestVector::wait_some(std::size_t* indices, Status* statuses)
340 {
341 static_assert(sizeof(int) <= sizeof(std::size_t), "size_t must be at least size of int");
342 XASSERT(indices != nullptr);
343 XASSERT(statuses != nullptr);
344
345 int outcount = -1;
346 int* idx = reinterpret_cast<int*>(indices);
347 MPI_Waitsome(_isize(), _reqs_array(), &outcount, idx, (MPI_Status*)statuses);
348
349 // unpack int to size_t ?
350 if(sizeof(int) < sizeof(std::size_t))
351 {
352 for(std::size_t k = std::size_t(outcount); k > 0;)
353 {
354 --k;
355 indices[k] = std::size_t(idx[k]);
356 }
357 }
358
359 // store statuses
360 for(std::size_t k(0); k < std::size_t(outcount); ++k)
361 _stats[indices[k]] = statuses[k];
362
363 return std::size_t(outcount);
364 }*/
365
366 /* ***************************************************************************************** */
367 /* ***************************************************************************************** */
368 /* MPI Comm wrapper implementation */
369 /* ***************************************************************************************** */
370 /* ***************************************************************************************** */
371
373 comm(MPI_COMM_NULL),
374 _rank(0),
375 _size(0)
376 {
377 }
378
379 Comm::Comm(MPI_Comm comm_) :
380 comm(comm_),
381 _rank(0),
382 _size(0)
383 {
384 if(comm_ != MPI_COMM_NULL)
385 {
386 MPI_Comm_rank(comm, &_rank);
387 MPI_Comm_size(comm, &_size);
388 }
389 }
390
391 Comm::Comm(Comm&& other) :
392 comm(other.comm),
393 _rank(other._rank),
394 _size(other._size)
395 {
396 // do not free the world and self comms
397 if((other.comm != MPI_COMM_WORLD) && (other.comm != MPI_COMM_SELF))
398 {
399 other.comm = MPI_COMM_NULL;
400 other._rank = other._size = 0;
401 }
402 }
403
405 {
406 if(this != &other)
407 {
408 XASSERT(comm == MPI_COMM_NULL);
409 comm = other.comm;
410 _rank = other._rank;
411 _size = other._size;
412 // do not free the world and self comms
413 if((other.comm != MPI_COMM_WORLD) && (other.comm != MPI_COMM_SELF))
414 {
415 other.comm = MPI_COMM_NULL;
416 other._rank = other._size = 0;
417 }
418 }
419 return *this;
420 }
421
423 {
424 // do not free the world, self and null comms
425 if((comm != MPI_COMM_WORLD) && (comm != MPI_COMM_SELF) && (comm != MPI_COMM_NULL))
426 MPI_Comm_free(&comm);
427 }
428
430 {
431 return Comm(MPI_COMM_WORLD);
432 }
433
435 {
436 return Comm(MPI_COMM_SELF);
437 }
438
440 {
441 return Comm(MPI_COMM_NULL);
442 }
443
444 bool Comm::is_world() const
445 {
446 return (comm == MPI_COMM_WORLD);
447 }
448
449 bool Comm::is_self() const
450 {
451 return (comm == MPI_COMM_SELF);
452 }
453
454 bool Comm::is_null() const
455 {
456 return (comm == MPI_COMM_NULL);
457 }
458
460 {
461 // do not duplicate world, self and null comms
462 if((comm == MPI_COMM_WORLD) || (comm == MPI_COMM_SELF) || (comm == MPI_COMM_NULL))
463 return Comm(comm);
464
465 // create a real duplicate
466 MPI_Comm newcomm = MPI_COMM_NULL;
467 MPI_Comm_dup(comm, &newcomm);
468
469 return Comm(newcomm);
470 }
471
472 Comm Comm::comm_create_range_incl(int count, int first, int stride) const
473 {
474 XASSERT(count > 0);
475 XASSERT((first >= 0) && (first < _size));
476 XASSERT(stride > 0);
477
478 // get this comm's group
479 MPI_Group group = MPI_GROUP_NULL;
480 MPI_Comm_group(comm, &group);
481
482 // create sub-group
483 MPI_Group newgroup = MPI_GROUP_NULL;
484 int ranges[3] = {first, first + (count-1)*stride, stride};
485 MPI_Group_range_incl(group, 1, &ranges, &newgroup);
486
487 // create new comm from group
488 MPI_Comm newcomm = MPI_COMM_NULL;
489 MPI_Comm_create(comm, newgroup, &newcomm);
490
491 // free group handles
492 MPI_Group_free(&newgroup);
493 MPI_Group_free(&group);
494
495 return Comm(newcomm);
496 }
497
498 Comm Comm::comm_create_incl(int n, const int* ranks) const
499 {
500 XASSERT(n > 0);
501 XASSERT(ranks != nullptr);
502
503 // get this comm's group
504 MPI_Group group = MPI_GROUP_NULL;
505 MPI_Comm_group(comm, &group);
506
507 // create sub-group
508 MPI_Group newgroup = MPI_GROUP_NULL;
509 MPI_Group_incl(group, n, ranks, &newgroup);
510
511 // create new comm from group
512 MPI_Comm newcomm = MPI_COMM_NULL;
513 MPI_Comm_create(comm, newgroup, &newcomm);
514
515 // free group handles
516 MPI_Group_free(&newgroup);
517 MPI_Group_free(&group);
518
519 return Comm(newcomm);
520 }
521
522 Comm Comm::comm_split(int color, int key) const
523 {
524 MPI_Comm newcomm = MPI_COMM_NULL;
525 MPI_Comm_split(comm, color, key, &newcomm);
526 return Comm(newcomm);
527 }
528
529 void Comm::barrier() const
530 {
531 MPI_Barrier(comm);
532 }
533
535 {
536 MPI_Request req(MPI_REQUEST_NULL);
537 MPI_Ibarrier(comm, &req);
538 return Request(req);
539 }
540
541 void Comm::bcast(void* buffer, std::size_t count, const Datatype& datatype, int root) const
542 {
543 MPI_Bcast(buffer, int(count), datatype.dt, root, comm);
544 }
545
546 Request Comm::ibcast(void* buffer, std::size_t count, const Datatype& datatype, int root) const
547 {
548 MPI_Request req(MPI_REQUEST_NULL);
549 MPI_Ibcast(buffer, int(count), datatype.dt, root, comm, &req);
550 return Request(req);
551 }
552
553 void Comm::gather(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, std::size_t recvcount, const Datatype& recvtype, int root) const
554 {
555 if((sendbuf == recvbuf) && (root == _rank))
556 MPI_Gather(MPI_IN_PLACE, 0, recvtype.dt, recvbuf, int(recvcount), recvtype.dt, root, comm);
557 else
558 MPI_Gather(sendbuf, int(sendcount), sendtype.dt, recvbuf, int(recvcount), recvtype.dt, root, comm);
559 }
560
561 Request Comm::igather(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, std::size_t recvcount, const Datatype& recvtype, int root) const
562 {
563 MPI_Request req(MPI_REQUEST_NULL);
564 if((sendbuf == recvbuf) && (root == _rank))
565 MPI_Igather(MPI_IN_PLACE, 0, recvtype.dt, recvbuf, int(recvcount), recvtype.dt, root, comm, &req);
566 else
567 MPI_Igather(sendbuf, int(sendcount), sendtype.dt, recvbuf, int(recvcount), recvtype.dt, root, comm, &req);
568 return Request(req);
569 }
570
571 void Comm::scatter(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, std::size_t recvcount, const Datatype& recvtype, int root) const
572 {
573 if((sendbuf == recvbuf) && (root == _rank))
574 MPI_Scatter(sendbuf, int(sendcount), sendtype.dt, MPI_IN_PLACE, 0, sendtype.dt, root, comm);
575 else
576 MPI_Scatter(sendbuf, int(sendcount), sendtype.dt, recvbuf, int(recvcount), recvtype.dt, root, comm);
577 }
578
579 Request Comm::iscatter(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, std::size_t recvcount, const Datatype& recvtype, int root) const
580 {
581 MPI_Request req(MPI_REQUEST_NULL);
582 if((sendbuf == recvbuf) && (root == _rank))
583 MPI_Iscatter(sendbuf, int(sendcount), sendtype.dt, MPI_IN_PLACE, 0, sendtype.dt, root, comm, &req);
584 else
585 MPI_Iscatter(sendbuf, int(sendcount), sendtype.dt, recvbuf, int(recvcount), recvtype.dt, root, comm, &req);
586 return Request(req);
587 }
588
589 void Comm::allgather(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, std::size_t recvcount, const Datatype& recvtype) const
590 {
591 if(sendbuf == recvbuf)
592 MPI_Allgather(MPI_IN_PLACE, 0, recvtype.dt, recvbuf, int(recvcount), recvtype.dt, comm);
593 else
594 MPI_Allgather(sendbuf, int(sendcount), sendtype.dt, recvbuf, int(recvcount), recvtype.dt, comm);
595 }
596
597 Request Comm::iallgather(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, std::size_t recvcount, const Datatype& recvtype) const
598 {
599 MPI_Request req(MPI_REQUEST_NULL);
600 if(sendbuf == recvbuf)
601 MPI_Iallgather(MPI_IN_PLACE, 0, recvtype.dt, recvbuf, int(recvcount), recvtype.dt, comm, &req);
602 else
603 MPI_Iallgather(sendbuf, int(sendcount), sendtype.dt, recvbuf, int(recvcount), recvtype.dt, comm, &req);
604 return Request(req);
605 }
606
607 void Comm::allgatherv(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, const int* recvcounts, const int* displs, const Datatype& recvtype) const
608 {
609 if(sendbuf == recvbuf)
610 MPI_Allgatherv(MPI_IN_PLACE, 0, recvtype.dt, recvbuf, recvcounts, displs, recvtype.dt, comm);
611 else
612 MPI_Allgatherv(sendbuf, int(sendcount), sendtype.dt, recvbuf, recvcounts, displs, recvtype.dt, comm);
613 }
614
615 void Comm::alltoall(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, std::size_t recvcount, const Datatype& recvtype) const
616 {
617 if(sendbuf == recvbuf)
618 MPI_Alltoall(MPI_IN_PLACE, 0, recvtype.dt, recvbuf, int(recvcount), recvtype.dt, comm);
619 else
620 MPI_Alltoall(sendbuf, int(sendcount), sendtype.dt, recvbuf, int(recvcount), recvtype.dt, comm);
621 }
622
623 Request Comm::ialltoall(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, std::size_t recvcount, const Datatype& recvtype) const
624 {
625 MPI_Request req(MPI_REQUEST_NULL);
626 if(sendbuf == recvbuf)
627 MPI_Ialltoall(MPI_IN_PLACE, 0, recvtype.dt, recvbuf, int(recvcount), recvtype.dt, comm, &req);
628 else
629 MPI_Ialltoall(sendbuf, int(sendcount), sendtype.dt, recvbuf, int(recvcount), recvtype.dt, comm, &req);
630 return Request(req);
631 }
632
633 void Comm::alltoallv(const void* sendbuf, const int* sendcounts, const int* sdispls, const Datatype& sendtype, void* recvbuf, const int* recvcounts, const int* rdispls, const Datatype& recvtype) const
634 {
635 if(sendbuf == recvbuf)
636 MPI_Alltoallv(MPI_IN_PLACE, 0, 0, recvtype.dt, recvbuf, recvcounts, rdispls, recvtype.dt, comm);
637 else
638 MPI_Alltoallv(sendbuf, sendcounts, sdispls, sendtype.dt, recvbuf, recvcounts, rdispls, recvtype.dt, comm);
639 }
640
641 void Comm::reduce(const void* sendbuf, void* recvbuf, std::size_t count, const Datatype& datatype, const Operation& op, int root) const
642 {
643 // MPI_IN_PLACE is only allowed on root process
644 MPI_Reduce((sendbuf == recvbuf) && (_rank == root) ? MPI_IN_PLACE : sendbuf, recvbuf, int(count), datatype.dt, op.op, root, comm);
645 }
646
647 Request Comm::ireduce(const void* sendbuf, void* recvbuf, std::size_t count, const Datatype& datatype, const Operation& op, int root) const
648 {
649 MPI_Request req(MPI_REQUEST_NULL);
650 // MPI_IN_PLACE is only allowed on root process
651 MPI_Ireduce((sendbuf == recvbuf) && (_rank == root) ? MPI_IN_PLACE : sendbuf, recvbuf, int(count), datatype.dt, op.op, root, comm, &req);
652 return Request(req);
653 }
654
655 void Comm::allreduce(const void* sendbuf, void* recvbuf, std::size_t count, const Datatype& datatype, const Operation& op) const
656 {
657 MPI_Allreduce(sendbuf == recvbuf ? MPI_IN_PLACE : sendbuf, recvbuf, int(count), datatype.dt, op.op, comm);
658 }
659
660 Request Comm::iallreduce(const void* sendbuf, void* recvbuf, std::size_t count, const Datatype& datatype, const Operation& op) const
661 {
662 MPI_Request req(MPI_REQUEST_NULL);
663 MPI_Iallreduce(sendbuf == recvbuf ? MPI_IN_PLACE : sendbuf, recvbuf, int(count), datatype.dt, op.op, comm, &req);
664 return Request(req);
665 }
666
667 void Comm::scan(const void* sendbuf, void* recvbuf, std::size_t count, const Datatype& datatype, const Operation& op) const
668 {
669 MPI_Scan(sendbuf == recvbuf ? MPI_IN_PLACE : sendbuf, recvbuf, int(count), datatype.dt, op.op, comm);
670 }
671
672 /*Request Comm::iscan(const void* sendbuf, void* recvbuf, std::size_t count, const Datatype& datatype, const Operation& op) const
673 {
674 MPI_Request req(MPI_REQUEST_NULL);
675#ifdef MSMPI_VER
676 MPI_Scan(sendbuf == recvbuf ? MPI_IN_PLACE : sendbuf, recvbuf, int(count), datatype.dt, op.op, comm);
677#else
678 MPI_Iscan(sendbuf == recvbuf ? MPI_IN_PLACE : sendbuf, recvbuf, int(count), datatype.dt, op.op, comm, &req);
679#endif
680 return Request(req);
681 }*/
682
683 void Comm::exscan(const void* sendbuf, void* recvbuf, std::size_t count, const Datatype& datatype, const Operation& op) const
684 {
685 MPI_Exscan(sendbuf == recvbuf ? MPI_IN_PLACE : sendbuf, recvbuf, int(count), datatype.dt, op.op, comm);
686 }
687
688 /*Request Comm::iexscan(const void* sendbuf, void* recvbuf, std::size_t count, const Datatype& datatype, const Operation& op) const
689 {
690 MPI_Request req(MPI_REQUEST_NULL);
691#ifdef MSMPI_VER
692 MPI_Exscan(sendbuf == recvbuf ? MPI_IN_PLACE : sendbuf, recvbuf, int(count), datatype.dt, op.op, comm);
693#else
694 MPI_Iexscan(sendbuf == recvbuf ? MPI_IN_PLACE : sendbuf, recvbuf, int(count), datatype.dt, op.op, comm, &req);
695#endif
696 return Request(req);
697 }*/
698
699 void Comm::send(const void* buffer, std::size_t count, const Datatype& datatype, int dest, int tag) const
700 {
701 MPI_Send(buffer, int(count), datatype.dt, dest, tag, comm);
702 }
703
704 Request Comm::isend(const void* buffer, std::size_t count, const Datatype& datatype, int dest, int tag) const
705 {
706 MPI_Request req(MPI_REQUEST_NULL);
707 MPI_Isend(buffer, int(count), datatype.dt, dest, tag, comm, &req);
708 return Request(req);
709 }
710
711 void Comm::recv(void* buffer, std::size_t count, const Datatype& datatype, int source, int tag, Status& status) const
712 {
713 MPI_Recv(buffer, int(count), datatype.dt, source, tag, comm, status.mpi_status());
714 }
715
716 Request Comm::irecv(void* buffer, std::size_t count, const Datatype& datatype, int source, int tag) const
717 {
718 MPI_Request req(MPI_REQUEST_NULL);
719 MPI_Irecv(buffer, int(count), datatype.dt, source, tag, comm, &req);
720 return Request(req);
721 }
722
723 void Comm::bcast_stringstream(std::stringstream& stream, int root) const
724 {
725 std::string str;
726 std::size_t len(0);
727
728 // Get size to broadcast
729 if(rank() == root)
730 {
731 str = stream.str();
732 len = str.length();
733 }
734
735 // broadcast string length
736 bcast(&len, std::size_t(1), root);
737
738 // empty string?
739 if(len == std::size_t(0))
740 return;
741
742 // allocate buffer
743 std::vector<char> buf(len + std::size_t(1));
744
745 // fill buffer on root
746 if(rank() == root)
747 std::strcpy(buf.data(), str.c_str());
748
749 // broadcast buffer
750 bcast(buf.data(), buf.size(), root);
751
752 // convert
753 if(rank() != root)
754 stream << std::string(buf.data(), len);
755 }
756
757 void Comm::bcast_binarystream(BinaryStream& stream, int root) const
758 {
759 // get the stream's internal data container
760 std::vector<char>& data = stream.container();
761 std::size_t len(0);
762
763 // Get size to broadcast
764 if(rank() == root)
765 len = data.size();
766
767 // broadcast vector length
768 bcast(&len, std::size_t(1), root);
769
770 // empty vector?
771 if(len == std::size_t(0))
772 return;
773
774 // reallocate vector
775 if(rank() != root)
776 data.resize(len);
777
778 // broadcast data
779 bcast(data.data(), data.size(), root);
780 }
781
782 void Comm::print(std::ostream& os, const String& msg, int root) const
783 {
784 XASSERTM((0 <= root) && (root < _size), "invalid root rank argument");
785 if(root == _rank)
786 os << msg << "\n";
787 }
788
789 void Comm::allprint(std::ostream& os, const String& msg, int root) const
790 {
791 XASSERTM((0 <= root) && (root < _size), "invalid root rank argument");
792
793 if(_rank == root)
794 {
795 // determine the rank padding size first:
796 const std::size_t n = std::size_t(_size);
797 const std::size_t ndig = Math::ilog10(n);
798
799 // gather message lengths
800 std::size_t mylen = msg.size();
801 std::vector<std::size_t> lengths(n);
802 this->gather(&mylen, std::size_t(1), lengths.data(), std::size_t(1), root);
803
804 // receive and print messages
805 for(int i(0); i < _size; ++i)
806 {
807 // allocate message buffer
808 std::size_t length = lengths.at(std::size_t(i));
809 if(length == std::size_t(0))
810 continue;
811
812 // get the message to be printed
813 String message;
814 if(i == root)
815 message = msg;
816 else
817 {
818 // receive message
819 std::vector<char> msgbuf(length);
820 this->recv(msgbuf.data(), length, i);
821 message.assign(msgbuf.data(), length);
822 }
823
824 // split message into single lines
825 std::deque<String> lines = message.split_by_charset("\n");
826
827 // set line prefix
828 String prefix = stringify(i).pad_front(ndig);
829
830 // print all lines prefixed
831 for(auto it = lines.begin(); it != lines.end(); ++it)
832 os << '[' << prefix << "] " << (*it) << "\n";
833 }
834 }
835 else // rank != root
836 {
837 // send the message length via gather
838 std::size_t dummy(0), mylen = msg.size();
839 this->gather(&mylen, std::size_t(1), &dummy, std::size_t(1), root);
840
841 // send the message itself
842 if(mylen > std::size_t(0))
843 this->send(msg.data(), mylen, root);
844 }
845 }
846
847 void Comm::print_flush(std::ostream& os, int root) const
848 {
849 XASSERTM((0 <= root) && (root < _size), "invalid root rank argument");
850 if(root == _rank)
851 os << std::flush;
852 }
853
854 /* ######################################################################################### */
855 /* ######################################################################################### */
856 /* ######################################################################################### */
857#else // non-MPI build
858 /* ######################################################################################### */
859 /* ######################################################################################### */
860 /* ######################################################################################### */
861
862 bool initialize(int& /*argc*/, char**& /*argv*/)
863 {
864 // nothing to do here
865 return true;
866 }
867
868 void finalize()
869 {
870 // nothing to do here
871 }
872
873 // datatypes
874 // Note: The numbers for the datatypes are arbitrary; it is just important that each datatype
875 // has its own unique number.
876 const Datatype dt_byte ( 1, sizeof(char));
877 const Datatype dt_char ( 2, sizeof(char));
878 const Datatype dt_wchar ( 3, sizeof(wchar_t));
879 const Datatype dt_signed_char (11, sizeof(signed char));
880 const Datatype dt_signed_short (12, sizeof(short));
881 const Datatype dt_signed_int (13, sizeof(int));
882 const Datatype dt_signed_long (14, sizeof(long));
883 const Datatype dt_signed_long_long (15, sizeof(long long));
884 const Datatype dt_unsigned_char (21, sizeof(unsigned char));
885 const Datatype dt_unsigned_short (22, sizeof(unsigned short));
886 const Datatype dt_unsigned_int (23, sizeof(unsigned int));
887 const Datatype dt_unsigned_long (24, sizeof(unsigned long));
888 const Datatype dt_unsigned_long_long (25, sizeof(unsigned long long));
889 const Datatype dt_float (31, sizeof(float));
890 const Datatype dt_double (32, sizeof(double));
891 const Datatype dt_long_double (33, sizeof(long double));
892#ifdef FEAT_HAVE_QUADMATH
893 const Datatype dt__float128 (34, sizeof(__float128));
894#endif
895#ifdef FEAT_HAVE_HALFMATH
896 const Datatype dt__half (35, sizeof(__half));
897#endif
898 const Datatype dt_signed_int8 (41, sizeof(std::int8_t));
899 const Datatype dt_signed_int16 (42, sizeof(std::int16_t));
900 const Datatype dt_signed_int32 (43, sizeof(std::int32_t));
901 const Datatype dt_signed_int64 (44, sizeof(std::int64_t));
902 const Datatype dt_unsigned_int8 (51, sizeof(std::uint8_t));
903 const Datatype dt_unsigned_int16 (52, sizeof(std::uint16_t));
904 const Datatype dt_unsigned_int32 (53, sizeof(std::uint32_t));
905 const Datatype dt_unsigned_int64 (54, sizeof(std::uint64_t));
906
907 // operations
908 const Operation op_sum(1);
909 const Operation op_max(2);
910 const Operation op_min(3);
911
912 /* ***************************************************************************************** */
913 /* ***************************************************************************************** */
914 /* Dummy Request wrapper implementation */
915 /* ***************************************************************************************** */
916 /* ***************************************************************************************** */
917
919 request(1)
920 {
921 }
922
923
924 Request::Request(Request&& other) :
925 request(other.request)
926 {
927 other.request = 0;
928 }
929
930 Request& Request::operator=(Request&& other)
931 {
932 if(this != &other)
933 {
934 this->request = other.request;
935 other.request = 0;
936 }
937 return *this;
938 }
939
940 bool Request::is_null() const
941 {
942 return request == 0;
943 }
944
945 void Request::free()
946 {
947 request = 0;
948 }
949
950 void Request::cancel()
951 {
952 }
953
954 bool Request::test(Status&)
955 {
956 request = 0;
957 return true;
958 }
959
960 bool Request::wait(Status&)
961 {
962 bool ret = (request != 0);
963 request = 0;
964 return ret;
965 }
966
967 /* ***************************************************************************************** */
968 /* ***************************************************************************************** */
969 /* MPI RequestVector implementation */
970 /* ***************************************************************************************** */
971 /* ***************************************************************************************** */
972
974 {
975 free();
976 return true;
977 }
978
979 bool RequestVector::test_any(std::size_t& idx, Status&)
980 {
981 // try to find one non-null request
982 for(std::size_t i(0); i < _reqs.size(); ++i)
983 {
984 if(!_reqs.at(i).is_null())
985 {
986 idx = i;
987 _reqs.at(i).free();
988 return true;
989 }
990 }
991 // no active requests in vector
992 return false;
993 }
994
995 /*std::size_t RequestVector::test_some(std::size_t* indices, Status* statuses)
996 {
997 XASSERT(indices != nullptr);
998 XASSERT(statuses != nullptr);
999
1000 // return all non-null requests
1001 std::size_t k = 0;
1002 for(std::size_t i(0); i < _reqs.size(); ++i)
1003 {
1004 if(!_reqs.at(i).is_null())
1005 {
1006 indices[k++] = i;
1007 _reqs.at(i).free();
1008 }
1009 }
1010 return k;
1011 }*/
1012
1014 {
1015 free();
1016 }
1017
1018 bool RequestVector::wait_any(std::size_t& idx, Status& status)
1019 {
1020 return test_any(idx, status);
1021 }
1022
1023 /*std::size_t RequestVector::wait_some(std::size_t* indices, Status* statuses)
1024 {
1025 return test_some(indices, statuses);
1026 }*/
1027
1028 /* ***************************************************************************************** */
1029 /* ***************************************************************************************** */
1030 /* Dummy Comm wrapper implementation */
1031 /* ***************************************************************************************** */
1032 /* ***************************************************************************************** */
1033
1034 Comm::Comm() :
1035 _rank(0),
1036 _size(0)
1037 {
1038 }
1039
1040 Comm::Comm(int) :
1041 _rank(0),
1042 _size(1)
1043 {
1044 }
1045
1046 Comm::Comm(Comm&& other) :
1047 _rank(other._rank),
1048 _size(other._size)
1049 {
1050 }
1051
1052 Comm& Comm::operator=(Comm&& other)
1053 {
1054 if(this != &other)
1055 {
1056 _rank = other._rank;
1057 _size = other._size;
1058 }
1059 return *this;
1060 }
1061
1062 Comm::~Comm()
1063 {
1064 }
1065
1066 Comm Comm::world()
1067 {
1068 return Comm(1);
1069 }
1070
1071 Comm Comm::self()
1072 {
1073 return Comm(1);
1074 }
1075
1076 Comm Comm::null()
1077 {
1078 return Comm();
1079 }
1080
1081 bool Comm::is_null() const
1082 {
1083 return _size == 0;
1084 }
1085
1086 bool Comm::is_world() const
1087 {
1088 return _size == 1;
1089 }
1090
1091 bool Comm::is_self() const
1092 {
1093 return _size == 1;
1094 }
1095
1096 Comm Comm::comm_dup() const
1097 {
1098 return (_size == 0) ? Comm() : Comm(0);
1099 }
1100
1101 Comm Comm::comm_create_range_incl(int, int, int) const
1102 {
1103 return Comm(1);
1104 }
1105
1106 Comm Comm::comm_create_incl(int, const int*) const
1107 {
1108 return Comm(1);
1109 }
1110
1111 Comm Comm::comm_split(int, int) const
1112 {
1113 return Comm(1);
1114 }
1115
1116 void Comm::barrier() const
1117 {
1118 // nothing to do
1119 }
1120
1121 Request Comm::ibarrier() const
1122 {
1123 // nothing to do
1124 return Request();
1125 }
1126
1127 void Comm::bcast(void*, std::size_t, const Datatype&, int) const
1128 {
1129 // nothing to do
1130 }
1131
1132 Request Comm::ibcast(void*, std::size_t, const Datatype&, int) const
1133 {
1134 // nothing to do
1135 return Request();
1136 }
1137
1138 void Comm::gather(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, std::size_t recvcount, const Datatype& recvtype, int root) const
1139 {
1140 XASSERT(root == 0);
1141 alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype);
1142 }
1143
1144 Request Comm::igather(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, std::size_t recvcount, const Datatype& recvtype, int root) const
1145 {
1146 XASSERT(root == 0);
1147 alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype);
1148 return Request();
1149 }
1150
1151 void Comm::scatter(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, std::size_t recvcount, const Datatype& recvtype, int root) const
1152 {
1153 XASSERT(root == 0);
1154 alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype);
1155 }
1156
1157 Request Comm::iscatter(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, std::size_t recvcount, const Datatype& recvtype, int root) const
1158 {
1159 XASSERT(root == 0);
1160 alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype);
1161 return Request();
1162 }
1163
1164 void Comm::allgather(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, std::size_t recvcount, const Datatype& recvtype) const
1165 {
1166 alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype);
1167 }
1168
1169 Request Comm::iallgather(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, std::size_t recvcount, const Datatype& recvtype) const
1170 {
1171 alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype);
1172 return Request();
1173 }
1174
1175 void Comm::allgatherv(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, const int* recvcounts, const int* displs, const Datatype& recvtype) const
1176 {
1177 XASSERT(displs[0] == 0);
1178 alltoall(sendbuf, sendcount, sendtype, recvbuf, std::size_t(recvcounts[0]), recvtype);
1179 }
1180
1181 void Comm::alltoall(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, std::size_t recvcount, const Datatype& recvtype) const
1182 {
1183 XASSERT(sendcount == recvcount);
1184 XASSERT(sendtype == recvtype);
1185 // gcc 9.2.0 checks for non-nullptr receive buffer at compile-time,
1186 // which may issue false-positive warnings for code that is never executed at run-time
1187 if((recvbuf != sendbuf) && (recvbuf != nullptr))
1188 memcpy(recvbuf, sendbuf, sendcount * sendtype.size());
1189 }
1190
1191 Request Comm::ialltoall(const void* sendbuf, std::size_t sendcount, const Datatype& sendtype, void* recvbuf, std::size_t recvcount, const Datatype& recvtype) const
1192 {
1193 alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype);
1194 return Request();
1195 }
1196
1197 void Comm::alltoallv(const void* sendbuf, const int* sendcounts, const int* sdispls, const Datatype& sendtype, void* recvbuf, const int* recvcounts, const int* rdispls, const Datatype& recvtype) const
1198 {
1199 XASSERT(sdispls[0] == 0);
1200 XASSERT(rdispls[0] == 0);
1201 alltoall(sendbuf, std::size_t(sendcounts[0]), sendtype, recvbuf, std::size_t(recvcounts[0]), recvtype);
1202 }
1203
1204 void Comm::reduce(const void* sendbuf, void* recvbuf, std::size_t count, const Datatype& datatype, const Operation& op, int root) const
1205 {
1206 XASSERT(root == 0);
1207 allreduce(sendbuf, recvbuf, count, datatype, op);
1208 }
1209
1210 Request Comm::ireduce(const void* sendbuf, void* recvbuf, std::size_t count, const Datatype& datatype, const Operation& op, int root) const
1211 {
1212 XASSERT(root == 0);
1213 allreduce(sendbuf, recvbuf, count, datatype, op);
1214 return Request();
1215 }
1216
1217 void Comm::allreduce(const void* sendbuf, void* recvbuf, std::size_t count, const Datatype& datatype, const Operation&) const
1218 {
1219 // gcc 9.2.0 checks for non-nullptr receive buffer at compile-time,
1220 // which may issue false-positive warnings for code that is never executed at run-time
1221 if((recvbuf != sendbuf) && (recvbuf != nullptr))
1222 memcpy(recvbuf, sendbuf, count * datatype.size());
1223 }
1224
1225 Request Comm::iallreduce(const void* sendbuf, void* recvbuf, std::size_t count, const Datatype& datatype, const Operation& op) const
1226 {
1227 allreduce(sendbuf, recvbuf, count, datatype, op);
1228 return Request();
1229 }
1230
1231 void Comm::scan(const void* sendbuf, void* recvbuf, std::size_t count, const Datatype& datatype, const Operation& op) const
1232 {
1233 allreduce(sendbuf, recvbuf, count, datatype, op);
1234 }
1235
1236 /*Request Comm::iscan(const void* sendbuf, void* recvbuf, std::size_t count, const Datatype& datatype, const Operation& op) const
1237 {
1238 allreduce(sendbuf, recvbuf, count, datatype, op);
1239 return Request();
1240 }*/
1241
1242 void Comm::exscan(const void*, void*, std::size_t, const Datatype&, const Operation&) const
1243 {
1244 // nothing to do
1245 }
1246
1247 /*Request Comm::iexscan(const void*, void*, std::size_t, const Datatype&, const Operation&) const
1248 {
1249 // nothing to do
1250 return Request();
1251 }*/
1252
1253 void Comm::send(const void*, std::size_t, const Datatype&, int, int) const
1254 {
1255 // nothing to do
1256 }
1257
1258 Request Comm::isend(const void*, std::size_t, const Datatype&, int, int) const
1259 {
1260 // nothing to do
1261 return Request();
1262 }
1263
1264 void Comm::recv(void*, std::size_t, const Datatype&, int, int, Status&) const
1265 {
1266 // nothing to do
1267 }
1268
1269 Request Comm::irecv(void*, std::size_t, const Datatype&, int, int) const
1270 {
1271 // nothing to do
1272 return Request();
1273 }
1274
1275 void Comm::bcast_stringstream(std::stringstream&, int) const
1276 {
1277 // nothing to do
1278 }
1279
1280 void Comm::bcast_binarystream(BinaryStream&, int) const
1281 {
1282 // nothing to do
1283 }
1284
1285 void Comm::print(std::ostream& os, const String& msg, int) const
1286 {
1287 os << msg << "\n";
1288 }
1289
1290 void Comm::allprint(std::ostream& os, const String& msg, int) const
1291 {
1292 os << msg << "\n";
1293 }
1294
1295 void Comm::print_flush(std::ostream& os, int) const
1296 {
1297 os << std::flush;
1298 }
1299#endif // FEAT_HAVE_MPI
1300 } // namespace Dist
1301} // namespace FEAT
#define XASSERT(expr)
Assertion macro definition.
Definition: assertion.hpp:262
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Definition: assertion.hpp:263
Binary Stream class.
std::vector< char > & container()
Returns a reference to the internal vector container.
Communicator class.
Definition: dist.hpp:1349
void scatter(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, std::size_t recvcount, const Datatype &recvtype, int root) const
Blocking scatter.
Definition: dist.cpp:571
Comm & operator=(const Comm &)=delete
communicators are non-copyable
void bcast(void *buffer, std::size_t count, const Datatype &datatype, int root) const
Blocking broadcast.
Definition: dist.cpp:541
void reduce(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op, int root) const
Blocking Reduce.
Definition: dist.cpp:641
Request iallgather(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, std::size_t recvcount, const Datatype &recvtype) const
Nonblocking gather-to-all.
Definition: dist.cpp:597
Comm comm_create_range_incl(int count, int first=0, int stride=1) const
Creates a new sub-communicator from a strided range of ranks.
Definition: dist.cpp:472
void allreduce(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op) const
Blocking All-Reduce.
Definition: dist.cpp:655
void barrier() const
Blocking barrier.
Definition: dist.cpp:529
Request ibcast(void *buffer, std::size_t count, const Datatype &datatype, int root) const
Nonblocking broadcast.
Definition: dist.cpp:546
void exscan(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op) const
Blocking Exclusive Scan.
Definition: dist.cpp:683
Comm comm_create_incl(int n, const int *ranks) const
Creates a new sub-communicator for a given set of ranks.
Definition: dist.cpp:498
Request igather(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, std::size_t recvcount, const Datatype &recvtype, int root) const
Nonblocking gather.
Definition: dist.cpp:561
static Comm null()
Returns a null communicator.
Definition: dist.cpp:439
Request ibarrier() const
Nonblocking barrier.
Definition: dist.cpp:534
int _rank
our rank
Definition: dist.hpp:1358
virtual ~Comm()
virtual destructor
Definition: dist.cpp:422
Request iallreduce(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op) const
Nonblocking All-Reduce.
Definition: dist.cpp:660
Request irecv(void *buffer, std::size_t count, const Datatype &datatype, int source, int tag=0) const
Nonblocking Receive.
Definition: dist.cpp:716
Comm()
Standard constructor.
Definition: dist.cpp:372
Comm comm_split(int color, int key) const
Creates a new sub-communicator by splitting this communicator.
Definition: dist.cpp:522
bool is_world() const
Checks whether this communicator is the world communicator.
Definition: dist.cpp:444
void bcast_binarystream(BinaryStream &stream, int root=0) const
Blocking broadcast of a BinaryStream.
Definition: dist.cpp:757
void recv(void *buffer, std::size_t count, const Datatype &datatype, int source, int tag, Status &status) const
Blocking Receive.
Definition: dist.cpp:711
Request isend(const void *buffer, std::size_t count, const Datatype &datatype, int dest, int tag=0) const
Nonblocking Send.
Definition: dist.cpp:704
int _size
the communicator size
Definition: dist.hpp:1360
MPI_Comm comm
our MPI communicator handle
Definition: dist.hpp:1353
Request iscatter(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, std::size_t recvcount, const Datatype &recvtype, int root) const
Nonblocking scatter.
Definition: dist.cpp:579
void gather(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, std::size_t recvcount, const Datatype &recvtype, int root) const
Blocking gather.
Definition: dist.cpp:553
static Comm self()
Returns a copy of the self communicator.
Definition: dist.cpp:434
bool is_self() const
Checks whether this communicator is the self communicator.
Definition: dist.cpp:449
void alltoallv(const void *sendbuf, const int *sendcounts, const int *sdispls, const Datatype &sendtype, void *recvbuf, const int *recvcounts, const int *rdispls, const Datatype &recvtype) const
Blocking All-to-All Scatter/Gather.
Definition: dist.cpp:633
void alltoall(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, std::size_t recvcount, const Datatype &recvtype) const
Blocking All-to-All Scatter/Gather.
Definition: dist.cpp:615
void send(const void *buffer, std::size_t count, const Datatype &datatype, int dest, int tag=0) const
Blocking Send.
Definition: dist.cpp:699
void print_flush(std::ostream &os, int root=0) const
Explicitly flushes the output stream.
Definition: dist.cpp:847
Comm comm_dup() const
Creates a copy of this communicator.
Definition: dist.cpp:459
void allprint(std::ostream &os, const String &msg, int root=0) const
Prints the ordered messages of all processes to an output stream.
Definition: dist.cpp:789
Request ireduce(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op, int root) const
Nonblocking Reduce.
Definition: dist.cpp:647
void allgatherv(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, const int *recvcounts, const int *displs, const Datatype &recvtype) const
Blocking gather-to-all.
Definition: dist.cpp:607
Request ialltoall(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, std::size_t recvcount, const Datatype &recvtype) const
Nonblocking All-to-All Scatter/Gather.
Definition: dist.cpp:623
int rank() const
Returns the rank of this process in this communicator.
Definition: dist.hpp:1494
void allgather(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, std::size_t recvcount, const Datatype &recvtype) const
Blocking gather-to-all.
Definition: dist.cpp:589
bool is_null() const
Checks whether this communicator is a null communicator.
Definition: dist.cpp:454
static Comm world()
Returns a copy of the world communicator.
Definition: dist.cpp:429
void print(std::ostream &os, const String &msg, int root=0) const
Prints a message line to an output stream.
Definition: dist.cpp:782
void scan(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op) const
Blocking Inclusive Scan.
Definition: dist.cpp:667
void bcast_stringstream(std::stringstream &stream, int root=0) const
Blocking broadcast of a std::stringstream.
Definition: dist.cpp:723
Communication Request class.
Definition: dist.hpp:423
MPI_Request request
our internal MPI request handle
Definition: dist.hpp:427
Request()
Standard constructor.
Definition: dist.cpp:218
bool wait()
Blocks until the request is fulfilled (or null).
Definition: dist.hpp:588
bool test()
Tests whether the request is fulfilled (or null).
Definition: dist.hpp:556
void free()
Frees the request.
Definition: dist.cpp:250
void cancel()
Cancels the request.
Definition: dist.cpp:256
Request & operator=(const Request &)=delete
Request objects are non-copyable.
bool is_null() const
Checks whether this request is null.
Definition: dist.cpp:245
bool test_all()
Tests whether all active requests are fulfilled (or null).
Definition: dist.cpp:283
bool test_any(std::size_t &idx, Status &status)
Tests whether one of the active requests has been fulfilled.
Definition: dist.cpp:290
std::vector< Request > _reqs
internal vector of Request objects
Definition: dist.hpp:643
void free()
Frees all remaining active requests.
Definition: dist.hpp:974
bool wait_any(std::size_t &idx, Status &status)
Blocks until one of the active requests has been fulfilled.
Definition: dist.cpp:329
void wait_all()
Blocks until all active requests are fulfilled.
Definition: dist.cpp:324
Communication Status class.
Definition: dist.hpp:295
MPI_Status * mpi_status()
Definition: dist.hpp:314
String class implementation.
Definition: string.hpp:46
std::deque< String > split_by_charset(const String &charset) const
Splits the string by a delimiter charset.
Definition: string.hpp:467
String pad_front(size_type len, char c=' ') const
Pads the front of the string up to a desired length.
Definition: string.hpp:392
bool initialize(int &argc, char **&argv)
Initializes the distributed communication system.
Definition: dist.cpp:105
const Datatype dt_byte(MPI_BYTE, sizeof(char))
Datatype wrapper for MPI_BYTE.
Definition: dist.hpp:124
const Datatype dt_wchar(MPI_WCHAR, sizeof(wchar_t))
Datatype wrapper for MPI_WCHAR.
Definition: dist.hpp:128
const Datatype dt_signed_long(MPI_LONG, sizeof(long))
Datatype wrapper for MPI_LONG.
Definition: dist.hpp:136
const Datatype dt_float(MPI_FLOAT, sizeof(float))
Datatype wrapper for MPI_FLOAT.
Definition: dist.hpp:150
const Datatype dt_signed_char(MPI_SIGNED_CHAR, sizeof(signed char))
Datatype wrapper for MPI_SIGNED_CHAR.
Definition: dist.hpp:130
const Datatype dt_signed_short(MPI_SHORT, sizeof(short))
Datatype wrapper for MPI_SHORT.
Definition: dist.hpp:132
const Datatype dt_unsigned_long_long(MPI_UNSIGNED_LONG_LONG, sizeof(unsigned long long))
Datatype wrapper for MPI_UNSIGNED_LONG_LONG.
Definition: dist.hpp:148
const Datatype dt_signed_int32(MPI_INT32_T, sizeof(std::int32_t))
Datatype wrapper for MPI_INT32_T.
Definition: dist.hpp:160
const Datatype dt_unsigned_int32(MPI_UINT32_T, sizeof(std::uint32_t))
Datatype wrapper for MPI_UINT32_T.
Definition: dist.hpp:168
void finalize()
Finalizes the distributed communication system.
Definition: dist.cpp:148
const Datatype dt_signed_int8(MPI_INT8_T, sizeof(std::int8_t))
Datatype wrapper for MPI_INT8_T.
Definition: dist.hpp:156
const Datatype dt_char(MPI_CHAR, sizeof(char))
Datatype wrapper for MPI_CHAR.
Definition: dist.hpp:126
const Operation op_min(MPI_MIN)
Operation wrapper for MPI_MIN.
Definition: dist.hpp:275
const Datatype dt_unsigned_int(MPI_UNSIGNED, sizeof(unsigned int))
Datatype wrapper for MPI_UNSIGNED.
Definition: dist.hpp:144
const Datatype dt_unsigned_int16(MPI_UINT16_T, sizeof(std::uint16_t))
Datatype wrapper for MPI_UINT16_T.
Definition: dist.hpp:166
const Datatype dt_unsigned_int8(MPI_UINT8_T, sizeof(std::uint8_t))
Datatype wrapper for MPI_UINT8_T.
Definition: dist.hpp:164
const Datatype dt__half
custom Datatype for __half
const Datatype dt_signed_int(MPI_INT, sizeof(int))
Datatype wrapper for MPI_INT.
Definition: dist.hpp:134
const Datatype dt_signed_long_long(MPI_LONG_LONG, sizeof(long long))
Datatype wrapper for MPI_LONG_LONG.
Definition: dist.hpp:138
const Operation op_max(MPI_MAX)
Operation wrapper for MPI_MAX.
Definition: dist.hpp:273
const Datatype dt_double(MPI_DOUBLE, sizeof(double))
Datatype wrapper for MPI_DOUBLE.
Definition: dist.hpp:152
const Datatype dt_unsigned_char(MPI_UNSIGNED_CHAR, sizeof(unsigned char))
Datatype wrapper for MPI_UNSIGNED_CHAR.
Definition: dist.hpp:140
const Datatype dt_signed_int16(MPI_INT16_T, sizeof(std::int16_t))
Datatype wrapper for MPI_INT16_T.
Definition: dist.hpp:158
const Datatype dt_unsigned_short(MPI_UNSIGNED_SHORT, sizeof(unsigned short))
Datatype wrapper for MPI_UNSIGNED_SHORT.
Definition: dist.hpp:142
const Datatype dt_unsigned_int64(MPI_UINT64_T, sizeof(std::uint64_t))
Datatype wrapper for MPI_UINT64_T.
Definition: dist.hpp:170
const Datatype dt_long_double(MPI_LONG_DOUBLE, sizeof(long double))
Datatype wrapper for MPI_LONG_DOUBLE.
Definition: dist.hpp:154
const Operation op_sum(MPI_SUM)
Operation wrapper for MPI_SUM.
Definition: dist.hpp:271
const Datatype dt__float128
custom Datatype for __float128
const Datatype dt_unsigned_long(MPI_UNSIGNED_LONG, sizeof(unsigned long))
Datatype wrapper for MPI_UNSIGNED_LONG.
Definition: dist.hpp:146
const Datatype dt_signed_int64(MPI_INT64_T, sizeof(std::int64_t))
Datatype wrapper for MPI_INT64_T.
Definition: dist.hpp:162
@ other
generic/other permutation strategy
T_ ilog10(T_ x)
Computes the integral base-10 logarithm of an integer, i.e. its number of non-zero decimal digits.
Definition: math.hpp:231
FEAT namespace.
Definition: adjactor.hpp:12
String stringify(const T_ &item)
Converts an item into a String.
Definition: string.hpp:944
Communication Datatype class.
Definition: dist.hpp:75
MPI_Datatype dt
the MPI datatype handle
Definition: dist.hpp:78
Communication Operation class.
Definition: dist.hpp:237
MPI_Op op
the MPI operation handle
Definition: dist.hpp:240