7#include <kernel/util/dist.hpp>
8#include <kernel/util/math.hpp>
18#if defined(FEAT_HAVE_MPI) || defined(DOXYGEN)
20#ifdef FEAT_OVERRIDE_MPI_OPS
25 static inline void eval(T_& x, T_& y)
35 static inline void eval(T_& x, T_& y)
46 static inline void eval(T_& x, T_& y)
56 template<
typename Op_,
typename T_>
57 inline void func_op_t(
void* iv,
void* iov,
int* n)
59 T_* y =
reinterpret_cast<T_*
>(iv);
60 T_* x =
reinterpret_cast<T_*
>(iov);
61 for(
int i(0); i < *n; ++i)
63 Op_::eval(x[i], y[i]);
68 template<
typename Op_>
69 void op_callback(
void* iv,
void* iov,
int* n, MPI_Datatype* dt)
71 if(*dt == MPI_SIGNED_CHAR) func_op_t<Op_, signed char> (iv, iov, n);
else
72 if(*dt == MPI_SHORT) func_op_t<Op_, short> (iv, iov, n);
else
73 if(*dt == MPI_INT) func_op_t<Op_, int> (iv, iov, n);
else
74 if(*dt == MPI_LONG) func_op_t<Op_, long> (iv, iov, n);
else
75 if(*dt == MPI_LONG_LONG) func_op_t<Op_, long long> (iv, iov, n);
else
76 if(*dt == MPI_UNSIGNED_CHAR) func_op_t<Op_, unsigned char> (iv, iov, n);
else
77 if(*dt == MPI_UNSIGNED_SHORT) func_op_t<Op_, unsigned short> (iv, iov, n);
else
78 if(*dt == MPI_UNSIGNED) func_op_t<Op_, unsigned int> (iv, iov, n);
else
79 if(*dt == MPI_UNSIGNED_LONG) func_op_t<Op_, unsigned long> (iv, iov, n);
else
80 if(*dt == MPI_UNSIGNED_LONG_LONG) func_op_t<Op_, unsigned long long>(iv, iov, n);
else
81 if(*dt == MPI_FLOAT) func_op_t<Op_, float> (iv, iov, n);
else
82 if(*dt == MPI_DOUBLE) func_op_t<Op_, double> (iv, iov, n);
else
83 if(*dt == MPI_LONG_DOUBLE) func_op_t<Op_, long double> (iv, iov, n);
else
84 if(*dt == MPI_INT8_T) func_op_t<Op_, std::int8_t> (iv, iov, n);
else
85 if(*dt == MPI_INT16_T) func_op_t<Op_, std::int16_t> (iv, iov, n);
else
86 if(*dt == MPI_INT32_T) func_op_t<Op_, std::int32_t> (iv, iov, n);
else
87 if(*dt == MPI_INT64_T) func_op_t<Op_, std::int64_t> (iv, iov, n);
else
88 if(*dt == MPI_UINT8_T) func_op_t<Op_, std::uint8_t> (iv, iov, n);
else
89 if(*dt == MPI_UINT16_T) func_op_t<Op_, std::uint16_t> (iv, iov, n);
else
90 if(*dt == MPI_UINT32_T) func_op_t<Op_, std::uint32_t> (iv, iov, n);
else
91 if(*dt == MPI_UINT64_T) func_op_t<Op_, std::uint64_t> (iv, iov, n);
else
92#ifdef FEAT_HAVE_QUADMATH
93 if(*dt ==
dt__float128.
dt) func_op_t<Op_, __float128> (iv, iov, n);
else
95#ifdef FEAT_HAVE_HALFMATH
96 if(*dt ==
dt__half.
dt) func_op_t<Op_, __half> (iv, iov, n);
else
100 MPI_Abort(MPI_COMM_WORLD, 1);
107 int already_initialized(0);
108 if (::MPI_Initialized(&already_initialized) != MPI_SUCCESS)
111#ifdef FEAT_MPI_THREAD_MULTIPLE
112 XASSERTM((
bool)!already_initialized,
"MPI was already initialized by another library, we cannot ensure MPI_THREAD_MULTIPLE support!");
113 int required = MPI_THREAD_MULTIPLE;
115 int required = MPI_THREAD_SERIALIZED;
117 int provided = MPI_THREAD_SINGLE;
119 if (!already_initialized && ::MPI_Init_thread(&argc, &argv, required, &provided) != MPI_SUCCESS)
122 if (!already_initialized && (provided < required))
125#ifdef FEAT_HAVE_QUADMATH
128 MPI_Type_contiguous(
int(
sizeof(__float128)), MPI_BYTE, &dt_f128.
dt);
129 MPI_Type_commit(&dt_f128.
dt);
131#ifdef FEAT_HAVE_HALFMATH
134 MPI_Type_contiguous(
int(
sizeof(__half)), MPI_BYTE, &dt_half.
dt);
135 MPI_Type_commit(&dt_half.
dt);
138#ifdef FEAT_OVERRIDE_MPI_OPS
151#ifdef FEAT_OVERRIDE_MPI_OPS
153 MPI_Op_free(&my_op_sum.
op);
156#ifdef FEAT_HAVE_QUADMATH
158 MPI_Type_free(&dt_f128.
dt);
161#ifdef FEAT_HAVE_HALFMATH
163 MPI_Type_free(&dt_half.
dt);
167 int already_finalized(0);
168 ::MPI_Finalized(&already_finalized);
169 if (!already_finalized)
174 const Datatype
dt_byte (MPI_BYTE,
sizeof(
char));
175 const Datatype
dt_char (MPI_CHAR,
sizeof(
char));
176 const Datatype
dt_wchar (MPI_WCHAR,
sizeof(
wchar_t));
177 const Datatype
dt_signed_char (MPI_SIGNED_CHAR,
sizeof(
signed char));
187 const Datatype
dt_float (MPI_FLOAT,
sizeof(
float));
188 const Datatype
dt_double (MPI_DOUBLE,
sizeof(
double));
189 const Datatype
dt_long_double (MPI_LONG_DOUBLE,
sizeof(
long double));
198#ifdef FEAT_HAVE_QUADMATH
202#ifdef FEAT_HAVE_HALFMATH
204 const Datatype
dt__half (0,
sizeof(__half));
208 const Operation
op_sum(MPI_SUM);
209 const Operation
op_max(MPI_MAX);
210 const Operation
op_min(MPI_MIN);
219 request(MPI_REQUEST_NULL)
229 request(other.request)
231 other.request = MPI_REQUEST_NULL;
240 other.request = MPI_REQUEST_NULL;
247 return request == MPI_REQUEST_NULL;
252 if(
request != MPI_REQUEST_NULL)
258 if(
request != MPI_REQUEST_NULL)
271 if(
request == MPI_REQUEST_NULL)
286 MPI_Testall(_isize(), _reqs_array(), &flag, _stats_array());
294 MPI_Testany(_isize(), _reqs_array(), &i, &flag, status.
mpi_status());
297 idx = std::size_t(i);
326 MPI_Waitall(_isize(), _reqs_array(), _stats_array());
332 MPI_Waitany(_isize(), _reqs_array(), &i, status.
mpi_status());
333 if(i == MPI_UNDEFINED)
335 idx = std::size_t(i);
384 if(comm_ != MPI_COMM_NULL)
397 if((other.comm != MPI_COMM_WORLD) && (other.comm != MPI_COMM_SELF))
399 other.comm = MPI_COMM_NULL;
400 other._rank = other._size = 0;
413 if((other.comm != MPI_COMM_WORLD) && (other.comm != MPI_COMM_SELF))
415 other.comm = MPI_COMM_NULL;
416 other._rank = other._size = 0;
425 if((
comm != MPI_COMM_WORLD) && (
comm != MPI_COMM_SELF) && (
comm != MPI_COMM_NULL))
426 MPI_Comm_free(&
comm);
431 return Comm(MPI_COMM_WORLD);
436 return Comm(MPI_COMM_SELF);
441 return Comm(MPI_COMM_NULL);
446 return (
comm == MPI_COMM_WORLD);
451 return (
comm == MPI_COMM_SELF);
456 return (
comm == MPI_COMM_NULL);
462 if((
comm == MPI_COMM_WORLD) || (
comm == MPI_COMM_SELF) || (
comm == MPI_COMM_NULL))
466 MPI_Comm newcomm = MPI_COMM_NULL;
467 MPI_Comm_dup(
comm, &newcomm);
469 return Comm(newcomm);
479 MPI_Group group = MPI_GROUP_NULL;
480 MPI_Comm_group(
comm, &group);
483 MPI_Group newgroup = MPI_GROUP_NULL;
484 int ranges[3] = {first, first + (count-1)*stride, stride};
485 MPI_Group_range_incl(group, 1, &ranges, &newgroup);
488 MPI_Comm newcomm = MPI_COMM_NULL;
489 MPI_Comm_create(
comm, newgroup, &newcomm);
492 MPI_Group_free(&newgroup);
493 MPI_Group_free(&group);
495 return Comm(newcomm);
504 MPI_Group group = MPI_GROUP_NULL;
505 MPI_Comm_group(
comm, &group);
508 MPI_Group newgroup = MPI_GROUP_NULL;
509 MPI_Group_incl(group, n, ranks, &newgroup);
512 MPI_Comm newcomm = MPI_COMM_NULL;
513 MPI_Comm_create(
comm, newgroup, &newcomm);
516 MPI_Group_free(&newgroup);
517 MPI_Group_free(&group);
519 return Comm(newcomm);
524 MPI_Comm newcomm = MPI_COMM_NULL;
525 MPI_Comm_split(
comm, color, key, &newcomm);
526 return Comm(newcomm);
536 MPI_Request req(MPI_REQUEST_NULL);
537 MPI_Ibarrier(
comm, &req);
543 MPI_Bcast(buffer,
int(count), datatype.
dt, root,
comm);
548 MPI_Request req(MPI_REQUEST_NULL);
549 MPI_Ibcast(buffer,
int(count), datatype.
dt, root,
comm, &req);
553 void Comm::gather(
const void* sendbuf, std::size_t sendcount,
const Datatype& sendtype,
void* recvbuf, std::size_t recvcount,
const Datatype& recvtype,
int root)
const
555 if((sendbuf == recvbuf) && (root ==
_rank))
556 MPI_Gather(MPI_IN_PLACE, 0, recvtype.
dt, recvbuf,
int(recvcount), recvtype.
dt, root,
comm);
558 MPI_Gather(sendbuf,
int(sendcount), sendtype.
dt, recvbuf,
int(recvcount), recvtype.
dt, root,
comm);
563 MPI_Request req(MPI_REQUEST_NULL);
564 if((sendbuf == recvbuf) && (root ==
_rank))
565 MPI_Igather(MPI_IN_PLACE, 0, recvtype.
dt, recvbuf,
int(recvcount), recvtype.
dt, root,
comm, &req);
567 MPI_Igather(sendbuf,
int(sendcount), sendtype.
dt, recvbuf,
int(recvcount), recvtype.
dt, root,
comm, &req);
571 void Comm::scatter(
const void* sendbuf, std::size_t sendcount,
const Datatype& sendtype,
void* recvbuf, std::size_t recvcount,
const Datatype& recvtype,
int root)
const
573 if((sendbuf == recvbuf) && (root ==
_rank))
574 MPI_Scatter(sendbuf,
int(sendcount), sendtype.
dt, MPI_IN_PLACE, 0, sendtype.
dt, root,
comm);
576 MPI_Scatter(sendbuf,
int(sendcount), sendtype.
dt, recvbuf,
int(recvcount), recvtype.
dt, root,
comm);
581 MPI_Request req(MPI_REQUEST_NULL);
582 if((sendbuf == recvbuf) && (root ==
_rank))
583 MPI_Iscatter(sendbuf,
int(sendcount), sendtype.
dt, MPI_IN_PLACE, 0, sendtype.
dt, root,
comm, &req);
585 MPI_Iscatter(sendbuf,
int(sendcount), sendtype.
dt, recvbuf,
int(recvcount), recvtype.
dt, root,
comm, &req);
591 if(sendbuf == recvbuf)
592 MPI_Allgather(MPI_IN_PLACE, 0, recvtype.
dt, recvbuf,
int(recvcount), recvtype.
dt,
comm);
594 MPI_Allgather(sendbuf,
int(sendcount), sendtype.
dt, recvbuf,
int(recvcount), recvtype.
dt,
comm);
599 MPI_Request req(MPI_REQUEST_NULL);
600 if(sendbuf == recvbuf)
601 MPI_Iallgather(MPI_IN_PLACE, 0, recvtype.
dt, recvbuf,
int(recvcount), recvtype.
dt,
comm, &req);
603 MPI_Iallgather(sendbuf,
int(sendcount), sendtype.
dt, recvbuf,
int(recvcount), recvtype.
dt,
comm, &req);
607 void Comm::allgatherv(
const void* sendbuf, std::size_t sendcount,
const Datatype& sendtype,
void* recvbuf,
const int* recvcounts,
const int* displs,
const Datatype& recvtype)
const
609 if(sendbuf == recvbuf)
610 MPI_Allgatherv(MPI_IN_PLACE, 0, recvtype.
dt, recvbuf, recvcounts, displs, recvtype.
dt,
comm);
612 MPI_Allgatherv(sendbuf,
int(sendcount), sendtype.
dt, recvbuf, recvcounts, displs, recvtype.
dt,
comm);
617 if(sendbuf == recvbuf)
618 MPI_Alltoall(MPI_IN_PLACE, 0, recvtype.
dt, recvbuf,
int(recvcount), recvtype.
dt,
comm);
620 MPI_Alltoall(sendbuf,
int(sendcount), sendtype.
dt, recvbuf,
int(recvcount), recvtype.
dt,
comm);
625 MPI_Request req(MPI_REQUEST_NULL);
626 if(sendbuf == recvbuf)
627 MPI_Ialltoall(MPI_IN_PLACE, 0, recvtype.
dt, recvbuf,
int(recvcount), recvtype.
dt,
comm, &req);
629 MPI_Ialltoall(sendbuf,
int(sendcount), sendtype.
dt, recvbuf,
int(recvcount), recvtype.
dt,
comm, &req);
633 void Comm::alltoallv(
const void* sendbuf,
const int* sendcounts,
const int* sdispls,
const Datatype& sendtype,
void* recvbuf,
const int* recvcounts,
const int* rdispls,
const Datatype& recvtype)
const
635 if(sendbuf == recvbuf)
636 MPI_Alltoallv(MPI_IN_PLACE, 0, 0, recvtype.
dt, recvbuf, recvcounts, rdispls, recvtype.
dt,
comm);
638 MPI_Alltoallv(sendbuf, sendcounts, sdispls, sendtype.
dt, recvbuf, recvcounts, rdispls, recvtype.
dt,
comm);
644 MPI_Reduce((sendbuf == recvbuf) && (
_rank == root) ? MPI_IN_PLACE : sendbuf, recvbuf,
int(count), datatype.
dt, op.
op, root,
comm);
649 MPI_Request req(MPI_REQUEST_NULL);
651 MPI_Ireduce((sendbuf == recvbuf) && (
_rank == root) ? MPI_IN_PLACE : sendbuf, recvbuf,
int(count), datatype.
dt, op.
op, root,
comm, &req);
657 MPI_Allreduce(sendbuf == recvbuf ? MPI_IN_PLACE : sendbuf, recvbuf,
int(count), datatype.
dt, op.
op,
comm);
662 MPI_Request req(MPI_REQUEST_NULL);
663 MPI_Iallreduce(sendbuf == recvbuf ? MPI_IN_PLACE : sendbuf, recvbuf,
int(count), datatype.
dt, op.
op,
comm, &req);
669 MPI_Scan(sendbuf == recvbuf ? MPI_IN_PLACE : sendbuf, recvbuf,
int(count), datatype.
dt, op.
op,
comm);
685 MPI_Exscan(sendbuf == recvbuf ? MPI_IN_PLACE : sendbuf, recvbuf,
int(count), datatype.
dt, op.
op,
comm);
699 void Comm::send(
const void* buffer, std::size_t count,
const Datatype& datatype,
int dest,
int tag)
const
701 MPI_Send(buffer,
int(count), datatype.
dt, dest, tag,
comm);
706 MPI_Request req(MPI_REQUEST_NULL);
707 MPI_Isend(buffer,
int(count), datatype.
dt, dest, tag,
comm, &req);
713 MPI_Recv(buffer,
int(count), datatype.
dt, source, tag,
comm, status.
mpi_status());
718 MPI_Request req(MPI_REQUEST_NULL);
719 MPI_Irecv(buffer,
int(count), datatype.
dt, source, tag,
comm, &req);
736 bcast(&len, std::size_t(1), root);
739 if(len == std::size_t(0))
743 std::vector<char> buf(len + std::size_t(1));
747 std::strcpy(buf.data(), str.c_str());
750 bcast(buf.data(), buf.size(), root);
754 stream << std::string(buf.data(), len);
760 std::vector<char>& data = stream.
container();
768 bcast(&len, std::size_t(1), root);
771 if(len == std::size_t(0))
779 bcast(data.data(), data.size(), root);
784 XASSERTM((0 <= root) && (root <
_size),
"invalid root rank argument");
791 XASSERTM((0 <= root) && (root <
_size),
"invalid root rank argument");
796 const std::size_t n = std::size_t(
_size);
800 std::size_t mylen = msg.size();
801 std::vector<std::size_t> lengths(n);
802 this->
gather(&mylen, std::size_t(1), lengths.data(), std::size_t(1), root);
805 for(
int i(0); i <
_size; ++i)
808 std::size_t length = lengths.at(std::size_t(i));
809 if(length == std::size_t(0))
819 std::vector<char> msgbuf(length);
820 this->
recv(msgbuf.data(), length, i);
821 message.assign(msgbuf.data(), length);
831 for(
auto it = lines.begin(); it != lines.end(); ++it)
832 os <<
'[' << prefix <<
"] " << (*it) <<
"\n";
838 std::size_t dummy(0), mylen = msg.size();
839 this->
gather(&mylen, std::size_t(1), &dummy, std::size_t(1), root);
842 if(mylen > std::size_t(0))
843 this->
send(msg.data(), mylen, root);
849 XASSERTM((0 <= root) && (root <
_size),
"invalid root rank argument");
876 const Datatype
dt_byte ( 1,
sizeof(
char));
877 const Datatype
dt_char ( 2,
sizeof(
char));
878 const Datatype
dt_wchar ( 3,
sizeof(
wchar_t));
889 const Datatype
dt_float (31,
sizeof(
float));
890 const Datatype
dt_double (32,
sizeof(
double));
892#ifdef FEAT_HAVE_QUADMATH
895#ifdef FEAT_HAVE_HALFMATH
896 const Datatype
dt__half (35,
sizeof(__half));
908 const Operation
op_sum(1);
909 const Operation
op_max(2);
910 const Operation
op_min(3);
925 request(
other.request)
982 for(std::size_t i(0); i <
_reqs.size(); ++i)
984 if(!
_reqs.at(i).is_null())
1127 void Comm::bcast(
void*, std::size_t,
const Datatype&,
int)
const
1132 Request
Comm::ibcast(
void*, std::size_t,
const Datatype&,
int)
const
1138 void Comm::gather(
const void* sendbuf, std::size_t sendcount,
const Datatype& sendtype,
void* recvbuf, std::size_t recvcount,
const Datatype& recvtype,
int root)
const
1141 alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype);
1144 Request
Comm::igather(
const void* sendbuf, std::size_t sendcount,
const Datatype& sendtype,
void* recvbuf, std::size_t recvcount,
const Datatype& recvtype,
int root)
const
1147 alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype);
1151 void Comm::scatter(
const void* sendbuf, std::size_t sendcount,
const Datatype& sendtype,
void* recvbuf, std::size_t recvcount,
const Datatype& recvtype,
int root)
const
1154 alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype);
1157 Request
Comm::iscatter(
const void* sendbuf, std::size_t sendcount,
const Datatype& sendtype,
void* recvbuf, std::size_t recvcount,
const Datatype& recvtype,
int root)
const
1160 alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype);
1164 void Comm::allgather(
const void* sendbuf, std::size_t sendcount,
const Datatype& sendtype,
void* recvbuf, std::size_t recvcount,
const Datatype& recvtype)
const
1166 alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype);
1169 Request
Comm::iallgather(
const void* sendbuf, std::size_t sendcount,
const Datatype& sendtype,
void* recvbuf, std::size_t recvcount,
const Datatype& recvtype)
const
1171 alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype);
1175 void Comm::allgatherv(
const void* sendbuf, std::size_t sendcount,
const Datatype& sendtype,
void* recvbuf,
const int* recvcounts,
const int* displs,
const Datatype& recvtype)
const
1178 alltoall(sendbuf, sendcount, sendtype, recvbuf, std::size_t(recvcounts[0]), recvtype);
1181 void Comm::alltoall(
const void* sendbuf, std::size_t sendcount,
const Datatype& sendtype,
void* recvbuf, std::size_t recvcount,
const Datatype& recvtype)
const
1183 XASSERT(sendcount == recvcount);
1184 XASSERT(sendtype == recvtype);
1187 if((recvbuf != sendbuf) && (recvbuf !=
nullptr))
1188 memcpy(recvbuf, sendbuf, sendcount * sendtype.size());
1191 Request
Comm::ialltoall(
const void* sendbuf, std::size_t sendcount,
const Datatype& sendtype,
void* recvbuf, std::size_t recvcount,
const Datatype& recvtype)
const
1193 alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype);
1197 void Comm::alltoallv(
const void* sendbuf,
const int* sendcounts,
const int* sdispls,
const Datatype& sendtype,
void* recvbuf,
const int* recvcounts,
const int* rdispls,
const Datatype& recvtype)
const
1201 alltoall(sendbuf, std::size_t(sendcounts[0]), sendtype, recvbuf, std::size_t(recvcounts[0]), recvtype);
1204 void Comm::reduce(
const void* sendbuf,
void* recvbuf, std::size_t count,
const Datatype& datatype,
const Operation& op,
int root)
const
1207 allreduce(sendbuf, recvbuf, count, datatype, op);
1210 Request
Comm::ireduce(
const void* sendbuf,
void* recvbuf, std::size_t count,
const Datatype& datatype,
const Operation& op,
int root)
const
1213 allreduce(sendbuf, recvbuf, count, datatype, op);
1217 void Comm::allreduce(
const void* sendbuf,
void* recvbuf, std::size_t count,
const Datatype& datatype,
const Operation&)
const
1221 if((recvbuf != sendbuf) && (recvbuf !=
nullptr))
1222 memcpy(recvbuf, sendbuf, count * datatype.size());
1225 Request
Comm::iallreduce(
const void* sendbuf,
void* recvbuf, std::size_t count,
const Datatype& datatype,
const Operation& op)
const
1227 allreduce(sendbuf, recvbuf, count, datatype, op);
1231 void Comm::scan(
const void* sendbuf,
void* recvbuf, std::size_t count,
const Datatype& datatype,
const Operation& op)
const
1233 allreduce(sendbuf, recvbuf, count, datatype, op);
1242 void Comm::exscan(
const void*,
void*, std::size_t,
const Datatype&,
const Operation&)
const
1253 void Comm::send(
const void*, std::size_t,
const Datatype&,
int,
int)
const
1258 Request
Comm::isend(
const void*, std::size_t,
const Datatype&,
int,
int)
const
1264 void Comm::recv(
void*, std::size_t,
const Datatype&,
int,
int, Status&)
const
1269 Request
Comm::irecv(
void*, std::size_t,
const Datatype&,
int,
int)
const
1285 void Comm::print(std::ostream& os,
const String& msg,
int)
const
1290 void Comm::allprint(std::ostream& os,
const String& msg,
int)
const
#define XASSERT(expr)
Assertion macro definition.
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
std::vector< char > & container()
Returns a reference to the internal vector container.
void scatter(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, std::size_t recvcount, const Datatype &recvtype, int root) const
Blocking scatter.
Comm & operator=(const Comm &)=delete
communicators are non-copyable
void bcast(void *buffer, std::size_t count, const Datatype &datatype, int root) const
Blocking broadcast.
void reduce(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op, int root) const
Blocking Reduce.
Request iallgather(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, std::size_t recvcount, const Datatype &recvtype) const
Nonblocking gather-to-all.
Comm comm_create_range_incl(int count, int first=0, int stride=1) const
Creates a new sub-communicator from a strided range of ranks.
void allreduce(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op) const
Blocking All-Reduce.
void barrier() const
Blocking barrier.
Request ibcast(void *buffer, std::size_t count, const Datatype &datatype, int root) const
Nonblocking broadcast.
void exscan(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op) const
Blocking Exclusive Scan.
Comm comm_create_incl(int n, const int *ranks) const
Creates a new sub-communicator for a given set of ranks.
Request igather(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, std::size_t recvcount, const Datatype &recvtype, int root) const
Nonblocking gather.
static Comm null()
Returns a null communicator.
Request ibarrier() const
Nonblocking barrier.
virtual ~Comm()
virtual destructor
Request iallreduce(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op) const
Nonblocking All-Reduce.
Request irecv(void *buffer, std::size_t count, const Datatype &datatype, int source, int tag=0) const
Nonblocking Receive.
Comm()
Standard constructor.
Comm comm_split(int color, int key) const
Creates a new sub-communicator by splitting this communicator.
bool is_world() const
Checks whether this communicator is the world communicator.
void bcast_binarystream(BinaryStream &stream, int root=0) const
Blocking broadcast of a BinaryStream.
void recv(void *buffer, std::size_t count, const Datatype &datatype, int source, int tag, Status &status) const
Blocking Receive.
Request isend(const void *buffer, std::size_t count, const Datatype &datatype, int dest, int tag=0) const
Nonblocking Send.
int _size
the communicator size
MPI_Comm comm
our MPI communicator handle
Request iscatter(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, std::size_t recvcount, const Datatype &recvtype, int root) const
Nonblocking scatter.
void gather(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, std::size_t recvcount, const Datatype &recvtype, int root) const
Blocking gather.
static Comm self()
Returns a copy of the self communicator.
bool is_self() const
Checks whether this communicator is the self communicator.
void alltoallv(const void *sendbuf, const int *sendcounts, const int *sdispls, const Datatype &sendtype, void *recvbuf, const int *recvcounts, const int *rdispls, const Datatype &recvtype) const
Blocking All-to-All Scatter/Gather.
void alltoall(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, std::size_t recvcount, const Datatype &recvtype) const
Blocking All-to-All Scatter/Gather.
void send(const void *buffer, std::size_t count, const Datatype &datatype, int dest, int tag=0) const
Blocking Send.
void print_flush(std::ostream &os, int root=0) const
Explicitly flushes the output stream.
Comm comm_dup() const
Creates a copy of this communicator.
void allprint(std::ostream &os, const String &msg, int root=0) const
Prints the ordered messages of all processes to an output stream.
Request ireduce(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op, int root) const
Nonblocking Reduce.
void allgatherv(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, const int *recvcounts, const int *displs, const Datatype &recvtype) const
Blocking gather-to-all.
Request ialltoall(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, std::size_t recvcount, const Datatype &recvtype) const
Nonblocking All-to-All Scatter/Gather.
int rank() const
Returns the rank of this process in this communicator.
void allgather(const void *sendbuf, std::size_t sendcount, const Datatype &sendtype, void *recvbuf, std::size_t recvcount, const Datatype &recvtype) const
Blocking gather-to-all.
bool is_null() const
Checks whether this communicator is a null communicator.
static Comm world()
Returns a copy of the world communicator.
void print(std::ostream &os, const String &msg, int root=0) const
Prints a message line to an output stream.
void scan(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op) const
Blocking Inclusive Scan.
void bcast_stringstream(std::stringstream &stream, int root=0) const
Blocking broadcast of a std::stringstream.
Communication Request class.
MPI_Request request
our internal MPI request handle
Request()
Standard constructor.
bool wait()
Blocks until the request is fulfilled (or null).
bool test()
Tests whether the request is fulfilled (or null).
void free()
Frees the request.
void cancel()
Cancels the request.
Request & operator=(const Request &)=delete
Request objects are non-copyable.
bool is_null() const
Checks whether this request is null.
bool test_all()
Tests whether all active requests are fulfilled (or null).
bool test_any(std::size_t &idx, Status &status)
Tests whether one of the active requests has been fulfilled.
std::vector< Request > _reqs
internal vector of Request objects
void free()
Frees all remaining active requests.
bool wait_any(std::size_t &idx, Status &status)
Blocks until one of the active requests has been fulfilled.
void wait_all()
Blocks until all active requests are fulfilled.
Communication Status class.
MPI_Status * mpi_status()
String class implementation.
std::deque< String > split_by_charset(const String &charset) const
Splits the string by a delimiter charset.
String pad_front(size_type len, char c=' ') const
Pads the front of the string up to a desired length.
bool initialize(int &argc, char **&argv)
Initializes the distributed communication system.
const Datatype dt_byte(MPI_BYTE, sizeof(char))
Datatype wrapper for MPI_BYTE.
const Datatype dt_wchar(MPI_WCHAR, sizeof(wchar_t))
Datatype wrapper for MPI_WCHAR.
const Datatype dt_signed_long(MPI_LONG, sizeof(long))
Datatype wrapper for MPI_LONG.
const Datatype dt_float(MPI_FLOAT, sizeof(float))
Datatype wrapper for MPI_FLOAT.
const Datatype dt_signed_char(MPI_SIGNED_CHAR, sizeof(signed char))
Datatype wrapper for MPI_SIGNED_CHAR.
const Datatype dt_signed_short(MPI_SHORT, sizeof(short))
Datatype wrapper for MPI_SHORT.
const Datatype dt_unsigned_long_long(MPI_UNSIGNED_LONG_LONG, sizeof(unsigned long long))
Datatype wrapper for MPI_UNSIGNED_LONG_LONG.
const Datatype dt_signed_int32(MPI_INT32_T, sizeof(std::int32_t))
Datatype wrapper for MPI_INT32_T.
const Datatype dt_unsigned_int32(MPI_UINT32_T, sizeof(std::uint32_t))
Datatype wrapper for MPI_UINT32_T.
void finalize()
Finalizes the distributed communication system.
const Datatype dt_signed_int8(MPI_INT8_T, sizeof(std::int8_t))
Datatype wrapper for MPI_INT8_T.
const Datatype dt_char(MPI_CHAR, sizeof(char))
Datatype wrapper for MPI_CHAR.
const Operation op_min(MPI_MIN)
Operation wrapper for MPI_MIN.
const Datatype dt_unsigned_int(MPI_UNSIGNED, sizeof(unsigned int))
Datatype wrapper for MPI_UNSIGNED.
const Datatype dt_unsigned_int16(MPI_UINT16_T, sizeof(std::uint16_t))
Datatype wrapper for MPI_UINT16_T.
const Datatype dt_unsigned_int8(MPI_UINT8_T, sizeof(std::uint8_t))
Datatype wrapper for MPI_UINT8_T.
const Datatype dt__half
custom Datatype for __half
const Datatype dt_signed_int(MPI_INT, sizeof(int))
Datatype wrapper for MPI_INT.
const Datatype dt_signed_long_long(MPI_LONG_LONG, sizeof(long long))
Datatype wrapper for MPI_LONG_LONG.
const Operation op_max(MPI_MAX)
Operation wrapper for MPI_MAX.
const Datatype dt_double(MPI_DOUBLE, sizeof(double))
Datatype wrapper for MPI_DOUBLE.
const Datatype dt_unsigned_char(MPI_UNSIGNED_CHAR, sizeof(unsigned char))
Datatype wrapper for MPI_UNSIGNED_CHAR.
const Datatype dt_signed_int16(MPI_INT16_T, sizeof(std::int16_t))
Datatype wrapper for MPI_INT16_T.
const Datatype dt_unsigned_short(MPI_UNSIGNED_SHORT, sizeof(unsigned short))
Datatype wrapper for MPI_UNSIGNED_SHORT.
const Datatype dt_unsigned_int64(MPI_UINT64_T, sizeof(std::uint64_t))
Datatype wrapper for MPI_UINT64_T.
const Datatype dt_long_double(MPI_LONG_DOUBLE, sizeof(long double))
Datatype wrapper for MPI_LONG_DOUBLE.
const Operation op_sum(MPI_SUM)
Operation wrapper for MPI_SUM.
const Datatype dt__float128
custom Datatype for __float128
const Datatype dt_unsigned_long(MPI_UNSIGNED_LONG, sizeof(unsigned long))
Datatype wrapper for MPI_UNSIGNED_LONG.
const Datatype dt_signed_int64(MPI_INT64_T, sizeof(std::int64_t))
Datatype wrapper for MPI_INT64_T.
@ other
generic/other permutation strategy
T_ ilog10(T_ x)
Computes the integral base-10 logarithm of an integer, i.e. its number of non-zero decimal digits.
String stringify(const T_ &item)
Converts an item into a String.
Communication Datatype class.
MPI_Datatype dt
the MPI datatype handle
Communication Operation class.
MPI_Op op
the MPI operation handle