FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
parti_parmetis.cpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#include <kernel/geometry/parti_parmetis.hpp>
7
8#ifdef FEAT_HAVE_PARMETIS
9#include <parmetis.h>
10
11using namespace FEAT;
12using namespace Geometry;
13
14namespace FEAT
15{
16 namespace Geometry
17 {
18 PartiParMETIS::PartiParMETIS(const Dist::Comm& comm, Index min_elems, int max_procs) :
19 _comm(comm),
20 _sub_comm(Dist::Comm::null()),
21 _max_procs(max_procs),
22 _min_elems(min_elems),
23 _num_elems(0u),
24 _num_parts(0u),
25 _first_elem(0u),
26 _num_local_elems(0u),
27 _subgraph(),
28 _midpoints(),
29 _parts(),
30 _coloring()
31 {
32 XASSERT(max_procs >= 0);
33 XASSERT(min_elems > 0u);
34 }
35
36 PartiParMETIS::~PartiParMETIS()
37 {
38 }
39
40 void PartiParMETIS::_create_subgraph(const Adjacency::Graph& faces_at_elem, const Index num_parts)
41 {
42 // set number of desired partitions and total number of elements
43 this->_num_parts = num_parts;
44 this->_num_elems = faces_at_elem.get_num_nodes_domain();
45
46 // first of all, determine how many processes we want to use for Zoltan
47 int num_procs = Math::max(1, int(faces_at_elem.get_num_nodes_domain() / _min_elems));
48 if(_max_procs > 0)
49 Math::mini(num_procs, _max_procs);
50 Math::mini(num_procs, int(num_parts));
51 Math::mini(num_procs, _comm.size());
52
53 // do we need to create a sub-communicator for ParMETIS?
54 if(num_procs < _comm.size())
55 _sub_comm = _comm.comm_create_range_incl(num_procs);
56 else
57 _sub_comm = _comm.comm_dup();
58
59 // get my rank and size
60 const int z_rank = _sub_comm.rank();
61 const int z_size = _sub_comm.size();
62
63 // make sure that the number of desired parts is not smaller than the size of the communicator
64 XASSERTM(Index(z_size) <= num_parts, "thou shall not create less partitions than thou hast ranks");
65
66 // does this process not participate in the sub-communicator?
67 if(z_size == 0)
68 {
69 this->_first_elem = this->_num_local_elems = Index(0);
70 return;
71 }
72
73 // get number of elements and faces
74 const Index num_elems = faces_at_elem.get_num_nodes_domain();
75 const Index num_faces = faces_at_elem.get_num_nodes_image();
76
77 // determine the first and the last element of this rank
78 Index first_elem = (Index(z_rank) * num_elems) / Index(z_size);
79 Index end_elem = (Index(z_rank+1) * num_elems) / Index(z_size);
80 Index my_num_elems = end_elem - first_elem;
81
82 const Index* dom_ptr = faces_at_elem.get_domain_ptr();
83 const Index* img_idx = faces_at_elem.get_image_idx();
84
85 // create a sub-graph for this rank
86 const Index first_ptr = dom_ptr[first_elem];
87 const Index my_num_idx = dom_ptr[end_elem] - first_ptr;
88 Adjacency::Graph faces_at_my_elem(my_num_elems, num_faces, my_num_idx);
89
90 Index* my_dom_ptr = faces_at_my_elem.get_domain_ptr();
91 Index* my_img_idx = faces_at_my_elem.get_image_idx();
92 for(Index i(0); i <= my_num_elems; ++i)
93 my_dom_ptr[i] = dom_ptr[first_elem + i] - first_ptr;
94 for(Index i(0); i < my_num_idx; ++i)
95 my_img_idx[i] = img_idx[first_ptr + i];
96
97 // transpose graph for this rank
98 Adjacency::Graph elems_at_faces(Adjacency::RenderType::transpose, faces_at_elem);
99
100 // combine graph into our sub-hypergraph
101 this->_first_elem = first_elem;
102 this->_num_local_elems = my_num_elems;
103 this->_subgraph = Adjacency::Graph(Adjacency::RenderType::injectify_sorted, faces_at_my_elem, elems_at_faces);
104 this->_parts.resize(my_num_elems);
105 }
106
107 bool PartiParMETIS::_execute()
108 {
109 // if the size of the sub-communicator is zero, then this process does not participate
110 // in the actual partitioning process and it only needs to participate in the broadcast
111 bool is_ok = true;
112 if(this->_sub_comm.size() > 0)
113 {
114 // apply Zoltan partitioner
115 is_ok = this->_apply_parmetis();
116 }
117
118 // broadcast coloring from root rank and return
119 return this->_broadcast_coloring(is_ok);
120 }
121
122 bool PartiParMETIS::_apply_parmetis()
123 {
124 // convert all inputs to ParMETIS formats
125 idx_t wgtflag(0), numflag(0), ncon(1), edgecut(0);
126 idx_t nparts = idx_t(_num_parts);
127 idx_t ndims = idx_t(_midpoints.size() / _num_local_elems);
128 std::vector<idx_t> options(4, 0);
129 std::vector<real_t> tpwgts(_num_parts, real_t(1) / real_t(_num_parts));
130 std::vector<real_t> ubvec(1, real_t(1.05));
131 std::vector<idx_t> vwgt;
132 std::vector<idx_t> part(_num_local_elems, 0);
133
134 // convert midpoints to xyz
135 std::vector<real_t> xyz;
136 if(!_midpoints.empty())
137 {
138 xyz.resize(_midpoints.size());
139 for(std::size_t i(0); i < _midpoints.size(); ++i)
140 xyz[i] = real_t(_midpoints[i]);
141 }
142
143 // set up vertex distribution array
144 const int z_size = this->_sub_comm.size();
145 std::vector<idx_t> vtxdist;
146 for(int i(0); i <= z_size; ++i)
147 vtxdist.push_back(idx_t(_num_elems*i) / idx_t(z_size));
148
149 // create adjacency arrays from our sub-graph without the self-adjacency
150 std::vector<idx_t> xadj, adjncy;
151 const Index nv = _subgraph.get_num_nodes_domain();
152 const Index ne = _subgraph.get_num_indices();
153 xadj.reserve(nv + 1u);
154 adjncy.reserve(ne - nv);
155 const Index* dom_ptr = _subgraph.get_domain_ptr();
156 const Index* img_idx = _subgraph.get_image_idx();
157 for(Index i(0); i < _subgraph.get_num_nodes_domain(); ++i)
158 {
159 xadj.push_back(idx_t(adjncy.size()));
160 for(Index k(dom_ptr[i]); k < dom_ptr[i+1]; ++k)
161 {
162 if(img_idx[k] != _first_elem + i)
163 adjncy.push_back(idx_t(img_idx[k]));
164 }
165 }
166 xadj.push_back(idx_t(adjncy.size()));
167
168 // set up weights if desired
169 if(!this->_weights.empty())
170 {
171 std::size_t n = this->_weights.size();
172 wgtflag = 2; // vertex weights only
173 vwgt.resize(n);
174
175 // ParMETIS expects integer weights, but we have float weights, so we first have to compute
176 // the total sum of all weights and then choose a scaling factor so that the sum of the
177 // scaled weights is a large integer not greater than 10^9; then we can simply multiply
178 // the weights by that scaling factor and convert to integers.
179
180 // compute the total sum of all weights
181 Real wsum(0.0);
182 for(std::size_t i(0); i < n; ++i)
183 wsum += this->_weights[i];
184 XASSERTM(wsum > 1e-3, "weight sum appears to be zero");
185
186 // compute integer weights for METIS
187 Real scale = 1E+9 / wsum;
188 for(std::size_t i(0); i < n; ++i)
189 vwgt[i] = idx_t(scale * this->_weights[i]);
190 }
191
192 // call ParMETIS
193 int rtn = ParMETIS_V3_PartGeomKway(
194 vtxdist.data(), // in: first dof for each processor, on all processes, length P+1
195 xadj.data(), // in: dom_ptr of adjacency Graph (w/o self-adjacency)
196 adjncy.data(), // in: img_idx of adjacency Graph (w/o self-adjacency)
197 vwgt.data(), // in: vertex weights (may be set to nullptr)
198 nullptr, // in: edge weights (may be set to nullptr)
199 &wgtflag, // in: 0 for no weights
200 &numflag, // in: 0 for C numbering
201 &ndims, // in: dimension of mesh
202 xyz.data(), // in: vertex coords (element midpoints), process-local only
203 &ncon, // in: =1
204 &nparts, // in: number of desired partitions
205 tpwgts.data(), // in: array of 1/nparts
206 ubvec.data(), // in: array of 1.05
207 options.data(), // parameter array
208 &edgecut, // out: number of cut edges
209 part.data(), // out: partition for each local node
210 &_sub_comm.mpi_comm() // in: self explanatory
211 );
212
213 // convert parts
214 _parts.resize(_num_local_elems);
215 for(Index i(0); i < _num_local_elems; ++i)
216 _parts[i] = Index(part[i]);
217
218 // gather the coloring
219 return _gather_coloring() && (rtn == METIS_OK);
220 }
221
222 bool PartiParMETIS::_gather_coloring()
223 {
224 // get my rank and size
225 const int z_rank = _sub_comm.rank();
226 const Index z_size = Index(_sub_comm.size());
227
228 // determine maximum export size
229 Index max_local_elems(0);
230 _sub_comm.allreduce(&this->_num_local_elems, &max_local_elems, 1u, Dist::op_max);
231 max_local_elems += 1; // for actual size
232
233 // allocate send buffer
234 std::vector<Index> send_buf(max_local_elems);
235 send_buf[0] = _num_local_elems;
236 for(Index i(0); i < _num_local_elems; ++i)
237 send_buf[i+1] = this->_parts[i];
238
239 // allocate receive buffer and gather coloring at rank 0
240 std::vector<Index> recv_buf(z_rank == 0 ? max_local_elems * z_size : std::size_t(0));
241 _sub_comm.gather(send_buf.data(), max_local_elems, recv_buf.data(), max_local_elems, 0);
242
243 // all ranks > 0 are done here
244 if(z_rank > 0)
245 return true;
246
247 // compute total element count
248 Index num_elems(0);
249 for(Index i(0); i < z_size; ++i)
250 num_elems += recv_buf[i*max_local_elems];
251
252 // allocate ranks vector
253 this->_coloring = Adjacency::Coloring(this->_num_elems, this->_num_parts);
254 Index* col = this->_coloring.get_coloring();
255
256 // build ranks vector
257 for(Index i(0), k(0); i < z_size; ++i)
258 {
259 Index off = i*max_local_elems;
260 Index n = recv_buf[off];
261 for(Index j(1); j <= n; ++j, ++k)
262 col[k] = recv_buf[off+j];
263 }
264
265 // build color counts
266 std::vector<int> color_counts(this->_num_parts, 0);
267 for(Index i(0); i < this->_num_elems; ++i)
268 ++color_counts[col[i]];
269
270 // make sure we have at least 1 element per color/partition
271 for(Index i(0); i < this->_num_parts; ++i)
272 {
273 if(color_counts[i] == 0)
274 return false;
275 }
276
277 // okay
278 return true;
279 }
280
281 bool PartiParMETIS::_broadcast_coloring(bool metis_ok)
282 {
283 // first of all, let's compare our ParMETIS return codes;
284 // all processes which did not participate in the partitioning have a 'true' status
285 int was_ok(metis_ok ? 0 : 1), was_all_ok(0);
286 this->_comm.allreduce(&was_ok, &was_all_ok, std::size_t(1), Dist::op_sum);
287 if(was_all_ok > 0)
288 return false; // at least 1 process failed
289
290 // allocate coloring if not on root
291 if(this->_comm.rank() > 0)
292 this->_coloring = Adjacency::Coloring(this->_num_elems, this->_num_parts);
293
294 // broadcast coloring
295 this->_comm.bcast(this->_coloring.get_coloring(), this->_num_elems, 0);
296
297 // okay
298 return true;
299 }
300
301 } // namespace Geometry
302} // namespace FEAT
303
304#else // no FEAT_HAVE_PARMETIS
305
306// dummy function to suppress linker warnings
307void parmetis_not_linked() {}
308
309#endif // FEAT_HAVE_PARMETIS
#define XASSERT(expr)
Assertion macro definition.
Definition: assertion.hpp:262
#define XASSERTM(expr, msg)
Assertion macro definition with custom message.
Definition: assertion.hpp:263
Coloring object implementation.
Definition: coloring.hpp:37
Adjacency Graph implementation.
Definition: graph.hpp:34
Index * get_domain_ptr()
Returns the domain pointer array.
Definition: graph.hpp:359
Index * get_image_idx()
Returns the image node index array.
Definition: graph.hpp:374
Communicator class.
Definition: dist.hpp:1349
PartiParMETIS(const Dist::Comm &comm, Index min_elems=1000u, int max_procs=1000)
Constructor.
FEAT namespace.
Definition: adjactor.hpp:12
double Real
Real data type.
std::uint64_t Index
Index data type.