FEAT 3
Finite Element Analysis Toolbox
Loading...
Searching...
No Matches
statistics.cpp
1// FEAT3: Finite Element Analysis Toolbox, Version 3
2// Copyright (C) 2010 by Stefan Turek & the FEAT group
3// FEAT3 is released under the GNU General Public License version 3,
4// see the file 'copyright.txt' in the top level directory for details.
5
6#include <kernel/util/statistics.hpp>
7#include <kernel/util/dist.hpp>
8#include <kernel/solver/base.hpp>
9
10#include <queue>
11#include <numeric>
12
13using namespace FEAT;
14
15// static member initialization
30std::map<String, std::list<std::shared_ptr<Solver::ExpressionBase>>> Statistics::_solver_expressions;
31std::map<String, String> Statistics::_formatted_solver_trees;
32std::map<String, std::list<double>> Statistics::_overall_toe;
33std::map<String, std::list<Index>> Statistics::_overall_iters;
34std::map<String, std::list<double>> Statistics::_overall_mpi_execute_reduction;
35std::map<String, std::list<double>> Statistics::_overall_mpi_execute_blas2;
36std::map<String, std::list<double>> Statistics::_overall_mpi_execute_blas3;
37std::map<String, std::list<double>> Statistics::_overall_mpi_execute_collective;
38std::map<String, std::list<double>> Statistics::_overall_mpi_wait_reduction;
39std::map<String, std::list<double>> Statistics::_overall_mpi_wait_blas2;
40std::map<String, std::list<double>> Statistics::_overall_mpi_wait_blas3;
41std::map<String, std::list<double>> Statistics::_overall_mpi_wait_collective;
42std::map<String, std::list<std::vector<double>>> Statistics::_outer_mg_toe;
43std::map<String, std::list<std::vector<double>>> Statistics::_outer_mg_mpi_execute_reduction;
44std::map<String, std::list<std::vector<double>>> Statistics::_outer_mg_mpi_execute_blas2;
45std::map<String, std::list<std::vector<double>>> Statistics::_outer_mg_mpi_execute_blas3;
46std::map<String, std::list<std::vector<double>>> Statistics::_outer_mg_mpi_execute_collective;
47std::map<String, std::list<std::vector<double>>> Statistics::_outer_mg_mpi_wait_reduction;
48std::map<String, std::list<std::vector<double>>> Statistics::_outer_mg_mpi_wait_blas2;
49std::map<String, std::list<std::vector<double>>> Statistics::_outer_mg_mpi_wait_blas3;
50std::map<String, std::list<std::vector<double>>> Statistics::_outer_mg_mpi_wait_collective;
51std::map<String, std::list<double>> Statistics::_outer_schwarz_toe;
52std::map<String, std::list<Index>> Statistics::_outer_schwarz_iters;
58
59String Statistics::_generate_formatted_solver_tree(String target)
60{
61 std::list<String> names;
62 std::list<int> found; //number of found preconds / smoothers / coarse solvers. 0 = nothing found, 1 = smoother / s found, 2 = coarse solver / a found, 3 = all found
63
64 if(_solver_expressions.count(target) == 0)
65 {
66 XABORTM("target "+target+" not present in _solver_expressions");
67 }
68
69 if(_solver_expressions[target].empty())
70 {
71 // not an error -- maybe collection of statistics is disabled
72 return String();
73 }
74
75 if(_solver_expressions[target].front()->get_type() != Solver::ExpressionType::start_solve)
76 {
77 XABORTM("Should never happen - _solver_expressions list did not start with start solve expression!");
78 }
79
80 // process the very first entry, e.g. the outer most solver
81 auto it = _solver_expressions[target].begin();
82 String tree((*it)->solver_name);
83 names.push_back((*it)->solver_name);
84 found.push_back(0);
85
86 // process current last element in the list, until no element needs to be processed
87 while (names.size() > 0 && it != _solver_expressions[target].end())
88 {
89 ++it;
90
91 // the current solver is of multigrid type, search for smoother and coarse solver (smoother always comes first).
92 // if smoother and coarse solver have been found, skip everything until solver end statement has been found
93 if (names.back().starts_with("MultiGrid") || names.back().starts_with("VCycle") || names.back().starts_with("ScaRCMultiGrid"))
94 {
95 while (it != _solver_expressions[target].end())
96 {
97 auto expression = *it;
98
99 if (expression->get_type() == Solver::ExpressionType::call_smoother && expression->solver_name == names.back() && (found.back() == 0 || found.back() == 2))
100 {
101 found.back() += 1;
102 auto j = it;
103 ++j;
104 // smoother call found, that is a solver on its own. break processing of current solver and dive on step deeper into smoother solver processing
105 if ((*j)->get_type() == Solver::ExpressionType::start_solve)
106 {
107 ++it; //shift over to solver start expression
108 tree += " ( S: " + (*it)->solver_name;
109 names.push_back((*it)->solver_name);
110 found.push_back(0);
111 break;
112 }
113 // smoother is no solver on its own, we can continue with the current solver's end statement search
114 else
115 {
116 auto t = dynamic_cast<Solver::ExpressionCallSmoother*>(expression.get());
117 tree += " ( S: " + t->smoother_name;
118 }
119 }
120
121 if (expression->get_type() == Solver::ExpressionType::call_coarse_solver && expression->solver_name == names.back() && (found.back() == 0 || found.back() == 1))
122 {
123 found.back() += 2;
124 auto j = it;
125 ++j;
126 // coarse solver call found, that is a solver on its own. break processing of current solver and dive on step deeper into coarse solver processing
127 if ((*j)->get_type() == Solver::ExpressionType::start_solve)
128 {
129 ++it; //shift over to solver start expression
130 tree += " / C: " + (*it)->solver_name;
131 names.push_back((*it)->solver_name);
132 found.push_back(0);
133 break;
134 }
135 // coarse solver is no solver on its own, we can continue with the current solver's end statement search
136 else
137 {
138 auto t = dynamic_cast<Solver::ExpressionCallCoarseSolver*>(expression.get());
139 tree += " / C: " + t->coarse_solver_name;
140 }
141 }
142
143 if (expression->get_type() == Solver::ExpressionType::end_solve && expression->solver_name == names.back())
144 {
145 tree += " )";
146 names.pop_back();
147 found.pop_back();
148 break;
149 }
150 ++it;
151 }
152 }
153
154 // the current solver is of uzawa complement type, search for a and s solver
155 // if both have been found, skip everything until solver end statement has been found
156 else if (names.back().starts_with("Uzawa"))
157 {
158 while (it != _solver_expressions[target].end())
159 {
160 auto expression = *it;
161
162 if (expression->get_type() == Solver::ExpressionType::call_uzawa_s && expression->solver_name == names.back() && (found.back() == 0 || found.back() == 2))
163 {
164 found.back() += 1;
165 auto j = it;
166 ++j;
167 // s solver call found, that is a solver on its own. break processing of current solver and dive on step deeper into s solver processing
168 if ((*j)->get_type() == Solver::ExpressionType::start_solve)
169 {
170 ++it; //shift over to solver start expression
171 if (found.back() == 1)
172 tree += " ( S: " + (*it)->solver_name;
173 else
174 tree += " / S: " + (*it)->solver_name;
175 names.push_back((*it)->solver_name);
176 found.push_back(0);
177 break;
178 }
179 // s solver is no solver on its own, we can continue with the current solver's end statement search
180 else
181 {
182 auto t = dynamic_cast<Solver::ExpressionCallUzawaS*>(expression.get());
183 if (found.back() == 1)
184 tree += " ( S: " + t->solver_s_name;
185 else
186 tree += " / S: " + t->solver_s_name;
187 }
188 }
189
190 if (expression->get_type() == Solver::ExpressionType::call_uzawa_a && expression->solver_name == names.back() && (found.back() == 0 || found.back() == 1))
191 {
192 found.back() += 2;
193 auto j = it;
194 ++j;
195 // a solver call found, that is a solver on its own. break processing of current solver and dive on step deeper into a solver processing
196 if ((*j)->get_type() == Solver::ExpressionType::start_solve)
197 {
198 ++it; //shift over to solver start expression
199 if (found.back() == 2)
200 tree += " ( A: " + (*it)->solver_name;
201 else
202 tree += " / A: " + (*it)->solver_name;
203 names.push_back((*it)->solver_name);
204 found.push_back(0);
205 break;
206 }
207 // a solver is no solver on its own, we can continue with the current solver's end statement search
208 else
209 {
210 auto t = dynamic_cast<Solver::ExpressionCallUzawaA*>(expression.get());
211 if (found.back() == 2)
212 tree += " ( A: " + t->solver_a_name;
213 else
214 tree += " / A: " + t->solver_a_name;
215 }
216 }
217
218 if (expression->get_type() == Solver::ExpressionType::end_solve && expression->solver_name == names.back())
219 {
220 tree += " )";
221 names.pop_back();
222 found.pop_back();
223 break;
224 }
225 ++it;
226 }
227 }
228
229 // the current solver uses l and r preconditioners, search for both of them
230 // if both have been found, skip everything until solver end statement has been found
231 else if (names.back().starts_with("PCGNR"))
232 {
233 while (it != _solver_expressions[target].end())
234 {
235 auto expression = *it;
236
237 if (expression->get_type() == Solver::ExpressionType::call_precond_l && expression->solver_name == names.back() && (found.back() == 0 || found.back() == 2))
238 {
239 found.back() += 1;
240 auto j = it;
241 ++j;
242 // l solver call found, that is a solver on its own. break processing of current solver and dive on step deeper into l solver processing
243 if ((*j)->get_type() == Solver::ExpressionType::start_solve)
244 {
245 ++it; //shift over to solver start expression
246 if (found.back() == 1)
247 tree += " ( L: " + (*it)->solver_name;
248 else
249 tree += " / L: " + (*it)->solver_name;
250 names.push_back((*it)->solver_name);
251 found.push_back(0);
252 break;
253 }
254 // l solver is no solver on its own, we can continue with the current solver's end statement search
255 else
256 {
257 auto t = dynamic_cast<Solver::ExpressionCallPrecondL*>(expression.get());
258 if (found.back() == 1)
259 tree += " ( L: " + t->precond_name;
260 else
261 tree += " / L: " + t->precond_name;
262 }
263 }
264
265 if (expression->get_type() == Solver::ExpressionType::call_precond_r && expression->solver_name == names.back() && (found.back() == 0 || found.back() == 1))
266 {
267 found.back() += 2;
268 auto j = it;
269 ++j;
270 // r solver call found, that is a solver on its own. break processing of current solver and dive on step deeper into r solver processing
271 if ((*j)->get_type() == Solver::ExpressionType::start_solve)
272 {
273 ++it; //shift over to solver start expression
274 if (found.back() == 2)
275 tree += " ( R: " + (*it)->solver_name;
276 else
277 tree += " / R: " + (*it)->solver_name;
278 names.push_back((*it)->solver_name);
279 found.push_back(0);
280 break;
281 }
282 // r solver is no solver on its own, we can continue with the current solver's end statement search
283 else
284 {
285 auto t = dynamic_cast<Solver::ExpressionCallPrecondR*>(expression.get());
286 if (found.back() == 2)
287 tree += " ( R: " + t->precond_name;
288 else
289 tree += " / R: " + t->precond_name;
290 }
291 }
292
293 if (expression->get_type() == Solver::ExpressionType::end_solve && expression->solver_name == names.back())
294 {
295 tree += " )";
296 names.pop_back();
297 found.pop_back();
298 break;
299 }
300 ++it;
301 }
302 }
303
304 else
305 {
306 // the current solver is not of multigrid or uzawa type, i.e. is uses at most one preconditioner, search for its call or the solvers end.
307 while (it != _solver_expressions[target].end())
308 {
309 auto expression = *it;
310
311 if (expression->get_type() == Solver::ExpressionType::call_precond && found.back() == 0)
312 {
313 found.back() += 1;
314 auto j = it;
315 ++j;
316 // preconditioner call found, that is a solver on its own. break processing of current solver and dive on step deeper into preconditioner solver processing
317 if ((*j)->get_type() == Solver::ExpressionType::start_solve)
318 {
319 ++it; //shift over to solver start expression
320 tree += " ( " + (*it)->solver_name;
321 names.push_back((*it)->solver_name);
322 found.push_back(0);
323 break;
324 }
325 // preconditioner is no solver on its own, we can continue with the current solver's end statement search
326 else
327 {
328 auto t = dynamic_cast<Solver::ExpressionCallPrecond*>(expression.get());
329 tree += " ( " + t->precond_name;
330 }
331 }
332
333 if (expression->get_type() == Solver::ExpressionType::end_solve && expression->solver_name == names.back())
334 {
335 if (found.back() == 0)
336 tree += " ( none";
337 tree += " )";
338 names.pop_back();
339 found.pop_back();
340 break;
341 }
342 ++it;
343 }
344 }
345 }
346
347 if (names.size() > 0)
348 {
349 XABORTM("Should never happen - not all solver calls were parsed to the end!");
350 }
351
352 return tree;
353}
354
356{
357 size_t padding(0);
358
359 for (auto expression : _solver_expressions[expression_target])
360 {
361 switch (expression->get_type())
362 {
364 {
365 String s = stringify(expression->get_type()) + "[" + expression->solver_name + "]";
366 std::cout<<String(padding, ' ') << s << "\n";
367 padding += 2;
368 break;
369 }
371 {
372 auto t = dynamic_cast<Solver::ExpressionEndSolve*>(expression.get());
373 String s = stringify(t->get_type()) + "[" + t->solver_name + "] (" + stringify(t->status) + " / " + stringify(t->iters) + ")";
374 std::cout<<String(padding, ' ') << s << "\n";
375 padding -= 2;
376 break;
377 }
379 {
380 auto t = dynamic_cast<Solver::ExpressionDefect*>(expression.get());
381 String s = stringify(t->get_type()) + "[" + stringify(t->solver_name) + "] (" + stringify(t->def) + " / " + stringify(t->iter) + ")";
382 std::cout<<String(padding, ' ') << s << "\n";
383 break;
384 }
386 {
387 auto t = dynamic_cast<Solver::ExpressionCallPrecond*>(expression.get());
388 String s = stringify(t->get_type()) + "[" + t->solver_name + "] (" + t->precond_name + ")";
389 std::cout<<String(padding, ' ') << s << "\n";
390 break;
391 }
393 {
394 auto t = dynamic_cast<Solver::ExpressionCallPrecondL*>(expression.get());
395 String s = stringify(t->get_type()) + "[" + t->solver_name + "] (" + t->precond_name + ")";
396 std::cout<<String(padding, ' ') << s << "\n";
397 break;
398 }
400 {
401 auto t = dynamic_cast<Solver::ExpressionCallPrecondR*>(expression.get());
402 String s = stringify(t->get_type()) + "[" + t->solver_name + "] (" + t->precond_name + ")";
403 std::cout<<String(padding, ' ') << s << "\n";
404 break;
405 }
407 {
408 auto t = dynamic_cast<Solver::ExpressionCallSmoother*>(expression.get());
409 String s = stringify(t->get_type()) + "[" + stringify(t->solver_name) + "] (" + t->smoother_name + ")";
410 std::cout<<String(padding, ' ') << s << "\n";
411 break;
412 }
414 {
415 auto t = dynamic_cast<Solver::ExpressionCallCoarseSolver*>(expression.get());
416 String s = stringify(t->get_type()) + "[" + t->solver_name + "] (" + t->coarse_solver_name + ")";
417 std::cout<<String(padding, ' ') << s << "\n";
418 break;
419 }
421 {
422 auto t = dynamic_cast<Solver::ExpressionProlongation*>(expression.get());
423 String s = stringify(t->get_type()) + "[" + t->solver_name + "] (" + stringify(t->level) + ")";
424 std::cout<<String(padding, ' ') << s << "\n";
425 break;
426 }
428 {
429 auto t = dynamic_cast<Solver::ExpressionRestriction*>(expression.get());
430 String s = stringify(t->get_type()) + "[" + t->solver_name + "] (" + stringify(t->level) + ")";
431 std::cout<<String(padding, ' ') << s << "\n";
432 break;
433 }
435 {
436 auto t = dynamic_cast<Solver::ExpressionTimings*>(expression.get());
437 String s = stringify(t->get_type()) + "[" + t->solver_name + "] (" + stringify(t->solver_toe) + ")";
438 std::cout<<String(padding, ' ') << s << "\n";
439 break;
440 }
442 {
443 auto t = dynamic_cast<Solver::ExpressionLevelTimings*>(expression.get());
444 String s = stringify(t->get_type()) + "[" + t->solver_name + "] (" + stringify(t->level) + " / " + stringify(t->level_toe) + ")";
445 std::cout<<String(padding, ' ') << s << "\n";
446 break;
447 }
449 {
450 auto t = dynamic_cast<Solver::ExpressionCallUzawaS*>(expression.get());
451 String s = stringify(t->get_type()) + "[" + stringify(t->solver_name) + "] (" + t->solver_s_name + ")";
452 std::cout<<String(padding, ' ') << s << "\n";
453 break;
454 }
456 {
457 auto t = dynamic_cast<Solver::ExpressionCallUzawaA*>(expression.get());
458 String s = stringify(t->get_type()) + "[" + stringify(t->solver_name) + "] (" + t->solver_a_name + ")";
459 std::cout<<String(padding, ' ') << s << "\n";
460 break;
461 }
462 default:
463 {
464 String s = stringify(expression->get_type()) + "[" + expression->solver_name + "]";
465 std::cout<<String(padding, ' ') << s << "\n";
466 break;
467 }
468 }
469 }
470 std::cout<<"\n";
471}
472
474{
475 String result = "Total time: " + stringify(total_time) + "s";
476 if (total_time == 0.)
477 return result;
478
479 KahanAccumulator<double> measured_time;
480 measured_time += get_time_reduction();
481 measured_time += get_time_blas2();
482 measured_time += get_time_blas3();
483 measured_time += get_time_axpy();
484 measured_time += get_time_precon();
485 measured_time += get_time_mpi_execute_reduction();
486 measured_time += get_time_mpi_execute_blas2();
487 measured_time += get_time_mpi_execute_blas3();
488 measured_time += get_time_mpi_execute_collective();
489 measured_time += get_time_mpi_wait_reduction();
490 measured_time += get_time_mpi_wait_blas2();
491 measured_time += get_time_mpi_wait_blas3();
492 measured_time += get_time_mpi_wait_collective();
493
494 result += "\n";
495 result += "Accumulated op time: " + stringify(measured_time.value) + "\n";
496
497 result += "\n";
498
500
501 double t_max[14];
502 double t_min[14];
503 double t_local[14];
504 t_local[0] = total_time - measured_time;
505 t_local[1] = get_time_reduction();
506 t_local[2] = get_time_axpy();
507 t_local[3] = get_time_blas2();
508 t_local[4] = get_time_blas3();
509 t_local[5] = get_time_precon();
510 t_local[6] = get_time_mpi_execute_reduction();
511 t_local[7] = get_time_mpi_execute_blas2();
512 t_local[8] = get_time_mpi_execute_blas3();
513 t_local[9] = get_time_mpi_execute_collective();
514 t_local[10] = get_time_mpi_wait_reduction();
515 t_local[11] = get_time_mpi_wait_blas2();
516 t_local[12] = get_time_mpi_wait_blas3();
517 t_local[13] = get_time_mpi_wait_collective();
518
519 comm.allreduce(t_local, t_max, std::size_t(14), Dist::op_max);
520 comm.allreduce(t_local, t_min, std::size_t(14), Dist::op_min);
521
522 result += String("Reductions:").pad_back(22) + "max: " + stringify(t_max[1]) + ", min: " + stringify(t_min[1]) + ", local: " + stringify(t_local[1]) + "\n";
523
524 result += String("Blas-1:").pad_back(22) + "max: " + stringify(t_max[2]) + ", min: " + stringify(t_min[2]) + ", local: " + stringify(t_local[2]) + "\n";
525
526 result += String("Blas-2:").pad_back(22) + "max: " + stringify(t_max[3]) + ", min: " + stringify(t_min[3]) + ", local: " + stringify(t_local[3]) + "\n";
527
528 result += String("Blas-3:").pad_back(22) + "max: " + stringify(t_max[4]) + ", min: " + stringify(t_min[4]) + ", local: " + stringify(t_local[4]) + "\n";
529
530 result += String("Precon Kernels:").pad_back(22) + "max: " + stringify(t_max[5]) + ", min: " + stringify(t_min[5]) + ", local: " + stringify(t_local[5]) + "\n";
531
532 result += String("MPI Exec Reduction:").pad_back(22) + "max: " + stringify(t_max[6]) + ", min: " + stringify(t_min[6]) + ", local: " + stringify(t_local[6]) + "\n";
533
534 result += String("MPI Exec Blas-2:").pad_back(22) + "max: " + stringify(t_max[7]) + ", min: " + stringify(t_min[7]) + ", local: " + stringify(t_local[7]) + "\n";
535
536 result += String("MPI Exec Blas-3:").pad_back(22) + "max: " + stringify(t_max[8]) + ", min: " + stringify(t_min[8]) + ", local: " + stringify(t_local[8]) + "\n";
537
538 result += String("MPI Exec Collective:").pad_back(22) + "max: " + stringify(t_max[9]) + ", min: " + stringify(t_min[9]) + ", local: " + stringify(t_local[9]) + "\n";
539
540 result += String("MPI Wait Reduction:").pad_back(22) + "max: " + stringify(t_max[10]) + ", min: " + stringify(t_min[10]) + ", local: " + stringify(t_local[10]) + "\n";
541
542 result += String("MPI Wait Blas-2:").pad_back(22) + "max: " + stringify(t_max[11]) + ", min: " + stringify(t_min[11]) + ", local: " + stringify(t_local[11]) + "\n";
543
544 result += String("MPI Wait Blas-3:").pad_back(22) + "max: " + stringify(t_max[12]) + ", min: " + stringify(t_min[12]) + ", local: " + stringify(t_local[12]) + "\n";
545
546 result += String("MPI Wait Collective:").pad_back(22) + "max: " + stringify(t_max[13]) + ", min: " + stringify(t_min[13]) + ", local: " + stringify(t_local[13]) + "\n";
547
548 result += String("Not covered:").pad_back(22) + "max: " + stringify(t_max[0]) + ", min: " + stringify(t_min[0]) + ", local: " + stringify(t_local[0]) + "\n";
549
550 if (t_min[0] < 0.0)
551 {
552 // total_time < measured_time.sum for at least one process
553 result += "WARNING: Accumulated op time is greater than the provided total execution time !\n";
554 }
555 return result;
556}
557
559{
561
562 if (_formatted_solver_trees.count(target) == 0)
564 if(_formatted_solver_trees.at(target).empty())
565 return String();
566
567 auto solver_time_mg = FEAT::Statistics::get_time_mg(target);
568 auto solver_time_mg_mpi_execute_reduction = FEAT::Statistics::get_time_mg_mpi_execute_reduction(target);
569 auto solver_time_mg_mpi_execute_blas2 = FEAT::Statistics::get_time_mg_mpi_execute_blas2(target);
570 auto solver_time_mg_mpi_execute_blas3 = FEAT::Statistics::get_time_mg_mpi_execute_blas3(target);
571 auto solver_time_mg_mpi_execute_collective = FEAT::Statistics::get_time_mg_mpi_execute_collective(target);
572 auto solver_time_mg_mpi_wait_reduction = FEAT::Statistics::get_time_mg_mpi_wait_reduction(target);
573 auto solver_time_mg_mpi_wait_blas2 = FEAT::Statistics::get_time_mg_mpi_wait_blas2(target);
574 auto solver_time_mg_mpi_wait_blas3 = FEAT::Statistics::get_time_mg_mpi_wait_blas3(target);
575 auto solver_time_mg_mpi_wait_collective = FEAT::Statistics::get_time_mg_mpi_wait_collective(target);
576
577 Index item_count(Index(12) + Index(solver_time_mg.front().size() + solver_time_mg_mpi_execute_reduction.front().size() + solver_time_mg_mpi_execute_blas2.front().size() +
578 solver_time_mg_mpi_execute_blas3.front().size() + solver_time_mg_mpi_execute_collective.front().size() +
579 solver_time_mg_mpi_wait_reduction.front().size() + solver_time_mg_mpi_wait_blas2.front().size() + solver_time_mg_mpi_wait_blas3.front().size() +
580 solver_time_mg_mpi_wait_collective.front().size()));
581
582 double * t_local = new double[item_count];
583 double * t_max = new double[item_count];
584 double * t_min = new double[item_count];
585
586 /*
587 * array to value mapping:
588 * 0 solver_toe
589 * 1 solver_iters
590 * 2 solver_mpi_execute_reduction
591 * 3 solver_mpi_execute_blas2
592 * 4 solver_mpi_execute_blas3
593 * 5 solver_mpi_execute_collective
594 * 6 solver_mpi_wait_reduction
595 * 7 solver_mpi_wait_blas2
596 * 8 solver_mpi_wait_blas3
597 * 9 solver_mpi_wait_collective
598 * 10 solver_schwarz_toe
599 * 11 solver_schwarz_iters
600 * n solver_mg_toe
601 * n solver_mg_mpi_execute_reduction
602 * n solver_mg_mpi_execute_blas2
603 * n solver_mg_mpi_execute_blas3
604 * n solver_mg_mpi_execute_collective
605 * n solver_mg_mpi_wait_reduction
606 * n solver_mg_mpi_wait_blas2
607 * n solver_mg_mpi_wait_blas3
608 * n solver_mg_mpi_wait_collective
609 */
610
611 t_local[0] = std::accumulate(FEAT::Statistics::get_time_toe(target).begin(),
612 FEAT::Statistics::get_time_toe(target).end(), 0.);
613 t_local[1] = double(std::accumulate(FEAT::Statistics::get_iters(target).begin(),
614 FEAT::Statistics::get_iters(target).end(), Index(0)));
615 t_local[2] = std::accumulate(FEAT::Statistics::get_time_mpi_execute_reduction(target).begin(),
616 FEAT::Statistics::get_time_mpi_execute_reduction(target).end(), 0.);
617 t_local[3] = std::accumulate(FEAT::Statistics::get_time_mpi_execute_blas2(target).begin(),
618 FEAT::Statistics::get_time_mpi_execute_blas2(target).end(), 0.);
619 t_local[4] = std::accumulate(FEAT::Statistics::get_time_mpi_execute_blas3(target).begin(),
620 FEAT::Statistics::get_time_mpi_execute_blas3(target).end(), 0.);
621 t_local[5] = std::accumulate(FEAT::Statistics::get_time_mpi_execute_collective(target).begin(),
622 FEAT::Statistics::get_time_mpi_execute_collective(target).end(), 0.);
623 t_local[6] = std::accumulate(FEAT::Statistics::get_time_mpi_wait_reduction(target).begin(),
624 FEAT::Statistics::get_time_mpi_wait_reduction(target).end(), 0.);
625 t_local[7] = std::accumulate(FEAT::Statistics::get_time_mpi_wait_blas2(target).begin(),
626 FEAT::Statistics::get_time_mpi_wait_blas2(target).end(), 0.);
627 t_local[8] = std::accumulate(FEAT::Statistics::get_time_mpi_wait_blas3(target).begin(),
628 FEAT::Statistics::get_time_mpi_wait_blas3(target).end(), 0.);
629 t_local[9] = std::accumulate(FEAT::Statistics::get_time_mpi_wait_collective(target).begin(),
630 FEAT::Statistics::get_time_mpi_wait_collective(target).end(), 0.);
631 t_local[10] = std::accumulate(FEAT::Statistics::get_time_schwarz(target).begin(),
632 FEAT::Statistics::get_time_schwarz(target).end(), 0.);
633 t_local[11] = double(std::accumulate(FEAT::Statistics::get_iters_schwarz(target).begin(),
634 FEAT::Statistics::get_iters_schwarz(target).end(), Index(0)));
635 t_local[12] /= double(FEAT::Statistics::get_iters_schwarz(target).size());
636
637 Index offset(12);
638 Index levels(Index(solver_time_mg.front().size()));
639
640 for (auto& step : solver_time_mg)
641 {
642 XASSERT(step.size() == levels);
643 for (Index i(0) ; i < step.size() ; ++i)
644 {
645 t_local[offset + i] = double(0);
646 }
647 for (Index i(0) ; i < step.size() ; ++i)
648 {
649 t_local[offset + i] += step.at(i);
650 }
651 }
652
653 offset += levels;
654
655 for (auto& step : solver_time_mg_mpi_execute_reduction)
656 {
657 XASSERT(step.size() == levels);
658 for (Index i(0) ; i < step.size() ; ++i)
659 {
660 t_local[offset + i] = double(0);
661 }
662 for (Index i(0) ; i < step.size() ; ++i)
663 {
664 t_local[offset + i] += step.at(i);
665 }
666 }
667
668 offset += levels;
669
670 for (auto& step : solver_time_mg_mpi_execute_blas2)
671 {
672 XASSERT(step.size() == levels);
673 for (Index i(0) ; i < step.size() ; ++i)
674 {
675 t_local[offset + i] = double(0);
676 }
677 for (Index i(0) ; i < step.size() ; ++i)
678 {
679 t_local[offset + i] += step.at(i);
680 }
681 }
682
683 offset += levels;
684
685 for (auto& step : solver_time_mg_mpi_execute_blas3)
686 {
687 XASSERT(step.size() == levels);
688 for (Index i(0) ; i < step.size() ; ++i)
689 {
690 t_local[offset + i] = double(0);
691 }
692 for (Index i(0) ; i < step.size() ; ++i)
693 {
694 t_local[offset + i] += step.at(i);
695 }
696 }
697
698 offset += levels;
699
700 for (auto& step : solver_time_mg_mpi_execute_collective)
701 {
702 XASSERT(step.size() == levels);
703 for (Index i(0) ; i < step.size() ; ++i)
704 {
705 t_local[offset + i] = double(0);
706 }
707 for (Index i(0) ; i < step.size() ; ++i)
708 {
709 t_local[offset + i] += step.at(i);
710 }
711 }
712
713 offset += levels;
714
715 for (auto& step : solver_time_mg_mpi_wait_reduction)
716 {
717 XASSERT(step.size() == levels);
718 for (Index i(0) ; i < step.size() ; ++i)
719 {
720 t_local[offset + i] = double(0);
721 }
722 for (Index i(0) ; i < step.size() ; ++i)
723 {
724 t_local[offset + i] += step.at(i);
725 }
726 }
727
728 offset += levels;
729
730 for (auto& step : solver_time_mg_mpi_wait_blas2)
731 {
732 XASSERT(step.size() == levels);
733 for (Index i(0) ; i < step.size() ; ++i)
734 {
735 t_local[offset + i] = double(0);
736 }
737 for (Index i(0) ; i < step.size() ; ++i)
738 {
739 t_local[offset + i] += step.at(i);
740 }
741 }
742
743 offset += levels;
744
745 for (auto& step : solver_time_mg_mpi_wait_blas3)
746 {
747 XASSERT(step.size() == levels);
748 for (Index i(0) ; i < step.size() ; ++i)
749 {
750 t_local[offset + i] = double(0);
751 }
752 for (Index i(0) ; i < step.size() ; ++i)
753 {
754 t_local[offset + i] += step.at(i);
755 }
756 }
757
758 offset += levels;
759
760 for (auto& step : solver_time_mg_mpi_wait_collective)
761 {
762 XASSERT(step.size() == levels);
763 for (Index i(0) ; i < step.size() ; ++i)
764 {
765 t_local[offset + i] = double(0);
766 }
767 for (Index i(0) ; i < step.size() ; ++i)
768 {
769 t_local[offset + i] += step.at(i);
770 }
771 }
772
773 comm.allreduce(t_local, t_max, std::size_t(item_count), Dist::op_max);
774 comm.allreduce(t_local, t_min, std::size_t(item_count), Dist::op_min);
775
776 String result = target + "\n";
777 result += String("toe:").pad_back(27) + "max: " + stringify(t_max[0]) + ", min: " + stringify(t_min[0]) + ", local: " +
778 stringify(t_local[0]) + "\n";
779 result += String("iters:").pad_back(27) + "max: " + stringify(Index(t_max[1])) + ", min: " + stringify(Index(t_min[1])) + ", local: " +
780 stringify(Index(t_local[1])) + "\n";
781 result += String("mpi exe reduction:").pad_back(27) + "max: " + stringify(t_max[2]) + ", min: " + stringify(t_min[2]) + ", local: " +
782 stringify(t_local[2]) + "\n";
783 result += String("mpi exec blas2:").pad_back(27) + "max: " + stringify(t_max[3]) + ", min: " + stringify(t_min[3]) + ", local: " +
784 stringify(t_local[3]) + "\n";
785 result += String("mpi exec blas3:").pad_back(27) + "max: " + stringify(t_max[4]) + ", min: " + stringify(t_min[4]) + ", local: " +
786 stringify(t_local[4]) + "\n";
787 result += String("mpi exec collective:").pad_back(27) + "max: " + stringify(t_max[5]) + ", min: " + stringify(t_min[5]) + ", local: " +
788 stringify(t_local[5]) + "\n";
789 result += String("mpi wait reduction:").pad_back(27) + "max: " + stringify(t_max[6]) + ", min: " + stringify(t_min[6]) + ", local: " +
790 stringify(t_local[6]) + "\n";
791 result += String("mpi wait blas2:").pad_back(27) + "max: " + stringify(t_max[7]) + ", min: " + stringify(t_min[7]) + ", local: " +
792 stringify(t_local[7]) + "\n";
793 result += String("mpi wait blas3:").pad_back(27) + "max: " + stringify(t_max[8]) + ", min: " + stringify(t_min[8]) + ", local: " +
794 stringify(t_local[8]) + "\n";
795 result += String("mpi wait collective:").pad_back(27) + "max: " + stringify(t_max[9]) + ", min: " + stringify(t_min[9]) + ", local: " +
796 stringify(t_local[9]) + "\n";
797 result += String("schwarz toe:").pad_back(27) + "max: " + stringify(t_max[10]) + ", min: " + stringify(t_min[10]) + ", local: " +
798 stringify(t_local[10]) + "\n";
799 result += String("schwarz iters:").pad_back(27) + "max: " + stringify(Index(t_max[11])) + ", min: " + stringify(Index(t_min[11])) + ", local: " +
800 stringify(Index(t_local[11])) + "\n";
801
802 offset = 12;
803 for (Index i(0) ; i < levels ; ++i)
804 {
805 result += String("toe lvl ") + stringify(i).pad_back(19) + "max: " + stringify(t_max[offset + i]) + ", min: " + stringify(t_min[offset + i]) + ", local: " +
806 stringify(t_local[offset + i]) + "\n";
807 }
808 offset += levels;
809 for (Index i(0) ; i < levels ; ++i)
810 {
811 result += String("mpi exec reduction lvl ") + stringify(i).pad_back(4) + "max: " + stringify(t_max[offset + i]) + ", min: " + stringify(t_min[offset + i]) +
812 ", local: " + stringify(t_local[offset + i]) + "\n";
813 }
814 offset += levels;
815 for (Index i(0) ; i < levels ; ++i)
816 {
817 result += String("mpi exec blas2 lvl ") + stringify(i).pad_back(9) + "max: " + stringify(t_max[offset + i]) + ", min: " + stringify(t_min[offset + i]) +
818 ", local: " + stringify(t_local[offset + i]) + "\n";
819 }
820 offset += levels;
821 for (Index i(0) ; i < levels ; ++i)
822 {
823 result += String("mpi exec blas3 lvl ") + stringify(i).pad_back(9) + "max: " + stringify(t_max[offset + i]) + ", min: " + stringify(t_min[offset + i]) +
824 ", local: " + stringify(t_local[offset + i]) + "\n";
825 }
826 offset += levels;
827 for (Index i(0) ; i < levels ; ++i)
828 {
829 result += String("mpi exec collective lvl ") + stringify(i).pad_back(3) + "max: " + stringify(t_max[offset + i]) + ", min: " + stringify(t_min[offset + i]) +
830 ", local: " + stringify(t_local[offset + i]) + "\n";
831 }
832 offset += levels;
833 for (Index i(0) ; i < levels ; ++i)
834 {
835 result += String("mpi wait red lvl ") + stringify(i).pad_back(10) + "max: " + stringify(t_max[offset + i]) + ", min: " + stringify(t_min[offset + i]) +
836 ", local: " + stringify(t_local[offset + i]) + "\n";
837 }
838 offset += levels;
839 for (Index i(0) ; i < levels ; ++i)
840 {
841 result += String("mpi wait blas2 lvl ") + stringify(i).pad_back(9) + "max: " + stringify(t_max[offset + i]) + ", min: " + stringify(t_min[offset + i]) +
842 ", local: " + stringify(t_local[offset + i]) + "\n";
843 }
844 offset += levels;
845 for (Index i(0) ; i < levels ; ++i)
846 {
847 result += String("mpi wait blas3 lvl ") + stringify(i).pad_back(9) + "max: " + stringify(t_max[offset + i]) + ", min: " + stringify(t_min[offset + i]) +
848 ", local: " + stringify(t_local[offset + i]) + "\n";
849 }
850 offset += levels;
851 for (Index i(0) ; i < levels ; ++i)
852 {
853 result += String("mpi wait collective lvl ") + stringify(i).pad_back(3) + "max: " + stringify(t_max[offset + i]) + ", min: " + stringify(t_min[offset + i]) +
854 ", local: " + stringify(t_local[offset + i]);
855 if (i != levels - 1)
856 result += "\n";
857 }
858
859 delete[] t_local;
860 delete[] t_max;
861 delete[] t_min;
862
863 return result;
864}
865
867{
868 for (auto itarget = _solver_expressions.begin() ; itarget != _solver_expressions.end() ; ++itarget)
869 {
870 auto target = itarget->first;
871
872 // generate solver tree string for target, if it was not created earlier
873 if (_formatted_solver_trees.count(target) == 0)
874 _formatted_solver_trees[target] = _generate_formatted_solver_tree(target);
875
876 // explore solver tree, gather top level timings
877 std::vector<String> names;
878
879 if(_solver_expressions.count(target) == 0)
880 {
881 XABORTM("target "+target+" not present in _solver_expressions");
882 }
883
884 // solver expression may be empty if collection is disabled
885 if(_solver_expressions[target].empty())
886 continue;
887
888 if(_solver_expressions[target].front()->get_type() != Solver::ExpressionType::start_solve)
889 {
890 XABORTM("Should never happen - _solver_expressions list did not start with start solve expression!");
891 }
892
893 _overall_toe[target].push_back(0.);
894 _overall_iters[target].push_back(Index(0));
895 _overall_mpi_execute_reduction[target].push_back(0.);
896 _overall_mpi_execute_blas2[target].push_back(0.);
897 _overall_mpi_execute_blas3[target].push_back(0.);
898 _overall_mpi_execute_collective[target].push_back(0.);
899 _overall_mpi_wait_reduction[target].push_back(0.);
900 _overall_mpi_wait_blas2[target].push_back(0.);
901 _overall_mpi_wait_blas3[target].push_back(0.);
902 _overall_mpi_wait_collective[target].push_back(0.);
903 _outer_mg_toe[target].emplace_back();
904 _outer_mg_mpi_execute_reduction[target].emplace_back();
905 _outer_mg_mpi_execute_blas2[target].emplace_back();
906 _outer_mg_mpi_execute_blas3[target].emplace_back();
907 _outer_mg_mpi_execute_collective[target].emplace_back();
908 _outer_mg_mpi_wait_reduction[target].emplace_back();
909 _outer_mg_mpi_wait_blas2[target].emplace_back();
910 _outer_mg_mpi_wait_blas3[target].emplace_back();
911 _outer_mg_mpi_wait_collective[target].emplace_back();
912 _outer_schwarz_toe[target].push_back(0.);
913 _outer_schwarz_iters[target].push_back(Index(0));
914
915 Index outer_mg_depth(0);
916 Index outer_schwarz_depth(0);
917
918 for (auto it = _solver_expressions.at(target).begin() ; it != _solver_expressions.at(target).end() ; ++it)
919 {
920 auto expression = *it;
921
922 if (expression->get_type() == Solver::ExpressionType::start_solve)
923 {
924 names.push_back((*it)->solver_name);
925
926 // set outest mg depth to first mg found in solver tree while descending
927 if (names.size() > 0 && (names.back().starts_with("MultiGrid") || names.back().starts_with("VCycle") || names.back().starts_with("ScaRCMultiGrid")) && outer_mg_depth == 0)
928 outer_mg_depth = (Index)names.size();
929
930 // set depth of schwarz preconditioner in solver tree while descending
931 if (names.size() > 0 && names.back().starts_with("Schwarz") && outer_schwarz_depth == 0)
932 outer_schwarz_depth = (Index)names.size();
933 }
934
935 //fetch iters from top-lvl (lying insided of schwarz or global)
936 if (expression->get_type() == Solver::ExpressionType::end_solve && expression->solver_name == names.back())
937 {
938 if (names.size() > 1 && names.size() == outer_schwarz_depth + 1 && names.at(names.size() - 2) == "Schwarz")
939 {
940 auto t = dynamic_cast<Solver::ExpressionEndSolve*>(expression.get());
941 _outer_schwarz_iters[target].back() += t->iters;
942 }
943 if (names.size() < 2)
944 {
945 auto t = dynamic_cast<Solver::ExpressionEndSolve*>(expression.get());
946
947 _overall_iters[target].back() += t->iters;
948 }
949
950 names.pop_back();
951 continue;
952 }
953
954 if ((names.size() < 2) && expression->get_type() == FEAT::Solver::ExpressionType::timings)
955 {
956 auto t = dynamic_cast<Solver::ExpressionTimings*>(expression.get());
957
958 _overall_toe[target].back() += t->solver_toe;
959 _overall_mpi_execute_reduction[target].back() += t->mpi_execute_reduction;
960 _overall_mpi_execute_blas2[target].back() += t->mpi_execute_blas2;
961 _overall_mpi_execute_blas3[target].back() += t->mpi_execute_blas3;
962 _overall_mpi_execute_collective[target].back() += t->mpi_execute_collective;
963 _overall_mpi_wait_reduction[target].back() += t->mpi_wait_reduction;
964 _overall_mpi_wait_blas2[target].back() += t->mpi_wait_blas2;
965 _overall_mpi_wait_blas3[target].back() += t->mpi_wait_blas3;
966 _overall_mpi_wait_collective[target].back() += t->mpi_wait_collective;
967 }
968
969 if (names.size() == outer_mg_depth && expression->get_type() == FEAT::Solver::ExpressionType::level_timings)
970 {
971 auto t = dynamic_cast<Solver::ExpressionLevelTimings*>(expression.get());
972 // add new vector entry for current level if vector does not already contain an entry for this level
973 if (_outer_mg_toe[target].back().size() <= t->level)
974 {
975 _outer_mg_toe[target].back().push_back(t->level_toe);
976 _outer_mg_mpi_execute_reduction[target].back().push_back(t->mpi_execute_reduction);
977 _outer_mg_mpi_execute_blas2[target].back().push_back(t->mpi_execute_blas2);
978 _outer_mg_mpi_execute_blas3[target].back().push_back(t->mpi_execute_blas3);
979 _outer_mg_mpi_execute_collective[target].back().push_back(t->mpi_execute_collective);
980 _outer_mg_mpi_wait_reduction[target].back().push_back(t->mpi_wait_reduction);
981 _outer_mg_mpi_wait_blas2[target].back().push_back(t->mpi_wait_blas2);
982 _outer_mg_mpi_wait_blas3[target].back().push_back(t->mpi_wait_blas3);
983 _outer_mg_mpi_wait_collective[target].back().push_back(t->mpi_wait_collective);
984 }
985 else
986 {
987 _outer_mg_toe[target].back().at(t->level) += t->level_toe;
988 _outer_mg_mpi_execute_reduction[target].back().at(t->level) += t->mpi_execute_reduction;
989 _outer_mg_mpi_execute_blas2[target].back().at(t->level) += t->mpi_execute_blas2;
990 _outer_mg_mpi_execute_blas3[target].back().at(t->level) += t->mpi_execute_blas3;
991 _outer_mg_mpi_execute_collective[target].back().at(t->level) += t->mpi_execute_collective;
992 _outer_mg_mpi_wait_reduction[target].back().at(t->level) += t->mpi_wait_reduction;
993 _outer_mg_mpi_wait_blas2[target].back().at(t->level) += t->mpi_wait_blas2;
994 _outer_mg_mpi_wait_blas3[target].back().at(t->level) += t->mpi_wait_blas3;
995 _outer_mg_mpi_wait_collective[target].back().at(t->level) += t->mpi_wait_collective;
996 }
997 }
998
999 //grep the solver which lies in the schwarz solver to get its toe
1000 if (names.size() > 1 && names.size() == outer_schwarz_depth + 1 && names.at(names.size() - 2) == "Schwarz" && expression->get_type() == FEAT::Solver::ExpressionType::timings)
1001 {
1002 auto t = dynamic_cast<Solver::ExpressionTimings*>(expression.get());
1003 _outer_schwarz_toe[target].back() += t->solver_toe;
1004 }
1005 }
1006 }
1007
1008 // clear raw solver expressions
1009 _solver_expressions.clear();
1010}
#define XABORTM(msg)
Abortion macro definition with custom message.
Definition: assertion.hpp:192
#define XASSERT(expr)
Assertion macro definition.
Definition: assertion.hpp:262
Communicator class.
Definition: dist.hpp:1349
void allreduce(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op) const
Blocking All-Reduce.
Definition: dist.cpp:655
static Comm world()
Returns a copy of the world communicator.
Definition: dist.cpp:429
Kahan Summation accumulator class template.
DT_ value
the current sum value
static std::list< Index > & get_iters_schwarz(String target)
retrieve list of all outer schwarz solver call iteration count entries
Definition: statistics.hpp:498
static KahanAccumulator< double > _time_mpi_execute_blas3
global time of execution for mpi related idle/wait tasks of blas-3 operations
Definition: statistics.hpp:58
static KahanAccumulator< double > _time_precon
global time of execution for special preconditioner kernel type operations
Definition: statistics.hpp:49
static KahanAccumulator< double > _time_reduction
global time of execution for reduction type operations
Definition: statistics.hpp:37
static KahanAccumulator< double > _time_mpi_execute_reduction
global time of execution for mpi related idle/wait tasks of (scalar) reduction operations
Definition: statistics.hpp:52
static std::map< String, String > _formatted_solver_trees
mapping of solver target name to formatted solver tree string
Definition: statistics.hpp:79
static std::map< String, std::list< std::vector< double > > > _outer_mg_toe
mapping of solver name to list of outer multigrid level timings. each std::vector holds a complete le...
Definition: statistics.hpp:93
static Index _flops
global flop counter
Definition: statistics.hpp:34
static std::list< std::vector< double > > & get_time_mg_mpi_execute_blas3(String target)
retrieve list of all overall solver mpi execute blas3 toe entries per level
Definition: statistics.hpp:456
static std::map< String, std::list< double > > _outer_schwarz_toe
overall time of outer schwarz preconditioners internal solver
Definition: statistics.hpp:103
static void compress_solver_expressions()
compress solver statistics (toe / defect norm / mpi timings) from previous calls
Definition: statistics.cpp:866
static KahanAccumulator< double > _time_mpi_execute_blas2
global time of execution for mpi related idle/wait tasks of blas-2 operations
Definition: statistics.hpp:55
static String get_formatted_solver_internals(String target="default")
Retrieve formatted timings and iteration counts of internal solver structures for the provided solver...
Definition: statistics.cpp:558
static std::map< String, std::list< double > > _overall_toe
overall time per reset call per solver name string.
Definition: statistics.hpp:82
static KahanAccumulator< double > _time_axpy
global time of execution for blas-1 type operations
Definition: statistics.hpp:46
static std::map< String, std::list< std::shared_ptr< Solver::ExpressionBase > > > _solver_expressions
a consecutive list of all solver actions
Definition: statistics.hpp:76
static KahanAccumulator< double > _time_mpi_wait_reduction
global time of wait execution for mpi related idle/wait tasks of (scalar) reduction operations
Definition: statistics.hpp:64
static std::list< double > & get_time_schwarz(String target)
retrieve list of all outer schwarz solver call toe entries
Definition: statistics.hpp:492
static KahanAccumulator< double > _time_blas2
global time of execution for blas-2 type operations
Definition: statistics.hpp:40
static KahanAccumulator< double > _time_mpi_execute_collective
global time of execution for mpi related idle/wait tasks of collective operations (without scalar red...
Definition: statistics.hpp:61
static std::list< Index > & get_iters(String target)
retrieve list of all overall solver iteration entries
Definition: statistics.hpp:384
static bool enable_solver_expressions
specifies whether collection of solver expressions is to be enabled
Definition: statistics.hpp:165
static std::list< std::vector< double > > & get_time_mg(String target)
retrieve list of all overall solver toe entries per mg level
Definition: statistics.hpp:438
static std::list< std::vector< double > > & get_time_mg_mpi_execute_collective(String target)
retrieve list of all overall solver mpi execute collective toe entries per level
Definition: statistics.hpp:462
static KahanAccumulator< double > _time_mpi_wait_collective
global time of wait execution for mpi related idle/wait tasks of collective operations (without scala...
Definition: statistics.hpp:73
static std::map< String, std::list< Index > > _outer_schwarz_iters
overall iterations of outer schwarz preconditioners internal solver
Definition: statistics.hpp:105
static std::list< std::vector< double > > & get_time_mg_mpi_wait_collective(String target)
retrieve list of all overall solver mpi collective wait toe entries per level
Definition: statistics.hpp:486
static KahanAccumulator< double > _time_mpi_wait_blas3
global time of wait execution for mpi related idle/wait tasks of blas3 operations
Definition: statistics.hpp:70
static std::list< std::vector< double > > & get_time_mg_mpi_execute_blas2(String target)
retrieve list of all overall solver mpi execute blas2 toe entries per level
Definition: statistics.hpp:450
static KahanAccumulator< double > _time_blas3
global time of execution for blas-3 type operations
Definition: statistics.hpp:43
static std::list< std::vector< double > > & get_time_mg_mpi_wait_reduction(String target)
retrieve list of all overall solver mpi reduction wait toe entries per level
Definition: statistics.hpp:468
static String get_formatted_times(double total_time)
Retrieve formatted time consumption overview in percent relative to some provided total time.
Definition: statistics.cpp:473
static double toe_solve
time of solution in seconds, needs initialization
Definition: statistics.hpp:171
static std::list< double > & get_time_toe(String target)
retrieve list of all overall solver toe entries
Definition: statistics.hpp:378
static std::list< std::vector< double > > & get_time_mg_mpi_execute_reduction(String target)
retrieve list of all overall solver mpi execute reduction toe entries per level
Definition: statistics.hpp:444
static std::list< std::vector< double > > & get_time_mg_mpi_wait_blas3(String target)
retrieve list of all overall solver mpi blas3 wait toe entries per level
Definition: statistics.hpp:480
static std::list< std::vector< double > > & get_time_mg_mpi_wait_blas2(String target)
retrieve list of all overall solver mpi blas2 wait toe entries per level
Definition: statistics.hpp:474
static KahanAccumulator< double > _time_mpi_wait_blas2
global time of wait execution for mpi related idle/wait tasks of blas2 operations
Definition: statistics.hpp:67
static void print_solver_expressions()
print out the complete solver expression list
Definition: statistics.cpp:355
static String expression_target
the current solver's descriptive string
Definition: statistics.hpp:173
static double toe_partition
time of partitioning in seconds, needs initialization
Definition: statistics.hpp:167
static double toe_assembly
time of assembly in seconds, needs initialization
Definition: statistics.hpp:169
String class implementation.
Definition: string.hpp:46
String pad_back(size_type len, char c=' ') const
Pads the back of the string up to a desired length.
Definition: string.hpp:415
void pop_back()
Removes the last character from the string.
Definition: string.hpp:245
const Operation op_min(MPI_MIN)
Operation wrapper for MPI_MIN.
Definition: dist.hpp:275
const Operation op_max(MPI_MAX)
Operation wrapper for MPI_MAX.
Definition: dist.hpp:273
@ timings
annotate iterations timings
@ call_uzawa_a
call A matrix solver (uzawa complement)
@ prol
prolonation (multigrid)
@ end_solve
end last solve process
@ call_precond_l
call L preconditioner
@ defect
annotate iterations defect
@ rest
restriction (multigrid)
@ call_uzawa_s
call S matrix solver (uzawa complement)
@ call_coarse_solver
call coarse grid solver
@ call_precond_r
call R preconditioner
@ level_timings
annotate level timings
@ call_precond
call preconditioner
@ start_solve
start new solve process
FEAT namespace.
Definition: adjactor.hpp:12
String stringify(const T_ &item)
Converts an item into a String.
Definition: string.hpp:944
std::uint64_t Index
Index data type.