6#include <kernel/util/statistics.hpp>
7#include <kernel/util/dist.hpp>
8#include <kernel/solver/base.hpp>
33std::map<String, std::list<Index>> Statistics::_overall_iters;
34std::map<String, std::list<double>> Statistics::_overall_mpi_execute_reduction;
35std::map<String, std::list<double>> Statistics::_overall_mpi_execute_blas2;
36std::map<String, std::list<double>> Statistics::_overall_mpi_execute_blas3;
37std::map<String, std::list<double>> Statistics::_overall_mpi_execute_collective;
38std::map<String, std::list<double>> Statistics::_overall_mpi_wait_reduction;
39std::map<String, std::list<double>> Statistics::_overall_mpi_wait_blas2;
40std::map<String, std::list<double>> Statistics::_overall_mpi_wait_blas3;
41std::map<String, std::list<double>> Statistics::_overall_mpi_wait_collective;
43std::map<String, std::list<std::vector<double>>> Statistics::_outer_mg_mpi_execute_reduction;
44std::map<String, std::list<std::vector<double>>> Statistics::_outer_mg_mpi_execute_blas2;
45std::map<String, std::list<std::vector<double>>> Statistics::_outer_mg_mpi_execute_blas3;
46std::map<String, std::list<std::vector<double>>> Statistics::_outer_mg_mpi_execute_collective;
47std::map<String, std::list<std::vector<double>>> Statistics::_outer_mg_mpi_wait_reduction;
48std::map<String, std::list<std::vector<double>>> Statistics::_outer_mg_mpi_wait_blas2;
49std::map<String, std::list<std::vector<double>>> Statistics::_outer_mg_mpi_wait_blas3;
50std::map<String, std::list<std::vector<double>>> Statistics::_outer_mg_mpi_wait_collective;
59String Statistics::_generate_formatted_solver_tree(
String target)
61 std::list<String> names;
66 XABORTM(
"target "+target+
" not present in _solver_expressions");
77 XABORTM(
"Should never happen - _solver_expressions list did not start with start solve expression!");
82 String tree((*it)->solver_name);
83 names.push_back((*it)->solver_name);
93 if (names.back().starts_with(
"MultiGrid") || names.back().starts_with(
"VCycle") || names.back().starts_with(
"ScaRCMultiGrid"))
97 auto expression = *it;
108 tree +=
" ( S: " + (*it)->solver_name;
109 names.push_back((*it)->solver_name);
117 tree +=
" ( S: " + t->smoother_name;
130 tree +=
" / C: " + (*it)->solver_name;
131 names.push_back((*it)->solver_name);
139 tree +=
" / C: " + t->coarse_solver_name;
156 else if (names.back().starts_with(
"Uzawa"))
160 auto expression = *it;
171 if (found.back() == 1)
172 tree +=
" ( S: " + (*it)->solver_name;
174 tree +=
" / S: " + (*it)->solver_name;
175 names.push_back((*it)->solver_name);
183 if (found.back() == 1)
184 tree +=
" ( S: " + t->solver_s_name;
186 tree +=
" / S: " + t->solver_s_name;
199 if (found.back() == 2)
200 tree +=
" ( A: " + (*it)->solver_name;
202 tree +=
" / A: " + (*it)->solver_name;
203 names.push_back((*it)->solver_name);
211 if (found.back() == 2)
212 tree +=
" ( A: " + t->solver_a_name;
214 tree +=
" / A: " + t->solver_a_name;
231 else if (names.back().starts_with(
"PCGNR"))
235 auto expression = *it;
246 if (found.back() == 1)
247 tree +=
" ( L: " + (*it)->solver_name;
249 tree +=
" / L: " + (*it)->solver_name;
250 names.push_back((*it)->solver_name);
258 if (found.back() == 1)
259 tree +=
" ( L: " + t->precond_name;
261 tree +=
" / L: " + t->precond_name;
274 if (found.back() == 2)
275 tree +=
" ( R: " + (*it)->solver_name;
277 tree +=
" / R: " + (*it)->solver_name;
278 names.push_back((*it)->solver_name);
286 if (found.back() == 2)
287 tree +=
" ( R: " + t->precond_name;
289 tree +=
" / R: " + t->precond_name;
309 auto expression = *it;
320 tree +=
" ( " + (*it)->solver_name;
321 names.push_back((*it)->solver_name);
329 tree +=
" ( " + t->precond_name;
335 if (found.back() == 0)
347 if (names.size() > 0)
349 XABORTM(
"Should never happen - not all solver calls were parsed to the end!");
361 switch (expression->get_type())
365 String s =
stringify(expression->get_type()) +
"[" + expression->solver_name +
"]";
366 std::cout<<
String(padding,
' ') << s <<
"\n";
374 std::cout<<
String(padding,
' ') << s <<
"\n";
382 std::cout<<
String(padding,
' ') << s <<
"\n";
388 String s =
stringify(t->get_type()) +
"[" + t->solver_name +
"] (" + t->precond_name +
")";
389 std::cout<<
String(padding,
' ') << s <<
"\n";
395 String s =
stringify(t->get_type()) +
"[" + t->solver_name +
"] (" + t->precond_name +
")";
396 std::cout<<
String(padding,
' ') << s <<
"\n";
402 String s =
stringify(t->get_type()) +
"[" + t->solver_name +
"] (" + t->precond_name +
")";
403 std::cout<<
String(padding,
' ') << s <<
"\n";
410 std::cout<<
String(padding,
' ') << s <<
"\n";
416 String s =
stringify(t->get_type()) +
"[" + t->solver_name +
"] (" + t->coarse_solver_name +
")";
417 std::cout<<
String(padding,
' ') << s <<
"\n";
424 std::cout<<
String(padding,
' ') << s <<
"\n";
431 std::cout<<
String(padding,
' ') << s <<
"\n";
438 std::cout<<
String(padding,
' ') << s <<
"\n";
445 std::cout<<
String(padding,
' ') << s <<
"\n";
452 std::cout<<
String(padding,
' ') << s <<
"\n";
459 std::cout<<
String(padding,
' ') << s <<
"\n";
464 String s =
stringify(expression->get_type()) +
"[" + expression->solver_name +
"]";
465 std::cout<<
String(padding,
' ') << s <<
"\n";
476 if (total_time == 0.)
480 measured_time += get_time_reduction();
481 measured_time += get_time_blas2();
482 measured_time += get_time_blas3();
483 measured_time += get_time_axpy();
484 measured_time += get_time_precon();
485 measured_time += get_time_mpi_execute_reduction();
486 measured_time += get_time_mpi_execute_blas2();
487 measured_time += get_time_mpi_execute_blas3();
488 measured_time += get_time_mpi_execute_collective();
489 measured_time += get_time_mpi_wait_reduction();
490 measured_time += get_time_mpi_wait_blas2();
491 measured_time += get_time_mpi_wait_blas3();
492 measured_time += get_time_mpi_wait_collective();
495 result +=
"Accumulated op time: " +
stringify(measured_time.
value) +
"\n";
504 t_local[0] = total_time - measured_time;
505 t_local[1] = get_time_reduction();
506 t_local[2] = get_time_axpy();
507 t_local[3] = get_time_blas2();
508 t_local[4] = get_time_blas3();
509 t_local[5] = get_time_precon();
510 t_local[6] = get_time_mpi_execute_reduction();
511 t_local[7] = get_time_mpi_execute_blas2();
512 t_local[8] = get_time_mpi_execute_blas3();
513 t_local[9] = get_time_mpi_execute_collective();
514 t_local[10] = get_time_mpi_wait_reduction();
515 t_local[11] = get_time_mpi_wait_blas2();
516 t_local[12] = get_time_mpi_wait_blas3();
517 t_local[13] = get_time_mpi_wait_collective();
553 result +=
"WARNING: Accumulated op time is greater than the provided total execution time !\n";
577 Index item_count(
Index(12) +
Index(solver_time_mg.front().size() + solver_time_mg_mpi_execute_reduction.front().size() + solver_time_mg_mpi_execute_blas2.front().size() +
578 solver_time_mg_mpi_execute_blas3.front().size() + solver_time_mg_mpi_execute_collective.front().size() +
579 solver_time_mg_mpi_wait_reduction.front().size() + solver_time_mg_mpi_wait_blas2.front().size() + solver_time_mg_mpi_wait_blas3.front().size() +
580 solver_time_mg_mpi_wait_collective.front().size()));
582 double * t_local =
new double[item_count];
583 double * t_max =
new double[item_count];
584 double * t_min =
new double[item_count];
615 t_local[2] = std::accumulate(FEAT::Statistics::get_time_mpi_execute_reduction(target).begin(),
616 FEAT::Statistics::get_time_mpi_execute_reduction(target).end(), 0.);
617 t_local[3] = std::accumulate(FEAT::Statistics::get_time_mpi_execute_blas2(target).begin(),
618 FEAT::Statistics::get_time_mpi_execute_blas2(target).end(), 0.);
619 t_local[4] = std::accumulate(FEAT::Statistics::get_time_mpi_execute_blas3(target).begin(),
620 FEAT::Statistics::get_time_mpi_execute_blas3(target).end(), 0.);
621 t_local[5] = std::accumulate(FEAT::Statistics::get_time_mpi_execute_collective(target).begin(),
622 FEAT::Statistics::get_time_mpi_execute_collective(target).end(), 0.);
623 t_local[6] = std::accumulate(FEAT::Statistics::get_time_mpi_wait_reduction(target).begin(),
624 FEAT::Statistics::get_time_mpi_wait_reduction(target).end(), 0.);
625 t_local[7] = std::accumulate(FEAT::Statistics::get_time_mpi_wait_blas2(target).begin(),
626 FEAT::Statistics::get_time_mpi_wait_blas2(target).end(), 0.);
627 t_local[8] = std::accumulate(FEAT::Statistics::get_time_mpi_wait_blas3(target).begin(),
628 FEAT::Statistics::get_time_mpi_wait_blas3(target).end(), 0.);
629 t_local[9] = std::accumulate(FEAT::Statistics::get_time_mpi_wait_collective(target).begin(),
630 FEAT::Statistics::get_time_mpi_wait_collective(target).end(), 0.);
638 Index levels(
Index(solver_time_mg.front().size()));
640 for (
auto& step : solver_time_mg)
642 XASSERT(step.size() == levels);
643 for (
Index i(0) ; i < step.size() ; ++i)
645 t_local[offset + i] = double(0);
647 for (
Index i(0) ; i < step.size() ; ++i)
649 t_local[offset + i] += step.at(i);
655 for (
auto& step : solver_time_mg_mpi_execute_reduction)
657 XASSERT(step.size() == levels);
658 for (
Index i(0) ; i < step.size() ; ++i)
660 t_local[offset + i] = double(0);
662 for (
Index i(0) ; i < step.size() ; ++i)
664 t_local[offset + i] += step.at(i);
670 for (
auto& step : solver_time_mg_mpi_execute_blas2)
672 XASSERT(step.size() == levels);
673 for (
Index i(0) ; i < step.size() ; ++i)
675 t_local[offset + i] = double(0);
677 for (
Index i(0) ; i < step.size() ; ++i)
679 t_local[offset + i] += step.at(i);
685 for (
auto& step : solver_time_mg_mpi_execute_blas3)
687 XASSERT(step.size() == levels);
688 for (
Index i(0) ; i < step.size() ; ++i)
690 t_local[offset + i] = double(0);
692 for (
Index i(0) ; i < step.size() ; ++i)
694 t_local[offset + i] += step.at(i);
700 for (
auto& step : solver_time_mg_mpi_execute_collective)
702 XASSERT(step.size() == levels);
703 for (
Index i(0) ; i < step.size() ; ++i)
705 t_local[offset + i] = double(0);
707 for (
Index i(0) ; i < step.size() ; ++i)
709 t_local[offset + i] += step.at(i);
715 for (
auto& step : solver_time_mg_mpi_wait_reduction)
717 XASSERT(step.size() == levels);
718 for (
Index i(0) ; i < step.size() ; ++i)
720 t_local[offset + i] = double(0);
722 for (
Index i(0) ; i < step.size() ; ++i)
724 t_local[offset + i] += step.at(i);
730 for (
auto& step : solver_time_mg_mpi_wait_blas2)
732 XASSERT(step.size() == levels);
733 for (
Index i(0) ; i < step.size() ; ++i)
735 t_local[offset + i] = double(0);
737 for (
Index i(0) ; i < step.size() ; ++i)
739 t_local[offset + i] += step.at(i);
745 for (
auto& step : solver_time_mg_mpi_wait_blas3)
747 XASSERT(step.size() == levels);
748 for (
Index i(0) ; i < step.size() ; ++i)
750 t_local[offset + i] = double(0);
752 for (
Index i(0) ; i < step.size() ; ++i)
754 t_local[offset + i] += step.at(i);
760 for (
auto& step : solver_time_mg_mpi_wait_collective)
762 XASSERT(step.size() == levels);
763 for (
Index i(0) ; i < step.size() ; ++i)
765 t_local[offset + i] = double(0);
767 for (
Index i(0) ; i < step.size() ; ++i)
769 t_local[offset + i] += step.at(i);
776 String result = target +
"\n";
803 for (
Index i(0) ; i < levels ; ++i)
809 for (
Index i(0) ; i < levels ; ++i)
812 ", local: " +
stringify(t_local[offset + i]) +
"\n";
815 for (
Index i(0) ; i < levels ; ++i)
818 ", local: " +
stringify(t_local[offset + i]) +
"\n";
821 for (
Index i(0) ; i < levels ; ++i)
824 ", local: " +
stringify(t_local[offset + i]) +
"\n";
827 for (
Index i(0) ; i < levels ; ++i)
830 ", local: " +
stringify(t_local[offset + i]) +
"\n";
833 for (
Index i(0) ; i < levels ; ++i)
836 ", local: " +
stringify(t_local[offset + i]) +
"\n";
839 for (
Index i(0) ; i < levels ; ++i)
842 ", local: " +
stringify(t_local[offset + i]) +
"\n";
845 for (
Index i(0) ; i < levels ; ++i)
848 ", local: " +
stringify(t_local[offset + i]) +
"\n";
851 for (
Index i(0) ; i < levels ; ++i)
854 ", local: " +
stringify(t_local[offset + i]);
870 auto target = itarget->first;
877 std::vector<String> names;
881 XABORTM(
"target "+target+
" not present in _solver_expressions");
890 XABORTM(
"Should never happen - _solver_expressions list did not start with start solve expression!");
894 _overall_iters[target].push_back(
Index(0));
895 _overall_mpi_execute_reduction[target].push_back(0.);
896 _overall_mpi_execute_blas2[target].push_back(0.);
897 _overall_mpi_execute_blas3[target].push_back(0.);
898 _overall_mpi_execute_collective[target].push_back(0.);
899 _overall_mpi_wait_reduction[target].push_back(0.);
900 _overall_mpi_wait_blas2[target].push_back(0.);
901 _overall_mpi_wait_blas3[target].push_back(0.);
902 _overall_mpi_wait_collective[target].push_back(0.);
904 _outer_mg_mpi_execute_reduction[target].emplace_back();
905 _outer_mg_mpi_execute_blas2[target].emplace_back();
906 _outer_mg_mpi_execute_blas3[target].emplace_back();
907 _outer_mg_mpi_execute_collective[target].emplace_back();
908 _outer_mg_mpi_wait_reduction[target].emplace_back();
909 _outer_mg_mpi_wait_blas2[target].emplace_back();
910 _outer_mg_mpi_wait_blas3[target].emplace_back();
911 _outer_mg_mpi_wait_collective[target].emplace_back();
915 Index outer_mg_depth(0);
916 Index outer_schwarz_depth(0);
920 auto expression = *it;
924 names.push_back((*it)->solver_name);
927 if (names.size() > 0 && (names.back().starts_with(
"MultiGrid") || names.back().starts_with(
"VCycle") || names.back().starts_with(
"ScaRCMultiGrid")) && outer_mg_depth == 0)
928 outer_mg_depth = (
Index)names.size();
931 if (names.size() > 0 && names.back().starts_with(
"Schwarz") && outer_schwarz_depth == 0)
932 outer_schwarz_depth = (
Index)names.size();
938 if (names.size() > 1 && names.size() == outer_schwarz_depth + 1 && names.at(names.size() - 2) ==
"Schwarz")
943 if (names.size() < 2)
947 _overall_iters[target].back() += t->iters;
959 _overall_mpi_execute_reduction[target].back() += t->mpi_execute_reduction;
960 _overall_mpi_execute_blas2[target].back() += t->mpi_execute_blas2;
961 _overall_mpi_execute_blas3[target].back() += t->mpi_execute_blas3;
962 _overall_mpi_execute_collective[target].back() += t->mpi_execute_collective;
963 _overall_mpi_wait_reduction[target].back() += t->mpi_wait_reduction;
964 _overall_mpi_wait_blas2[target].back() += t->mpi_wait_blas2;
965 _overall_mpi_wait_blas3[target].back() += t->mpi_wait_blas3;
966 _overall_mpi_wait_collective[target].back() += t->mpi_wait_collective;
976 _outer_mg_mpi_execute_reduction[target].back().push_back(t->mpi_execute_reduction);
977 _outer_mg_mpi_execute_blas2[target].back().push_back(t->mpi_execute_blas2);
978 _outer_mg_mpi_execute_blas3[target].back().push_back(t->mpi_execute_blas3);
979 _outer_mg_mpi_execute_collective[target].back().push_back(t->mpi_execute_collective);
980 _outer_mg_mpi_wait_reduction[target].back().push_back(t->mpi_wait_reduction);
981 _outer_mg_mpi_wait_blas2[target].back().push_back(t->mpi_wait_blas2);
982 _outer_mg_mpi_wait_blas3[target].back().push_back(t->mpi_wait_blas3);
983 _outer_mg_mpi_wait_collective[target].back().push_back(t->mpi_wait_collective);
988 _outer_mg_mpi_execute_reduction[target].back().at(t->level) += t->mpi_execute_reduction;
989 _outer_mg_mpi_execute_blas2[target].back().at(t->level) += t->mpi_execute_blas2;
990 _outer_mg_mpi_execute_blas3[target].back().at(t->level) += t->mpi_execute_blas3;
991 _outer_mg_mpi_execute_collective[target].back().at(t->level) += t->mpi_execute_collective;
992 _outer_mg_mpi_wait_reduction[target].back().at(t->level) += t->mpi_wait_reduction;
993 _outer_mg_mpi_wait_blas2[target].back().at(t->level) += t->mpi_wait_blas2;
994 _outer_mg_mpi_wait_blas3[target].back().at(t->level) += t->mpi_wait_blas3;
995 _outer_mg_mpi_wait_collective[target].back().at(t->level) += t->mpi_wait_collective;
#define XABORTM(msg)
Abortion macro definition with custom message.
#define XASSERT(expr)
Assertion macro definition.
void allreduce(const void *sendbuf, void *recvbuf, std::size_t count, const Datatype &datatype, const Operation &op) const
Blocking All-Reduce.
static Comm world()
Returns a copy of the world communicator.
Kahan Summation accumulator class template.
DT_ value
the current sum value
static std::list< Index > & get_iters_schwarz(String target)
retrieve list of all outer schwarz solver call iteration count entries
static KahanAccumulator< double > _time_mpi_execute_blas3
global time of execution for mpi related idle/wait tasks of blas-3 operations
static KahanAccumulator< double > _time_precon
global time of execution for special preconditioner kernel type operations
static KahanAccumulator< double > _time_reduction
global time of execution for reduction type operations
static KahanAccumulator< double > _time_mpi_execute_reduction
global time of execution for mpi related idle/wait tasks of (scalar) reduction operations
static std::map< String, String > _formatted_solver_trees
mapping of solver target name to formatted solver tree string
static std::map< String, std::list< std::vector< double > > > _outer_mg_toe
mapping of solver name to list of outer multigrid level timings. each std::vector holds a complete le...
static Index _flops
global flop counter
static std::list< std::vector< double > > & get_time_mg_mpi_execute_blas3(String target)
retrieve list of all overall solver mpi execute blas3 toe entries per level
static std::map< String, std::list< double > > _outer_schwarz_toe
overall time of outer schwarz preconditioners internal solver
static void compress_solver_expressions()
compress solver statistics (toe / defect norm / mpi timings) from previous calls
static KahanAccumulator< double > _time_mpi_execute_blas2
global time of execution for mpi related idle/wait tasks of blas-2 operations
static String get_formatted_solver_internals(String target="default")
Retrieve formatted timings and iteration counts of internal solver structures for the provided solver...
static std::map< String, std::list< double > > _overall_toe
overall time per reset call per solver name string.
static KahanAccumulator< double > _time_axpy
global time of execution for blas-1 type operations
static std::map< String, std::list< std::shared_ptr< Solver::ExpressionBase > > > _solver_expressions
a consecutive list of all solver actions
static KahanAccumulator< double > _time_mpi_wait_reduction
global time of wait execution for mpi related idle/wait tasks of (scalar) reduction operations
static std::list< double > & get_time_schwarz(String target)
retrieve list of all outer schwarz solver call toe entries
static KahanAccumulator< double > _time_blas2
global time of execution for blas-2 type operations
static KahanAccumulator< double > _time_mpi_execute_collective
global time of execution for mpi related idle/wait tasks of collective operations (without scalar red...
static std::list< Index > & get_iters(String target)
retrieve list of all overall solver iteration entries
static bool enable_solver_expressions
specifies whether collection of solver expressions is to be enabled
static std::list< std::vector< double > > & get_time_mg(String target)
retrieve list of all overall solver toe entries per mg level
static std::list< std::vector< double > > & get_time_mg_mpi_execute_collective(String target)
retrieve list of all overall solver mpi execute collective toe entries per level
static KahanAccumulator< double > _time_mpi_wait_collective
global time of wait execution for mpi related idle/wait tasks of collective operations (without scala...
static std::map< String, std::list< Index > > _outer_schwarz_iters
overall iterations of outer schwarz preconditioners internal solver
static std::list< std::vector< double > > & get_time_mg_mpi_wait_collective(String target)
retrieve list of all overall solver mpi collective wait toe entries per level
static KahanAccumulator< double > _time_mpi_wait_blas3
global time of wait execution for mpi related idle/wait tasks of blas3 operations
static std::list< std::vector< double > > & get_time_mg_mpi_execute_blas2(String target)
retrieve list of all overall solver mpi execute blas2 toe entries per level
static KahanAccumulator< double > _time_blas3
global time of execution for blas-3 type operations
static std::list< std::vector< double > > & get_time_mg_mpi_wait_reduction(String target)
retrieve list of all overall solver mpi reduction wait toe entries per level
static String get_formatted_times(double total_time)
Retrieve formatted time consumption overview in percent relative to some provided total time.
static double toe_solve
time of solution in seconds, needs initialization
static std::list< double > & get_time_toe(String target)
retrieve list of all overall solver toe entries
static std::list< std::vector< double > > & get_time_mg_mpi_execute_reduction(String target)
retrieve list of all overall solver mpi execute reduction toe entries per level
static std::list< std::vector< double > > & get_time_mg_mpi_wait_blas3(String target)
retrieve list of all overall solver mpi blas3 wait toe entries per level
static std::list< std::vector< double > > & get_time_mg_mpi_wait_blas2(String target)
retrieve list of all overall solver mpi blas2 wait toe entries per level
static KahanAccumulator< double > _time_mpi_wait_blas2
global time of wait execution for mpi related idle/wait tasks of blas2 operations
static void print_solver_expressions()
print out the complete solver expression list
static String expression_target
the current solver's descriptive string
static double toe_partition
time of partitioning in seconds, needs initialization
static double toe_assembly
time of assembly in seconds, needs initialization
String class implementation.
String pad_back(size_type len, char c=' ') const
Pads the back of the string up to a desired length.
void pop_back()
Removes the last character from the string.
const Operation op_min(MPI_MIN)
Operation wrapper for MPI_MIN.
const Operation op_max(MPI_MAX)
Operation wrapper for MPI_MAX.
@ timings
annotate iterations timings
@ call_uzawa_a
call A matrix solver (uzawa complement)
@ prol
prolonation (multigrid)
@ end_solve
end last solve process
@ call_precond_l
call L preconditioner
@ defect
annotate iterations defect
@ rest
restriction (multigrid)
@ call_uzawa_s
call S matrix solver (uzawa complement)
@ call_coarse_solver
call coarse grid solver
@ call_smoother
call smoother
@ call_precond_r
call R preconditioner
@ level_timings
annotate level timings
@ call_precond
call preconditioner
@ start_solve
start new solve process
String stringify(const T_ &item)
Converts an item into a String.
std::uint64_t Index
Index data type.