2626#include < sstream>
2727#include < assert.h>
2828#include < bitset>
29+ #include < map>
2930#include " cpucounters.h"
3031#include " utils.h"
3132
@@ -89,6 +90,7 @@ void print_help(const string & prog_name)
8990 cout << " -yc | --yescores | /yc => enable specific cores to output\n " ;
9091 cout << " -ns | --nosockets | /ns => hide socket related output\n " ;
9192 cout << " -nsys | --nosystem | /nsys => hide system related output\n " ;
93+ cout << " --die => show aggregated core metrics per die\n " ;
9294 cout << " --color => use ASCII colors\n " ;
9395 cout << " --no-color => don't use ASCII colors\n " ;
9496 cout << " -csv[=file.csv] | /csv[=file.csv] => output compact CSV format to screen or\n "
@@ -171,7 +173,8 @@ void print_output(PCM * m,
171173 const bool show_partial_core_output,
172174 const bool show_socket_output,
173175 const bool show_system_output,
174- const int metricVersion
176+ const int metricVersion,
177+ const bool show_die_output = false
175178 )
176179{
177180 cout << " \n " ;
@@ -301,6 +304,28 @@ void print_output(PCM * m,
301304 cout << resetColor ();
302305 }
303306 }
307+ if (show_die_output)
308+ {
309+ // aggregate core metrics per (socket, die) pair
310+ std::map<std::pair<int32, int32>, CoreCounterState> die_cstates1, die_cstates2;
311+ for (uint32 i = 0 ; i < m->getNumCores (); ++i)
312+ {
313+ if (m->isCoreOnline (i) == false )
314+ continue ;
315+ auto key = std::make_pair (m->getSocketId (i), m->getDieId (i));
316+ die_cstates1[key] += cstates1[i];
317+ die_cstates2[key] += cstates2[i];
318+ }
319+ cout << longDiv;
320+ for (const auto & entry : die_cstates1)
321+ {
322+ const auto & key = entry.first ;
323+ cout << " SKT " << setw (4 ) << (std::to_string (key.first ) + " ." + std::to_string (key.second ));
324+ print_basic_metrics (m, die_cstates1[key], die_cstates2[key], metricVersion);
325+ print_other_metrics (m, die_cstates1[key], die_cstates2[key]);
326+ cout << resetColor ();
327+ }
328+ }
304329 if (show_socket_output)
305330 {
306331 if (!(m->getNumSockets () == 1 && (m->isAtom () || cpu_family_model == PCM::KNL)))
@@ -725,7 +750,8 @@ void print_csv_header(PCM * m,
725750 const bool show_core_output,
726751 const bool show_partial_core_output,
727752 const bool show_socket_output,
728- const bool show_system_output
753+ const bool show_system_output,
754+ const bool show_die_output = false
729755 )
730756{
731757 // print first header line
@@ -880,6 +906,30 @@ void print_csv_header(PCM * m,
880906 }
881907 }
882908
909+ if (show_die_output)
910+ {
911+ // build sorted list of (socket, die) pairs for first header line
912+ std::map<std::pair<int32, int32>, bool > die_map;
913+ for (uint32 i = 0 ; i < m->getNumCores (); ++i)
914+ {
915+ if (m->isCoreOnline (i))
916+ die_map[std::make_pair (m->getSocketId (i), m->getDieId (i))] = true ;
917+ }
918+ for (const auto & entry : die_map)
919+ {
920+ header = " SKT" + std::to_string (entry.first .first ) + " ." + std::to_string (entry.first .second );
921+ print_basic_metrics_csv_semicolons (m, header);
922+ if (m->L3CacheOccupancyMetricAvailable ())
923+ print_csv_header_helper (header);
924+ if (m->CoreLocalMemoryBWMetricAvailable ())
925+ print_csv_header_helper (header);
926+ if (m->CoreRemoteMemoryBWMetricAvailable ())
927+ print_csv_header_helper (header);
928+ print_csv_header_helper (header); // TEMP
929+ print_csv_header_helper (header, 7 ); // INST,ACYC,TIME(ticks),PhysIPC,PhysIPC%,INSTnom,INSTnom%
930+ }
931+ }
932+
883933 if (show_core_output)
884934 {
885935 for (uint32 i = 0 ; i < m->getNumCores (); ++i)
@@ -1059,6 +1109,29 @@ void print_csv_header(PCM * m,
10591109 }
10601110 }
10611111
1112+ if (show_die_output)
1113+ {
1114+ // second header line for die-level columns
1115+ std::map<std::pair<int32, int32>, bool > die_map;
1116+ for (uint32 i = 0 ; i < m->getNumCores (); ++i)
1117+ {
1118+ if (m->isCoreOnline (i))
1119+ die_map[std::make_pair (m->getSocketId (i), m->getDieId (i))] = true ;
1120+ }
1121+ for (size_t d = 0 ; d < die_map.size (); ++d)
1122+ {
1123+ print_basic_metrics_csv_header (m);
1124+ if (m->L3CacheOccupancyMetricAvailable ())
1125+ cout << " L3OCC," ;
1126+ if (m->CoreLocalMemoryBWMetricAvailable ())
1127+ cout << " LMB," ;
1128+ if (m->CoreRemoteMemoryBWMetricAvailable ())
1129+ cout << " RMB," ;
1130+ cout << " TEMP," ;
1131+ cout << " INST,ACYC,TIME(ticks),PhysIPC,PhysIPC%,INSTnom,INSTnom%," ;
1132+ }
1133+ }
1134+
10621135 if (show_core_output)
10631136 {
10641137 for (uint32 i = 0 ; i < m->getNumCores (); ++i)
@@ -1151,7 +1224,8 @@ void print_csv(PCM * m,
11511224 const bool show_core_output,
11521225 const bool show_partial_core_output,
11531226 const bool show_socket_output,
1154- const bool show_system_output
1227+ const bool show_system_output,
1228+ const bool show_die_output = false
11551229 )
11561230{
11571231 cout << " \n " ;
@@ -1324,6 +1398,35 @@ void print_csv(PCM * m,
13241398 }
13251399 }
13261400
1401+ if (show_die_output)
1402+ {
1403+ // aggregate core metrics per (socket, die) pair
1404+ std::map<std::pair<int32, int32>, CoreCounterState> die_cstates1, die_cstates2;
1405+ for (uint32 i = 0 ; i < m->getNumCores (); ++i)
1406+ {
1407+ if (m->isCoreOnline (i) == false )
1408+ continue ;
1409+ auto key = std::make_pair (m->getSocketId (i), m->getDieId (i));
1410+ die_cstates1[key] += cstates1[i];
1411+ die_cstates2[key] += cstates2[i];
1412+ }
1413+ for (const auto & entry : die_cstates1)
1414+ {
1415+ const auto & key = entry.first ;
1416+ print_basic_metrics_csv (m, die_cstates1[key], die_cstates2[key], false );
1417+ print_other_metrics_csv (m, die_cstates1[key], die_cstates2[key]);
1418+ cout << ' ,' << temp_format (die_cstates2[key].getThermalHeadroom ()) << ' ,' ;
1419+
1420+ cout << float_format (getInstructionsRetired (die_cstates1[key], die_cstates2[key])) << " ,"
1421+ << float_format (getCycles (die_cstates1[key], die_cstates2[key])) << " ,"
1422+ << float_format (getInvariantTSC (cstates1[0 ], cstates2[0 ])) << " ,"
1423+ << getCoreIPC (die_cstates1[key], die_cstates2[key]) << " ,"
1424+ << 100 . * (getCoreIPC (die_cstates1[key], die_cstates2[key]) / double (m->getMaxIPC ())) << " ,"
1425+ << getTotalExecUsage (die_cstates1[key], die_cstates2[key]) << " ,"
1426+ << 100 . * (getTotalExecUsage (die_cstates1[key], die_cstates2[key]) / double (m->getMaxIPC ())) << " ," ;
1427+ }
1428+ }
1429+
13271430 if (show_core_output)
13281431 {
13291432 for (uint32 i = 0 ; i < m->getNumCores (); ++i)
@@ -1388,6 +1491,7 @@ int mainThrows(int argc, char * argv[])
13881491 bool show_partial_core_output = false ;
13891492 bool show_socket_output = true ;
13901493 bool show_system_output = true ;
1494+ bool show_die_output = false ;
13911495 bool csv_output = false ;
13921496 bool reset_pmu = false ;
13931497 bool disable_JKT_workaround = false ; // as per http://software.intel.com/en-us/articles/performance-impact-when-sampling-certain-llc-events-on-snb-ep-with-vtune
@@ -1472,6 +1576,11 @@ int mainThrows(int argc, char * argv[])
14721576 show_system_output = false ;
14731577 continue ;
14741578 }
1579+ else if (check_argument_equals (*argv, {" --die" }))
1580+ {
1581+ show_die_output = true ;
1582+ continue ;
1583+ }
14751584 else if (check_argument_equals (*argv, {" --color" }))
14761585 {
14771586 setColorEnabled ();
@@ -1613,7 +1722,7 @@ int mainThrows(int argc, char * argv[])
16131722 // cerr << "DEBUG: Delay: " << delay << " seconds. Blocked: " << m->isBlocked() << "\n";
16141723
16151724 if (csv_output) {
1616- print_csv_header (m, ycores, show_core_output, show_partial_core_output, show_socket_output, show_system_output);
1725+ print_csv_header (m, ycores, show_core_output, show_partial_core_output, show_socket_output, show_system_output, show_die_output );
16171726 }
16181727
16191728 m->getAllCounterStates (sstate1, sktstate1, cstates1);
@@ -1632,11 +1741,11 @@ int mainThrows(int argc, char * argv[])
16321741
16331742 if (csv_output)
16341743 print_csv (m, cstates1, cstates2, sktstate1, sktstate2, ycores, sstate1, sstate2,
1635- show_core_output, show_partial_core_output, show_socket_output, show_system_output);
1744+ show_core_output, show_partial_core_output, show_socket_output, show_system_output, show_die_output );
16361745 else
16371746 print_output (m, cstates1, cstates2, sktstate1, sktstate2, ycores, sstate1, sstate2,
16381747 cpu_family_model, show_core_output, show_partial_core_output, show_socket_output, show_system_output,
1639- metricVersion);
1748+ metricVersion, show_die_output );
16401749
16411750 std::swap (sstate1, sstate2);
16421751 std::swap (sktstate1, sktstate2);
0 commit comments