@@ -66,6 +66,7 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
66
66
#include < sys/time.h>
67
67
#ifdef __linux__
68
68
#include < sys/mman.h>
69
+ #include < dirent.h>
69
70
#endif
70
71
#endif
71
72
@@ -426,6 +427,12 @@ bool PCM::isFixedCounterSupported(unsigned c)
426
427
427
428
bool PCM::isHWTMAL1Supported () const
428
429
{
430
+ #ifdef PCM_USE_PERF
431
+ if (perfEventTaskHandle.empty () == false )
432
+ {
433
+ return false ; // per PID/task perf collection does not support HW TMA L1
434
+ }
435
+ #endif
429
436
static int supported = -1 ;
430
437
if (supported < 0 )
431
438
{
@@ -2144,8 +2151,7 @@ PCM::PCM() :
2144
2151
2145
2152
#ifdef PCM_USE_PERF
2146
2153
canUsePerf = true ;
2147
- std::vector<int > dummy (PERF_MAX_COUNTERS, -1 );
2148
- perfEventHandle.resize (num_cores, dummy);
2154
+ perfEventHandle.resize (num_cores, std::vector<int >(PERF_MAX_COUNTERS, -1 ));
2149
2155
#endif
2150
2156
2151
2157
for (int32 i = 0 ; i < num_cores; ++i)
@@ -2390,7 +2396,7 @@ perf_event_attr PCM_init_perf_event_attr(bool group = true)
2390
2396
}
2391
2397
#endif
2392
2398
2393
- PCM::ErrorCode PCM::program (const PCM::ProgramMode mode_, const void * parameter_, const bool silent)
2399
+ PCM::ErrorCode PCM::program (const PCM::ProgramMode mode_, const void * parameter_, const bool silent, const int pid )
2394
2400
{
2395
2401
#ifdef __linux__
2396
2402
if (isNMIWatchdogEnabled (silent))
@@ -2442,7 +2448,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter
2442
2448
if (!silent) std::cerr << " Can not use Linux perf because OffcoreResponse usage is not supported. Falling-back to direct PMU programming.\n " ;
2443
2449
}
2444
2450
}
2445
- if (isHWTMAL1Supported () == true && perfSupportsTopDown () == false )
2451
+ if (isHWTMAL1Supported () == true && perfSupportsTopDown () == false && pid == - 1 )
2446
2452
{
2447
2453
canUsePerf = false ;
2448
2454
if (!silent) std::cerr << " Installed Linux kernel perf does not support hardware top-down level-1 counters. Using direct PMU programming instead.\n " ;
@@ -2689,6 +2695,50 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter
2689
2695
<< core_fixed_counter_num_max << " available\n " ;
2690
2696
return PCM::UnknownError;
2691
2697
}
2698
+ if (pid != -1 && canUsePerf == false )
2699
+ {
2700
+ std::cerr << " PCM ERROR: pid monitoring is only supported with Linux perf_event driver\n " ;
2701
+ return PCM::UnknownError;
2702
+ }
2703
+
2704
+ std::vector<int > tids{};
2705
+ #ifdef PCM_USE_PERF
2706
+ if (pid != -1 )
2707
+ {
2708
+ const auto strDir = std::string (" /proc/" ) + std::to_string (pid) + " /task/" ;
2709
+ DIR * tidDir = opendir (strDir.c_str ());
2710
+ if (tidDir)
2711
+ {
2712
+ struct dirent * entry{nullptr };
2713
+ while ((entry = readdir (tidDir)) != nullptr )
2714
+ {
2715
+ assert (entry->d_name );
2716
+ const auto tid = atoi (entry->d_name );
2717
+ if (tid)
2718
+ {
2719
+ tids.push_back (tid);
2720
+ // std::cerr << "Detected task " << tids.back() << "\n";
2721
+ }
2722
+ }
2723
+ closedir (tidDir);
2724
+ }
2725
+ else
2726
+ {
2727
+ std::cerr << " ERROR: Can't open " << strDir << " \n " ;
2728
+ return PCM::UnknownError;
2729
+ }
2730
+ }
2731
+ if (tids.empty () == false )
2732
+ {
2733
+ if (isHWTMAL1Supported ())
2734
+ {
2735
+ if (!silent) std::cerr << " INFO: TMA L1 metrics are not supported in PID collection mode\n " ;
2736
+ }
2737
+ if (!silent) std::cerr << " INFO: collecting core metrics for " << tids.size () << " threads in process " << pid << " \n " ;
2738
+ PerfEventHandleContainer _1 (num_cores, std::vector<int >(PERF_MAX_COUNTERS, -1 ));
2739
+ perfEventTaskHandle.resize (tids.size (), _1);
2740
+ }
2741
+ #endif
2692
2742
2693
2743
programmed_pmu = true ;
2694
2744
@@ -2703,11 +2753,11 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter
2703
2753
{
2704
2754
if (isCoreOnline (i) == false ) continue ;
2705
2755
2706
- std::packaged_task<void ()> task ([this , i, mode_, pExtDesc, &programmingStatuses]() -> void
2756
+ std::packaged_task<void ()> task ([this , i, mode_, pExtDesc, &programmingStatuses, &tids ]() -> void
2707
2757
{
2708
2758
TemporalThreadAffinity tempThreadAffinity (i, false ); // speedup trick for Linux
2709
2759
2710
- programmingStatuses[i] = programCoreCounters (i, mode_, pExtDesc, lastProgrammedCustomCounters[i]);
2760
+ programmingStatuses[i] = programCoreCounters (i, mode_, pExtDesc, lastProgrammedCustomCounters[i], tids );
2711
2761
});
2712
2762
asyncCoreResults.push_back (task.get_future ());
2713
2763
coreTaskQueues[i]->push (task);
@@ -2807,8 +2857,10 @@ std::mutex printErrorMutex;
2807
2857
PCM::ErrorCode PCM::programCoreCounters (const int i /* core */ ,
2808
2858
const PCM::ProgramMode mode_,
2809
2859
const ExtendedCustomCoreEventDescription * pExtDesc,
2810
- std::vector<EventSelectRegister> & result)
2860
+ std::vector<EventSelectRegister> & result,
2861
+ const std::vector<int > & tids)
2811
2862
{
2863
+ (void ) tids; // to silence uused param warning on non Linux OS
2812
2864
// program core counters
2813
2865
2814
2866
result.clear ();
@@ -2840,27 +2892,57 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
2840
2892
};
2841
2893
#ifdef PCM_USE_PERF
2842
2894
int leader_counter = -1 ;
2843
- auto programPerfEvent = [this , &leader_counter, &i](perf_event_attr & e, const int eventPos, const std::string & eventName) -> bool
2844
- {
2845
- // if (i == 0) std::cerr << "DEBUG: programming event "<< std::hex << e.config << std::dec << "\n";
2846
- if ((perfEventHandle[i][eventPos] = syscall (SYS_perf_event_open, &e, -1 ,
2847
- i /* core id */ , leader_counter /* group leader */ , 0 )) <= 0 )
2848
- {
2849
- std::lock_guard<std::mutex> _ (printErrorMutex);
2850
- std::cerr << " Linux Perf: Error when programming " << eventName << " , error: " << strerror (errno) <<
2851
- " with config 0x" << std::hex << e.config <<
2852
- " config1 0x" << e.config1 << std::dec << " \n " ;
2853
- if (24 == errno)
2895
+ auto programPerfEvent = [this , &leader_counter, &i, &tids](perf_event_attr e, const int eventPos, const std::string & eventName) -> bool
2896
+ {
2897
+ auto programPerfEventHelper = [&i]( PerfEventHandleContainer & perfEventHandle,
2898
+ perf_event_attr & e,
2899
+ const int eventPos,
2900
+ const std::string & eventName,
2901
+ const int leader_counter,
2902
+ const int tid) -> bool
2903
+ {
2904
+ // if (i == 0) std::cerr << "DEBUG: programming event "<< std::hex << e.config << std::dec << "\n";
2905
+ if ((perfEventHandle[i][eventPos] = syscall (SYS_perf_event_open, &e, tid,
2906
+ i /* core id */ , leader_counter /* group leader */ , 0 )) <= 0 )
2854
2907
{
2855
- std::cerr << " try executing 'ulimit -n 20000' to increase the limit on the number of open files.\n " ;
2908
+ std::lock_guard<std::mutex> _ (printErrorMutex);
2909
+ std::cerr << " Linux Perf: Error when programming " << eventName << " , error: " << strerror (errno) <<
2910
+ " with config 0x" << std::hex << e.config <<
2911
+ " config1 0x" << e.config1 << std::dec << " for tid " << tid << " leader " << leader_counter << " \n " ;
2912
+ if (24 == errno)
2913
+ {
2914
+ std::cerr << PCM_ULIMIT_RECOMMENDATION;
2915
+ }
2916
+ else
2917
+ {
2918
+ std::cerr << " try running with environment variable PCM_NO_PERF=1\n " ;
2919
+ }
2920
+ return false ;
2856
2921
}
2857
- else
2922
+ return true ;
2923
+ };
2924
+ if (tids.empty () == false )
2925
+ {
2926
+ e.inherit = 1 ;
2927
+ e.exclude_kernel = 1 ;
2928
+ e.exclude_hv = 1 ;
2929
+ e.read_format = 0 ; // 'inherit' does not work for combinations of read format (e.g. PERF_FORMAT_GROUP)
2930
+ auto handleIt = perfEventTaskHandle.begin ();
2931
+ for (const auto & tid: tids)
2858
2932
{
2859
- std::cerr << " try running with environment variable PCM_NO_PERF=1\n " ;
2933
+ if (handleIt == perfEventTaskHandle.end ())
2934
+ {
2935
+ break ;
2936
+ }
2937
+ if (programPerfEventHelper (*handleIt, e, eventPos, eventName, -1 , tid) == false )
2938
+ {
2939
+ return false ;
2940
+ }
2941
+ ++handleIt;
2860
2942
}
2861
- return false ;
2943
+ return true ;
2862
2944
}
2863
- return true ;
2945
+ return programPerfEventHelper (perfEventHandle, e, eventPos, eventName, leader_counter, - 1 ) ;
2864
2946
};
2865
2947
if (canUsePerf)
2866
2948
{
@@ -3729,16 +3811,27 @@ void PCM::cleanupPMU(const bool silent)
3729
3811
#ifdef PCM_USE_PERF
3730
3812
if (canUsePerf)
3731
3813
{
3732
- for (int i = 0 ; i < num_cores; ++i)
3733
- for (int c = 0 ; c < PERF_MAX_COUNTERS; ++c)
3814
+ auto cleanOne = [this ](PerfEventHandleContainer & cont)
3815
+ {
3816
+ for (int i = 0 ; i < num_cores; ++i)
3817
+ {
3818
+ for (int c = 0 ; c < PERF_MAX_COUNTERS; ++c)
3819
+ {
3820
+ auto & h = cont[i][c];
3821
+ if (h != -1 ) ::close (h);
3822
+ h = -1 ;
3823
+ }
3824
+ }
3825
+ };
3826
+ cleanOne (perfEventHandle);
3827
+ for (auto & cont : perfEventTaskHandle)
3734
3828
{
3735
- auto & h = perfEventHandle[i][c];
3736
- if (h != -1 ) ::close (h);
3737
- h = -1 ;
3829
+ cleanOne (cont);
3738
3830
}
3831
+ perfEventTaskHandle.clear ();
3739
3832
3740
- if (!silent) std::cerr << " Closed perf event handles\n " ;
3741
- return ;
3833
+ if (!silent) std::cerr << " Closed perf event handles\n " ;
3834
+ return ;
3742
3835
}
3743
3836
#endif
3744
3837
@@ -4048,6 +4141,31 @@ CoreCounterState getCoreCounterState(uint32 core)
4048
4141
#ifdef PCM_USE_PERF
4049
4142
void PCM::readPerfData (uint32 core, std::vector<uint64> & outData)
4050
4143
{
4144
+ if (perfEventTaskHandle.empty () == false )
4145
+ {
4146
+ std::fill (outData.begin (), outData.end (), 0 );
4147
+ for (const auto & handleArray : perfEventTaskHandle)
4148
+ {
4149
+ for (size_t ctr = 0 ; ctr < PERF_MAX_COUNTERS; ++ctr)
4150
+ {
4151
+ const int fd = handleArray[core][ctr];
4152
+ if (fd != -1 )
4153
+ {
4154
+ uint64 result{0ULL };
4155
+ const int status = ::read (fd, &result, sizeof (result));
4156
+ if (status != sizeof (result))
4157
+ {
4158
+ std::cerr << " PCM Error: failed to read from Linux perf handle " << fd << " \n " ;
4159
+ }
4160
+ else
4161
+ {
4162
+ outData[ctr] += result;
4163
+ }
4164
+ }
4165
+ }
4166
+ }
4167
+ return ;
4168
+ }
4051
4169
auto readPerfDataHelper = [this ](const uint32 core, std::vector<uint64>& outData, const uint32 leader, const uint32 num_counters)
4052
4170
{
4053
4171
if (perfEventHandle[core][leader] < 0 )
@@ -4471,7 +4589,7 @@ void PCM::programPCU(uint32* PCUCntConf, const uint64 filter)
4471
4589
}
4472
4590
}
4473
4591
4474
- PCM::ErrorCode PCM::program (const RawPMUConfigs& curPMUConfigs_, const bool silent)
4592
+ PCM::ErrorCode PCM::program (const RawPMUConfigs& curPMUConfigs_, const bool silent, const int pid )
4475
4593
{
4476
4594
if (MSR.empty ()) return PCM::MSRAccessDenied;
4477
4595
threadMSRConfig = RawPMUConfig{};
@@ -4526,7 +4644,7 @@ PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool sile
4526
4644
}
4527
4645
conf.defaultUncoreProgramming = false ;
4528
4646
4529
- const auto status = program (PCM::EXT_CUSTOM_CORE_EVENTS, &conf, silent);
4647
+ const auto status = program (PCM::EXT_CUSTOM_CORE_EVENTS, &conf, silent, pid );
4530
4648
if (status != PCM::Success)
4531
4649
{
4532
4650
return status;
@@ -6323,7 +6441,7 @@ class PerfVirtualControlRegister : public HWRegister
6323
6441
{
6324
6442
std::cerr << " Linux Perf: Error on programming PMU " << pmuID << " : " << strerror (errno) << " \n " ;
6325
6443
std::cerr << " config: 0x" << std::hex << event.config << " config1: 0x" << event.config1 << " config2: 0x" << event.config2 << std::dec << " \n " ;
6326
- if (errno == 24 ) std::cerr << " try executing 'ulimit -n 20000' to increase the limit on the number of open files. \n " ;
6444
+ if (errno == 24 ) std::cerr << PCM_ULIMIT_RECOMMENDATION ;
6327
6445
return ;
6328
6446
}
6329
6447
}
@@ -7633,9 +7751,8 @@ void PCM::programCbo(const uint64 * events, const uint32 opCode, const uint32 nc
7633
7751
uint32 refCore = socketRefCore[i];
7634
7752
TemporalThreadAffinity tempThreadAffinity (refCore); // speedup trick for Linux
7635
7753
7636
- for (uint32 cbo = 0 ; cbo < getMaxNumOfCBoxes (); ++cbo)
7754
+ for (uint32 cbo = 0 ; cbo < getMaxNumOfCBoxes () && cbo < cboPMUs[i]. size () ; ++cbo)
7637
7755
{
7638
- assert (cbo < cboPMUs[i].size ());
7639
7756
cboPMUs[i][cbo].initFreeze (UNC_PMON_UNIT_CTL_FRZ_EN);
7640
7757
7641
7758
if (ICX != cpu_model && SNOWRIDGE != cpu_model)
@@ -7694,7 +7811,10 @@ void PCM::programUBOX(const uint64* events)
7694
7811
7695
7812
*uboxPMUs[s].fixedCounterControl = UCLK_FIXED_CTL_EN;
7696
7813
7697
- PCM::program (uboxPMUs[s], events, events + 2 , 0 );
7814
+ if (events)
7815
+ {
7816
+ PCM::program (uboxPMUs[s], events, events + 2 , 0 );
7817
+ }
7698
7818
}
7699
7819
}
7700
7820
0 commit comments