@@ -66,6 +66,7 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
66
66
#include < sys/time.h>
67
67
#ifdef __linux__
68
68
#include < sys/mman.h>
69
+ #include < dirent.h>
69
70
#endif
70
71
#endif
71
72
@@ -426,6 +427,12 @@ bool PCM::isFixedCounterSupported(unsigned c)
426
427
427
428
bool PCM::isHWTMAL1Supported () const
428
429
{
430
+ #ifdef PCM_USE_PERF
431
+ if (perfEventTaskHandle.empty () == false )
432
+ {
433
+ return false ; // per PID/task perf collection does not support HW TMA L1
434
+ }
435
+ #endif
429
436
static int supported = -1 ;
430
437
if (supported < 0 )
431
438
{
@@ -2144,8 +2151,7 @@ PCM::PCM() :
2144
2151
2145
2152
#ifdef PCM_USE_PERF
2146
2153
canUsePerf = true ;
2147
- std::vector<int > dummy (PERF_MAX_COUNTERS, -1 );
2148
- perfEventHandle.resize (num_cores, dummy);
2154
+ perfEventHandle.resize (num_cores, std::vector<int >(PERF_MAX_COUNTERS, -1 ));
2149
2155
#endif
2150
2156
2151
2157
for (int32 i = 0 ; i < num_cores; ++i)
@@ -2442,7 +2448,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter
2442
2448
if (!silent) std::cerr << " Can not use Linux perf because OffcoreResponse usage is not supported. Falling-back to direct PMU programming.\n " ;
2443
2449
}
2444
2450
}
2445
- if (isHWTMAL1Supported () == true && perfSupportsTopDown () == false )
2451
+ if (isHWTMAL1Supported () == true && perfSupportsTopDown () == false && pid == - 1 )
2446
2452
{
2447
2453
canUsePerf = false ;
2448
2454
if (!silent) std::cerr << " Installed Linux kernel perf does not support hardware top-down level-1 counters. Using direct PMU programming instead.\n " ;
@@ -2695,6 +2701,45 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter
2695
2701
return PCM::UnknownError;
2696
2702
}
2697
2703
2704
+ std::vector<int > tids{};
2705
+ #ifdef PCM_USE_PERF
2706
+ if (pid != -1 )
2707
+ {
2708
+ const auto strDir = std::string (" /proc/" ) + std::to_string (pid) + " /task/" ;
2709
+ DIR * tidDir = opendir (strDir.c_str ());
2710
+ if (tidDir)
2711
+ {
2712
+ struct dirent * entry{nullptr };
2713
+ while ((entry = readdir (tidDir)) != nullptr )
2714
+ {
2715
+ assert (entry->d_name );
2716
+ const auto tid = atoi (entry->d_name );
2717
+ if (tid)
2718
+ {
2719
+ tids.push_back (tid);
2720
+ // std::cerr << "Detected task " << tids.back() << "\n";
2721
+ }
2722
+ }
2723
+ closedir (tidDir);
2724
+ }
2725
+ else
2726
+ {
2727
+ std::cerr << " ERROR: Can't open " << strDir << " \n " ;
2728
+ return PCM::UnknownError;
2729
+ }
2730
+ }
2731
+ if (tids.empty () == false )
2732
+ {
2733
+ if (isHWTMAL1Supported ())
2734
+ {
2735
+ if (!silent) std::cerr << " INFO: TMA L1 metrics are not supported in PID collection mode\n " ;
2736
+ }
2737
+ if (!silent) std::cerr << " INFO: collecting core metrics for " << tids.size () << " threads in process " << pid << " \n " ;
2738
+ PerfEventHandleContainer _1 (num_cores, std::vector<int >(PERF_MAX_COUNTERS, -1 ));
2739
+ perfEventTaskHandle.resize (tids.size (), _1);
2740
+ }
2741
+ #endif
2742
+
2698
2743
programmed_pmu = true ;
2699
2744
2700
2745
lastProgrammedCustomCounters.clear ();
@@ -2708,11 +2753,11 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter
2708
2753
{
2709
2754
if (isCoreOnline (i) == false ) continue ;
2710
2755
2711
- std::packaged_task<void ()> task ([this , i, mode_, pExtDesc, &programmingStatuses, &pid ]() -> void
2756
+ std::packaged_task<void ()> task ([this , i, mode_, pExtDesc, &programmingStatuses, &tids ]() -> void
2712
2757
{
2713
2758
TemporalThreadAffinity tempThreadAffinity (i, false ); // speedup trick for Linux
2714
2759
2715
- programmingStatuses[i] = programCoreCounters (i, mode_, pExtDesc, lastProgrammedCustomCounters[i], pid );
2760
+ programmingStatuses[i] = programCoreCounters (i, mode_, pExtDesc, lastProgrammedCustomCounters[i], tids );
2716
2761
});
2717
2762
asyncCoreResults.push_back (task.get_future ());
2718
2763
coreTaskQueues[i]->push (task);
@@ -2813,9 +2858,9 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
2813
2858
const PCM::ProgramMode mode_,
2814
2859
const ExtendedCustomCoreEventDescription * pExtDesc,
2815
2860
std::vector<EventSelectRegister> & result,
2816
- const int pid )
2861
+ const std::vector< int > & tids )
2817
2862
{
2818
- (void ) pid ; // to silence uused param warning on non Linux OS
2863
+ (void ) tids ; // to silence uused param warning on non Linux OS
2819
2864
// program core counters
2820
2865
2821
2866
result.clear ();
@@ -2847,27 +2892,51 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
2847
2892
};
2848
2893
#ifdef PCM_USE_PERF
2849
2894
int leader_counter = -1 ;
2850
- auto programPerfEvent = [this , &leader_counter, &i, &pid](perf_event_attr & e, const int eventPos, const std::string & eventName) -> bool
2851
- {
2852
- // if (i == 0) std::cerr << "DEBUG: programming event "<< std::hex << e.config << std::dec << "\n";
2853
- if ((perfEventHandle[i][eventPos] = syscall (SYS_perf_event_open, &e, pid,
2854
- i /* core id */ , leader_counter /* group leader */ , 0 )) <= 0 )
2855
- {
2856
- std::lock_guard<std::mutex> _ (printErrorMutex);
2857
- std::cerr << " Linux Perf: Error when programming " << eventName << " , error: " << strerror (errno) <<
2858
- " with config 0x" << std::hex << e.config <<
2859
- " config1 0x" << e.config1 << std::dec << " \n " ;
2860
- if (24 == errno)
2895
+ auto programPerfEvent = [this , &leader_counter, &i, &tids](perf_event_attr e, const int eventPos, const std::string & eventName) -> bool
2896
+ {
2897
+ auto programPerfEventHelper = [&i]( PerfEventHandleContainer & perfEventHandle,
2898
+ perf_event_attr & e,
2899
+ const int eventPos,
2900
+ const std::string & eventName,
2901
+ const int leader_counter,
2902
+ const int tid) -> bool
2903
+ {
2904
+ // if (i == 0) std::cerr << "DEBUG: programming event "<< std::hex << e.config << std::dec << "\n";
2905
+ if ((perfEventHandle[i][eventPos] = syscall (SYS_perf_event_open, &e, tid,
2906
+ i /* core id */ , leader_counter /* group leader */ , 0 )) <= 0 )
2861
2907
{
2862
- std::cerr << " try executing 'ulimit -n 20000' to increase the limit on the number of open files.\n " ;
2908
+ std::lock_guard<std::mutex> _ (printErrorMutex);
2909
+ std::cerr << " Linux Perf: Error when programming " << eventName << " , error: " << strerror (errno) <<
2910
+ " with config 0x" << std::hex << e.config <<
2911
+ " config1 0x" << e.config1 << std::dec << " for tid " << tid << " leader " << leader_counter << " \n " ;
2912
+ if (24 == errno)
2913
+ {
2914
+ std::cerr << PCM_ULIMIT_RECOMMENDATION;
2915
+ }
2916
+ else
2917
+ {
2918
+ std::cerr << " try running with environment variable PCM_NO_PERF=1\n " ;
2919
+ }
2920
+ return false ;
2863
2921
}
2864
- else
2922
+ return true ;
2923
+ };
2924
+ if (tids.empty () == false )
2925
+ {
2926
+ e.inherit = 1 ;
2927
+ e.read_format = 0 ; // 'inherit' does not work for combinations of read format (e.g. PERF_FORMAT_GROUP)
2928
+ auto handleIt = perfEventTaskHandle.begin ();
2929
+ for (const auto & tid: tids)
2865
2930
{
2866
- std::cerr << " try running with environment variable PCM_NO_PERF=1\n " ;
2931
+ if (programPerfEventHelper (*handleIt, e, eventPos, eventName, -1 , tid) == false )
2932
+ {
2933
+ return false ;
2934
+ }
2935
+ ++handleIt;
2867
2936
}
2868
- return false ;
2937
+ return true ;
2869
2938
}
2870
- return true ;
2939
+ return programPerfEventHelper (perfEventHandle, e, eventPos, eventName, leader_counter, - 1 ) ;
2871
2940
};
2872
2941
if (canUsePerf)
2873
2942
{
@@ -3736,16 +3805,27 @@ void PCM::cleanupPMU(const bool silent)
3736
3805
#ifdef PCM_USE_PERF
3737
3806
if (canUsePerf)
3738
3807
{
3739
- for (int i = 0 ; i < num_cores; ++i)
3740
- for (int c = 0 ; c < PERF_MAX_COUNTERS; ++c)
3808
+ auto cleanOne = [this ](PerfEventHandleContainer & cont)
3809
+ {
3810
+ for (int i = 0 ; i < num_cores; ++i)
3811
+ {
3812
+ for (int c = 0 ; c < PERF_MAX_COUNTERS; ++c)
3813
+ {
3814
+ auto & h = cont[i][c];
3815
+ if (h != -1 ) ::close (h);
3816
+ h = -1 ;
3817
+ }
3818
+ }
3819
+ };
3820
+ cleanOne (perfEventHandle);
3821
+ for (auto & cont : perfEventTaskHandle)
3741
3822
{
3742
- auto & h = perfEventHandle[i][c];
3743
- if (h != -1 ) ::close (h);
3744
- h = -1 ;
3823
+ cleanOne (cont);
3745
3824
}
3825
+ perfEventTaskHandle.clear ();
3746
3826
3747
- if (!silent) std::cerr << " Closed perf event handles\n " ;
3748
- return ;
3827
+ if (!silent) std::cerr << " Closed perf event handles\n " ;
3828
+ return ;
3749
3829
}
3750
3830
#endif
3751
3831
@@ -4055,6 +4135,31 @@ CoreCounterState getCoreCounterState(uint32 core)
4055
4135
#ifdef PCM_USE_PERF
4056
4136
void PCM::readPerfData (uint32 core, std::vector<uint64> & outData)
4057
4137
{
4138
+ if (perfEventTaskHandle.empty () == false )
4139
+ {
4140
+ std::fill (outData.begin (), outData.end (), 0 );
4141
+ for (const auto & handleArray : perfEventTaskHandle)
4142
+ {
4143
+ for (size_t ctr = 0 ; ctr < PERF_MAX_COUNTERS; ++ctr)
4144
+ {
4145
+ const int fd = handleArray[core][ctr];
4146
+ if (fd != -1 )
4147
+ {
4148
+ uint64 result{0ULL };
4149
+ const int status = ::read (fd, &result, sizeof (result));
4150
+ if (status != sizeof (result))
4151
+ {
4152
+ std::cerr << " PCM Error: failed to read from Linux perf handle " << fd << " \n " ;
4153
+ }
4154
+ else
4155
+ {
4156
+ outData[ctr] += result;
4157
+ }
4158
+ }
4159
+ }
4160
+ }
4161
+ return ;
4162
+ }
4058
4163
auto readPerfDataHelper = [this ](const uint32 core, std::vector<uint64>& outData, const uint32 leader, const uint32 num_counters)
4059
4164
{
4060
4165
if (perfEventHandle[core][leader] < 0 )
@@ -6330,7 +6435,7 @@ class PerfVirtualControlRegister : public HWRegister
6330
6435
{
6331
6436
std::cerr << " Linux Perf: Error on programming PMU " << pmuID << " : " << strerror (errno) << " \n " ;
6332
6437
std::cerr << " config: 0x" << std::hex << event.config << " config1: 0x" << event.config1 << " config2: 0x" << event.config2 << std::dec << " \n " ;
6333
- if (errno == 24 ) std::cerr << " try executing 'ulimit -n 20000' to increase the limit on the number of open files. \n " ;
6438
+ if (errno == 24 ) std::cerr << PCM_ULIMIT_RECOMMENDATION ;
6334
6439
return ;
6335
6440
}
6336
6441
}
0 commit comments