Skip to content

Commit 47dcecf

Browse files
authored
Merge pull request #178 from intel-innersource/rdementi/pid-monitoring
rdementi/pid monitoring
2 parents 3d45db3 + 0f2af55 commit 47dcecf

File tree

11 files changed

+234
-48
lines changed

11 files changed

+234
-48
lines changed

scripts/get_sles_bins.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
2+
3+
filename=`curl https://download.opensuse.org/repositories/home:/opcm/SLE_15_SP1/x86_64/ -s | sed -n 's/.*\(pcm-0-[0-9]*\.1\.x86_64.rpm\).*/\1/p'`
4+
5+
curl -L https://download.opensuse.org/repositories/home:/opcm/SLE_15_SP1/x86_64/$filename -o $filename
6+
7+
rpm2cpio $filename | cpio -idmv
8+

src/cpucounters.cpp

Lines changed: 156 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
6666
#include <sys/time.h>
6767
#ifdef __linux__
6868
#include <sys/mman.h>
69+
#include <dirent.h>
6970
#endif
7071
#endif
7172

@@ -426,6 +427,12 @@ bool PCM::isFixedCounterSupported(unsigned c)
426427

427428
bool PCM::isHWTMAL1Supported() const
428429
{
430+
#ifdef PCM_USE_PERF
431+
if (perfEventTaskHandle.empty() == false)
432+
{
433+
return false; // per PID/task perf collection does not support HW TMA L1
434+
}
435+
#endif
429436
static int supported = -1;
430437
if (supported < 0)
431438
{
@@ -2144,8 +2151,7 @@ PCM::PCM() :
21442151

21452152
#ifdef PCM_USE_PERF
21462153
canUsePerf = true;
2147-
std::vector<int> dummy(PERF_MAX_COUNTERS, -1);
2148-
perfEventHandle.resize(num_cores, dummy);
2154+
perfEventHandle.resize(num_cores, std::vector<int>(PERF_MAX_COUNTERS, -1));
21492155
#endif
21502156

21512157
for (int32 i = 0; i < num_cores; ++i)
@@ -2390,7 +2396,7 @@ perf_event_attr PCM_init_perf_event_attr(bool group = true)
23902396
}
23912397
#endif
23922398

2393-
PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter_, const bool silent)
2399+
PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter_, const bool silent, const int pid)
23942400
{
23952401
#ifdef __linux__
23962402
if (isNMIWatchdogEnabled(silent))
@@ -2442,7 +2448,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter
24422448
if (!silent) std::cerr << "Can not use Linux perf because OffcoreResponse usage is not supported. Falling-back to direct PMU programming.\n";
24432449
}
24442450
}
2445-
if (isHWTMAL1Supported() == true && perfSupportsTopDown() == false)
2451+
if (isHWTMAL1Supported() == true && perfSupportsTopDown() == false && pid == -1)
24462452
{
24472453
canUsePerf = false;
24482454
if (!silent) std::cerr << "Installed Linux kernel perf does not support hardware top-down level-1 counters. Using direct PMU programming instead.\n";
@@ -2689,6 +2695,50 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter
26892695
<< core_fixed_counter_num_max << " available\n";
26902696
return PCM::UnknownError;
26912697
}
2698+
if (pid != -1 && canUsePerf == false)
2699+
{
2700+
std::cerr << "PCM ERROR: pid monitoring is only supported with Linux perf_event driver\n";
2701+
return PCM::UnknownError;
2702+
}
2703+
2704+
std::vector<int> tids{};
2705+
#ifdef PCM_USE_PERF
2706+
if (pid != -1)
2707+
{
2708+
const auto strDir = std::string("/proc/") + std::to_string(pid) + "/task/";
2709+
DIR * tidDir = opendir(strDir.c_str());
2710+
if (tidDir)
2711+
{
2712+
struct dirent * entry{nullptr};
2713+
while ((entry = readdir(tidDir)) != nullptr)
2714+
{
2715+
assert(entry->d_name);
2716+
const auto tid = atoi(entry->d_name);
2717+
if (tid)
2718+
{
2719+
tids.push_back(tid);
2720+
// std::cerr << "Detected task " << tids.back() << "\n";
2721+
}
2722+
}
2723+
closedir(tidDir);
2724+
}
2725+
else
2726+
{
2727+
std::cerr << "ERROR: Can't open " << strDir << "\n";
2728+
return PCM::UnknownError;
2729+
}
2730+
}
2731+
if (tids.empty() == false)
2732+
{
2733+
if (isHWTMAL1Supported())
2734+
{
2735+
if (!silent) std::cerr << "INFO: TMA L1 metrics are not supported in PID collection mode\n";
2736+
}
2737+
if (!silent) std::cerr << "INFO: collecting core metrics for " << tids.size() << " threads in process " << pid << "\n";
2738+
PerfEventHandleContainer _1(num_cores, std::vector<int>(PERF_MAX_COUNTERS, -1));
2739+
perfEventTaskHandle.resize(tids.size(), _1);
2740+
}
2741+
#endif
26922742

26932743
programmed_pmu = true;
26942744

@@ -2703,11 +2753,11 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter
27032753
{
27042754
if (isCoreOnline(i) == false) continue;
27052755

2706-
std::packaged_task<void()> task([this, i, mode_, pExtDesc, &programmingStatuses]() -> void
2756+
std::packaged_task<void()> task([this, i, mode_, pExtDesc, &programmingStatuses, &tids]() -> void
27072757
{
27082758
TemporalThreadAffinity tempThreadAffinity(i, false); // speedup trick for Linux
27092759

2710-
programmingStatuses[i] = programCoreCounters(i, mode_, pExtDesc, lastProgrammedCustomCounters[i]);
2760+
programmingStatuses[i] = programCoreCounters(i, mode_, pExtDesc, lastProgrammedCustomCounters[i], tids);
27112761
});
27122762
asyncCoreResults.push_back(task.get_future());
27132763
coreTaskQueues[i]->push(task);
@@ -2807,8 +2857,10 @@ std::mutex printErrorMutex;
28072857
PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
28082858
const PCM::ProgramMode mode_,
28092859
const ExtendedCustomCoreEventDescription * pExtDesc,
2810-
std::vector<EventSelectRegister> & result)
2860+
std::vector<EventSelectRegister> & result,
2861+
const std::vector<int> & tids)
28112862
{
2863+
(void) tids; // to silence uused param warning on non Linux OS
28122864
// program core counters
28132865

28142866
result.clear();
@@ -2840,27 +2892,57 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
28402892
};
28412893
#ifdef PCM_USE_PERF
28422894
int leader_counter = -1;
2843-
auto programPerfEvent = [this, &leader_counter, &i](perf_event_attr & e, const int eventPos, const std::string & eventName) -> bool
2844-
{
2845-
// if (i == 0) std::cerr << "DEBUG: programming event "<< std::hex << e.config << std::dec << "\n";
2846-
if ((perfEventHandle[i][eventPos] = syscall(SYS_perf_event_open, &e, -1,
2847-
i /* core id */, leader_counter /* group leader */, 0)) <= 0)
2848-
{
2849-
std::lock_guard<std::mutex> _(printErrorMutex);
2850-
std::cerr << "Linux Perf: Error when programming " << eventName << ", error: " << strerror(errno) <<
2851-
" with config 0x" << std::hex << e.config <<
2852-
" config1 0x" << e.config1 << std::dec << "\n";
2853-
if (24 == errno)
2895+
auto programPerfEvent = [this, &leader_counter, &i, &tids](perf_event_attr e, const int eventPos, const std::string & eventName) -> bool
2896+
{
2897+
auto programPerfEventHelper = [&i]( PerfEventHandleContainer & perfEventHandle,
2898+
perf_event_attr & e,
2899+
const int eventPos,
2900+
const std::string & eventName,
2901+
const int leader_counter,
2902+
const int tid) -> bool
2903+
{
2904+
// if (i == 0) std::cerr << "DEBUG: programming event "<< std::hex << e.config << std::dec << "\n";
2905+
if ((perfEventHandle[i][eventPos] = syscall(SYS_perf_event_open, &e, tid,
2906+
i /* core id */, leader_counter /* group leader */, 0)) <= 0)
28542907
{
2855-
std::cerr << "try executing 'ulimit -n 20000' to increase the limit on the number of open files.\n";
2908+
std::lock_guard<std::mutex> _(printErrorMutex);
2909+
std::cerr << "Linux Perf: Error when programming " << eventName << ", error: " << strerror(errno) <<
2910+
" with config 0x" << std::hex << e.config <<
2911+
" config1 0x" << e.config1 << std::dec << " for tid " << tid << " leader " << leader_counter << "\n";
2912+
if (24 == errno)
2913+
{
2914+
std::cerr << PCM_ULIMIT_RECOMMENDATION;
2915+
}
2916+
else
2917+
{
2918+
std::cerr << "try running with environment variable PCM_NO_PERF=1\n";
2919+
}
2920+
return false;
28562921
}
2857-
else
2922+
return true;
2923+
};
2924+
if (tids.empty() == false)
2925+
{
2926+
e.inherit = 1;
2927+
e.exclude_kernel = 1;
2928+
e.exclude_hv = 1;
2929+
e.read_format = 0; // 'inherit' does not work for combinations of read format (e.g. PERF_FORMAT_GROUP)
2930+
auto handleIt = perfEventTaskHandle.begin();
2931+
for (const auto & tid: tids)
28582932
{
2859-
std::cerr << "try running with environment variable PCM_NO_PERF=1\n";
2933+
if (handleIt == perfEventTaskHandle.end())
2934+
{
2935+
break;
2936+
}
2937+
if (programPerfEventHelper(*handleIt, e, eventPos, eventName, -1, tid) == false)
2938+
{
2939+
return false;
2940+
}
2941+
++handleIt;
28602942
}
2861-
return false;
2943+
return true;
28622944
}
2863-
return true;
2945+
return programPerfEventHelper(perfEventHandle, e, eventPos, eventName, leader_counter, -1);
28642946
};
28652947
if (canUsePerf)
28662948
{
@@ -3729,16 +3811,27 @@ void PCM::cleanupPMU(const bool silent)
37293811
#ifdef PCM_USE_PERF
37303812
if (canUsePerf)
37313813
{
3732-
for (int i = 0; i < num_cores; ++i)
3733-
for(int c = 0; c < PERF_MAX_COUNTERS; ++c)
3814+
auto cleanOne = [this](PerfEventHandleContainer & cont)
3815+
{
3816+
for (int i = 0; i < num_cores; ++i)
3817+
{
3818+
for(int c = 0; c < PERF_MAX_COUNTERS; ++c)
3819+
{
3820+
auto & h = cont[i][c];
3821+
if (h != -1) ::close(h);
3822+
h = -1;
3823+
}
3824+
}
3825+
};
3826+
cleanOne(perfEventHandle);
3827+
for (auto & cont : perfEventTaskHandle)
37343828
{
3735-
auto & h = perfEventHandle[i][c];
3736-
if (h != -1) ::close(h);
3737-
h = -1;
3829+
cleanOne(cont);
37383830
}
3831+
perfEventTaskHandle.clear();
37393832

3740-
if (!silent) std::cerr << " Closed perf event handles\n";
3741-
return;
3833+
if (!silent) std::cerr << " Closed perf event handles\n";
3834+
return;
37423835
}
37433836
#endif
37443837

@@ -4048,6 +4141,31 @@ CoreCounterState getCoreCounterState(uint32 core)
40484141
#ifdef PCM_USE_PERF
40494142
void PCM::readPerfData(uint32 core, std::vector<uint64> & outData)
40504143
{
4144+
if (perfEventTaskHandle.empty() == false)
4145+
{
4146+
std::fill(outData.begin(), outData.end(), 0);
4147+
for (const auto & handleArray : perfEventTaskHandle)
4148+
{
4149+
for (size_t ctr = 0; ctr < PERF_MAX_COUNTERS; ++ctr)
4150+
{
4151+
const int fd = handleArray[core][ctr];
4152+
if (fd != -1)
4153+
{
4154+
uint64 result{0ULL};
4155+
const int status = ::read(fd, &result, sizeof(result));
4156+
if (status != sizeof(result))
4157+
{
4158+
std::cerr << "PCM Error: failed to read from Linux perf handle " << fd << "\n";
4159+
}
4160+
else
4161+
{
4162+
outData[ctr] += result;
4163+
}
4164+
}
4165+
}
4166+
}
4167+
return;
4168+
}
40514169
auto readPerfDataHelper = [this](const uint32 core, std::vector<uint64>& outData, const uint32 leader, const uint32 num_counters)
40524170
{
40534171
if (perfEventHandle[core][leader] < 0)
@@ -4471,7 +4589,7 @@ void PCM::programPCU(uint32* PCUCntConf, const uint64 filter)
44714589
}
44724590
}
44734591

4474-
PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool silent)
4592+
PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool silent, const int pid)
44754593
{
44764594
if (MSR.empty()) return PCM::MSRAccessDenied;
44774595
threadMSRConfig = RawPMUConfig{};
@@ -4526,7 +4644,7 @@ PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool sile
45264644
}
45274645
conf.defaultUncoreProgramming = false;
45284646

4529-
const auto status = program(PCM::EXT_CUSTOM_CORE_EVENTS, &conf, silent);
4647+
const auto status = program(PCM::EXT_CUSTOM_CORE_EVENTS, &conf, silent, pid);
45304648
if (status != PCM::Success)
45314649
{
45324650
return status;
@@ -6323,7 +6441,7 @@ class PerfVirtualControlRegister : public HWRegister
63236441
{
63246442
std::cerr << "Linux Perf: Error on programming PMU " << pmuID << ": " << strerror(errno) << "\n";
63256443
std::cerr << "config: 0x" << std::hex << event.config << " config1: 0x" << event.config1 << " config2: 0x" << event.config2 << std::dec << "\n";
6326-
if (errno == 24) std::cerr << "try executing 'ulimit -n 20000' to increase the limit on the number of open files.\n";
6444+
if (errno == 24) std::cerr << PCM_ULIMIT_RECOMMENDATION;
63276445
return;
63286446
}
63296447
}
@@ -7633,9 +7751,8 @@ void PCM::programCbo(const uint64 * events, const uint32 opCode, const uint32 nc
76337751
uint32 refCore = socketRefCore[i];
76347752
TemporalThreadAffinity tempThreadAffinity(refCore); // speedup trick for Linux
76357753

7636-
for(uint32 cbo = 0; cbo < getMaxNumOfCBoxes(); ++cbo)
7754+
for(uint32 cbo = 0; cbo < getMaxNumOfCBoxes() && cbo < cboPMUs[i].size(); ++cbo)
76377755
{
7638-
assert(cbo < cboPMUs[i].size());
76397756
cboPMUs[i][cbo].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN);
76407757

76417758
if (ICX != cpu_model && SNOWRIDGE != cpu_model)
@@ -7694,7 +7811,10 @@ void PCM::programUBOX(const uint64* events)
76947811

76957812
*uboxPMUs[s].fixedCounterControl = UCLK_FIXED_CTL_EN;
76967813

7697-
PCM::program(uboxPMUs[s], events, events + 2, 0);
7814+
if (events)
7815+
{
7816+
PCM::program(uboxPMUs[s], events, events + 2, 0);
7817+
}
76987818
}
76997819
}
77007820

src/cpucounters.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -859,7 +859,9 @@ class PCM_API PCM
859859

860860
bool canUsePerf;
861861
#ifdef PCM_USE_PERF
862-
std::vector<std::vector<int> > perfEventHandle;
862+
typedef std::vector<std::vector<int> > PerfEventHandleContainer;
863+
PerfEventHandleContainer perfEventHandle;
864+
std::vector<PerfEventHandleContainer> perfEventTaskHandle;
863865
void readPerfData(uint32 core, std::vector<uint64> & data);
864866

865867
enum {
@@ -894,7 +896,7 @@ class PCM_API PCM
894896
std::vector<std::vector<EventSelectRegister> > lastProgrammedCustomCounters;
895897
uint32 checkCustomCoreProgramming(std::shared_ptr<SafeMsrHandle> msr);
896898
ErrorCode programCoreCounters(int core, const PCM::ProgramMode mode, const ExtendedCustomCoreEventDescription * pExtDesc,
897-
std::vector<EventSelectRegister> & programmedCustomCounters);
899+
std::vector<EventSelectRegister> & programmedCustomCounters, const std::vector<int> & tids);
898900

899901
bool PMUinUse();
900902
void cleanupPMU(const bool silent = false);
@@ -1110,6 +1112,8 @@ class PCM_API PCM
11101112
/*! \brief Programs performance counters
11111113
\param mode_ mode of programming, see ProgramMode definition
11121114
\param parameter_ optional parameter for some of programming modes
1115+
\param silent set to true to silence diagnostic messages
1116+
\param pid restrict core metrics only to specified pid (process id)
11131117
11141118
Call this method before you start using the performance counting routines.
11151119
@@ -1118,7 +1122,7 @@ class PCM_API PCM
11181122
program PMUs: Intel(r) VTune(tm), Intel(r) Performance Tuning Utility (PTU). This code may make
11191123
VTune or PTU measurements invalid. VTune or PTU measurement may make measurement with this code invalid. Please enable either usage of these routines or VTune/PTU/etc.
11201124
*/
1121-
ErrorCode program(const ProgramMode mode_ = DEFAULT_EVENTS, const void * parameter_ = NULL, const bool silent = false); // program counters and start counting
1125+
ErrorCode program(const ProgramMode mode_ = DEFAULT_EVENTS, const void * parameter_ = NULL, const bool silent = false, const int pid = -1); // program counters and start counting
11221126

11231127
/*! \brief checks the error and suggests solution and/or exits the process
11241128
\param code error code from the 'program' call
@@ -1183,7 +1187,7 @@ class PCM_API PCM
11831187
FrontendPos = 4
11841188
};
11851189
typedef std::map<std::string, RawPMUConfig> RawPMUConfigs;
1186-
ErrorCode program(const RawPMUConfigs& curPMUConfigs, const bool silent = false);
1190+
ErrorCode program(const RawPMUConfigs& curPMUConfigs, const bool silent = false, const int pid = -1);
11871191

11881192
std::pair<unsigned, unsigned> getOCREventNr(const int event, const unsigned coreID) const
11891193
{

src/pci.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ int openHandle(uint32 groupnr_, uint32 bus, uint32 device, uint32 function)
384384
int handle = ::open(path.str().c_str(), O_RDWR);
385385
if (handle < 0)
386386
{
387-
if (errno == 24) std::cerr << "ERROR: try executing 'ulimit -n 20000' to increase the limit on the number of open files.\n";
387+
if (errno == 24) std::cerr << "ERROR: " << PCM_ULIMIT_RECOMMENDATION;
388388
handle = ::open((std::string("/pcm") + path.str()).c_str(), O_RDWR);
389389
}
390390
return handle;

0 commit comments

Comments
 (0)