Skip to content

Commit 7e6975c

Browse files
authored
Add context switches and use getrusage (#50)
1 parent a96bc0f commit 7e6975c

File tree

4 files changed

+48
-43
lines changed

4 files changed

+48
-43
lines changed

include/Monitoring/ProcessMonitor.h

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <string>
1414
#include <thread>
1515
#include <vector>
16+
#include <sys/resource.h>
1617

1718
#include "Monitoring/Metric.h"
1819

@@ -34,29 +35,26 @@ class ProcessMonitor
3435
/// Default destructor
3536
~ProcessMonitor() = default;
3637

37-
/// Generates performance metrics (stored in mPsParams vecotr)
38-
std::vector<Metric> getPidStatus();
38+
/// Retrieves memory usage (%)
39+
Metric getMemoryUsage();
3940

40-
/// Generates metrics per network interface: bytesReceived, bytesTransmitted
41+
/// Retrieves bytesReceived, bytesTransmitted per network interface
4142
std::vector<Metric> getNetworkUsage();
4243

44+
/// Retrieves CPU usage (%) and number of context switches during the interval
45+
std::vector<Metric> getCpuAndContexts();
4346
private:
4447
/// PIDs that are monitored
4548
unsigned int mPid;
4649

47-
/// options to be passed to PS
48-
std::string mPsCommand;
49-
50-
/// mutex to lock vector of PIDs
51-
std::mutex mVectorPidLock;
52-
53-
/// List of PS params with their types
54-
const std::vector<std::pair<std::string, MetricType>> mPsParams {
55-
{"etime", MetricType::STRING}, {"pcpu", MetricType::DOUBLE}, {"pmem", MetricType::DOUBLE}
56-
};
57-
5850
/// Executes terminal command
5951
std::string exec(const char* cmd);
52+
53+
/// 'getrusage' values from last execution
54+
struct rusage mPreviousGetrUsage;
55+
56+
/// Timestamp when process monitoring was executed last time
57+
std::chrono::high_resolution_clock::time_point mTimeLastRun;
6058
};
6159

6260
} // namespace monitoring

src/Monitoring.cxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,9 @@ void Monitoring::processMonitorLoop(int interval)
115115
while (mMonitorRunning) {
116116
std::this_thread::sleep_for (std::chrono::milliseconds(interval*10));
117117
if ((++loopCount % 100) != 0) continue;
118-
send(mProcessMonitor->getPidStatus());
118+
send(mProcessMonitor->getCpuAndContexts());
119119
send(mProcessMonitor->getNetworkUsage());
120+
send(mProcessMonitor->getMemoryUsage());
120121
loopCount = 0;
121122
}
122123
}

src/ProcessMonitor.cxx

Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55

66
#include "Monitoring/ProcessMonitor.h"
77
#include "Exceptions/MonitoringInternalException.h"
8+
#include "MonLogger.h"
89
#include <boost/algorithm/string/classification.hpp>
9-
#include <boost/algorithm/string/split.hpp>
1010
#include <chrono>
11-
#include "MonLogger.h"
1211
#include <sstream>
12+
#include <cmath>
1313

1414
namespace o2
1515
{
@@ -20,18 +20,16 @@ namespace monitoring
2020
ProcessMonitor::ProcessMonitor()
2121
{
2222
mPid = static_cast<unsigned int>(::getpid());
23-
for (auto const param : mPsParams) {
24-
mPsCommand = mPsCommand.empty() ? param.first : mPsCommand += (',' + param.first);
25-
}
26-
mPsCommand = "ps --no-headers -o " + mPsCommand + " --pid ";
23+
getrusage(RUSAGE_SELF, &mPreviousGetrUsage);
24+
mTimeLastRun = std::chrono::high_resolution_clock::now();
2725
}
2826

2927
std::vector<Metric> ProcessMonitor::getNetworkUsage()
3028
{
3129
std::vector<Metric> metrics;
3230
std::stringstream ss;
3331
// get bytes received and transmitted per interface
34-
ss << "cat /proc/" << mPid << "/net/dev | tail -n +3 |awk ' {print $1 $2 \":\" $10}'";
32+
ss << "cat /proc/" << mPid << "/net/dev | tail -n +3 | grep -v -e 'lo' -e 'virbr0' | awk ' {print $1 $2 \":\" $10}'";
3533
std::string output = exec(ss.str().c_str());
3634
// for each line (each network interfrace)
3735
std::istringstream iss(output);
@@ -50,31 +48,37 @@ std::vector<Metric> ProcessMonitor::getNetworkUsage()
5048
return metrics;
5149
}
5250

53-
std::vector<Metric> ProcessMonitor::getPidStatus()
51+
Metric ProcessMonitor::getMemoryUsage()
5452
{
55-
std::vector<Metric> metrics;
56-
std::string command = mPsCommand + std::to_string(mPid);
53+
std::string command = "ps --no-headers -o pmem --pid " + std::to_string(mPid);
5754
std::string output = exec(command.c_str());
58-
59-
// split output into std vector
60-
std::vector<std::string> pidParams;
6155
boost::trim(output);
62-
boost::split(pidParams, output, boost::is_any_of("\t "), boost::token_compress_on);
63-
64-
// parse output, cast to propriate types
65-
auto j = mPsParams.begin();
66-
for (auto i = pidParams.begin(); i != pidParams.end(); ++i, ++j) {
67-
if (j->second == MetricType::DOUBLE) {
68-
metrics.emplace_back(Metric{std::stod(*i), j->first});
69-
}
70-
else if (j->second == MetricType::INT) {
71-
metrics.emplace_back(Metric{std::stoi(*i), j->first});
72-
}
73-
else {
74-
metrics.emplace_back(Metric{*i, j->first});
75-
}
56+
return Metric{std::stod(output), "memoryUsagePercentage"};
57+
}
58+
59+
std::vector<Metric> ProcessMonitor::getCpuAndContexts() {
60+
std::vector<Metric> metrics;
61+
struct rusage currentUsage;
62+
getrusage(RUSAGE_SELF, &currentUsage);
63+
auto timeNow = std::chrono::high_resolution_clock::now();
64+
double timePassed = std::chrono::duration_cast<std::chrono::microseconds>(timeNow - mTimeLastRun).count();
65+
if (timePassed < 950) { // do not run too often
66+
throw MonitoringInternalException("Process Monitor getrusage", "Do not invoke more often then 1ms");
7667
}
68+
double fractionCpuUsed = (
69+
currentUsage.ru_utime.tv_sec*1000000.0 + currentUsage.ru_utime.tv_usec - (mPreviousGetrUsage.ru_utime.tv_sec*1000000.0 + mPreviousGetrUsage.ru_utime.tv_usec)
70+
+ currentUsage.ru_stime.tv_sec*1000000.0 + currentUsage.ru_stime.tv_usec - (mPreviousGetrUsage.ru_stime.tv_sec*1000000.0 + mPreviousGetrUsage.ru_stime.tv_usec)
71+
) / timePassed;
72+
73+
metrics.emplace_back(Metric{
74+
static_cast<double>(std::round(fractionCpuUsed * 100.0 * 100.0 ) / 100.0), "cpuUsedPercentage"
75+
});
76+
metrics.emplace_back(Metric{
77+
static_cast<uint64_t>(currentUsage.ru_nivcsw - mPreviousGetrUsage.ru_nivcsw), "involuntaryContextSwitches"
78+
});
7779

80+
mTimeLastRun = timeNow;
81+
mPreviousGetrUsage = currentUsage;
7882
return metrics;
7983
}
8084

test/testProcessMonitor.cxx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ namespace Test {
1111
BOOST_AUTO_TEST_CASE(createProcessMonitor)
1212
{
1313
o2::monitoring::ProcessMonitor processMonitor;
14+
#ifdef _OS_LINUX
1415
processMonitor.getNetworkUsage();
15-
processMonitor.getPidStatus();
16+
processMonitor.getMemoryUsage();
17+
#endif
1618
}
1719

1820
} // namespace Test

0 commit comments

Comments
 (0)