Skip to content

Commit b6f2190

Browse files
authored
[OMON-593] Use smaps_rollup on CS8 and add API to enable each PM measurement (#299)
1 parent 5ba0c0b commit b6f2190

File tree

8 files changed

+142
-35
lines changed

8 files changed

+142
-35
lines changed

CMakeLists.txt

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ endif()
3131

3232
# Define project
3333
project(Monitoring
34-
VERSION 3.12.11
34+
VERSION 3.13.0
3535
DESCRIPTION "O2 Monitoring library"
3636
LANGUAGES CXX
3737
)
@@ -178,6 +178,20 @@ endif()
178178
if (UNIX AND NOT APPLE)
179179
message(STATUS "Detected Linux: Process monitor enabled")
180180
set(LINUX true)
181+
# Detecto CC7 or CS8
182+
if(EXISTS "/etc/os-release")
183+
file(STRINGS "/etc/os-release" OS_RELEASE)
184+
foreach(KV ${OS_RELEASE})
185+
if(KV MATCHES "^VERSION=\"8\"")
186+
set(CS8 true)
187+
message(STATUS " Detected CS8")
188+
endif()
189+
if(KV MATCHES "^VERSION=\"7")
190+
set(CC7 true)
191+
message(STATUS " Detected CC7")
192+
endif()
193+
endforeach()
194+
endif()
181195
endif()
182196

183197
if (APPLE)
@@ -193,6 +207,8 @@ target_compile_definitions(Monitoring
193207
PRIVATE
194208
$<$<BOOL:${APPLE}>:O2_MONITORING_OS_MAC>
195209
$<$<BOOL:${LINUX}>:O2_MONITORING_OS_LINUX>
210+
$<$<BOOL:${CC7}>:O2_MONITORING_OS_CC7>
211+
$<$<BOOL:${CS8}>:O2_MONITORING_OS_CS8>
196212
$<$<BOOL:${ApMon_FOUND}>:O2_MONITORING_WITH_APPMON>
197213
$<$<BOOL:${RdKafka_FOUND}>:O2_MONITORING_WITH_KAFKA>
198214
$<$<BOOL:${CURL_FOUND}>:O2_MONITORING_WITH_CURL>

README.md

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,22 +151,32 @@ See how it works in the example: [examples/4-RateDerivedMetric.cxx](examples/4-R
151151
152152
### Process monitoring
153153
154-
This feature provides basic performance status of the process. Note that is runs in separate thread (without mutex).
154+
This feature provides basic performance status of the process. Note that is runs in separate thread.
155155
156156
```cpp
157-
enableProcessMonitoring([interval in seconds]);
157+
enableProcessMonitoring([interval in seconds, {Measurement list}]);
158158
```
159+
List of valid measurement lists:
160+
- `Monitor::Cpu`
161+
- `Monitor::Mem`
162+
- `Monitor::Smaps` - Beware. Enabling this will trigger kernel to run `smaps_account` periodically.
163+
159164
Following metrics are generated every time interval:
160-
CPU measurements:
165+
`Monitor::Cpu`:
161166
+ **cpuUsedPercentage** - percentage of a core usage (kernel + user mode) over time interval
162167
+ **involuntaryContextSwitches** - involuntary context switches over time interval
163168
+ **cpuUsedAbsolute** - amount of time spent on process execution (in user and kernel mode) over time interval (expressed in microseconds)
164169

165-
Memory measurements: (Linux only)
170+
`Monitor::Mem`: (Linux only)
166171
+ **memoryUsagePercentage** - ratio of the process's virtual memory to memory available on the machine
167172
+ **virtualMemorySize** - virtual memory reserved by process (expressed in kB)
168173
+ **residentSetSize** - resident set size reserved by process (expressed in kB)
169174

175+
`Monitor::Smaps`: (Linux only)
176+
+ **proportionalSetSize** - count of pages it has in memory, where each page is divided by the number of processes sharing it
177+
+ **memoryPrivateClean** - unmodified private pages
178+
+ **memoryPrivateDirty** - modified private pages
179+
170180
Additional metrics are generated at the end of process execution:
171181
CPU measurements:
172182
+ **cpuTimeConsumedByProcess** - total amount of time spent on process execution (in user and kernel mode) (expressed in microseconds)

examples/5-Benchmark.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ int main(int argc, char* argv[])
6767

6868
auto monitoring = MonitoringFactory::Get(vm["url"].as<std::string>());
6969
if (vm["monitor"].as<bool>()) {
70-
monitoring->enableProcessMonitoring(1);
70+
monitoring->enableProcessMonitoring(1, {Monitor::Cpu, Monitor::Mem, Monitor::Smaps});
7171
}
7272
if (vm["multiple"].as<bool>()) {
7373
for (int j = 1; j <= count; j++) {

include/Monitoring/Monitoring.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ class Monitoring
7070

7171
/// Enables process monitoring
7272
/// \param interval refresh interval
73-
void enableProcessMonitoring(const unsigned int interval = 5);
73+
/// \param enabledMeasurements vector of monitor measurements, eg. Monitor::Cpu
74+
void enableProcessMonitoring(const unsigned int interval = 5, std::vector<Monitor> enabledMeasurements = {Monitor::Cpu, Monitor::Mem});
7475

7576
/// Flushes metric buffer (this can also happen when buffer is full)
7677
void flushBuffer();

include/Monitoring/ProcessMonitor.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@ namespace o2
3131
namespace monitoring
3232
{
3333

34+
enum class Monitor : short {
35+
Cpu,
36+
Mem,
37+
Smaps
38+
};
39+
3440
/// Monitors current process and/or other processes running at the same machien
3541
class ProcessMonitor
3642
{
@@ -58,22 +64,27 @@ class ProcessMonitor
5864
static std::vector<std::string> getAvailableMetricsNames();
5965
std::vector<Metric> getPerformanceMetrics();
6066

61-
public:
62-
/// Prepares externam software commands (ps)
67+
/// Sets PID and total memory
6368
ProcessMonitor();
6469

6570
/// Default destructor
6671
~ProcessMonitor() = default;
6772

73+
/// Set initial variables for CPU usage calculations
6874
void init();
6975

76+
/// Enable given measurement
77+
void enable(Monitor measurement);
78+
7079
private:
80+
/// States which measurements are enabled
81+
std::array<bool, 3> mEnabledMeasurements;
82+
7183
double splitStatusLineAndRetriveValue(const std::string& line) const;
7284

7385
/// Retrievs total memory size from /proc/meminfo
7486
void setTotalMemory();
7587

76-
private:
7788
static constexpr const char* metricsNames[] = {"memoryUsagePercentage", "virtualMemorySize", "residentSetSize",
7889
"cpuUsedPercentage", "involuntaryContextSwitches", "voluntaryContextSwitches", "cpuUsedAbsolute",
7990
"averageResidentSetSize", "averageVirtualMemorySize", "averageCpuUsedPercentage",

src/Monitoring.cxx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ Monitoring::Monitoring()
4040
mDerivedHandler = std::make_unique<DerivedMetrics>();
4141
mBuffering = false;
4242
mProcessMonitoringInterval = 0;
43-
//mAutoPushInterval = 0;
4443
mMonitorRunning = false;
4544
}
4645

@@ -83,9 +82,12 @@ void Monitoring::flushBuffer(const short index)
8382
mStorage[index].clear();
8483
}
8584

86-
void Monitoring::enableProcessMonitoring(const unsigned int interval)
85+
void Monitoring::enableProcessMonitoring(const unsigned int interval, std::vector<Monitor> enabledMeasurements)
8786
{
8887
mProcessMonitoringInterval = interval;
88+
for (const auto& measurement : enabledMeasurements) {
89+
mProcessMonitor->enable(measurement);
90+
}
8991
if (!mMonitorRunning) {
9092
mProcessMonitor->init();
9193
mMonitorRunning = true;

src/ProcessMonitor.cxx

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ namespace o2
3030
namespace monitoring
3131
{
3232

33+
#ifdef O2_MONITORING_OS_CS8
34+
static constexpr auto SMAPS_FILE = "/proc/self/smaps_rollup";
35+
#else
36+
static constexpr auto SMAPS_FILE = "/proc/self/smaps";
37+
#endif
38+
3339
ProcessMonitor::ProcessMonitor()
3440
{
3541
mPid = static_cast<unsigned int>(::getpid());
@@ -38,6 +44,7 @@ ProcessMonitor::ProcessMonitor()
3844
#ifdef O2_MONITORING_OS_LINUX
3945
setTotalMemory();
4046
#endif
47+
mEnabledMeasurements.fill(false);
4148
}
4249

4350
void ProcessMonitor::init()
@@ -46,6 +53,11 @@ void ProcessMonitor::init()
4653
getrusage(RUSAGE_SELF, &mPreviousGetrUsage);
4754
}
4855

56+
void ProcessMonitor::enable(Monitor measurement)
57+
{
58+
mEnabledMeasurements[static_cast<short>(measurement)] = true;
59+
}
60+
4961
void ProcessMonitor::setTotalMemory()
5062
{
5163
std::ifstream memInfo("/proc/meminfo");
@@ -85,7 +97,7 @@ std::vector<Metric> ProcessMonitor::getMemoryUsage()
8597

8698
std::vector<Metric> ProcessMonitor::getSmaps()
8799
{
88-
std::ifstream statusStream("/proc/self/smaps");
100+
std::ifstream statusStream(SMAPS_FILE);
89101
double pssTotal = 0;
90102
double cleanTotal = 0;
91103
double dirtyTotal = 0;
@@ -147,41 +159,53 @@ double ProcessMonitor::splitStatusLineAndRetriveValue(const std::string& line) c
147159

148160
std::vector<Metric> ProcessMonitor::getPerformanceMetrics()
149161
{
150-
auto metrics = getCpuAndContexts();
162+
std::vector<Metric> metrics;
163+
metrics.reserve(12);
164+
if (mEnabledMeasurements.at(static_cast<short>(Monitor::Cpu))) {
165+
auto cpuMetrics = getCpuAndContexts();
166+
std::move(cpuMetrics.begin(), cpuMetrics.end(), std::back_inserter(metrics));
167+
}
151168
#ifdef O2_MONITORING_OS_LINUX
152-
auto memoryMetrics = getMemoryUsage();
153-
std::move(memoryMetrics.begin(), memoryMetrics.end(), std::back_inserter(metrics));
154-
auto smapMetrics = getSmaps();
155-
std::move(smapMetrics.begin(), smapMetrics.end(), std::back_inserter(metrics));
169+
if (mEnabledMeasurements.at(static_cast<short>(Monitor::Mem))) {
170+
auto memoryMetrics = getMemoryUsage();
171+
std::move(memoryMetrics.begin(), memoryMetrics.end(), std::back_inserter(metrics));
172+
}
173+
if (mEnabledMeasurements.at(static_cast<short>(Monitor::Smaps))) {
174+
auto smapMetrics = getSmaps();
175+
std::move(smapMetrics.begin(), smapMetrics.end(), std::back_inserter(metrics));
176+
}
156177
#endif
157178
return metrics;
158179
}
159180

160181
std::vector<Metric> ProcessMonitor::makeLastMeasurementAndGetMetrics()
161182
{
162183
std::vector<Metric> metrics;
163-
getCpuAndContexts();
164184
#ifdef O2_MONITORING_OS_LINUX
165-
getMemoryUsage();
185+
if (mEnabledMeasurements.at(static_cast<short>(Monitor::Mem))) {
186+
getMemoryUsage();
166187

167-
auto avgVmRSS = std::accumulate(mVmRssMeasurements.begin(), mVmRssMeasurements.end(), 0.0) /
168-
mVmRssMeasurements.size();
188+
auto avgVmRSS = std::accumulate(mVmRssMeasurements.begin(), mVmRssMeasurements.end(), 0.0) /
189+
mVmRssMeasurements.size();
169190

170-
metrics.emplace_back(avgVmRSS, metricsNames[AVG_RESIDENT_SET_SIZE]);
191+
metrics.emplace_back(avgVmRSS, metricsNames[AVG_RESIDENT_SET_SIZE]);
171192

172-
auto avgVmSize = std::accumulate(mVmSizeMeasurements.begin(), mVmSizeMeasurements.end(), 0.0) /
173-
mVmSizeMeasurements.size();
174-
metrics.emplace_back(avgVmSize, metricsNames[AVG_VIRTUAL_MEMORY_SIZE]);
193+
auto avgVmSize = std::accumulate(mVmSizeMeasurements.begin(), mVmSizeMeasurements.end(), 0.0) /
194+
mVmSizeMeasurements.size();
195+
metrics.emplace_back(avgVmSize, metricsNames[AVG_VIRTUAL_MEMORY_SIZE]);
196+
}
175197
#endif
198+
if (mEnabledMeasurements.at(static_cast<short>(Monitor::Cpu))) {
199+
getCpuAndContexts();
176200

177-
auto avgCpuUsage = std::accumulate(mCpuPerctange.begin(), mCpuPerctange.end(), 0.0) /
178-
mCpuPerctange.size();
179-
uint64_t accumulationOfCpuTimeConsumption = std::accumulate(mCpuMicroSeconds.begin(),
180-
mCpuMicroSeconds.end(), 0UL);
181-
182-
metrics.emplace_back(avgCpuUsage, metricsNames[AVG_CPU_USED_PERCENTAGE]);
183-
metrics.emplace_back(accumulationOfCpuTimeConsumption, metricsNames[ACCUMULATED_CPU_TIME]);
201+
auto avgCpuUsage = std::accumulate(mCpuPerctange.begin(), mCpuPerctange.end(), 0.0) /
202+
mCpuPerctange.size();
203+
uint64_t accumulationOfCpuTimeConsumption = std::accumulate(mCpuMicroSeconds.begin(),
204+
mCpuMicroSeconds.end(), 0UL);
184205

206+
metrics.emplace_back(avgCpuUsage, metricsNames[AVG_CPU_USED_PERCENTAGE]);
207+
metrics.emplace_back(accumulationOfCpuTimeConsumption, metricsNames[ACCUMULATED_CPU_TIME]);
208+
}
185209
return metrics;
186210
}
187211

test/testProcessMonitor.cxx

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,58 @@ void disableRedirect()
3838
}
3939

4040

41-
BOOST_AUTO_TEST_CASE(monitorProcess)
41+
42+
BOOST_AUTO_TEST_CASE(monitorProcessDefaultCount)
43+
{
44+
{
45+
auto monitoring = o2::monitoring::MonitoringFactory::Get("influxdb-stdout://");
46+
monitoring->enableProcessMonitoring(1);
47+
enableRedirect();
48+
std::this_thread::sleep_for(std::chrono::milliseconds(200));
49+
}
50+
51+
std::istringstream returned(coutRedirect.str());
52+
disableRedirect();
53+
unsigned short int countMetrics = 0;
54+
for (std::string line; std::getline(returned, line); ) {
55+
countMetrics++;
56+
}
57+
// On linux 11 (without Smaps) and macOS 6
58+
BOOST_CHECK(countMetrics == 11 || countMetrics == 6);
59+
}
60+
61+
62+
BOOST_AUTO_TEST_CASE(monitorProcessCpuOnly)
63+
{
64+
std::array<std::string, 6> names = {"cpuUsedPercentage", "involuntaryContextSwitches", "voluntaryContextSwitches", "cpuUsedAbsolute",
65+
"averageCpuUsedPercentage", "cpuTimeConsumedByProcess"};
66+
{
67+
auto monitoring = o2::monitoring::MonitoringFactory::Get("influxdb-stdout://");
68+
monitoring->enableProcessMonitoring(1, {Monitor::Cpu});
69+
enableRedirect();
70+
std::this_thread::sleep_for(std::chrono::milliseconds(200));
71+
}
72+
73+
std::istringstream returned(coutRedirect.str());
74+
disableRedirect();
75+
unsigned short int countMetrics = 0;
76+
for (std::string line; std::getline(returned, line); ) {
77+
BOOST_CHECK(std::find(names.begin(), names.end(), line.substr(0, line.find(','))) != names.end());
78+
countMetrics++;
79+
}
80+
// On linux and macOS 6
81+
BOOST_CHECK_EQUAL(countMetrics, 6);
82+
}
83+
84+
BOOST_AUTO_TEST_CASE(monitorProcessAll)
4285
{
4386
std::array<std::string, 14> names = {"memoryUsagePercentage", "virtualMemorySize", "residentSetSize",
4487
"cpuUsedPercentage", "involuntaryContextSwitches", "voluntaryContextSwitches", "cpuUsedAbsolute",
4588
"averageResidentSetSize", "averageVirtualMemorySize", "averageCpuUsedPercentage",
4689
"cpuTimeConsumedByProcess", "proportionalSetSize", "memoryPrivateClean", "memoryPrivateDirty"};
4790
{
4891
auto monitoring = o2::monitoring::MonitoringFactory::Get("influxdb-stdout://");
49-
monitoring->enableProcessMonitoring(1);
92+
monitoring->enableProcessMonitoring(1, {Monitor::Cpu, Monitor::Smaps, Monitor::Mem});
5093
enableRedirect();
5194
std::this_thread::sleep_for(std::chrono::milliseconds(200));
5295
}

0 commit comments

Comments
 (0)