Skip to content

Commit ff8b74c

Browse files
charliekangopcm
authored andcommitted
Add native PCIe metrics collection and export support (#925)
* Add native PCIe metrics collection and export support - Move IPlatform::getPlatform() from pcm-pcie.cpp to pcm-pcie.h as inline so both pcm-pcie (CLI) and pcm-sensor-server (HTTP exporter) can instantiate platform objects without duplicating the factory logic - Add pcm-pcie-collector.h for Prometheus-style PCIe metrics collection - Expose IPlatform data accessors (getReadBw, getWriteBw, event, etc.) as public so PCIeCollector can read metrics externally - Extend pcm-sensor-server.cpp with PCIe metrics HTTP endpoints - Replace exit() with throw std::runtime_error() for server safety - Use override instead of virtual for compile-time correctness - Fix build warning in pcm-pcicfg.cpp
1 parent e2e6dd5 commit ff8b74c

File tree

5 files changed

+327
-82
lines changed

5 files changed

+327
-82
lines changed

src/pcm-pcicfg.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ int mainThrows(int argc, char * argv[])
137137
}
138138

139139
// List all PCI devices
140-
forAllDevices([&dec, &verbosity, &pciDB](const uint32 group, const uint32 bus, const uint32 device, const uint32 function, const uint32 device_id)
140+
forAllDevices([&dec, &verbosity, &pciDB](const uint32 group, const uint32 bus, const uint32 device, const uint32 function, const uint32 /* device_id */)
141141
{
142142
if (PciHandleType::exists(group, bus, device, function) == false)
143143
{

src/pcm-pcie-collector.h

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
// SPDX-License-Identifier: BSD-3-Clause
2+
// Copyright (c) 2026, Intel Corporation
3+
#pragma once
4+
5+
#include "pcm-pcie.h"
6+
7+
#include <condition_variable>
8+
#include <mutex>
9+
#include <thread>
10+
#include <atomic>
11+
#include <vector>
12+
#include <string>
13+
#include <cstdint>
14+
#include <iostream>
15+
16+
class PCIeCollector {
17+
public:
18+
struct SocketBW {
19+
uint64_t readBytes = 0;
20+
uint64_t writeBytes = 0;
21+
};
22+
23+
static PCIeCollector* getInstance() {
24+
static PCIeCollector instance;
25+
return instance.supported_ ? &instance : nullptr;
26+
}
27+
28+
PCIeCollector(PCIeCollector const &) = delete;
29+
PCIeCollector& operator=(PCIeCollector const &) = delete;
30+
PCIeCollector(PCIeCollector &&) = delete;
31+
PCIeCollector& operator=(PCIeCollector &&) = delete;
32+
33+
static constexpr uint32_t kDefaultIntervalMs = 2000;
34+
35+
void startBackground(uint32_t intervalMs = kDefaultIntervalMs) {
36+
bool expected = false;
37+
if (!bgRunning_.compare_exchange_strong(expected, true)) return;
38+
bgThread_ = std::thread([this, intervalMs]() {
39+
while (bgRunning_.load()) {
40+
collect();
41+
std::unique_lock<std::mutex> lk(mu_);
42+
cv_.wait_for(lk, std::chrono::milliseconds(intervalMs),
43+
[this] { return !bgRunning_.load(); });
44+
}
45+
});
46+
}
47+
48+
void stop() {
49+
bool expected = true;
50+
if (!bgRunning_.compare_exchange_strong(expected, false)) return;
51+
cv_.notify_one();
52+
if (bgThread_.joinable()) bgThread_.join();
53+
}
54+
55+
~PCIeCollector() { stop(); }
56+
57+
uint32_t socketCount() const { return socketCount_; }
58+
59+
SocketBW getSocket(uint32_t skt) const {
60+
std::lock_guard<std::mutex> lk(mu_);
61+
if (skt < snapshot_.size()) return snapshot_[skt];
62+
return {};
63+
}
64+
65+
SocketBW getAggregate() const {
66+
std::lock_guard<std::mutex> lk(mu_);
67+
return aggregate_;
68+
}
69+
70+
std::vector<uint64_t> getRawValues(uint32_t skt) const {
71+
std::lock_guard<std::mutex> lk(mu_);
72+
if (skt < rawValues_.size()) return rawValues_[skt];
73+
return {};
74+
}
75+
76+
std::vector<uint64_t> getRawAggregate() const {
77+
std::lock_guard<std::mutex> lk(mu_);
78+
return rawAggValues_;
79+
}
80+
81+
const std::vector<std::string>& eventNames() const { return eventNames_; }
82+
uint32_t numEvents() const { return static_cast<uint32_t>(eventNames_.size()); }
83+
bool isSupported() const { return supported_; }
84+
85+
private:
86+
PCIeCollector() {
87+
try {
88+
PCM* pcm = PCM::getInstance();
89+
static constexpr uint32_t kPmonMultiplier = 1000;
90+
platform_.reset(IPlatform::getPlatform(pcm, false, true, false, kPmonMultiplier));
91+
if (platform_) {
92+
supported_ = true;
93+
socketCount_ = pcm->getNumSockets();
94+
const auto& names = platform_->getEventNames();
95+
eventNames_.assign(names.begin(), names.end());
96+
snapshot_.resize(socketCount_);
97+
rawValues_.resize(socketCount_, std::vector<uint64_t>(eventNames_.size(), 0));
98+
rawAggValues_.resize(eventNames_.size(), 0);
99+
cumSnapshot_.resize(socketCount_);
100+
cumRawValues_.resize(socketCount_, std::vector<uint64_t>(eventNames_.size(), 0));
101+
}
102+
} catch (const std::exception& e) {
103+
std::cerr << "PCIeCollector: " << e.what() << " (PCIe metrics disabled)\n";
104+
supported_ = false;
105+
}
106+
}
107+
108+
void collect() {
109+
if (!platform_ || !bgRunning_.load()) return;
110+
platform_->cleanup();
111+
platform_->getEvents();
112+
113+
SocketBW aggDelta{0, 0};
114+
const uint32_t nEvt = numEvents();
115+
116+
for (uint32_t s = 0; s < socketCount_; ++s) {
117+
uint64_t dr = platform_->getReadBw(s, IPlatform::TOTAL);
118+
uint64_t dw = platform_->getWriteBw(s, IPlatform::TOTAL);
119+
cumSnapshot_[s].readBytes += dr;
120+
cumSnapshot_[s].writeBytes += dw;
121+
aggDelta.readBytes += dr;
122+
aggDelta.writeBytes += dw;
123+
for (uint32_t i = 0; i < nEvt; ++i)
124+
cumRawValues_[s][i] += platform_->event(s, IPlatform::TOTAL, i);
125+
}
126+
cumAggregate_.readBytes += aggDelta.readBytes;
127+
cumAggregate_.writeBytes += aggDelta.writeBytes;
128+
129+
std::vector<uint64_t> rawAgg(nEvt, 0);
130+
for (uint32_t s = 0; s < socketCount_; ++s)
131+
for (uint32_t i = 0; i < nEvt; ++i)
132+
rawAgg[i] += cumRawValues_[s][i];
133+
134+
std::lock_guard<std::mutex> lk(mu_);
135+
snapshot_ = cumSnapshot_;
136+
aggregate_ = cumAggregate_;
137+
rawValues_ = cumRawValues_;
138+
rawAggValues_ = rawAgg;
139+
}
140+
141+
std::unique_ptr<IPlatform> platform_;
142+
bool supported_ = false;
143+
uint32_t socketCount_ = 0;
144+
std::vector<std::string> eventNames_;
145+
146+
std::vector<SocketBW> snapshot_;
147+
SocketBW aggregate_;
148+
std::vector<std::vector<uint64_t>> rawValues_;
149+
std::vector<uint64_t> rawAggValues_;
150+
151+
std::vector<SocketBW> cumSnapshot_;
152+
SocketBW cumAggregate_;
153+
std::vector<std::vector<uint64_t>> cumRawValues_;
154+
155+
mutable std::mutex mu_;
156+
std::condition_variable cv_;
157+
std::thread bgThread_;
158+
std::atomic<bool> bgRunning_{false};
159+
};

src/pcm-pcie.cpp

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -93,35 +93,7 @@ void print_usage(const string & progname)
9393
cout << "\n";
9494
}
9595

96-
IPlatform *IPlatform::getPlatform(PCM *m, bool csv, bool print_bandwidth, bool print_additional_info, uint32 delay)
97-
{
98-
switch (m->getCPUFamilyModel()) {
99-
case PCM::GNR:
100-
case PCM::GNR_D:
101-
case PCM::SRF:
102-
return new BirchStreamPlatform(m, csv, print_bandwidth, print_additional_info, delay);
103-
case PCM::GRR:
104-
return new LoganvillePlatform(m, csv, print_bandwidth, print_additional_info, delay);
105-
case PCM::SPR:
106-
case PCM::EMR:
107-
return new EagleStreamPlatform(m, csv, print_bandwidth, print_additional_info, delay);
108-
case PCM::ICX:
109-
case PCM::SNOWRIDGE:
110-
return new WhitleyPlatform(m, csv, print_bandwidth, print_additional_info, delay);
111-
case PCM::SKX:
112-
return new PurleyPlatform(m, csv, print_bandwidth, print_additional_info, delay);
113-
case PCM::BDX_DE:
114-
case PCM::BDX:
115-
case PCM::KNL:
116-
case PCM::HASWELLX:
117-
return new GrantleyPlatform(m, csv, print_bandwidth, print_additional_info, delay);
118-
case PCM::IVYTOWN:
119-
case PCM::JAKETOWN:
120-
return new BromolowPlatform(m, csv, print_bandwidth, print_additional_info, delay);
121-
default:
122-
return NULL;
123-
}
124-
}
96+
// getPlatform() is defined inline in pcm-pcie.h.
12597

12698
PCM_MAIN_NOTHROW;
12799

0 commit comments

Comments
 (0)