Skip to content

Commit 2ab3d17

Browse files
committed
Replicate generic hardware events on all CPU PMUs
On systems with more than one PMU for the CPUs (e.g. Apple M series SOCs), generic hardware events are only created for an arbitrary PMU. Usually this is the big cluster's PMU, which can cause inaccuracies when the process is scheduled onto a little core. To fix this, teach PerfCounters to register generic hardware events on all CPU PMUs. CPU PMUs are identified using the same method as perf.
1 parent eed8f5c commit 2ab3d17

File tree

1 file changed

+91
-39
lines changed

1 file changed

+91
-39
lines changed

src/perf_counters.cc

Lines changed: 91 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@
1919
#include <vector>
2020

2121
#if defined HAVE_LIBPFM
22+
#include <dirent.h>
23+
#include <fcntl.h>
24+
#include <linux/perf_event.h>
25+
#include <sys/stat.h>
26+
2227
#include "perfmon/pfmlib.h"
2328
#include "perfmon/pfmlib_perf_event.h"
2429
#endif
@@ -68,7 +73,7 @@ bool PerfCounters::Initialize() {
6873

6974
bool PerfCounters::IsCounterSupported(const std::string& name) {
7075
Initialize();
71-
perf_event_attr_t attr;
76+
perf_event_attr attr;
7277
std::memset(&attr, 0, sizeof(attr));
7378
pfm_perf_encode_arg_t arg;
7479
std::memset(&arg, 0, sizeof(arg));
@@ -79,6 +84,49 @@ bool PerfCounters::IsCounterSupported(const std::string& name) {
7984
return (ret == PFM_SUCCESS);
8085
}
8186

87+
static std::vector<uint64_t> GetPMUTypesForEvent(const perf_event_attr& attr) {
88+
// Replicate generic hardware events on all CPU PMUs.
89+
if (attr.type == PERF_TYPE_HARDWARE && attr.config < PERF_COUNT_HW_MAX) {
90+
std::vector<uint64_t> types;
91+
DIR* dir = opendir("/sys/bus/event_source/devices");
92+
if (!dir) {
93+
return {0};
94+
}
95+
while (dirent* ent = readdir(dir)) {
96+
std::string_view name_str = ent->d_name;
97+
auto node_path = [&](const char* node) {
98+
return std::string("/sys/bus/event_source/devices/") + ent->d_name +
99+
"/" + node;
100+
};
101+
struct stat st;
102+
if (name_str == "cpu" || name_str == "cpum_cf" ||
103+
stat(node_path("cpus").c_str(), &st) == 0 || errno != ENOENT) {
104+
int type_fd = open(node_path("type").c_str(), O_RDONLY);
105+
if (type_fd < 0) {
106+
closedir(dir);
107+
return {0};
108+
}
109+
char type_str[32] = {};
110+
ssize_t res = read(type_fd, type_str, sizeof(type_str) - 1);
111+
close(type_fd);
112+
if (res < 0) {
113+
closedir(dir);
114+
return {0};
115+
}
116+
uint64_t type;
117+
if (sscanf(type_str, "%" PRIu64, &type) != 1) {
118+
closedir(dir);
119+
return {0};
120+
}
121+
types.push_back(type);
122+
}
123+
}
124+
closedir(dir);
125+
return types;
126+
}
127+
return {0};
128+
}
129+
82130
PerfCounters PerfCounters::Create(
83131
const std::vector<std::string>& counter_names) {
84132
if (!counter_names.empty()) {
@@ -158,50 +206,54 @@ PerfCounters PerfCounters::Create(
158206
attr.read_format = PERF_FORMAT_GROUP; //| PERF_FORMAT_TOTAL_TIME_ENABLED |
159207
// PERF_FORMAT_TOTAL_TIME_RUNNING;
160208

161-
int id = -1;
162-
while (id < 0) {
163-
static constexpr size_t kNrOfSyscallRetries = 5;
164-
// Retry syscall as it was interrupted often (b/64774091).
165-
for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
166-
++num_retries) {
167-
id = perf_event_open(&attr, 0, -1, group_id, 0);
168-
if (id >= 0 || errno != EINTR) {
169-
break;
209+
uint64_t base_config = attr.config;
210+
for (uint64_t pmu : GetPMUTypesForEvent(attr)) {
211+
attr.config = (pmu << PERF_PMU_TYPE_SHIFT) | base_config;
212+
int id = -1;
213+
while (id < 0) {
214+
static constexpr size_t kNrOfSyscallRetries = 5;
215+
// Retry syscall as it was interrupted often (b/64774091).
216+
for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
217+
++num_retries) {
218+
id = perf_event_open(&attr, 0, -1, group_id, 0);
219+
if (id >= 0 || errno != EINTR) {
220+
break;
221+
}
170222
}
171-
}
172-
if (id < 0) {
173-
// If the file descriptor is negative we might have reached a limit
174-
// in the current group. Set the group_id to -1 and retry
175-
if (group_id >= 0) {
176-
// Create a new group
177-
group_id = -1;
178-
} else {
179-
// At this point we have already retried to set a new group id and
180-
// failed. We then give up.
181-
break;
223+
if (id < 0) {
224+
// If the file descriptor is negative we might have reached a limit
225+
// in the current group. Set the group_id to -1 and retry
226+
if (group_id >= 0) {
227+
// Create a new group
228+
group_id = -1;
229+
} else {
230+
// At this point we have already retried to set a new group id and
231+
// failed. We then give up.
232+
break;
233+
}
182234
}
183235
}
184-
}
185236

186-
// We failed to get a new file descriptor. We might have reached a hard
187-
// hardware limit that cannot be resolved even with group multiplexing
188-
if (id < 0) {
189-
GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
190-
"for performance counter "
191-
<< name << ". Ignoring\n";
237+
// We failed to get a new file descriptor. We might have reached a hard
238+
// hardware limit that cannot be resolved even with group multiplexing
239+
if (id < 0) {
240+
GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
241+
"for performance counter "
242+
<< name << ". Ignoring\n";
192243

193-
// We give up on this counter but try to keep going
194-
// as the others would be fine
195-
continue;
196-
}
197-
if (group_id < 0) {
198-
// This is a leader, store and assign it to the current file descriptor
199-
leader_ids.push_back(id);
200-
group_id = id;
244+
// We give up on this counter but try to keep going
245+
// as the others would be fine
246+
continue;
247+
}
248+
if (group_id < 0) {
249+
// This is a leader, store and assign it to the current file descriptor
250+
leader_ids.push_back(id);
251+
group_id = id;
252+
}
253+
// This is a valid counter, add it to our descriptor's list
254+
counter_ids.push_back(id);
255+
valid_names.push_back(name);
201256
}
202-
// This is a valid counter, add it to our descriptor's list
203-
counter_ids.push_back(id);
204-
valid_names.push_back(name);
205257
}
206258

207259
// Loop through all group leaders activating them

0 commit comments

Comments
 (0)