Skip to content

Commit 9c0724d

Browse files
author
Pavel Kosov
committed
[LNT] Updated cPerf to read the section Attributes (support Simpleperf)
The current cPerf implementation requires the feature EVENT_DESC in the profile file. The feature EVENT_DESC is optional and is missing in profile files generated by many tools including Simpleperf. The same data is stored in the base section Attributes. But it is necessary some heuristic around event names. OS Laboratory, Huawei Russian Research Institute (Saint-Petersburg) Reviewed By: thopre Differential Revision: https://reviews.llvm.org/D112411
1 parent 83829b5 commit 9c0724d

File tree

1 file changed

+162
-45
lines changed

1 file changed

+162
-45
lines changed

lnt/testing/profile/cPerf.cpp

Lines changed: 162 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,21 @@ struct perf_header {
188188
uint64_t flags1[3];
189189
};
190190

191+
struct perf_event_attr {
192+
uint32_t type;
193+
uint32_t size;
194+
uint64_t config;
195+
uint64_t sample_period;
196+
uint64_t sample_type;
197+
uint64_t read_format;
198+
uint64_t flags;
199+
uint32_t wakeup_events;
200+
uint32_t bp_type;
201+
uint64_t bp_addr;
202+
uint64_t bp_len;
203+
uint64_t branch_sample_type;
204+
};
205+
191206
struct perf_event_header {
192207
uint32_t type;
193208
uint16_t misc;
@@ -237,6 +252,68 @@ struct perf_sample_id {
237252
uint64_t id;
238253
};
239254

255+
enum perf_type_id {
256+
PERF_TYPE_HARDWARE = 0,
257+
PERF_TYPE_SOFTWARE = 1,
258+
PERF_TYPE_TRACEPOINT = 2,
259+
PERF_TYPE_HW_CACHE = 3,
260+
PERF_TYPE_RAW = 4,
261+
PERF_TYPE_BREAKPOINT = 5,
262+
PERF_TYPE_MAX
263+
};
264+
265+
enum perf_hw_id {
266+
PERF_COUNT_HW_CPU_CYCLES = 0,
267+
PERF_COUNT_HW_INSTRUCTIONS = 1,
268+
PERF_COUNT_HW_CACHE_REFERENCES = 2,
269+
PERF_COUNT_HW_CACHE_MISSES = 3,
270+
PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
271+
PERF_COUNT_HW_BRANCH_MISSES = 5,
272+
PERF_COUNT_HW_BUS_CYCLES = 6,
273+
PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7,
274+
PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8,
275+
PERF_COUNT_HW_REF_CPU_CYCLES = 9,
276+
PERF_COUNT_HW_MAX
277+
};
278+
279+
static const char* hw_event_names[PERF_COUNT_HW_MAX] = {
280+
"cycles",
281+
"instructions",
282+
"cache-references",
283+
"cache-misses",
284+
"branch-instructions",
285+
"branch-misses",
286+
"bus-cycles",
287+
"stalled-cycles-frontend",
288+
"stalled-cycles-backend",
289+
"ref-cpu-cycles"
290+
};
291+
292+
enum perf_sw_ids {
293+
PERF_COUNT_SW_CPU_CLOCK = 0,
294+
PERF_COUNT_SW_TASK_CLOCK = 1,
295+
PERF_COUNT_SW_PAGE_FAULTS = 2,
296+
PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
297+
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
298+
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
299+
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
300+
PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
301+
PERF_COUNT_SW_EMULATION_FAULTS = 8,
302+
PERF_COUNT_SW_MAX
303+
};
304+
305+
static const char* sw_event_names[PERF_COUNT_SW_MAX] = {
306+
"cpu-clock",
307+
"task-clock",
308+
"page-faults",
309+
"context-switches",
310+
"cpu-migrations",
311+
"minor-faults",
312+
"major-faults",
313+
"alignment-faults",
314+
"emulation-faults"
315+
};
316+
240317
//===----------------------------------------------------------------------===//
241318
// Readers for nm and objdump output
242319
//===----------------------------------------------------------------------===//
@@ -339,7 +416,7 @@ class NmOutput : public std::vector<Symbol> {
339416
while (std::getline(ss, token, delim)) {
340417
output.push_back(token);
341418
}
342-
return output.size();
419+
return (int)output.size();
343420
}
344421
};
345422

@@ -431,6 +508,7 @@ class PerfReader {
431508

432509
void readHeader();
433510
void readAttrs();
511+
void readEventDesc();
434512
void readDataStream();
435513
unsigned char *readEvent(unsigned char *);
436514
perf_event_sample parseEvent(unsigned char *Buf, uint64_t Layout);
@@ -499,16 +577,52 @@ void PerfReader::readDataStream() {
499577
Buf = readEvent(Buf);
500578
}
501579

580+
#define HEADER_EVENT_DESC 12
581+
502582
void PerfReader::readAttrs() {
503-
const int HEADER_EVENT_DESC = 12;
583+
if (Header->flags & (1U << HEADER_EVENT_DESC)) {
584+
readEventDesc();
585+
} else {
586+
uint64_t NumEvents = Header->attrs.size / Header->attr_size;
587+
for (unsigned I = 0; I < NumEvents; ++I) {
588+
const perf_event_attr* attr = (const perf_event_attr*)&Buffer[Header->attrs.offset + I * Header->attr_size];
589+
const perf_file_section* ids = (const perf_file_section*)((unsigned char *)attr + attr->size);
590+
unsigned char* Buf = &Buffer[ids->offset];
591+
uint64_t NumIDs = ids->size / sizeof(uint64_t);
592+
593+
const char* Str = "unknown";
594+
switch (attr->type) {
595+
case PERF_TYPE_HARDWARE:
596+
if (attr->config < PERF_COUNT_HW_MAX) Str = hw_event_names[attr->config];
597+
break;
598+
case PERF_TYPE_SOFTWARE:
599+
if (attr->config < PERF_COUNT_SW_MAX) Str = sw_event_names[attr->config];
600+
break;
601+
}
602+
603+
// Weirdness of perf: if there is only one event descriptor, that
604+
// event descriptor can be referred to by ANY id!
605+
if (NumEvents == 1 && NumIDs == 0) {
606+
EventIDs[0] = Str;
607+
EventLayouts[0] = attr->sample_type;
608+
}
609+
610+
for (unsigned J = 0; J < NumIDs; ++J) {
611+
auto id = TakeU64(Buf);
612+
EventIDs[id] = Str;
613+
EventLayouts[id] = attr->sample_type;
614+
}
615+
}
616+
}
617+
}
618+
619+
void PerfReader::readEventDesc() {
504620
perf_file_section *P =
505621
(perf_file_section *)&Buffer[Header->data.offset + Header->data.size];
506622
for (int I = 0; I < HEADER_EVENT_DESC; ++I)
507-
if (Header->flags & (1U << I))
623+
if (Header->flags & (1ULL << I))
508624
++P;
509625

510-
assert(Header->flags & (1U << HEADER_EVENT_DESC));
511-
512626
unsigned char *Buf = &Buffer[P->offset];
513627
uint32_t NumEvents = TakeU32(Buf);
514628
uint32_t AttrSize = TakeU32(Buf);
@@ -541,9 +655,10 @@ void PerfReader::readAttrs() {
541655
}
542656

543657
unsigned char *PerfReader::readEvent(unsigned char *Buf) {
544-
perf_event_sample *E = (perf_event_sample *)Buf;
545-
546-
if (E->header.type == PERF_RECORD_MMAP) {
658+
perf_event_header *E = (perf_event_header *)Buf;
659+
switch (E->type) {
660+
case PERF_RECORD_MMAP:
661+
{
547662
perf_event_mmap *E = (perf_event_mmap *)Buf;
548663
auto MapID = Maps.size();
549664
// EXEC ELF objects aren't relocated. DYN ones are,
@@ -559,10 +674,12 @@ unsigned char *PerfReader::readEvent(unsigned char *Buf) {
559674
auto &CurrentMap = CurrentMaps[ID->time];
560675
CurrentMap.insert({E->start, MapID});
561676
}
562-
if (E->header.type == PERF_RECORD_MMAP2) {
677+
break;
678+
case PERF_RECORD_MMAP2:
679+
{
563680
perf_event_mmap2 *E = (perf_event_mmap2 *)Buf;
564681
if (!(E->prot & PROT_EXEC))
565-
return &Buf[E->header.size];
682+
return break;
566683
auto MapID = Maps.size();
567684
// EXEC ELF objects aren't relocated. DYN ones are,
568685
// so if it's a DYN object adjust by subtracting the
@@ -577,44 +694,44 @@ unsigned char *PerfReader::readEvent(unsigned char *Buf) {
577694
auto &CurrentMap = CurrentMaps[ID->time];
578695
CurrentMap.insert({E->start, MapID});
579696
}
697+
break;
698+
case PERF_RECORD_SAMPLE:
699+
{
700+
perf_event_sample* E = (perf_event_sample*)Buf;
701+
auto NewE = parseEvent(((unsigned char*)E) + sizeof(perf_event_header),
702+
EventLayouts.begin()->second);
703+
auto EventID = NewE.id;
704+
auto PC = NewE.ip;
705+
706+
// Search for the map corresponding to this sample. Search backwards through
707+
// time, discarding any maps created after our timestamp.
708+
uint64_t MapID = ~0ULL;
709+
for (auto I = CurrentMaps.rbegin(), E = CurrentMaps.rend();
710+
I != E; ++I) {
711+
if (I->first > NewE.time)
712+
continue;
580713

581-
if (E->header.type != PERF_RECORD_SAMPLE)
582-
return &Buf[E->header.size];
583-
584-
auto NewE = parseEvent(((unsigned char*)E) + sizeof(perf_event_header),
585-
EventLayouts.begin()->second);
586-
auto EventID = NewE.id;
587-
auto PC = NewE.ip;
588-
589-
// Search for the map corresponding to this sample. Search backwards through
590-
// time, discarding any maps created after our timestamp.
591-
size_t MapID = ~0UL;
592-
for (auto I = CurrentMaps.rbegin(), E = CurrentMaps.rend();
593-
I != E; ++I) {
594-
if (I->first > NewE.time)
595-
continue;
596-
597-
auto NewI = I->second.upper_bound(PC);
598-
if (NewI == I->second.begin())
599-
continue;
600-
--NewI;
601-
602-
if (NewI->first > PC)
603-
continue;
604-
MapID = NewI->second;
605-
break;
606-
}
607-
if (MapID == ~0UL)
608-
return &Buf[E->header.size];
609-
assert(MapID != ~0UL);
610-
611-
assert(EventIDs.count(EventID));
612-
Events[MapID][PC][EventIDs[EventID]] += NewE.period;
714+
auto NewI = I->second.upper_bound(PC);
715+
if (NewI == I->second.begin())
716+
continue;
717+
--NewI;
613718

614-
TotalEvents[EventIDs[EventID]] += NewE.period;
615-
TotalEventsPerMap[MapID][EventIDs[EventID]] += NewE.period;
719+
if (NewI->first > PC)
720+
continue;
721+
MapID = NewI->second;
722+
break;
723+
}
724+
if (MapID != ~0ULL) {
725+
assert(EventIDs.count(EventID));
726+
Events[MapID][PC][EventIDs[EventID]] += NewE.period;
616727

617-
return &Buf[E->header.size];
728+
TotalEvents[EventIDs[EventID]] += NewE.period;
729+
TotalEventsPerMap[MapID][EventIDs[EventID]] += NewE.period;
730+
}
731+
}
732+
break;
733+
}
734+
return &Buf[E->size];
618735
}
619736

620737
perf_event_sample PerfReader::parseEvent(unsigned char *Buf, uint64_t Layout) {

0 commit comments

Comments
 (0)