@@ -188,6 +188,21 @@ struct perf_header {
188
188
uint64_t flags1[3 ];
189
189
};
190
190
191
+ struct perf_event_attr {
192
+ uint32_t type;
193
+ uint32_t size;
194
+ uint64_t config;
195
+ uint64_t sample_period;
196
+ uint64_t sample_type;
197
+ uint64_t read_format;
198
+ uint64_t flags;
199
+ uint32_t wakeup_events;
200
+ uint32_t bp_type;
201
+ uint64_t bp_addr;
202
+ uint64_t bp_len;
203
+ uint64_t branch_sample_type;
204
+ };
205
+
191
206
struct perf_event_header {
192
207
uint32_t type;
193
208
uint16_t misc;
@@ -237,6 +252,68 @@ struct perf_sample_id {
237
252
uint64_t id;
238
253
};
239
254
255
+ enum perf_type_id {
256
+ PERF_TYPE_HARDWARE = 0 ,
257
+ PERF_TYPE_SOFTWARE = 1 ,
258
+ PERF_TYPE_TRACEPOINT = 2 ,
259
+ PERF_TYPE_HW_CACHE = 3 ,
260
+ PERF_TYPE_RAW = 4 ,
261
+ PERF_TYPE_BREAKPOINT = 5 ,
262
+ PERF_TYPE_MAX
263
+ };
264
+
265
+ enum perf_hw_id {
266
+ PERF_COUNT_HW_CPU_CYCLES = 0 ,
267
+ PERF_COUNT_HW_INSTRUCTIONS = 1 ,
268
+ PERF_COUNT_HW_CACHE_REFERENCES = 2 ,
269
+ PERF_COUNT_HW_CACHE_MISSES = 3 ,
270
+ PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4 ,
271
+ PERF_COUNT_HW_BRANCH_MISSES = 5 ,
272
+ PERF_COUNT_HW_BUS_CYCLES = 6 ,
273
+ PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7 ,
274
+ PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8 ,
275
+ PERF_COUNT_HW_REF_CPU_CYCLES = 9 ,
276
+ PERF_COUNT_HW_MAX
277
+ };
278
+
279
+ static const char * hw_event_names[PERF_COUNT_HW_MAX] = {
280
+ " cycles" ,
281
+ " instructions" ,
282
+ " cache-references" ,
283
+ " cache-misses" ,
284
+ " branch-instructions" ,
285
+ " branch-misses" ,
286
+ " bus-cycles" ,
287
+ " stalled-cycles-frontend" ,
288
+ " stalled-cycles-backend" ,
289
+ " ref-cpu-cycles"
290
+ };
291
+
292
+ enum perf_sw_ids {
293
+ PERF_COUNT_SW_CPU_CLOCK = 0 ,
294
+ PERF_COUNT_SW_TASK_CLOCK = 1 ,
295
+ PERF_COUNT_SW_PAGE_FAULTS = 2 ,
296
+ PERF_COUNT_SW_CONTEXT_SWITCHES = 3 ,
297
+ PERF_COUNT_SW_CPU_MIGRATIONS = 4 ,
298
+ PERF_COUNT_SW_PAGE_FAULTS_MIN = 5 ,
299
+ PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6 ,
300
+ PERF_COUNT_SW_ALIGNMENT_FAULTS = 7 ,
301
+ PERF_COUNT_SW_EMULATION_FAULTS = 8 ,
302
+ PERF_COUNT_SW_MAX
303
+ };
304
+
305
+ static const char * sw_event_names[PERF_COUNT_SW_MAX] = {
306
+ " cpu-clock" ,
307
+ " task-clock" ,
308
+ " page-faults" ,
309
+ " context-switches" ,
310
+ " cpu-migrations" ,
311
+ " minor-faults" ,
312
+ " major-faults" ,
313
+ " alignment-faults" ,
314
+ " emulation-faults"
315
+ };
316
+
240
317
// ===----------------------------------------------------------------------===//
241
318
// Readers for nm and objdump output
242
319
// ===----------------------------------------------------------------------===//
@@ -339,7 +416,7 @@ class NmOutput : public std::vector<Symbol> {
339
416
while (std::getline (ss, token, delim)) {
340
417
output.push_back (token);
341
418
}
342
- return output.size ();
419
+ return ( int ) output.size ();
343
420
}
344
421
};
345
422
@@ -431,6 +508,7 @@ class PerfReader {
431
508
432
509
void readHeader ();
433
510
void readAttrs ();
511
+ void readEventDesc ();
434
512
void readDataStream ();
435
513
unsigned char *readEvent (unsigned char *);
436
514
perf_event_sample parseEvent (unsigned char *Buf, uint64_t Layout);
@@ -499,16 +577,52 @@ void PerfReader::readDataStream() {
499
577
Buf = readEvent (Buf);
500
578
}
501
579
580
+ #define HEADER_EVENT_DESC 12
581
+
502
582
void PerfReader::readAttrs () {
503
- const int HEADER_EVENT_DESC = 12 ;
583
+ if (Header->flags & (1U << HEADER_EVENT_DESC)) {
584
+ readEventDesc ();
585
+ } else {
586
+ uint64_t NumEvents = Header->attrs .size / Header->attr_size ;
587
+ for (unsigned I = 0 ; I < NumEvents; ++I) {
588
+ const perf_event_attr* attr = (const perf_event_attr*)&Buffer[Header->attrs .offset + I * Header->attr_size ];
589
+ const perf_file_section* ids = (const perf_file_section*)((unsigned char *)attr + attr->size );
590
+ unsigned char * Buf = &Buffer[ids->offset ];
591
+ uint64_t NumIDs = ids->size / sizeof (uint64_t );
592
+
593
+ const char * Str = " unknown" ;
594
+ switch (attr->type ) {
595
+ case PERF_TYPE_HARDWARE:
596
+ if (attr->config < PERF_COUNT_HW_MAX) Str = hw_event_names[attr->config ];
597
+ break ;
598
+ case PERF_TYPE_SOFTWARE:
599
+ if (attr->config < PERF_COUNT_SW_MAX) Str = sw_event_names[attr->config ];
600
+ break ;
601
+ }
602
+
603
+ // Weirdness of perf: if there is only one event descriptor, that
604
+ // event descriptor can be referred to by ANY id!
605
+ if (NumEvents == 1 && NumIDs == 0 ) {
606
+ EventIDs[0 ] = Str;
607
+ EventLayouts[0 ] = attr->sample_type ;
608
+ }
609
+
610
+ for (unsigned J = 0 ; J < NumIDs; ++J) {
611
+ auto id = TakeU64 (Buf);
612
+ EventIDs[id] = Str;
613
+ EventLayouts[id] = attr->sample_type ;
614
+ }
615
+ }
616
+ }
617
+ }
618
+
619
+ void PerfReader::readEventDesc () {
504
620
perf_file_section *P =
505
621
(perf_file_section *)&Buffer[Header->data .offset + Header->data .size ];
506
622
for (int I = 0 ; I < HEADER_EVENT_DESC; ++I)
507
- if (Header->flags & (1U << I))
623
+ if (Header->flags & (1ULL << I))
508
624
++P;
509
625
510
- assert (Header->flags & (1U << HEADER_EVENT_DESC));
511
-
512
626
unsigned char *Buf = &Buffer[P->offset ];
513
627
uint32_t NumEvents = TakeU32 (Buf);
514
628
uint32_t AttrSize = TakeU32 (Buf);
@@ -541,9 +655,10 @@ void PerfReader::readAttrs() {
541
655
}
542
656
543
657
unsigned char *PerfReader::readEvent (unsigned char *Buf) {
544
- perf_event_sample *E = (perf_event_sample *)Buf;
545
-
546
- if (E->header .type == PERF_RECORD_MMAP) {
658
+ perf_event_header *E = (perf_event_header *)Buf;
659
+ switch (E->type ) {
660
+ case PERF_RECORD_MMAP:
661
+ {
547
662
perf_event_mmap *E = (perf_event_mmap *)Buf;
548
663
auto MapID = Maps.size ();
549
664
// EXEC ELF objects aren't relocated. DYN ones are,
@@ -559,10 +674,12 @@ unsigned char *PerfReader::readEvent(unsigned char *Buf) {
559
674
auto &CurrentMap = CurrentMaps[ID->time ];
560
675
CurrentMap.insert ({E->start , MapID});
561
676
}
562
- if (E->header .type == PERF_RECORD_MMAP2) {
677
+ break ;
678
+ case PERF_RECORD_MMAP2:
679
+ {
563
680
perf_event_mmap2 *E = (perf_event_mmap2 *)Buf;
564
681
if (!(E->prot & PROT_EXEC))
565
- return &Buf[E-> header . size ] ;
682
+ return break ;
566
683
auto MapID = Maps.size ();
567
684
// EXEC ELF objects aren't relocated. DYN ones are,
568
685
// so if it's a DYN object adjust by subtracting the
@@ -577,44 +694,44 @@ unsigned char *PerfReader::readEvent(unsigned char *Buf) {
577
694
auto &CurrentMap = CurrentMaps[ID->time ];
578
695
CurrentMap.insert ({E->start , MapID});
579
696
}
697
+ break ;
698
+ case PERF_RECORD_SAMPLE:
699
+ {
700
+ perf_event_sample* E = (perf_event_sample*)Buf;
701
+ auto NewE = parseEvent (((unsigned char *)E) + sizeof (perf_event_header),
702
+ EventLayouts.begin ()->second );
703
+ auto EventID = NewE.id ;
704
+ auto PC = NewE.ip ;
705
+
706
+ // Search for the map corresponding to this sample. Search backwards through
707
+ // time, discarding any maps created after our timestamp.
708
+ uint64_t MapID = ~0ULL ;
709
+ for (auto I = CurrentMaps.rbegin (), E = CurrentMaps.rend ();
710
+ I != E; ++I) {
711
+ if (I->first > NewE.time )
712
+ continue ;
580
713
581
- if (E->header .type != PERF_RECORD_SAMPLE)
582
- return &Buf[E->header .size ];
583
-
584
- auto NewE = parseEvent (((unsigned char *)E) + sizeof (perf_event_header),
585
- EventLayouts.begin ()->second );
586
- auto EventID = NewE.id ;
587
- auto PC = NewE.ip ;
588
-
589
- // Search for the map corresponding to this sample. Search backwards through
590
- // time, discarding any maps created after our timestamp.
591
- size_t MapID = ~0UL ;
592
- for (auto I = CurrentMaps.rbegin (), E = CurrentMaps.rend ();
593
- I != E; ++I) {
594
- if (I->first > NewE.time )
595
- continue ;
596
-
597
- auto NewI = I->second .upper_bound (PC);
598
- if (NewI == I->second .begin ())
599
- continue ;
600
- --NewI;
601
-
602
- if (NewI->first > PC)
603
- continue ;
604
- MapID = NewI->second ;
605
- break ;
606
- }
607
- if (MapID == ~0UL )
608
- return &Buf[E->header .size ];
609
- assert (MapID != ~0UL );
610
-
611
- assert (EventIDs.count (EventID));
612
- Events[MapID][PC][EventIDs[EventID]] += NewE.period ;
714
+ auto NewI = I->second .upper_bound (PC);
715
+ if (NewI == I->second .begin ())
716
+ continue ;
717
+ --NewI;
613
718
614
- TotalEvents[EventIDs[EventID]] += NewE.period ;
615
- TotalEventsPerMap[MapID][EventIDs[EventID]] += NewE.period ;
719
+ if (NewI->first > PC)
720
+ continue ;
721
+ MapID = NewI->second ;
722
+ break ;
723
+ }
724
+ if (MapID != ~0ULL ) {
725
+ assert (EventIDs.count (EventID));
726
+ Events[MapID][PC][EventIDs[EventID]] += NewE.period ;
616
727
617
- return &Buf[E->header .size ];
728
+ TotalEvents[EventIDs[EventID]] += NewE.period ;
729
+ TotalEventsPerMap[MapID][EventIDs[EventID]] += NewE.period ;
730
+ }
731
+ }
732
+ break ;
733
+ }
734
+ return &Buf[E->size ];
618
735
}
619
736
620
737
perf_event_sample PerfReader::parseEvent (unsigned char *Buf, uint64_t Layout) {
0 commit comments