@@ -77,11 +77,17 @@ const EVENT_TYPES =
77
77
(:scaled_cycles , 9 ) # PERF_COUNT_HW_REF_CPU_CYCLES
78
78
]),
79
79
(:sw , 1 , # PERF_TYPE_SOFTWARE
80
- [(:page_faults , 2 ), # PERF_COUNT_SW_PAGE_FAULTS
80
+ [(:cpu_clock , 0 ), # PERF_COUNT_SW_CPU_CLOCK
81
+ (:task_clock , 1 ), # PEF_COUNT_SW_TASK_CLOCK
82
+ (:page_faults , 2 ), # PERF_COUNT_SW_PAGE_FAULTS
81
83
(:ctx_switches , 3 ), # PERF_COUNT_SW_CONTEXT_SWITCHES
82
84
(:cpu_migrations , 4 ), # PERF_COUNT_SW_CPU_MIGRATIONS
83
85
(:minor_page_faults , 5 ), # PERF_COUNT_SW_PAGE_FAULTS_MIN
84
86
(:major_page_faults , 6 ), # PERF_COUNT_SW_PAGE_FAULTS_MAJ
87
+ (:alignment_faults , 7 ), # PERF_COUNT_SW_ALIGNMENT_FAULTS
88
+ (:emulation_faults , 8 ), # PERF_COUNT_SW_EMULATION_FAULTS
89
+ (:dummy , 9 ), # PERF_COUNT_SW_DUMMY
90
+ (:bpf_output , 10 ), # PERF_COUNT_SW_BPF_OUTPUT
85
91
])
86
92
]
87
93
@@ -372,4 +378,262 @@ end
372
378
373
379
make_bench () = make_bench (reasonable_defaults)
374
380
381
+
382
+ # Event names are taken from the perf command.
383
+ const NAME_TO_EVENT = Dict (
384
+ # hardware events
385
+ " branch-instructions" => EventType (:hw , :branches ),
386
+ " branch-misses" => EventType (:hw , :branch_mispredicts ),
387
+ " cache-misses" => EventType (:hw , :cache_misses ),
388
+ " cache-references" => EventType (:hw , :cache_access ),
389
+ " cpu-cycles" => EventType (:hw , :cycles ),
390
+ " instructions" => EventType (:hw , :instructions ),
391
+ " stalled-cycles-backend" => EventType (:hw , :stalled_cycles_backend ),
392
+ " stalled-cycles-frontend" => EventType (:hw , :stalled_cycles_frontend ),
393
+
394
+ # software events
395
+ " alignment-faults" => EventType (:sw , :alignment_faults ),
396
+ " bpf-output" => EventType (:sw , :bpf_output ),
397
+ " context-switches" => EventType (:sw , :ctx_switches ),
398
+ " cpu-clock" => EventType (:sw , :cpu_clock ),
399
+ " cpu-migrations" => EventType (:sw , :cpu_migrations ),
400
+ " dummy" => EventType (:sw , :dummy ),
401
+ " emulation-faults" => EventType (:sw , :emulation_faults ),
402
+ " major-faults" => EventType (:sw , :major_page_faults ),
403
+ " minor-faults" => EventType (:sw , :minor_page_faults ),
404
+ " page-faults" => EventType (:sw , :page_faults ),
405
+ " task-clock" => EventType (:sw , :task_clock ),
406
+
407
+ # hardware cache events
408
+ " L1-dcache-load-misses" => EventType (:cache , :L1_data , :read , :miss ),
409
+ " L1-dcache-loads" => EventType (:cache , :L1_data , :read , :access ),
410
+ " L1-icache-load-misses" => EventType (:cache , :L1_insn , :read , :miss ),
411
+ " L1-icache-loads" => EventType (:cache , :L1_insn , :read , :access ),
412
+ " dTLB-load-misses" => EventType (:cache , :TLB_data , :read , :miss ),
413
+ " dTLB-loads" => EventType (:cache , :TLB_data , :read , :access ),
414
+ " iTLB-load-misses" => EventType (:cache , :TLB_insn , :read , :miss ),
415
+ " iTLB-loads" => EventType (:cache , :TLB_insn , :read , :access ),
416
+ )
417
+
418
+ const EVENT_TO_NAME = Dict (event => name for (name, event) in NAME_TO_EVENT)
419
+
420
+ function parse_pstats_options (opts)
421
+ # default events
422
+ events = parse_groups ("
423
+ (cpu-cycles, stalled-cycles-frontend, stalled-cycles-backend),
424
+ (instructions, branch-instructions, branch-misses),
425
+ (task-clock, context-switches, cpu-migrations, page-faults)
426
+ " )
427
+ for opt in opts
428
+ if opt isa AbstractString
429
+ events = parse_groups (opt)
430
+ elseif opt isa Expr && opt. head == :(= )
431
+ key, val = opt. args
432
+ error (" unknown key: $(key) " )
433
+ else
434
+ error (" unknown option: $(opt) " )
435
+ end
436
+ end
437
+ return (events = events,)
438
+ end
439
+
440
+ # syntax: groups = (group ',')* group
441
+ function parse_groups (str)
442
+ groups = Vector{EventType}[]
443
+ i = firstindex (str)
444
+ next = iterate (str, i)
445
+ while next != = nothing
446
+ i = skipws (str, i)
447
+ group, i = parse_group (str, i)
448
+ push! (groups, group)
449
+ i = skipws (str, i)
450
+ next = iterate (str, i)
451
+ if next === nothing
452
+ continue
453
+ end
454
+ c, i = next
455
+ if c == ' ,'
456
+ # ok
457
+ else
458
+ error (" unknown character: $(repr (c)) " )
459
+ end
460
+ end
461
+ return groups
462
+ end
463
+
464
+ # syntax: group = event | '(' (event ',')* event ')'
465
+ function parse_group (str, i)
466
+ group = EventType[]
467
+ next = iterate (str, i)
468
+ if next === nothing
469
+ error (" no events" )
470
+ elseif next[1 ] == ' ('
471
+ # group
472
+ i = next[2 ]
473
+ while true
474
+ i = skipws (str, i)
475
+ event, i = parse_event (str, i)
476
+ push! (group, event)
477
+ i = skipws (str, i)
478
+ next = iterate (str, i)
479
+ if next === nothing
480
+ error (" unpaired '('" )
481
+ end
482
+ c, i = next
483
+ if c == ' ,'
484
+ # ok
485
+ elseif c == ' )'
486
+ break
487
+ else
488
+ error (" unknown character: $(repr (c)) " )
489
+ end
490
+ end
491
+ else
492
+ # singleton group
493
+ i = skipws (str, i)
494
+ event, i = parse_event (str, i)
495
+ push! (group, event)
496
+ end
497
+ return group, i
498
+ end
499
+
500
+ # syntax: event = [A-Za-z0-9-]+
501
+ function parse_event (str, i)
502
+ isok (c) = ' A' ≤ c ≤ ' Z' || ' a' ≤ c ≤ ' z' || ' 0' ≤ c ≤ ' 9' || c == ' -'
503
+ start = i
504
+ next = iterate (str, start)
505
+ while next != = nothing && isok (next[1 ])
506
+ i = next[2 ]
507
+ next = iterate (str, i)
508
+ end
509
+ stop = prevind (str, i)
510
+ if start > stop
511
+ error (" empty event name" )
512
+ end
513
+ name = str[start: stop]
514
+ if ! haskey (NAME_TO_EVENT, name)
515
+ error (" unknown event name: $(name) " )
516
+ end
517
+ return NAME_TO_EVENT[name], i
375
518
end
519
+
520
+ # skip whitespace if any
521
+ function skipws (str, i)
522
+ @label head
523
+ next = iterate (str, i)
524
+ if next != = nothing && isspace (next[1 ])
525
+ i = next[2 ]
526
+ @goto head
527
+ end
528
+ return i
529
+ end
530
+
531
+ struct Stats
532
+ groups:: Vector{Vector{Counter}}
533
+ end
534
+
535
+ function Stats (b:: PerfBench )
536
+ groups = Vector{Counter}[]
537
+ for g in b. groups
538
+ values = Vector {UInt64} (undef, length (g)+ 1 + 2 )
539
+ read! (g. leader_io, values)
540
+ # ?Ref@assert(length(g) == values[1])
541
+ enabled, running = values[2 ], values[3 ]
542
+ push! (groups, [Counter (g. event_types[i], values[3 + i], enabled, running) for i in 1 : length (g)])
543
+ end
544
+ return Stats (groups)
545
+ end
546
+
547
+ function Base. haskey (stats:: Stats , name:: AbstractString )
548
+ event = NAME_TO_EVENT[name]
549
+ return any (counter. event == event for group in stats. groups for counter in group)
550
+ end
551
+
552
+ function Base. getindex (stats:: Stats , name:: AbstractString )
553
+ event = NAME_TO_EVENT[name]
554
+ for group in stats. groups, counter in group
555
+ if counter. event == event
556
+ return counter
557
+ end
558
+ end
559
+ throw (KeyError (name))
560
+ end
561
+
562
+ function Base. show (io:: IO , stats:: Stats )
563
+ w = 2 + 23 + 18
564
+ println (io, ' ━' ^ w)
565
+ for group in stats. groups
566
+ for i in 1 : length (group)
567
+ # grouping character
568
+ if length (group) == 1
569
+ c = ' ╶'
570
+ elseif i == 1
571
+ c = ' ┌'
572
+ elseif i == length (group)
573
+ c = ' └'
574
+ else
575
+ c = ' │'
576
+ end
577
+ counter = group[i]
578
+ event = counter. event
579
+ name = EVENT_TO_NAME[event]
580
+ @printf io " %-2s%-23s" c name
581
+ if ! isenabled (counter)
582
+ @printf (io, " %18s" , " not enabled" )
583
+ elseif ! isrun (counter)
584
+ @printf (io, " %10s%7.1f%%" , " NA" , 0.0 )
585
+ else
586
+ @printf (io, " %10.2e%7.1f%%" , scaledcount (counter), fillrate (counter) * 100 )
587
+ end
588
+ if isrun (counter)
589
+ # show a comment
590
+ if name == " cpu-cycles"
591
+ @printf (io, " # %4.1f cycles per ns" , counter. value / counter. running)
592
+ elseif (name == " stalled-cycles-frontend" || name == " stalled-cycles-backend" ) && haskey (stats, " cpu-cycles" )
593
+ @printf (io, " # %4.1f%% of cycles" , scaledcount (counter) / scaledcount (stats[" cpu-cycles" ]) * 100 )
594
+ elseif name == " instructions" && haskey (stats, " cpu-cycles" )
595
+ @printf (io, " # %4.1f insns per cycle" , scaledcount (counter) / scaledcount (stats[" cpu-cycles" ]))
596
+ elseif name == " branch-instructions" && haskey (stats, " instructions" )
597
+ @printf (io, " # %4.1f%% of instructions" , scaledcount (counter) / scaledcount (stats[" instructions" ]) * 100 )
598
+ elseif name == " branch-misses" && haskey (stats, " branch-instructions" )
599
+ @printf (io, " # %4.1f%% of branch instructions" , scaledcount (counter)/ scaledcount (stats[" branch-instructions" ]) * 100 )
600
+ elseif name == " cache-misses" && haskey (stats, " cache-references" )
601
+ @printf (io, " # %4.1f%% of cache references" , scaledcount (counter) / scaledcount (stats[" cache-references" ]) * 100 )
602
+ elseif name == " L1-dcache-load-misses" && haskey (stats, " L1-dcache-loads" )
603
+ @printf (io, " # %4.1f%% of loads" , scaledcount (counter) / scaledcount (stats[" L1-dcache-loads" ]) * 100 )
604
+ end
605
+ end
606
+ println (io)
607
+ end
608
+ end
609
+ print (io, ' ━' ^ w)
610
+ end
611
+
612
+ isenabled (counter:: Counter ) = counter. enabled > 0
613
+ isrun (counter:: Counter ) = counter. running > 0
614
+ fillrate (counter:: Counter ) = counter. running / counter. enabled
615
+ scaledcount (counter:: Counter ) = counter. value * (counter. enabled / counter. running)
616
+
617
+ """
618
+ @pstats [options] expr
619
+
620
+ Run `expr` and gather its performance statistics.
621
+ """
622
+ macro pstats (args... )
623
+ if isempty (args)
624
+ error (" @pstats requires at least one argument" )
625
+ end
626
+ opts, expr = parse_pstats_options (args[1 : end - 1 ]), args[end ]
627
+ quote
628
+ (function ()
629
+ bench = make_bench ($ (opts. events))
630
+ enable! (bench)
631
+ val = $ (esc (expr))
632
+ disable! (bench)
633
+ # trick the compiler not to eliminate the code
634
+ (rand () < 0 ? val : Stats (bench)):: Stats
635
+ end )()
636
+ end
637
+ end
638
+
639
+ end
0 commit comments