@@ -70,11 +70,17 @@ const EVENT_TYPES =
70
70
(:scaled_cycles , 9 ) # PERF_COUNT_HW_REF_CPU_CYCLES
71
71
]),
72
72
(:sw , 1 , # PERF_TYPE_SOFTWARE
73
- [(:page_faults , 2 ), # PERF_COUNT_SW_PAGE_FAULTS
73
+ [(:cpu_clock , 0 ), # PERF_COUNT_SW_CPU_CLOCK
74
+ (:task_clock , 1 ), # PEF_COUNT_SW_TASK_CLOCK
75
+ (:page_faults , 2 ), # PERF_COUNT_SW_PAGE_FAULTS
74
76
(:ctx_switches , 3 ), # PERF_COUNT_SW_CONTEXT_SWITCHES
75
77
(:cpu_migrations , 4 ), # PERF_COUNT_SW_CPU_MIGRATIONS
76
78
(:minor_page_faults , 5 ), # PERF_COUNT_SW_PAGE_FAULTS_MIN
77
79
(:major_page_faults , 6 ), # PERF_COUNT_SW_PAGE_FAULTS_MAJ
80
+ (:alignment_faults , 7 ), # PERF_COUNT_SW_ALIGNMENT_FAULTS
81
+ (:emulation_faults , 8 ), # PERF_COUNT_SW_EMULATION_FAULTS
82
+ (:dummy , 9 ), # PERF_COUNT_SW_DUMMY
83
+ (:bpf_output , 10 ), # PERF_COUNT_SW_BPF_OUTPUT
78
84
])
79
85
]
80
86
@@ -348,4 +354,262 @@ end
348
354
349
355
make_bench () = make_bench (reasonable_defaults)
350
356
357
+
358
+ # Event names are taken from the perf command.
359
+ const NAME_TO_EVENT = Dict (
360
+ # hardware events
361
+ " branch-instructions" => EventType (:hw , :branches ),
362
+ " branch-misses" => EventType (:hw , :branch_mispredicts ),
363
+ " cache-misses" => EventType (:hw , :cache_misses ),
364
+ " cache-references" => EventType (:hw , :cache_access ),
365
+ " cpu-cycles" => EventType (:hw , :cycles ),
366
+ " instructions" => EventType (:hw , :instructions ),
367
+ " stalled-cycles-backend" => EventType (:hw , :stalled_cycles_backend ),
368
+ " stalled-cycles-frontend" => EventType (:hw , :stalled_cycles_frontend ),
369
+
370
+ # software events
371
+ " alignment-faults" => EventType (:sw , :alignment_faults ),
372
+ " bpf-output" => EventType (:sw , :bpf_output ),
373
+ " context-switches" => EventType (:sw , :ctx_switches ),
374
+ " cpu-clock" => EventType (:sw , :cpu_clock ),
375
+ " cpu-migrations" => EventType (:sw , :cpu_migrations ),
376
+ " dummy" => EventType (:sw , :dummy ),
377
+ " emulation-faults" => EventType (:sw , :emulation_faults ),
378
+ " major-faults" => EventType (:sw , :major_page_faults ),
379
+ " minor-faults" => EventType (:sw , :minor_page_faults ),
380
+ " page-faults" => EventType (:sw , :page_faults ),
381
+ " task-clock" => EventType (:sw , :task_clock ),
382
+
383
+ # hardware cache events
384
+ " L1-dcache-load-misses" => EventType (:cache , :L1_data , :read , :miss ),
385
+ " L1-dcache-loads" => EventType (:cache , :L1_data , :read , :access ),
386
+ " L1-icache-load-misses" => EventType (:cache , :L1_insn , :read , :miss ),
387
+ " L1-icache-loads" => EventType (:cache , :L1_insn , :read , :access ),
388
+ " dTLB-load-misses" => EventType (:cache , :TLB_data , :read , :miss ),
389
+ " dTLB-loads" => EventType (:cache , :TLB_data , :read , :access ),
390
+ " iTLB-load-misses" => EventType (:cache , :TLB_insn , :read , :miss ),
391
+ " iTLB-loads" => EventType (:cache , :TLB_insn , :read , :access ),
392
+ )
393
+
394
+ const EVENT_TO_NAME = Dict (event => name for (name, event) in NAME_TO_EVENT)
395
+
396
+ function parse_pstats_options (opts)
397
+ # default events
398
+ events = parse_groups ("
399
+ (cpu-cycles, stalled-cycles-frontend, stalled-cycles-backend),
400
+ (instructions, branch-instructions, branch-misses),
401
+ (task-clock, context-switches, cpu-migrations, page-faults)
402
+ " )
403
+ for opt in opts
404
+ if opt isa AbstractString
405
+ events = parse_groups (opt)
406
+ elseif opt isa Expr && opt. head == :(= )
407
+ key, val = opt. args
408
+ error (" unknown key: $(key) " )
409
+ else
410
+ error (" unknown option: $(opt) " )
411
+ end
412
+ end
413
+ return (events = events,)
414
+ end
415
+
416
+ # syntax: groups = (group ',')* group
417
+ function parse_groups (str)
418
+ groups = Vector{EventType}[]
419
+ i = firstindex (str)
420
+ next = iterate (str, i)
421
+ while next != = nothing
422
+ i = skipws (str, i)
423
+ group, i = parse_group (str, i)
424
+ push! (groups, group)
425
+ i = skipws (str, i)
426
+ next = iterate (str, i)
427
+ if next === nothing
428
+ continue
429
+ end
430
+ c, i = next
431
+ if c == ' ,'
432
+ # ok
433
+ else
434
+ error (" unknown character: $(repr (c)) " )
435
+ end
436
+ end
437
+ return groups
438
+ end
439
+
440
+ # syntax: group = event | '(' (event ',')* event ')'
441
+ function parse_group (str, i)
442
+ group = EventType[]
443
+ next = iterate (str, i)
444
+ if next === nothing
445
+ error (" no events" )
446
+ elseif next[1 ] == ' ('
447
+ # group
448
+ i = next[2 ]
449
+ while true
450
+ i = skipws (str, i)
451
+ event, i = parse_event (str, i)
452
+ push! (group, event)
453
+ i = skipws (str, i)
454
+ next = iterate (str, i)
455
+ if next === nothing
456
+ error (" unpaired '('" )
457
+ end
458
+ c, i = next
459
+ if c == ' ,'
460
+ # ok
461
+ elseif c == ' )'
462
+ break
463
+ else
464
+ error (" unknown character: $(repr (c)) " )
465
+ end
466
+ end
467
+ else
468
+ # singleton group
469
+ i = skipws (str, i)
470
+ event, i = parse_event (str, i)
471
+ push! (group, event)
472
+ end
473
+ return group, i
474
+ end
475
+
476
+ # syntax: event = [A-Za-z0-9-]+
477
+ function parse_event (str, i)
478
+ isok (c) = ' A' ≤ c ≤ ' Z' || ' a' ≤ c ≤ ' z' || ' 0' ≤ c ≤ ' 9' || c == ' -'
479
+ start = i
480
+ next = iterate (str, start)
481
+ while next != = nothing && isok (next[1 ])
482
+ i = next[2 ]
483
+ next = iterate (str, i)
484
+ end
485
+ stop = prevind (str, i)
486
+ if start > stop
487
+ error (" empty event name" )
488
+ end
489
+ name = str[start: stop]
490
+ if ! haskey (NAME_TO_EVENT, name)
491
+ error (" unknown event name: $(name) " )
492
+ end
493
+ return NAME_TO_EVENT[name], i
351
494
end
495
+
496
+ # skip whitespace if any
497
+ function skipws (str, i)
498
+ @label head
499
+ next = iterate (str, i)
500
+ if next != = nothing && isspace (next[1 ])
501
+ i = next[2 ]
502
+ @goto head
503
+ end
504
+ return i
505
+ end
506
+
507
+ struct Stats
508
+ groups:: Vector{Vector{Counter}}
509
+ end
510
+
511
+ function Stats (b:: PerfBench )
512
+ groups = Vector{Counter}[]
513
+ for g in b. groups
514
+ values = Vector {UInt64} (undef, length (g)+ 1 + 2 )
515
+ read! (g. leader_io, values)
516
+ # ?Ref@assert(length(g) == values[1])
517
+ enabled, running = values[2 ], values[3 ]
518
+ push! (groups, [Counter (g. event_types[i], values[3 + i], enabled, running) for i in 1 : length (g)])
519
+ end
520
+ return Stats (groups)
521
+ end
522
+
523
+ function Base. haskey (stats:: Stats , name:: AbstractString )
524
+ event = NAME_TO_EVENT[name]
525
+ return any (counter. event == event for group in stats. groups for counter in group)
526
+ end
527
+
528
+ function Base. getindex (stats:: Stats , name:: AbstractString )
529
+ event = NAME_TO_EVENT[name]
530
+ for group in stats. groups, counter in group
531
+ if counter. event == event
532
+ return counter
533
+ end
534
+ end
535
+ throw (KeyError (name))
536
+ end
537
+
538
+ function Base. show (io:: IO , stats:: Stats )
539
+ w = 2 + 23 + 18
540
+ println (io, ' ━' ^ w)
541
+ for group in stats. groups
542
+ for i in 1 : length (group)
543
+ # grouping character
544
+ if length (group) == 1
545
+ c = ' ╶'
546
+ elseif i == 1
547
+ c = ' ┌'
548
+ elseif i == length (group)
549
+ c = ' └'
550
+ else
551
+ c = ' │'
552
+ end
553
+ counter = group[i]
554
+ event = counter. event
555
+ name = EVENT_TO_NAME[event]
556
+ @printf io " %-2s%-23s" c name
557
+ if ! isenabled (counter)
558
+ @printf (io, " %18s" , " not enabled" )
559
+ elseif ! isrun (counter)
560
+ @printf (io, " %10s%7.1f%%" , " NA" , 0.0 )
561
+ else
562
+ @printf (io, " %10.2e%7.1f%%" , scaledcount (counter), fillrate (counter) * 100 )
563
+ end
564
+ if isrun (counter)
565
+ # show a comment
566
+ if name == " cpu-cycles"
567
+ @printf (io, " # %4.1f cycles per ns" , counter. value / counter. running)
568
+ elseif (name == " stalled-cycles-frontend" || name == " stalled-cycles-backend" ) && haskey (stats, " cpu-cycles" )
569
+ @printf (io, " # %4.1f%% of cycles" , scaledcount (counter) / scaledcount (stats[" cpu-cycles" ]) * 100 )
570
+ elseif name == " instructions" && haskey (stats, " cpu-cycles" )
571
+ @printf (io, " # %4.1f insns per cycle" , scaledcount (counter) / scaledcount (stats[" cpu-cycles" ]))
572
+ elseif name == " branch-instructions" && haskey (stats, " instructions" )
573
+ @printf (io, " # %4.1f%% of instructions" , scaledcount (counter) / scaledcount (stats[" instructions" ]) * 100 )
574
+ elseif name == " branch-misses" && haskey (stats, " branch-instructions" )
575
+ @printf (io, " # %4.1f%% of branch instructions" , scaledcount (counter)/ scaledcount (stats[" branch-instructions" ]) * 100 )
576
+ elseif name == " cache-misses" && haskey (stats, " cache-references" )
577
+ @printf (io, " # %4.1f%% of cache references" , scaledcount (counter) / scaledcount (stats[" cache-references" ]) * 100 )
578
+ elseif name == " L1-dcache-load-misses" && haskey (stats, " L1-dcache-loads" )
579
+ @printf (io, " # %4.1f%% of loads" , scaledcount (counter) / scaledcount (stats[" L1-dcache-loads" ]) * 100 )
580
+ end
581
+ end
582
+ println (io)
583
+ end
584
+ end
585
+ print (io, ' ━' ^ w)
586
+ end
587
+
588
+ isenabled (counter:: Counter ) = counter. enabled > 0
589
+ isrun (counter:: Counter ) = counter. running > 0
590
+ fillrate (counter:: Counter ) = counter. running / counter. enabled
591
+ scaledcount (counter:: Counter ) = counter. value * (counter. enabled / counter. running)
592
+
593
+ """
594
+ @pstats [options] expr
595
+
596
+ Run `expr` and gather its performance statistics.
597
+ """
598
+ macro pstats (args... )
599
+ if isempty (args)
600
+ error (" @pstats requires at least one argument" )
601
+ end
602
+ opts, expr = parse_pstats_options (args[1 : end - 1 ]), args[end ]
603
+ quote
604
+ (function ()
605
+ bench = make_bench ($ (opts. events))
606
+ enable! (bench)
607
+ val = $ (esc (expr))
608
+ disable! (bench)
609
+ # trick the compiler not to eliminate the code
610
+ (rand () < 0 ? val : Stats (bench)):: Stats
611
+ end )()
612
+ end
613
+ end
614
+
615
+ end
0 commit comments