Skip to content

Commit d20ed16

Browse files
committed
Add pstats macro
1 parent fe015f0 commit d20ed16

File tree

2 files changed

+266
-2
lines changed

2 files changed

+266
-2
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@ PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
88
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
99

1010
[compat]
11-
julia = "1"
1211
Formatting = "0.4"
1312
PrettyTables = "0.9"
13+
julia = "1"

src/LinuxPerf.jl

Lines changed: 265 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,17 @@ const EVENT_TYPES =
7777
(:scaled_cycles, 9) # PERF_COUNT_HW_REF_CPU_CYCLES
7878
]),
7979
(:sw, 1, # PERF_TYPE_SOFTWARE
80-
[(:page_faults, 2), # PERF_COUNT_SW_PAGE_FAULTS
80+
[(:cpu_clock, 0), # PERF_COUNT_SW_CPU_CLOCK
81+
(:task_clock, 1), # PEF_COUNT_SW_TASK_CLOCK
82+
(:page_faults, 2), # PERF_COUNT_SW_PAGE_FAULTS
8183
(:ctx_switches, 3), # PERF_COUNT_SW_CONTEXT_SWITCHES
8284
(:cpu_migrations, 4), # PERF_COUNT_SW_CPU_MIGRATIONS
8385
(:minor_page_faults, 5), # PERF_COUNT_SW_PAGE_FAULTS_MIN
8486
(:major_page_faults, 6), # PERF_COUNT_SW_PAGE_FAULTS_MAJ
87+
(:alignment_faults, 7), # PERF_COUNT_SW_ALIGNMENT_FAULTS
88+
(:emulation_faults, 8), # PERF_COUNT_SW_EMULATION_FAULTS
89+
(:dummy, 9), # PERF_COUNT_SW_DUMMY
90+
(:bpf_output, 10), # PERF_COUNT_SW_BPF_OUTPUT
8591
])
8692
]
8793

@@ -372,4 +378,262 @@ end
372378

373379
make_bench() = make_bench(reasonable_defaults)
374380

381+
382+
# Event names are taken from the perf command.
383+
const NAME_TO_EVENT = Dict(
384+
# hardware events
385+
"branch-instructions" => EventType(:hw, :branches),
386+
"branch-misses" => EventType(:hw, :branch_mispredicts),
387+
"cache-misses" => EventType(:hw, :cache_misses),
388+
"cache-references" => EventType(:hw, :cache_access),
389+
"cpu-cycles" => EventType(:hw, :cycles),
390+
"instructions" => EventType(:hw, :instructions),
391+
"stalled-cycles-backend" => EventType(:hw, :stalled_cycles_backend),
392+
"stalled-cycles-frontend" => EventType(:hw, :stalled_cycles_frontend),
393+
394+
# software events
395+
"alignment-faults" => EventType(:sw, :alignment_faults),
396+
"bpf-output" => EventType(:sw, :bpf_output),
397+
"context-switches" => EventType(:sw, :ctx_switches),
398+
"cpu-clock" => EventType(:sw, :cpu_clock),
399+
"cpu-migrations" => EventType(:sw, :cpu_migrations),
400+
"dummy" => EventType(:sw, :dummy),
401+
"emulation-faults" => EventType(:sw, :emulation_faults),
402+
"major-faults" => EventType(:sw, :major_page_faults),
403+
"minor-faults" => EventType(:sw, :minor_page_faults),
404+
"page-faults" => EventType(:sw, :page_faults),
405+
"task-clock" => EventType(:sw, :task_clock),
406+
407+
# hardware cache events
408+
"L1-dcache-load-misses" => EventType(:cache, :L1_data, :read, :miss),
409+
"L1-dcache-loads" => EventType(:cache, :L1_data, :read, :access),
410+
"L1-icache-load-misses" => EventType(:cache, :L1_insn, :read, :miss),
411+
"L1-icache-loads" => EventType(:cache, :L1_insn, :read, :access),
412+
"dTLB-load-misses" => EventType(:cache, :TLB_data, :read, :miss),
413+
"dTLB-loads" => EventType(:cache, :TLB_data, :read, :access),
414+
"iTLB-load-misses" => EventType(:cache, :TLB_insn, :read, :miss),
415+
"iTLB-loads" => EventType(:cache, :TLB_insn, :read, :access),
416+
)
417+
418+
const EVENT_TO_NAME = Dict(event => name for (name, event) in NAME_TO_EVENT)
419+
420+
function parse_pstats_options(opts)
421+
# default events
422+
events = parse_groups("
423+
(cpu-cycles, stalled-cycles-frontend, stalled-cycles-backend),
424+
(instructions, branch-instructions, branch-misses),
425+
(task-clock, context-switches, cpu-migrations, page-faults)
426+
")
427+
for opt in opts
428+
if opt isa AbstractString
429+
events = parse_groups(opt)
430+
elseif opt isa Expr && opt.head == :(=)
431+
key, val = opt.args
432+
error("unknown key: $(key)")
433+
else
434+
error("unknown option: $(opt)")
435+
end
436+
end
437+
return (events = events,)
438+
end
439+
440+
# syntax: groups = (group ',')* group
441+
function parse_groups(str)
442+
groups = Vector{EventType}[]
443+
i = firstindex(str)
444+
next = iterate(str, i)
445+
while next !== nothing
446+
i = skipws(str, i)
447+
group, i = parse_group(str, i)
448+
push!(groups, group)
449+
i = skipws(str, i)
450+
next = iterate(str, i)
451+
if next === nothing
452+
continue
453+
end
454+
c, i = next
455+
if c == ','
456+
# ok
457+
else
458+
error("unknown character: $(repr(c))")
459+
end
460+
end
461+
return groups
462+
end
463+
464+
# syntax: group = event | '(' (event ',')* event ')'
465+
function parse_group(str, i)
466+
group = EventType[]
467+
next = iterate(str, i)
468+
if next === nothing
469+
error("no events")
470+
elseif next[1] == '('
471+
# group
472+
i = next[2]
473+
while true
474+
i = skipws(str, i)
475+
event, i = parse_event(str, i)
476+
push!(group, event)
477+
i = skipws(str, i)
478+
next = iterate(str, i)
479+
if next === nothing
480+
error("unpaired '('")
481+
end
482+
c, i = next
483+
if c == ','
484+
# ok
485+
elseif c == ')'
486+
break
487+
else
488+
error("unknown character: $(repr(c))")
489+
end
490+
end
491+
else
492+
# singleton group
493+
i = skipws(str, i)
494+
event, i = parse_event(str, i)
495+
push!(group, event)
496+
end
497+
return group, i
498+
end
499+
500+
# syntax: event = [A-Za-z0-9-]+
501+
function parse_event(str, i)
502+
isok(c) = 'A' c 'Z' || 'a' c 'z' || '0' c '9' || c == '-'
503+
start = i
504+
next = iterate(str, start)
505+
while next !== nothing && isok(next[1])
506+
i = next[2]
507+
next = iterate(str, i)
508+
end
509+
stop = prevind(str, i)
510+
if start > stop
511+
error("empty event name")
512+
end
513+
name = str[start:stop]
514+
if !haskey(NAME_TO_EVENT, name)
515+
error("unknown event name: $(name)")
516+
end
517+
return NAME_TO_EVENT[name], i
375518
end
519+
520+
# skip whitespace if any
521+
function skipws(str, i)
522+
@label head
523+
next = iterate(str, i)
524+
if next !== nothing && isspace(next[1])
525+
i = next[2]
526+
@goto head
527+
end
528+
return i
529+
end
530+
531+
struct Stats
532+
groups::Vector{Vector{Counter}}
533+
end
534+
535+
function Stats(b::PerfBench)
536+
groups = Vector{Counter}[]
537+
for g in b.groups
538+
values = Vector{UInt64}(undef, length(g)+1+2)
539+
read!(g.leader_io, values)
540+
#?Ref@assert(length(g) == values[1])
541+
enabled, running = values[2], values[3]
542+
push!(groups, [Counter(g.event_types[i], values[3+i], enabled, running) for i in 1:length(g)])
543+
end
544+
return Stats(groups)
545+
end
546+
547+
function Base.haskey(stats::Stats, name::AbstractString)
548+
event = NAME_TO_EVENT[name]
549+
return any(counter.event == event for group in stats.groups for counter in group)
550+
end
551+
552+
function Base.getindex(stats::Stats, name::AbstractString)
553+
event = NAME_TO_EVENT[name]
554+
for group in stats.groups, counter in group
555+
if counter.event == event
556+
return counter
557+
end
558+
end
559+
throw(KeyError(name))
560+
end
561+
562+
function Base.show(io::IO, stats::Stats)
563+
w = 2 + 23 + 18
564+
println(io, ''^w)
565+
for group in stats.groups
566+
for i in 1:length(group)
567+
# grouping character
568+
if length(group) == 1
569+
c = ''
570+
elseif i == 1
571+
c = ''
572+
elseif i == length(group)
573+
c = ''
574+
else
575+
c = ''
576+
end
577+
counter = group[i]
578+
event = counter.event
579+
name = EVENT_TO_NAME[event]
580+
@printf io "%-2s%-23s" c name
581+
if !isenabled(counter)
582+
@printf(io, "%18s", "not enabled")
583+
elseif !isrun(counter)
584+
@printf(io, "%10s%7.1f%%", "NA", 0.0)
585+
else
586+
@printf(io, "%10.2e%7.1f%%", scaledcount(counter), fillrate(counter) * 100)
587+
end
588+
if isrun(counter)
589+
# show a comment
590+
if name == "cpu-cycles"
591+
@printf(io, " # %4.1f cycles per ns", counter.value / counter.running)
592+
elseif (name == "stalled-cycles-frontend" || name == "stalled-cycles-backend") && haskey(stats, "cpu-cycles")
593+
@printf(io, " # %4.1f%% of cycles", scaledcount(counter) / scaledcount(stats["cpu-cycles"]) * 100)
594+
elseif name == "instructions" && haskey(stats, "cpu-cycles")
595+
@printf(io, " # %4.1f insns per cycle", scaledcount(counter) / scaledcount(stats["cpu-cycles"]))
596+
elseif name == "branch-instructions" && haskey(stats, "instructions")
597+
@printf(io, " # %4.1f%% of instructions", scaledcount(counter) / scaledcount(stats["instructions"]) * 100)
598+
elseif name == "branch-misses" && haskey(stats, "branch-instructions")
599+
@printf(io, " # %4.1f%% of branch instructions", scaledcount(counter)/ scaledcount(stats["branch-instructions"]) * 100)
600+
elseif name == "cache-misses" && haskey(stats, "cache-references")
601+
@printf(io, " # %4.1f%% of cache references", scaledcount(counter) / scaledcount(stats["cache-references"]) * 100)
602+
elseif name == "L1-dcache-load-misses" && haskey(stats, "L1-dcache-loads")
603+
@printf(io, " # %4.1f%% of loads", scaledcount(counter) / scaledcount(stats["L1-dcache-loads"]) * 100)
604+
end
605+
end
606+
println(io)
607+
end
608+
end
609+
print(io, ''^w)
610+
end
611+
612+
isenabled(counter::Counter) = counter.enabled > 0
613+
isrun(counter::Counter) = counter.running > 0
614+
fillrate(counter::Counter) = counter.running / counter.enabled
615+
scaledcount(counter::Counter) = counter.value * (counter.enabled / counter.running)
616+
617+
"""
618+
@pstats [options] expr
619+
620+
Run `expr` and gather its performance statistics.
621+
"""
622+
macro pstats(args...)
623+
if isempty(args)
624+
error("@pstats requires at least one argument")
625+
end
626+
opts, expr = parse_pstats_options(args[1:end-1]), args[end]
627+
quote
628+
(function ()
629+
bench = make_bench($(opts.events))
630+
enable!(bench)
631+
val = $(esc(expr))
632+
disable!(bench)
633+
# trick the compiler not to eliminate the code
634+
(rand() < 0 ? val : Stats(bench))::Stats
635+
end)()
636+
end
637+
end
638+
639+
end

0 commit comments

Comments
 (0)