Skip to content

Commit 4a5f890

Browse files
committed
Add pstats macro
1 parent 57b84ac commit 4a5f890

File tree

2 files changed

+266
-2
lines changed

2 files changed

+266
-2
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@ PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
88
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
99

1010
[compat]
11-
julia = "1"
1211
Formatting = "0.4"
1312
PrettyTables = "0.9"
13+
julia = "1"

src/LinuxPerf.jl

Lines changed: 265 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,17 @@ const EVENT_TYPES =
7070
(:scaled_cycles, 9) # PERF_COUNT_HW_REF_CPU_CYCLES
7171
]),
7272
(:sw, 1, # PERF_TYPE_SOFTWARE
73-
[(:page_faults, 2), # PERF_COUNT_SW_PAGE_FAULTS
73+
[(:cpu_clock, 0), # PERF_COUNT_SW_CPU_CLOCK
74+
(:task_clock, 1), # PEF_COUNT_SW_TASK_CLOCK
75+
(:page_faults, 2), # PERF_COUNT_SW_PAGE_FAULTS
7476
(:ctx_switches, 3), # PERF_COUNT_SW_CONTEXT_SWITCHES
7577
(:cpu_migrations, 4), # PERF_COUNT_SW_CPU_MIGRATIONS
7678
(:minor_page_faults, 5), # PERF_COUNT_SW_PAGE_FAULTS_MIN
7779
(:major_page_faults, 6), # PERF_COUNT_SW_PAGE_FAULTS_MAJ
80+
(:alignment_faults, 7), # PERF_COUNT_SW_ALIGNMENT_FAULTS
81+
(:emulation_faults, 8), # PERF_COUNT_SW_EMULATION_FAULTS
82+
(:dummy, 9), # PERF_COUNT_SW_DUMMY
83+
(:bpf_output, 10), # PERF_COUNT_SW_BPF_OUTPUT
7884
])
7985
]
8086

@@ -348,4 +354,262 @@ end
348354

349355
make_bench() = make_bench(reasonable_defaults)
350356

357+
358+
# Event names are taken from the perf command.
359+
const NAME_TO_EVENT = Dict(
360+
# hardware events
361+
"branch-instructions" => EventType(:hw, :branches),
362+
"branch-misses" => EventType(:hw, :branch_mispredicts),
363+
"cache-misses" => EventType(:hw, :cache_misses),
364+
"cache-references" => EventType(:hw, :cache_access),
365+
"cpu-cycles" => EventType(:hw, :cycles),
366+
"instructions" => EventType(:hw, :instructions),
367+
"stalled-cycles-backend" => EventType(:hw, :stalled_cycles_backend),
368+
"stalled-cycles-frontend" => EventType(:hw, :stalled_cycles_frontend),
369+
370+
# software events
371+
"alignment-faults" => EventType(:sw, :alignment_faults),
372+
"bpf-output" => EventType(:sw, :bpf_output),
373+
"context-switches" => EventType(:sw, :ctx_switches),
374+
"cpu-clock" => EventType(:sw, :cpu_clock),
375+
"cpu-migrations" => EventType(:sw, :cpu_migrations),
376+
"dummy" => EventType(:sw, :dummy),
377+
"emulation-faults" => EventType(:sw, :emulation_faults),
378+
"major-faults" => EventType(:sw, :major_page_faults),
379+
"minor-faults" => EventType(:sw, :minor_page_faults),
380+
"page-faults" => EventType(:sw, :page_faults),
381+
"task-clock" => EventType(:sw, :task_clock),
382+
383+
# hardware cache events
384+
"L1-dcache-load-misses" => EventType(:cache, :L1_data, :read, :miss),
385+
"L1-dcache-loads" => EventType(:cache, :L1_data, :read, :access),
386+
"L1-icache-load-misses" => EventType(:cache, :L1_insn, :read, :miss),
387+
"L1-icache-loads" => EventType(:cache, :L1_insn, :read, :access),
388+
"dTLB-load-misses" => EventType(:cache, :TLB_data, :read, :miss),
389+
"dTLB-loads" => EventType(:cache, :TLB_data, :read, :access),
390+
"iTLB-load-misses" => EventType(:cache, :TLB_insn, :read, :miss),
391+
"iTLB-loads" => EventType(:cache, :TLB_insn, :read, :access),
392+
)
393+
394+
const EVENT_TO_NAME = Dict(event => name for (name, event) in NAME_TO_EVENT)
395+
396+
function parse_pstats_options(opts)
397+
# default events
398+
events = parse_groups("
399+
(cpu-cycles, stalled-cycles-frontend, stalled-cycles-backend),
400+
(instructions, branch-instructions, branch-misses),
401+
(task-clock, context-switches, cpu-migrations, page-faults)
402+
")
403+
for opt in opts
404+
if opt isa AbstractString
405+
events = parse_groups(opt)
406+
elseif opt isa Expr && opt.head == :(=)
407+
key, val = opt.args
408+
error("unknown key: $(key)")
409+
else
410+
error("unknown option: $(opt)")
411+
end
412+
end
413+
return (events = events,)
414+
end
415+
416+
# syntax: groups = (group ',')* group
417+
function parse_groups(str)
418+
groups = Vector{EventType}[]
419+
i = firstindex(str)
420+
next = iterate(str, i)
421+
while next !== nothing
422+
i = skipws(str, i)
423+
group, i = parse_group(str, i)
424+
push!(groups, group)
425+
i = skipws(str, i)
426+
next = iterate(str, i)
427+
if next === nothing
428+
continue
429+
end
430+
c, i = next
431+
if c == ','
432+
# ok
433+
else
434+
error("unknown character: $(repr(c))")
435+
end
436+
end
437+
return groups
438+
end
439+
440+
# syntax: group = event | '(' (event ',')* event ')'
441+
function parse_group(str, i)
442+
group = EventType[]
443+
next = iterate(str, i)
444+
if next === nothing
445+
error("no events")
446+
elseif next[1] == '('
447+
# group
448+
i = next[2]
449+
while true
450+
i = skipws(str, i)
451+
event, i = parse_event(str, i)
452+
push!(group, event)
453+
i = skipws(str, i)
454+
next = iterate(str, i)
455+
if next === nothing
456+
error("unpaired '('")
457+
end
458+
c, i = next
459+
if c == ','
460+
# ok
461+
elseif c == ')'
462+
break
463+
else
464+
error("unknown character: $(repr(c))")
465+
end
466+
end
467+
else
468+
# singleton group
469+
i = skipws(str, i)
470+
event, i = parse_event(str, i)
471+
push!(group, event)
472+
end
473+
return group, i
474+
end
475+
476+
# syntax: event = [A-Za-z0-9-]+
477+
function parse_event(str, i)
478+
isok(c) = 'A' c 'Z' || 'a' c 'z' || '0' c '9' || c == '-'
479+
start = i
480+
next = iterate(str, start)
481+
while next !== nothing && isok(next[1])
482+
i = next[2]
483+
next = iterate(str, i)
484+
end
485+
stop = prevind(str, i)
486+
if start > stop
487+
error("empty event name")
488+
end
489+
name = str[start:stop]
490+
if !haskey(NAME_TO_EVENT, name)
491+
error("unknown event name: $(name)")
492+
end
493+
return NAME_TO_EVENT[name], i
351494
end
495+
496+
# skip whitespace if any
497+
function skipws(str, i)
498+
@label head
499+
next = iterate(str, i)
500+
if next !== nothing && isspace(next[1])
501+
i = next[2]
502+
@goto head
503+
end
504+
return i
505+
end
506+
507+
struct Stats
508+
groups::Vector{Vector{Counter}}
509+
end
510+
511+
function Stats(b::PerfBench)
512+
groups = Vector{Counter}[]
513+
for g in b.groups
514+
values = Vector{UInt64}(undef, length(g)+1+2)
515+
read!(g.leader_io, values)
516+
#?Ref@assert(length(g) == values[1])
517+
enabled, running = values[2], values[3]
518+
push!(groups, [Counter(g.event_types[i], values[3+i], enabled, running) for i in 1:length(g)])
519+
end
520+
return Stats(groups)
521+
end
522+
523+
function Base.haskey(stats::Stats, name::AbstractString)
524+
event = NAME_TO_EVENT[name]
525+
return any(counter.event == event for group in stats.groups for counter in group)
526+
end
527+
528+
function Base.getindex(stats::Stats, name::AbstractString)
529+
event = NAME_TO_EVENT[name]
530+
for group in stats.groups, counter in group
531+
if counter.event == event
532+
return counter
533+
end
534+
end
535+
throw(KeyError(name))
536+
end
537+
538+
function Base.show(io::IO, stats::Stats)
539+
w = 2 + 23 + 18
540+
println(io, ''^w)
541+
for group in stats.groups
542+
for i in 1:length(group)
543+
# grouping character
544+
if length(group) == 1
545+
c = ''
546+
elseif i == 1
547+
c = ''
548+
elseif i == length(group)
549+
c = ''
550+
else
551+
c = ''
552+
end
553+
counter = group[i]
554+
event = counter.event
555+
name = EVENT_TO_NAME[event]
556+
@printf io "%-2s%-23s" c name
557+
if !isenabled(counter)
558+
@printf(io, "%18s", "not enabled")
559+
elseif !isrun(counter)
560+
@printf(io, "%10s%7.1f%%", "NA", 0.0)
561+
else
562+
@printf(io, "%10.2e%7.1f%%", scaledcount(counter), fillrate(counter) * 100)
563+
end
564+
if isrun(counter)
565+
# show a comment
566+
if name == "cpu-cycles"
567+
@printf(io, " # %4.1f cycles per ns", counter.value / counter.running)
568+
elseif (name == "stalled-cycles-frontend" || name == "stalled-cycles-backend") && haskey(stats, "cpu-cycles")
569+
@printf(io, " # %4.1f%% of cycles", scaledcount(counter) / scaledcount(stats["cpu-cycles"]) * 100)
570+
elseif name == "instructions" && haskey(stats, "cpu-cycles")
571+
@printf(io, " # %4.1f insns per cycle", scaledcount(counter) / scaledcount(stats["cpu-cycles"]))
572+
elseif name == "branch-instructions" && haskey(stats, "instructions")
573+
@printf(io, " # %4.1f%% of instructions", scaledcount(counter) / scaledcount(stats["instructions"]) * 100)
574+
elseif name == "branch-misses" && haskey(stats, "branch-instructions")
575+
@printf(io, " # %4.1f%% of branch instructions", scaledcount(counter)/ scaledcount(stats["branch-instructions"]) * 100)
576+
elseif name == "cache-misses" && haskey(stats, "cache-references")
577+
@printf(io, " # %4.1f%% of cache references", scaledcount(counter) / scaledcount(stats["cache-references"]) * 100)
578+
elseif name == "L1-dcache-load-misses" && haskey(stats, "L1-dcache-loads")
579+
@printf(io, " # %4.1f%% of loads", scaledcount(counter) / scaledcount(stats["L1-dcache-loads"]) * 100)
580+
end
581+
end
582+
println(io)
583+
end
584+
end
585+
print(io, ''^w)
586+
end
587+
588+
isenabled(counter::Counter) = counter.enabled > 0
589+
isrun(counter::Counter) = counter.running > 0
590+
fillrate(counter::Counter) = counter.running / counter.enabled
591+
scaledcount(counter::Counter) = counter.value * (counter.enabled / counter.running)
592+
593+
"""
594+
@pstats [options] expr
595+
596+
Run `expr` and gather its performance statistics.
597+
"""
598+
macro pstats(args...)
599+
if isempty(args)
600+
error("@pstats requires at least one argument")
601+
end
602+
opts, expr = parse_pstats_options(args[1:end-1]), args[end]
603+
quote
604+
(function ()
605+
bench = make_bench($(opts.events))
606+
enable!(bench)
607+
val = $(esc(expr))
608+
disable!(bench)
609+
# trick the compiler not to eliminate the code
610+
(rand() < 0 ? val : Stats(bench))::Stats
611+
end)()
612+
end
613+
end
614+
615+
end

0 commit comments

Comments
 (0)