Skip to content

Commit ac1e11f

Browse files
committed
:)
1 parent 95e1448 commit ac1e11f

File tree

2 files changed

+371
-3
lines changed

2 files changed

+371
-3
lines changed

README.md

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,74 @@
1-
# LinuxPerf
1+
julia wrapper for linux's perf_event_open
2+
3+
the kernel multiplexes event counter that requires limited hardware resources so some counters are only active for a fraction of the running time (% on the right).
4+
5+
if you need to compare two quantities you must put them in the same event group so they are always scheduled at the same time (or not at all).
6+
7+
```julia
8+
julia> import LinuxPerf: make_bench, enable!, disable!, reset!, reasonable_defaults, counters
9+
julia> const bench = make_bench(reasonable_defaults);
10+
julia> @noinline function g(a)
11+
enable!(bench)
12+
c = 0
13+
for x in a
14+
if x > 0
15+
c += 1
16+
end
17+
end
18+
disable!(bench)
19+
c
20+
end
21+
g (generic function with 1 method)
22+
julia> g(zeros(10000))
23+
0
24+
julia> counters(bench)
25+
hw:cycles :
26+
52794 (100.0 %)
27+
hw:cache_access :
28+
881 (100.0 %)
29+
hw:cache_misses :
30+
579 (100.0 %)
31+
hw:branches :
32+
31367 (100.0 %)
33+
hw:branch_mispredicts :
34+
107 (100.0 %) # =)
35+
hw:instructions :
36+
96961 (100.0 %)
37+
sw:ctx_switches :
38+
0 (100.0 %)
39+
sw:page_faults :
40+
0 (100.0 %)
41+
sw:minor_page_faults :
42+
0 (100.0 %)
43+
sw:major_page_faults :
44+
0 (100.0 %)
45+
sw:cpu_migrations :
46+
0 (100.0 %)
47+
48+
julia> reset!(bench)
49+
julia> g(randn(10000))
50+
5023
51+
julia> counters(bench)
52+
hw:cycles :
53+
194454 (100.0 %)
54+
hw:cache_access :
55+
291 (100.0 %)
56+
hw:cache_misses :
57+
222 (100.0 %)
58+
hw:branches :
59+
38050 (100.0 %)
60+
hw:branch_mispredicts :
61+
5131 (100.0 %) # =(
62+
hw:instructions :
63+
129253 (100.0 %)
64+
sw:ctx_switches :
65+
0 (100.0 %)
66+
sw:page_faults :
67+
0 (100.0 %)
68+
sw:minor_page_faults :
69+
0 (100.0 %)
70+
sw:major_page_faults :
71+
0 (100.0 %)
72+
sw:cpu_migrations :
73+
0 (100.0 %)
74+
```

src/LinuxPerf.jl

Lines changed: 297 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,300 @@
11
module LinuxPerf
22

3-
# package code goes here
3+
const SYS_perf_event_open = 298
44

5-
end # module
5+
type perf_event_attr
6+
typ :: UInt32
7+
size :: UInt32
8+
config :: UInt64
9+
sample_period_or_freq :: UInt64
10+
sample_type :: UInt64
11+
read_format :: UInt64
12+
flags :: UInt64
13+
wakeup_events_or_watermark :: UInt32
14+
bp_type :: UInt32
15+
bp_addr_or_config1 :: UInt64
16+
bp_len_or_config2 :: UInt64
17+
branch_sample_type :: UInt64
18+
19+
sample_regs_user :: UInt64
20+
sample_stack_user :: UInt32
21+
clockid :: Int32
22+
sample_regs_intr :: UInt64
23+
aux_watermark :: UInt32
24+
__reserved_2 :: UInt32
25+
26+
perf_event_attr() = new()
27+
end
28+
29+
const EVENT_TYPES =
30+
[
31+
(:hw, 0, # PERF_TYPE_HARDWARE
32+
[(:cycles, 0), # PERF_COUNT_HW_CPU_CYCLES
33+
(:instructions, 1), # PERF_COUNT_HW_INSTRUCTIONS
34+
(:cache_access, 2), # PERF_COUNT_HW_CACHE_REFERENCES
35+
(:cache_misses, 3), # PERF_COUNT_HW_CACHE_MISSES
36+
(:branches, 4), # PERF_COUNT_HW_BRANCH_INSTRUCTIONS
37+
(:branch_mispredicts, 5), # PERF_COUNT_HW_BRANCH_MISSES
38+
(:bus_cycles, 6), # PERF_COUNT_HW_BUS_CYCLES
39+
(:stalled_cycles_frontend, 7), # PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
40+
(:stalled_cycles_backend, 8), # PERF_COUNT_HW_STALLED_CYCLES_BACKEND
41+
(:scaled_cycles, 9) # PERF_COUNT_HW_REF_CPU_CYCLES
42+
]),
43+
(:sw, 1, # PERF_TYPE_SOFTWARE
44+
[(:page_faults, 2), # PERF_COUNT_SW_PAGE_FAULTS
45+
(:ctx_switches, 3), # PERF_COUNT_SW_CONTEXT_SWITCHES
46+
(:cpu_migrations, 4), # PERF_COUNT_SW_CPU_MIGRATIONS
47+
(:minor_page_faults, 5), # PERF_COUNT_SW_PAGE_FAULTS_MIN
48+
(:major_page_faults, 6), # PERF_COUNT_SW_PAGE_FAULTS_MAJ
49+
])
50+
]
51+
52+
# cache events have special encoding
53+
const PERF_TYPE_HW_CACHE = 3
54+
const CACHE_TYPES =
55+
[(:L1_data, 0),
56+
(:L1_insn, 1),
57+
(:LLC, 2),
58+
(:TLB_data, 3),
59+
(:TLB_insn, 4),
60+
(:BPU, 5)]
61+
const CACHE_OPS =
62+
[(:read, 0),
63+
(:write, 1),
64+
(:prefetch, 2)]
65+
const CACHE_EVENTS =
66+
[(:access, 0),
67+
(:miss, 1)]
68+
69+
const PERF_FORMAT_TOTAL_TIME_ENABLED = 1 << 0
70+
const PERF_FORMAT_TOTAL_TIME_RUNNING = 1 << 1
71+
const PERF_FORMAT_GROUP = 1 << 3
72+
73+
immutable EventType
74+
category :: UInt32
75+
event :: UInt64
76+
end
77+
78+
function all_events()
79+
evts = EventType[]
80+
for (cat_name, cat_id, events) in EVENT_TYPES
81+
for (type_name, type_id) in events
82+
push!(evts, EventType(cat_id, type_id))
83+
end
84+
end
85+
evts
86+
end
87+
88+
function Base.show(io::IO, e::EventType)
89+
if e.category == PERF_TYPE_HW_CACHE
90+
print(io, "cache:")
91+
cache = e.event & 0xff
92+
idx = findfirst(k -> k[2] == cache, CACHE_TYPES)
93+
print(io, idx == 0 ? cache : CACHE_TYPES[idx][1], ":")
94+
cache_op = (e.event & 0xff00) >> 8
95+
idx = findfirst(k -> k[2] == cache_op, CACHE_OPS)
96+
print(io, idx == 0 ? cache : CACHE_OPS[idx][1], ":")
97+
cache_event = (e.event & 0xff0000) >> 16
98+
idx = findfirst(k -> k[2] == cache_event, CACHE_EVENTS)
99+
print(io, idx == 0 ? cache : CACHE_EVENTS[idx][1])
100+
else
101+
for (cat_name, cat_id, events) in EVENT_TYPES
102+
cat_id == e.category || continue
103+
print(io, cat_name, ":")
104+
for (type_name, type_id) in events
105+
type_id == e.event || continue
106+
print(io, type_name)
107+
return
108+
end
109+
print(io, e.event)
110+
return
111+
end
112+
print(io, "event(", e.category, ":", e.event, ")")
113+
end
114+
end
115+
116+
function EventType(cat::Symbol, event::Symbol)
117+
cat !== :cache || error("cache events needs 3 arguments")
118+
for (cat_name, cat_id, events) in EVENT_TYPES
119+
cat_name === cat || continue
120+
for (type_name, type_id) in events
121+
type_name === event || continue
122+
return EventType(cat_id, type_id)
123+
end
124+
error("event $event not found in $cat")
125+
end
126+
error("category $cat not found")
127+
end
128+
129+
function EventType(cat::Symbol, cache::Symbol, op::Symbol, evt::Symbol)
130+
cat === :cache || error("only cache events takes 3 arguments")
131+
idx = findfirst(x -> x[1] === cache, CACHE_TYPES)
132+
idx != 0 || error("cache not found $cache")
133+
cache_id = CACHE_TYPES[idx][2]
134+
idx = findfirst(x -> x[1] === op, CACHE_OPS)
135+
idx != 0 || error("op not found $op")
136+
op_id = CACHE_OPS[idx][2]
137+
idx = findfirst(x -> x[1] === evt, CACHE_EVENTS)
138+
idx != 0 || error("cache event not found $evt")
139+
evt_id = CACHE_EVENTS[idx][2]
140+
return EventType(PERF_TYPE_HW_CACHE,
141+
cache_id | (op_id << 8) | (evt_id << 16))
142+
end
143+
144+
type EventGroup
145+
leader_fd :: Cint
146+
fds :: Vector{Cint}
147+
event_types :: Vector{EventType}
148+
leader_io :: IOStream
149+
function EventGroup(types :: Vector{EventType};
150+
warn_unsupported = true,
151+
userspace_only = false
152+
)
153+
my_types = Array(EventType, 0)
154+
group = new(-1, Array(Cint, 0), Array(EventType, 0))
155+
for (i,evt_type) in enumerate(types)
156+
attr = perf_event_attr()
157+
attr.typ = evt_type.category
158+
attr.size = sizeof(perf_event_attr)
159+
attr.config = evt_type.event
160+
attr.sample_period_or_freq = 0
161+
if userspace_only
162+
attr.flags = (1 << 5) # exclude kernel
163+
end
164+
if group.leader_fd == -1
165+
attr.flags |= (1 << 0) # start disabled
166+
end
167+
attr.read_format =
168+
PERF_FORMAT_GROUP |
169+
PERF_FORMAT_TOTAL_TIME_ENABLED |
170+
PERF_FORMAT_TOTAL_TIME_RUNNING
171+
fd = ccall(:syscall, Cint, (Clong, Clong...), SYS_perf_event_open,
172+
pointer_from_objref(attr),
173+
0, -1, group.leader_fd, 0)
174+
if fd < 0
175+
errno = Libc.errno()
176+
if errno in (Libc.EINVAL,Libc.ENOENT)
177+
if warn_unsupported
178+
warn("$evt_type not supported, skipping")
179+
end
180+
continue
181+
else
182+
if errno == Libc.EACCES && !userspace_only
183+
warn("try to adjust /proc/sys/kernel/perf_event_paranoid to a value <= 1 or use user-space only events")
184+
end
185+
@show errno
186+
error("perf_event_open error : $(Libc.strerror(errno))")
187+
end
188+
end
189+
push!(group.event_types, evt_type)
190+
push!(group.fds, fd)
191+
if group.leader_fd == -1
192+
group.leader_fd = fd
193+
group.leader_io = fdio(fd)
194+
end
195+
end
196+
reset!(group)
197+
group
198+
end
199+
end
200+
201+
Base.length(g::EventGroup) = length(g.event_types)
202+
203+
function Base.show(io::IO, g::EventGroup)
204+
println(io, "EventGroup(")
205+
for e in g.event_types[1:end-1]
206+
println(io, "\t", e, ",")
207+
end
208+
print(io, "\t", g.event_types[end], ")")
209+
end
210+
211+
const PERF_EVENT_IOC_ENABLE = UInt64(0x2400)
212+
const PERF_EVENT_IOC_DISABLE = UInt64(0x2401)
213+
const PERF_EVENT_IOC_RESET = UInt64(0x2403)
214+
215+
function ioctl(group::EventGroup, x)
216+
res = ccall(:ioctl, Cint, (Cint, Clong, Clong), group.leader_fd, x, 1)
217+
if res < 0
218+
error("ioctl error : $(Libc.strerror())")
219+
end
220+
end
221+
enable!(g::EventGroup) = ioctl(g, PERF_EVENT_IOC_ENABLE)
222+
disable!(g::EventGroup) = ioctl(g, PERF_EVENT_IOC_DISABLE)
223+
reset!(g::EventGroup) = ioctl(g, PERF_EVENT_IOC_RESET)
224+
function Base.close(g::EventGroup)
225+
for fd in g.fds
226+
ccall(:close, Cint, (Cint,), fd)
227+
end
228+
end
229+
230+
type PerfBench
231+
groups :: Vector{EventGroup}
232+
end
233+
immutable Counter
234+
event :: EventType
235+
value :: UInt64
236+
enabled :: UInt64
237+
running :: UInt64
238+
end
239+
immutable Counters
240+
counters :: Vector{Counter}
241+
end
242+
function Base.show(io::IO, c::Counters)
243+
for c in c.counters
244+
print(io, c.event, " : ")
245+
if c.enabled == 0
246+
print(io, "never enabled")
247+
elseif c.running == 0
248+
print(io, "did not run")
249+
else
250+
@printf(io, "\n\t%20d (%.1f %%)", Int64(c.value), 100*(c.running/c.enabled))
251+
end
252+
println()
253+
end
254+
end
255+
enable!(b::PerfBench) = foreach(enable!, b.groups)
256+
disable!(b::PerfBench) = foreach(disable!, b.groups)
257+
reset!(b::PerfBench) = foreach(reset!, b.groups)
258+
function counters(b::PerfBench)
259+
c = Array(Counter, 0)
260+
for g in b.groups
261+
values = read(g.leader_io, UInt64, length(g)+1+2)
262+
@assert(length(g) == values[1])
263+
enabled, running = values[2], values[3]
264+
for i = 1:length(g)
265+
push!(c, Counter(g.event_types[i], values[3+i],
266+
enabled, running))
267+
end
268+
end
269+
Counters(c)
270+
end
271+
function make_bench(x)
272+
groups = Array(EventGroup, 0)
273+
for y in x
274+
if isa(y, EventType)
275+
push!(groups, EventGroup([y]))
276+
else
277+
push!(groups, EventGroup(y))
278+
end
279+
end
280+
PerfBench(groups)
281+
end
282+
283+
const reasonable_defaults =
284+
[EventType(:hw, :cycles),
285+
[EventType(:hw, :cache_access),
286+
EventType(:hw, :cache_misses)],
287+
[EventType(:hw, :branches),
288+
EventType(:hw, :branch_mispredicts),
289+
EventType(:hw, :instructions)],
290+
[EventType(:sw, :ctx_switches),
291+
EventType(:sw, :page_faults),
292+
EventType(:sw, :minor_page_faults),
293+
EventType(:sw, :major_page_faults),
294+
EventType(:sw, :cpu_migrations)],
295+
#= [EventType(:cache, :L1_data, :read, :access),
296+
EventType(:cache, :L1_data, :read, :miss)],
297+
[EventType(:cache, :L1_data, :write, :access),
298+
EventType(:cache, :L1_data, :write, :miss)]=#]
299+
300+
end

0 commit comments

Comments
 (0)