|
1 | 1 | module LinuxPerf
|
2 | 2 |
|
3 |
| -# package code goes here |
| 3 | +const SYS_perf_event_open = 298 |
4 | 4 |
|
5 |
| -end # module |
| 5 | +type perf_event_attr |
| 6 | + typ :: UInt32 |
| 7 | + size :: UInt32 |
| 8 | + config :: UInt64 |
| 9 | + sample_period_or_freq :: UInt64 |
| 10 | + sample_type :: UInt64 |
| 11 | + read_format :: UInt64 |
| 12 | + flags :: UInt64 |
| 13 | + wakeup_events_or_watermark :: UInt32 |
| 14 | + bp_type :: UInt32 |
| 15 | + bp_addr_or_config1 :: UInt64 |
| 16 | + bp_len_or_config2 :: UInt64 |
| 17 | + branch_sample_type :: UInt64 |
| 18 | + |
| 19 | + sample_regs_user :: UInt64 |
| 20 | + sample_stack_user :: UInt32 |
| 21 | + clockid :: Int32 |
| 22 | + sample_regs_intr :: UInt64 |
| 23 | + aux_watermark :: UInt32 |
| 24 | + __reserved_2 :: UInt32 |
| 25 | + |
| 26 | + perf_event_attr() = new() |
| 27 | +end |
| 28 | + |
| 29 | +const EVENT_TYPES = |
| 30 | + [ |
| 31 | + (:hw, 0, # PERF_TYPE_HARDWARE |
| 32 | + [(:cycles, 0), # PERF_COUNT_HW_CPU_CYCLES |
| 33 | + (:instructions, 1), # PERF_COUNT_HW_INSTRUCTIONS |
| 34 | + (:cache_access, 2), # PERF_COUNT_HW_CACHE_REFERENCES |
| 35 | + (:cache_misses, 3), # PERF_COUNT_HW_CACHE_MISSES |
| 36 | + (:branches, 4), # PERF_COUNT_HW_BRANCH_INSTRUCTIONS |
| 37 | + (:branch_mispredicts, 5), # PERF_COUNT_HW_BRANCH_MISSES |
| 38 | + (:bus_cycles, 6), # PERF_COUNT_HW_BUS_CYCLES |
| 39 | + (:stalled_cycles_frontend, 7), # PERF_COUNT_HW_STALLED_CYCLES_FRONTEND |
| 40 | + (:stalled_cycles_backend, 8), # PERF_COUNT_HW_STALLED_CYCLES_BACKEND |
| 41 | + (:scaled_cycles, 9) # PERF_COUNT_HW_REF_CPU_CYCLES |
| 42 | + ]), |
| 43 | + (:sw, 1, # PERF_TYPE_SOFTWARE |
| 44 | + [(:page_faults, 2), # PERF_COUNT_SW_PAGE_FAULTS |
| 45 | + (:ctx_switches, 3), # PERF_COUNT_SW_CONTEXT_SWITCHES |
| 46 | + (:cpu_migrations, 4), # PERF_COUNT_SW_CPU_MIGRATIONS |
| 47 | + (:minor_page_faults, 5), # PERF_COUNT_SW_PAGE_FAULTS_MIN |
| 48 | + (:major_page_faults, 6), # PERF_COUNT_SW_PAGE_FAULTS_MAJ |
| 49 | + ]) |
| 50 | + ] |
| 51 | + |
| 52 | +# cache events have special encoding |
| 53 | +const PERF_TYPE_HW_CACHE = 3 |
| 54 | +const CACHE_TYPES = |
| 55 | + [(:L1_data, 0), |
| 56 | + (:L1_insn, 1), |
| 57 | + (:LLC, 2), |
| 58 | + (:TLB_data, 3), |
| 59 | + (:TLB_insn, 4), |
| 60 | + (:BPU, 5)] |
| 61 | +const CACHE_OPS = |
| 62 | + [(:read, 0), |
| 63 | + (:write, 1), |
| 64 | + (:prefetch, 2)] |
| 65 | +const CACHE_EVENTS = |
| 66 | + [(:access, 0), |
| 67 | + (:miss, 1)] |
| 68 | + |
| 69 | +const PERF_FORMAT_TOTAL_TIME_ENABLED = 1 << 0 |
| 70 | +const PERF_FORMAT_TOTAL_TIME_RUNNING = 1 << 1 |
| 71 | +const PERF_FORMAT_GROUP = 1 << 3 |
| 72 | + |
| 73 | +immutable EventType |
| 74 | + category :: UInt32 |
| 75 | + event :: UInt64 |
| 76 | +end |
| 77 | + |
| 78 | +function all_events() |
| 79 | + evts = EventType[] |
| 80 | + for (cat_name, cat_id, events) in EVENT_TYPES |
| 81 | + for (type_name, type_id) in events |
| 82 | + push!(evts, EventType(cat_id, type_id)) |
| 83 | + end |
| 84 | + end |
| 85 | + evts |
| 86 | +end |
| 87 | + |
| 88 | +function Base.show(io::IO, e::EventType) |
| 89 | + if e.category == PERF_TYPE_HW_CACHE |
| 90 | + print(io, "cache:") |
| 91 | + cache = e.event & 0xff |
| 92 | + idx = findfirst(k -> k[2] == cache, CACHE_TYPES) |
| 93 | + print(io, idx == 0 ? cache : CACHE_TYPES[idx][1], ":") |
| 94 | + cache_op = (e.event & 0xff00) >> 8 |
| 95 | + idx = findfirst(k -> k[2] == cache_op, CACHE_OPS) |
| 96 | + print(io, idx == 0 ? cache : CACHE_OPS[idx][1], ":") |
| 97 | + cache_event = (e.event & 0xff0000) >> 16 |
| 98 | + idx = findfirst(k -> k[2] == cache_event, CACHE_EVENTS) |
| 99 | + print(io, idx == 0 ? cache : CACHE_EVENTS[idx][1]) |
| 100 | + else |
| 101 | + for (cat_name, cat_id, events) in EVENT_TYPES |
| 102 | + cat_id == e.category || continue |
| 103 | + print(io, cat_name, ":") |
| 104 | + for (type_name, type_id) in events |
| 105 | + type_id == e.event || continue |
| 106 | + print(io, type_name) |
| 107 | + return |
| 108 | + end |
| 109 | + print(io, e.event) |
| 110 | + return |
| 111 | + end |
| 112 | + print(io, "event(", e.category, ":", e.event, ")") |
| 113 | + end |
| 114 | +end |
| 115 | + |
| 116 | +function EventType(cat::Symbol, event::Symbol) |
| 117 | + cat !== :cache || error("cache events needs 3 arguments") |
| 118 | + for (cat_name, cat_id, events) in EVENT_TYPES |
| 119 | + cat_name === cat || continue |
| 120 | + for (type_name, type_id) in events |
| 121 | + type_name === event || continue |
| 122 | + return EventType(cat_id, type_id) |
| 123 | + end |
| 124 | + error("event $event not found in $cat") |
| 125 | + end |
| 126 | + error("category $cat not found") |
| 127 | +end |
| 128 | + |
| 129 | +function EventType(cat::Symbol, cache::Symbol, op::Symbol, evt::Symbol) |
| 130 | + cat === :cache || error("only cache events takes 3 arguments") |
| 131 | + idx = findfirst(x -> x[1] === cache, CACHE_TYPES) |
| 132 | + idx != 0 || error("cache not found $cache") |
| 133 | + cache_id = CACHE_TYPES[idx][2] |
| 134 | + idx = findfirst(x -> x[1] === op, CACHE_OPS) |
| 135 | + idx != 0 || error("op not found $op") |
| 136 | + op_id = CACHE_OPS[idx][2] |
| 137 | + idx = findfirst(x -> x[1] === evt, CACHE_EVENTS) |
| 138 | + idx != 0 || error("cache event not found $evt") |
| 139 | + evt_id = CACHE_EVENTS[idx][2] |
| 140 | + return EventType(PERF_TYPE_HW_CACHE, |
| 141 | + cache_id | (op_id << 8) | (evt_id << 16)) |
| 142 | +end |
| 143 | + |
| 144 | +type EventGroup |
| 145 | + leader_fd :: Cint |
| 146 | + fds :: Vector{Cint} |
| 147 | + event_types :: Vector{EventType} |
| 148 | + leader_io :: IOStream |
| 149 | + function EventGroup(types :: Vector{EventType}; |
| 150 | + warn_unsupported = true, |
| 151 | + userspace_only = false |
| 152 | + ) |
| 153 | + my_types = Array(EventType, 0) |
| 154 | + group = new(-1, Array(Cint, 0), Array(EventType, 0)) |
| 155 | + for (i,evt_type) in enumerate(types) |
| 156 | + attr = perf_event_attr() |
| 157 | + attr.typ = evt_type.category |
| 158 | + attr.size = sizeof(perf_event_attr) |
| 159 | + attr.config = evt_type.event |
| 160 | + attr.sample_period_or_freq = 0 |
| 161 | + if userspace_only |
| 162 | + attr.flags = (1 << 5) # exclude kernel |
| 163 | + end |
| 164 | + if group.leader_fd == -1 |
| 165 | + attr.flags |= (1 << 0) # start disabled |
| 166 | + end |
| 167 | + attr.read_format = |
| 168 | + PERF_FORMAT_GROUP | |
| 169 | + PERF_FORMAT_TOTAL_TIME_ENABLED | |
| 170 | + PERF_FORMAT_TOTAL_TIME_RUNNING |
| 171 | + fd = ccall(:syscall, Cint, (Clong, Clong...), SYS_perf_event_open, |
| 172 | + pointer_from_objref(attr), |
| 173 | + 0, -1, group.leader_fd, 0) |
| 174 | + if fd < 0 |
| 175 | + errno = Libc.errno() |
| 176 | + if errno in (Libc.EINVAL,Libc.ENOENT) |
| 177 | + if warn_unsupported |
| 178 | + warn("$evt_type not supported, skipping") |
| 179 | + end |
| 180 | + continue |
| 181 | + else |
| 182 | + if errno == Libc.EACCES && !userspace_only |
| 183 | + warn("try to adjust /proc/sys/kernel/perf_event_paranoid to a value <= 1 or use user-space only events") |
| 184 | + end |
| 185 | + @show errno |
| 186 | + error("perf_event_open error : $(Libc.strerror(errno))") |
| 187 | + end |
| 188 | + end |
| 189 | + push!(group.event_types, evt_type) |
| 190 | + push!(group.fds, fd) |
| 191 | + if group.leader_fd == -1 |
| 192 | + group.leader_fd = fd |
| 193 | + group.leader_io = fdio(fd) |
| 194 | + end |
| 195 | + end |
| 196 | + reset!(group) |
| 197 | + group |
| 198 | + end |
| 199 | +end |
| 200 | + |
| 201 | +Base.length(g::EventGroup) = length(g.event_types) |
| 202 | + |
| 203 | +function Base.show(io::IO, g::EventGroup) |
| 204 | + println(io, "EventGroup(") |
| 205 | + for e in g.event_types[1:end-1] |
| 206 | + println(io, "\t", e, ",") |
| 207 | + end |
| 208 | + print(io, "\t", g.event_types[end], ")") |
| 209 | +end |
| 210 | + |
| 211 | +const PERF_EVENT_IOC_ENABLE = UInt64(0x2400) |
| 212 | +const PERF_EVENT_IOC_DISABLE = UInt64(0x2401) |
| 213 | +const PERF_EVENT_IOC_RESET = UInt64(0x2403) |
| 214 | + |
| 215 | +function ioctl(group::EventGroup, x) |
| 216 | + res = ccall(:ioctl, Cint, (Cint, Clong, Clong), group.leader_fd, x, 1) |
| 217 | + if res < 0 |
| 218 | + error("ioctl error : $(Libc.strerror())") |
| 219 | + end |
| 220 | +end |
| 221 | +enable!(g::EventGroup) = ioctl(g, PERF_EVENT_IOC_ENABLE) |
| 222 | +disable!(g::EventGroup) = ioctl(g, PERF_EVENT_IOC_DISABLE) |
| 223 | +reset!(g::EventGroup) = ioctl(g, PERF_EVENT_IOC_RESET) |
| 224 | +function Base.close(g::EventGroup) |
| 225 | + for fd in g.fds |
| 226 | + ccall(:close, Cint, (Cint,), fd) |
| 227 | + end |
| 228 | +end |
| 229 | + |
| 230 | +type PerfBench |
| 231 | + groups :: Vector{EventGroup} |
| 232 | +end |
| 233 | +immutable Counter |
| 234 | + event :: EventType |
| 235 | + value :: UInt64 |
| 236 | + enabled :: UInt64 |
| 237 | + running :: UInt64 |
| 238 | +end |
| 239 | +immutable Counters |
| 240 | + counters :: Vector{Counter} |
| 241 | +end |
| 242 | +function Base.show(io::IO, c::Counters) |
| 243 | + for c in c.counters |
| 244 | + print(io, c.event, " : ") |
| 245 | + if c.enabled == 0 |
| 246 | + print(io, "never enabled") |
| 247 | + elseif c.running == 0 |
| 248 | + print(io, "did not run") |
| 249 | + else |
| 250 | + @printf(io, "\n\t%20d (%.1f %%)", Int64(c.value), 100*(c.running/c.enabled)) |
| 251 | + end |
| 252 | + println() |
| 253 | + end |
| 254 | +end |
| 255 | +enable!(b::PerfBench) = foreach(enable!, b.groups) |
| 256 | +disable!(b::PerfBench) = foreach(disable!, b.groups) |
| 257 | +reset!(b::PerfBench) = foreach(reset!, b.groups) |
| 258 | +function counters(b::PerfBench) |
| 259 | + c = Array(Counter, 0) |
| 260 | + for g in b.groups |
| 261 | + values = read(g.leader_io, UInt64, length(g)+1+2) |
| 262 | + @assert(length(g) == values[1]) |
| 263 | + enabled, running = values[2], values[3] |
| 264 | + for i = 1:length(g) |
| 265 | + push!(c, Counter(g.event_types[i], values[3+i], |
| 266 | + enabled, running)) |
| 267 | + end |
| 268 | + end |
| 269 | + Counters(c) |
| 270 | +end |
| 271 | +function make_bench(x) |
| 272 | + groups = Array(EventGroup, 0) |
| 273 | + for y in x |
| 274 | + if isa(y, EventType) |
| 275 | + push!(groups, EventGroup([y])) |
| 276 | + else |
| 277 | + push!(groups, EventGroup(y)) |
| 278 | + end |
| 279 | + end |
| 280 | + PerfBench(groups) |
| 281 | +end |
| 282 | + |
| 283 | +const reasonable_defaults = |
| 284 | + [EventType(:hw, :cycles), |
| 285 | + [EventType(:hw, :cache_access), |
| 286 | + EventType(:hw, :cache_misses)], |
| 287 | + [EventType(:hw, :branches), |
| 288 | + EventType(:hw, :branch_mispredicts), |
| 289 | + EventType(:hw, :instructions)], |
| 290 | + [EventType(:sw, :ctx_switches), |
| 291 | + EventType(:sw, :page_faults), |
| 292 | + EventType(:sw, :minor_page_faults), |
| 293 | + EventType(:sw, :major_page_faults), |
| 294 | + EventType(:sw, :cpu_migrations)], |
| 295 | +#= [EventType(:cache, :L1_data, :read, :access), |
| 296 | + EventType(:cache, :L1_data, :read, :miss)], |
| 297 | + [EventType(:cache, :L1_data, :write, :access), |
| 298 | + EventType(:cache, :L1_data, :write, :miss)]=#] |
| 299 | + |
| 300 | +end |
0 commit comments