@@ -5,70 +5,49 @@ the kernel multiplexes event counter that requires limited hardware resources so
5
5
if you need to compare two quantities you must put them in the same event group so they are always scheduled at the same time (or not at all).
6
6
7
7
``` julia
8
- julia> import LinuxPerf: make_bench, enable!, disable!, reset!, reasonable_defaults, counters
9
- julia > const bench = make_bench (reasonable_defaults);
8
+ julia> using LinuxPerf
9
+
10
10
julia> @noinline function g (a)
11
- enable! (bench)
12
11
c = 0
13
12
for x in a
14
13
if x > 0
15
14
c += 1
16
15
end
17
16
end
18
- disable! (bench)
19
17
c
20
18
end
21
19
g (generic function with 1 method)
20
+
22
21
julia> g (zeros (10000 ))
23
22
0
24
- julia> counters (bench)
25
- hw: cycles :
26
- 52794 (100.0 % )
27
- hw: cache_access :
28
- 881 (100.0 % )
29
- hw: cache_misses :
30
- 579 (100.0 % )
31
- hw: branches :
32
- 31367 (100.0 % )
33
- hw: branch_mispredicts :
34
- 107 (100.0 % ) # =)
35
- hw: instructions :
36
- 96961 (100.0 % )
37
- sw: ctx_switches :
38
- 0 (100.0 % )
39
- sw: page_faults :
40
- 0 (100.0 % )
41
- sw: minor_page_faults :
42
- 0 (100.0 % )
43
- sw: major_page_faults :
44
- 0 (100.0 % )
45
- sw: cpu_migrations :
46
- 0 (100.0 % )
47
23
48
- julia> reset! (bench )
49
- julia > g ( randn ( 10000 ))
50
- 5023
51
- julia > counters (bench)
52
- hw: cycles :
53
- 194454 ( 100.0 % )
54
- hw: cache_access :
55
- 291 ( 100.0 % )
56
- hw: cache_misses :
57
- 222 ( 100.0 % )
58
- hw: branches :
59
- 38050 ( 100.0 % )
60
- hw: branch_mispredicts :
61
- 5131 ( 100.0 % ) # =(
62
- hw: instructions :
63
- 129253 ( 100.0 % )
64
- sw: ctx_switches :
65
- 0 ( 100.0 % )
66
- sw: page_faults :
67
- 0 ( 100.0 % )
68
- sw: minor_page_faults :
69
- 0 ( 100.0 % )
70
- sw: major_page_faults :
71
- 0 ( 100.0 % )
72
- sw: cpu_migrations :
73
- 0 ( 100.0 % )
24
+ julia> data = zeros ( 10000 ); @measure g (data )
25
+ ┌───────────────────────┬────────────┬─────────────┐
26
+ │ │ Events │ Active Time │
27
+ ├───────────────────────┼────────────┼─────────────┤
28
+ │ hw: cycles │ 25 , 583 , 165 │ 100.0 % │
29
+ ├───────────────────────┼────────────┼─────────────┤
30
+ │ hw: cache_access │ 1 , 640 , 429 │ 100.0 % │
31
+ ├───────────────────────┼────────────┼─────────────┤
32
+ │ hw: cache_misses │ 328 , 561 │ 100.0 % │
33
+ ├───────────────────────┼────────────┼─────────────┤
34
+ │ hw: branches │ 6 , 164 , 138 │ 100.0 % │
35
+ ├───────────────────────┼────────────┼─────────────┤
36
+ │ hw: branch_mispredicts │ 223 , 272 │ 100.0 % │
37
+ ├───────────────────────┼────────────┼─────────────┤
38
+ │ hw: instructions │ 28 , 115 , 285 │ 100.0 % │
39
+ ├───────────────────────┼────────────┼─────────────┤
40
+ │ sw: ctx_switches │ 0 │ 100.0 % │
41
+ ├───────────────────────┼────────────┼─────────────┤
42
+ │ sw: page_faults │ 41 │ 100.0 % │
43
+ ├───────────────────────┼────────────┼─────────────┤
44
+ │ sw: minor_page_faults │ 41 │ 100.0 % │
45
+ ├───────────────────────┼────────────┼─────────────┤
46
+ │ sw: major_page_faults │ 0 │ 100.0 % │
47
+ ├───────────────────────┼────────────┼─────────────┤
48
+ │ sw: cpu_migrations │ 0 │ 100.0 % │
49
+ └───────────────────────┴────────────┴─────────────┘
74
50
```
51
+
52
+ For more fine tuned performance profile examples, please check out the ` test `
53
+ directory.
0 commit comments