Skip to content

Commit fe6cd63

Browse files
committed
adds the lifting benchmark
1 parent 1c3be48 commit fe6cd63

File tree

4 files changed

+186
-0
lines changed

4 files changed

+186
-0
lines changed

lifting-benchmark/Makefile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
.PHONY : all
2+
3+
all:
4+
ocamlbuild -package bap -package findlib.dynload bench.native
5+
6+
clean:
7+
ocamlbuild -clean

lifting-benchmark/README.md

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
Lifter Benchmark
2+
================
3+
4+
This simple tool will take a file and disassemble it from the very
5+
beginning till the last byte (even if it is not code, at all), and try
6+
to lift every possible instruction. It won't try to parse file headers,
7+
find code sections, or anything like this. At the end, if everything
8+
went fine, it will print the descriptive statistics of the lifter.
9+
10+
The benchmark also disables the optimization pipeline, however there is still
11+
a small overhead over the lifter, because the typechecking is enforced.
12+
In fact, at the end, we typecheck at least trice :)
13+
14+
Building and using
15+
==================
16+
17+
```
18+
make
19+
./bench.native <filename>
20+
```
21+
22+
You can also specify an architecture:
23+
```
24+
./bench.native <arch> <filename>
25+
```
26+
27+
28+
Examples
29+
========
30+
31+
On Intel(R) Xeon(R) CPU E5-2630 v4 @ 2.20GHz, bap 1.5 built wit OCaml 4.05.0+flambda,
32+
and llvm-3.8, yields the following results,
33+
```
34+
$ ./bench.native /lib/x86_64-linux-gnu/libc-2.23.so
35+
Statistics for the x86_64 lifter
36+
Total time: 6.89342 s
37+
Total throughtput: 265 kB/s
38+
Insn throughtput: 74532 I/s
39+
Insn latency: 13.42 mks/I
40+
Bytes processed: 1868831
41+
Data bytes: 24636
42+
Code bytes: 1844195
43+
Code density: 98.68%
44+
Total number of instructions: 513777
45+
Lifted instructions: 497837
46+
Lifting coverage: 96.90%
47+
```
48+
49+
and for Google Chrome
50+
51+
```
52+
$ ./bench.native /opt/google/chrome/chrome
53+
Statistics for the x86_64 lifter
54+
Total time: 571.788 s
55+
Total throughtput: 240 kB/s
56+
Insn throughtput: 75191 I/s
57+
Insn latency: 13.30 mks/I
58+
Bytes processed: 140517351
59+
Data bytes: 1285721
60+
Code bytes: 139231630
61+
Code density: 99.09%
62+
Total number of instructions: 42993264
63+
Lifted instructions: 41657415
64+
Lifting coverage: 96.89%
65+
```
66+
67+
Normalized to the CPU speed (i.e., CPU speed / throughput ), it means
68+
that currently the lifter on average makes 9,000 ops per disassembled
69+
byte and 30,000 operations per disassembled instruction.
70+
71+
72+
Caveats
73+
=======
74+
75+
For some reason the arm backend in LLVM just stops instead of erroring,
76+
on a malformed data. So for arm we need to feed it with something that
77+
looks like code.

lifting-benchmark/bench.ml

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
open Core_kernel
2+
open Bap.Std
3+
open Bap_plugins.Std
4+
5+
let usage () =
6+
eprintf "Performs linear sweep disassembly and lifting of raw bytes\n";
7+
eprintf "Usage: ./bench [<arch>] <file>\n";
8+
exit 1
9+
10+
module Dis = Disasm_expert.Basic
11+
12+
module Stats = struct
13+
let start = ref 0.
14+
let finish = ref 0.
15+
let fails = ref 0
16+
let insns = ref 0
17+
let code = ref 0
18+
let data = ref 0
19+
let update stat data = stat := !stat + data
20+
21+
22+
let print arch =
23+
let total = !finish -. !start in
24+
let total_bytes = !code + !data in
25+
let total_insns = !insns + !fails in
26+
let latency = (total /. float total_insns) *. 1e6 in
27+
let speed p = (float p /. total) in
28+
let ratio m n = (float m /. float n) *. 100. in
29+
printf "Statistics for the %a lifter\n" Arch.ppo arch;
30+
printf "Total time: %g s\n" total;
31+
printf "Total throughtput: %.0f kB/s\n" @@ speed (total_bytes / 1024) ;
32+
printf "Insn throughtput: %.0f I/s\n" @@ speed total_insns;
33+
printf "Insn latency: %.2f mks/I\n" latency;
34+
printf "Bytes processed: %d\n" total_bytes;
35+
printf "Data bytes: %d\n" !data;
36+
printf "Code bytes: %d\n" !code;
37+
printf "Code density: %.2f%%\n" @@ ratio !code total_bytes;
38+
printf "Total number of instructions: %d\n" total_insns;
39+
printf "Lifted instructions: %d\n" !insns;
40+
printf "Lifting coverage: %.2f%%\n" @@ ratio !insns total_insns
41+
end
42+
43+
let disasm arch mem =
44+
let module Target = (val target_of_arch arch) in
45+
Dis.with_disasm ~backend:"llvm" (Arch.to_string arch) ~f:(fun dis ->
46+
Stats.start := Unix.gettimeofday ();
47+
Result.return @@ Dis.run dis mem ~init:() ~return:ident
48+
~stop_on:[`Valid]
49+
~stopped:(fun s () ->
50+
Stats.finish := Unix.gettimeofday ())
51+
~invalid:(fun s mem () ->
52+
Stats.update Stats.data (Memory.length mem);
53+
Dis.step s ())
54+
~hit:(fun s mem insn () ->
55+
Stats.update Stats.code (Memory.length mem);
56+
match Target.lift mem insn with
57+
| Ok _ ->
58+
incr Stats.insns;
59+
Dis.step s ();
60+
| Error _ ->
61+
incr Stats.fails;
62+
Dis.step s ()))
63+
64+
65+
let main arch file =
66+
let size = Arch.addr_size arch in
67+
let base = Word.zero (Size.in_bits size) in
68+
match Memory.of_file (Arch.endian arch) base file with
69+
| Error err ->
70+
eprintf "Error: file is not readable or regular - %s\n"
71+
(Error.to_string_hum err);
72+
exit 1
73+
| Ok mem -> match disasm arch mem with
74+
| Error err ->
75+
eprintf "Error: failed to initialize the disassembler - %s\n"
76+
(Error.to_string_hum err)
77+
| Ok () ->
78+
Stats.print arch
79+
80+
let read_arch s = match Arch.of_string s with
81+
| Some a -> a
82+
| None ->
83+
eprintf "Error: unknown architecture %s\n" s;
84+
eprintf "The list of known architectures:\n";
85+
List.iter Arch.all ~f:(eprintf "\t%a\n" Arch.ppo);
86+
exit 1
87+
88+
let read_file s =
89+
if Sys.file_exists s && not (Sys.is_directory s)
90+
then s
91+
else begin
92+
eprintf "Error: `%s' is not a regular file\n" s;
93+
usage ()
94+
end
95+
96+
97+
let () =
98+
Plugins.run ~exclude:["bil"] ();
99+
match Array.length Sys.argv with
100+
| 2 -> main `x86_64 (read_file Sys.argv.(1))
101+
| 3 -> main (read_arch Sys.argv.(1)) (read_file Sys.argv.(2))
102+
| _ -> usage ()

lifting-benchmark/bench.mli

Whitespace-only changes.

0 commit comments

Comments
 (0)