Skip to content

Commit 07e4fa9

Browse files
authored
tweaks symbolization and function start identification facilities (#1355)
We remove the dependency on the Bap.Std.Image and instead use the image specification directly. These gives us strictly more symbols, as image imposes extra constraints, which my hide functions starts and their names. More information is not always better, as we now have more chances to get the conflicting knowledge. To ensure that we're able to preserve as much information as possible without compromising correctness we leverage our agent-based conflict resolution system. We push all names in which we're not completely sure into possible aliases and use a new agent, `bap:gossiper` to propse names from that set. To make everything work fine, we pushed down the reliability of the objdump symbolizer (as we want bap to have the final word). The improved symbolization facility uncovered a small bug in the way how the x86 lock intrinsic was implemented, it was named just `"lock"`, which obviously may conflict with a normal function with the same name (which was uncovered by our testsuite). This commit adds the `x86` prefix to the intrinsic, e.g., `x86:lock` as well as properly delimits the locked code with the corresponding `x86:unlock` intrinsic.
1 parent 51b4988 commit 07e4fa9

File tree

7 files changed

+194
-20
lines changed

7 files changed

+194
-20
lines changed

lib/bap/bap_project.ml

Lines changed: 176 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,182 @@ let memory_slot = KB.Class.property Theory.Unit.cls "unit-memory"
3939
~desc:"annotated memory regions of the unit"
4040
Memmap.domain
4141

42+
module Symbols = struct
43+
open KB.Let
44+
open KB.Syntax
45+
46+
module Addr = struct
47+
include Bitvec
48+
include Bitvec_order
49+
include Bitvec_binprot.Functions
50+
include Bitvec_sexp.Functions
51+
end
52+
53+
type table = {
54+
roots : Set.M(Addr).t;
55+
names : string Map.M(Addr).t;
56+
aliases : Set.M(String).t Map.M(Addr).t;
57+
} [@@deriving compare, equal, bin_io, sexp]
58+
59+
let empty = {
60+
roots = Set.empty (module Addr);
61+
names = Map.empty (module Addr);
62+
aliases = Map.empty (module Addr);
63+
}
64+
65+
let slot = KB.Class.property Theory.Unit.cls
66+
~package:"bap" "symbol-table"
67+
~persistent:(KB.Persistent.of_binable (module struct
68+
type t = table [@@deriving bin_io]
69+
end)) @@
70+
KB.Domain.flat ~empty ~equal:equal_table "symbols"
71+
72+
let is_ident s =
73+
String.length s > 0 &&
74+
(Char.is_alpha s.[0] || Char.equal s.[0] '_') &&
75+
String.for_all s ~f:(fun c -> Char.is_alphanum c ||
76+
Char.equal c '_')
77+
78+
let from_spec t =
79+
let collect fld = Ogre.collect Ogre.Query.(select @@ from fld) in
80+
let open Ogre.Let in
81+
let to_addr =
82+
let m = Bitvec.modulus (Theory.Target.code_addr_size t) in
83+
let n = Theory.Target.code_alignment t / Theory.Target.byte t in
84+
let mask = Int64.(lnot (of_int n - 1L)) in
85+
fun x ->
86+
let x = Int64.(x land mask) in
87+
Bitvec.(int64 x mod m) in
88+
let add_alias aliases addr alias = Map.update aliases addr ~f:(function
89+
| None -> Set.singleton (module String) alias
90+
| Some names -> Set.add names alias) in
91+
let pp_comma ppf () = Format.pp_print_string ppf ", " in
92+
let pp_addrs =
93+
Format.pp_print_list ~pp_sep:pp_comma Bitvec.pp
94+
and pp_names =
95+
Format.pp_print_list ~pp_sep:pp_comma Format.pp_print_string in
96+
let* roots =
97+
let+ roots =
98+
let* starts = collect Image.Scheme.code_start in
99+
let* values = collect Image.Scheme.symbol_value in
100+
let+ entry = Ogre.request Image.Scheme.entry_point in
101+
let roots = Seq.append starts (Seq.map ~f:fst values) in
102+
match entry with
103+
| None -> roots
104+
| Some entry -> Seq.cons entry roots in
105+
Seq.fold roots ~init:(Set.empty (module Bitvec_order))
106+
~f:(fun xs x -> Set.add xs (to_addr x)) in
107+
let+ named_symbols = collect Image.Scheme.named_symbol in
108+
let init = Bap_relation.empty Bitvec.compare String.compare in
109+
Seq.fold named_symbols ~init ~f:(fun rel (data,name) ->
110+
let addr = to_addr data in
111+
if Set.mem roots addr && is_ident name
112+
then Bap_relation.add rel (to_addr data) name
113+
else rel) |> fun rel ->
114+
Bap_relation.matching rel {empty with roots}
115+
~saturated:(fun k v t -> {
116+
t with names = Map.add_exn t.names k v
117+
})
118+
~unmatched:(fun reason t -> {
119+
t with aliases = match reason with
120+
| Non_injective_fwd (addrs,name) ->
121+
info "the symbol %s has ambiguous addresses: %a@\n"
122+
name pp_addrs addrs;
123+
List.fold addrs ~init:t.aliases ~f:(fun aliases addr ->
124+
add_alias aliases addr name)
125+
| Non_injective_bwd (names,addr) ->
126+
info "the symbol at %a has ambiguous names: %a@\n"
127+
Bitvec.pp addr pp_names names;
128+
List.fold names ~init:t.aliases ~f:(fun aliases name ->
129+
add_alias aliases addr name)
130+
})
131+
132+
let build_table t spec = match Ogre.eval (from_spec t) spec with
133+
| Ok x -> x
134+
| Error err ->
135+
invalid_argf "Malformed ogre specification: %s"
136+
(Error.to_string_hum err) ()
137+
138+
let collect_inputs from obj f =
139+
KB.collect Theory.Label.unit obj >>=? fun unit ->
140+
KB.collect Theory.Label.addr obj >>=? fun addr ->
141+
let+ data = KB.collect from unit in
142+
f data addr
143+
144+
let promised_table : unit =
145+
KB.promise slot @@ fun unit ->
146+
let* t = KB.collect Theory.Unit.target unit in
147+
let+ s = KB.collect Image.Spec.slot unit in
148+
build_table t s
149+
150+
let promised_roots : unit =
151+
KB.Rule.(begin
152+
declare "provides roots" |>
153+
require Image.Spec.slot |>
154+
provide Theory.Label.is_subroutine |>
155+
comment "computes roots from spec";
156+
end);
157+
KB.promise Theory.Label.is_subroutine @@ fun obj ->
158+
collect_inputs slot obj @@ fun {roots} addr ->
159+
Option.some_if (Set.mem roots addr) true
160+
161+
162+
let names_agent = KB.Agent.register
163+
~package:"bap" "specification-provider"
164+
~desc:"provides names obtained from the image specification."
165+
166+
let promised_names : unit =
167+
KB.Rule.(begin
168+
declare "provides names" |>
169+
require Image.Spec.slot |>
170+
provide Theory.Label.possible_name |>
171+
comment "computes symbol names from spec";
172+
end);
173+
KB.propose names_agent Theory.Label.possible_name @@ fun obj ->
174+
collect_inputs slot obj @@ fun {names} addr ->
175+
Map.find names addr
176+
177+
178+
let promised_aliases : unit =
179+
KB.Rule.(begin
180+
declare "provides aliases" |>
181+
require Image.Spec.slot |>
182+
provide Theory.Label.possible_name |>
183+
comment "computes symbol aliases (names) from spec";
184+
end);
185+
KB.promise Theory.Label.aliases @@ fun obj ->
186+
let* unit = KB.collect Theory.Label.unit obj in
187+
let* addr = KB.collect Theory.Label.addr obj in
188+
match unit,addr with
189+
| None,_|_,None -> KB.return (Set.empty (module String))
190+
| Some unit, Some addr ->
191+
let+ {aliases} = KB.collect slot unit in
192+
match Map.find aliases addr with
193+
| None -> Set.empty (module String)
194+
| Some aliases -> aliases
195+
196+
let gossiper = KB.Agent.register
197+
~package:"bap" "symbols-gossiper"
198+
~desc:"propses an alias as a possible name"
199+
~reliability:KB.Agent.unreliable
200+
201+
let gossiped_aliases : unit =
202+
KB.Rule.(begin
203+
declare "provides aliases as names" |>
204+
require Image.Spec.slot |>
205+
provide Theory.Label.possible_name |>
206+
comment "uses aliases as an unreliable source of symbol names";
207+
end);
208+
KB.propose gossiper Theory.Label.possible_name @@ fun obj ->
209+
let+ aliases = KB.collect Theory.Label.aliases obj in
210+
match Set.find aliases ~f:is_ident with
211+
| None -> Set.max_elt aliases
212+
| ident -> ident
213+
end
214+
215+
216+
217+
42218
let with_filename spec target _code memory path f =
43219
let open KB.Syntax in
44220
let width = Theory.Target.code_addr_size target in
@@ -237,27 +413,11 @@ module Input = struct
237413
target = compute_target ?target (Image.spec img);
238414
}
239415

240-
let symtab_agent =
241-
let reliability = KB.Agent.authorative in
242-
KB.Agent.register "symtab"
243-
~reliability
244-
~desc:"extracts symbols from symbol tables"
245-
~package:"bap"
246-
247-
let provide_image file image =
248-
let image_symbols = Symbolizer.(set_path (of_image image) file) in
249-
let image_roots = Rooter.(set_path (of_image image) file) in
250-
info "providing rooter and symbolizer from image of %a"
251-
Sexp.pp_hum ([%sexp_of : string option] (Image.filename image));
252-
Symbolizer.provide symtab_agent image_symbols;
253-
Rooter.provide image_roots
254-
255416
let of_image ?target ?loader filename =
256417
Image.create ?backend:loader filename >>| fun (img,warns) ->
257418
List.iter warns ~f:(fun e -> warning "%a" Error.pp e);
258419
let spec = Image.spec img in
259420
Signal.send Info.got_img img;
260-
provide_image filename img;
261421
let finish proj = {
262422
proj with
263423
storage = Dict.set proj.storage Image.specification spec;

lib/bap_relation/bap_relation.mli

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
sets and computes their matching that defines bijections between
77
the sets.
88
9-
{2 Format Definition and Notation}
9+
{2 Formal Definition and Notation}
1010
1111
Given two sets [K] and [S], with meta-variables [x,y,z] ranging
1212
over [K] and meta-variables [r,s,t] ranging over [S] we will
@@ -77,7 +77,7 @@ val iter : ('k,'s) t -> f:('k -> 's -> unit) -> unit
7777
[Non_injective_bwd ([r;s],x);
7878
*)
7979

80-
(** the reason why was the pair left unmatched *)
80+
(** the reason why the pair was left unmatched *)
8181
type ('k,'s) non_injective =
8282
| Non_injective_fwd of 'k list * 's (** Non-injective forward mapping. *)
8383
| Non_injective_bwd of 's list * 'k (** Non-injective backward mapping. *)

lib/bap_types/bap_stmt.ml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ module Special = struct
8080

8181
let pp_default ppf s = fprintf ppf "special@ @[<1>(%s)@]" s
8282

83+
let pp_cons ppf s =
84+
pp_print_string ppf @@ match String.chop_prefix s ~prefix:"bap:" with
85+
| None -> s
86+
| Some s -> s
87+
8388
let pp ppf s = match String.chop_prefix ~prefix s with
8489
| None -> pp_default ppf s
8590
| Some data -> match Sexp.of_string data with
@@ -88,7 +93,7 @@ module Special = struct
8893
let pp_vals ppf xs = pp_print_list
8994
~pp_sep:(fun ppf () -> pp_print_string ppf ", ")
9095
Sexp.pp_hum ppf xs in
91-
fprintf ppf "%s(@[<hv2>%a@])" cons pp_vals vals
96+
fprintf ppf "%a(@[<hv2>%a@])" pp_cons cons pp_vals vals
9297
| _ -> pp_default ppf s
9398
end
9499

oasis/bap-std

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,16 @@ Library bap
1717
cmdliner,
1818
bap-knowledge,
1919
bap-core-theory,
20+
bap-relation,
2021
graphlib,
2122
regular,
2223
core_kernel,
2324
ppx_bap,
2425
monads,
2526
bitvec,
27+
bitvec-binprot,
28+
bitvec-order,
29+
bitvec-sexp,
2630
ogre,
2731
fileutils,
2832
core_kernel.caml_unix

plugins/objdump/objdump_main.ml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ let with_objdump_output demangler ~file ~f ~init =
101101
let agent =
102102
KB.Agent.register ~package:"bap" "objdump-symbolizer"
103103
~desc:"extracts symbols objdump"
104+
~reliability:KB.Agent.doubtful
104105

105106
module Repository : sig
106107
type t

plugins/relocatable/rel_symbolizer.ml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ end
125125
let plt_agent = Knowledge.Agent.register
126126
~package:"bap" "plt-symbolizer"
127127
~desc:"extracts symbols from external relocations"
128+
~reliability:Knowledge.Agent.reliable
128129

129130
open KB.Syntax
130131

plugins/x86/x86_tools_prefix.ml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@ open X86_tools_types
55
module Make (RR : RR) (FR : FR) (IV : IV) : PR = struct
66
type t = X86_prefix.t [@@deriving sexp, compare]
77

8-
let lock bil = Bil.(encode intrinsic "lock") :: bil
8+
let lock bil =
9+
Bil.(encode intrinsic "x86:lock") :: bil @ [
10+
Bil.(encode intrinsic "x86:unlock")
11+
]
912

1013
module Rep = struct
1114
let rcx =

0 commit comments

Comments
 (0)