Skip to content

Commit c8250d3

Browse files
authored
improves symbolization facilities (#1209)
Implements support for various relocations and improves existing that enables us to pass all tests without relying on external symbols or tools such as objdump or radare2. This branch support PLT-like relocations, as well as direct calls with GLOB_DAT relocations (fixes #1135). The PLT entries are constant folded and memory references are then analyzed. We also extended the analysis that detects stub functions to support various ABI and file formats. For PowerPC MachO, that stores stubs directly in the text section, we implemented a signature matching procedure to reliably detect the stubs. We also significantly improved support of mips, which was sufferening from missing function starts that correspond to the stubbed functions as byteweigh is unable to detect these stubs. In addition, this PR brings a new library called Bap_relation that is a bidirectional mapping useful for storing addr <-> name mapping and ensure their bijection. This library is now used explicitly or implicitly (via the old symbolizer interface) by all our providers of symbolic information. This change prevents symbolizers from providing conflicting information, which may later lead to the knowledge base conflicts. We also removed so far the name to address translation service that we recently introduced #1119. We are not ready for this service yet (our knowledge base is not having enough rules stored in it) and without this rule we can disassemble 25% faster. There are also a couple of minor fixes and quality of life improvements: - fixes Insn.dests domain functions - a better default for the KB.Domain.Powerset inspect parameter - makes glibc-runtime heuristic more aggressive
1 parent 7019520 commit c8250d3

34 files changed

+923
-213
lines changed

.merlin

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ B _build/lib/knowledge
4242
B lib/bap
4343
B lib/bap_main
4444
B lib/bap_recipe
45+
B lib/bap_relation
4546
B lib/bap_abi
4647
B lib/bap_api
4748
B lib/bitvec

lib/bap_disasm/bap_disasm_insn.ml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ module Slot = struct
138138
let dests =
139139
let empty = Some (Set.empty (module Theory.Label)) in
140140
let order x y : KB.Order.partial = match x,y with
141+
| Some x,_ when Set.is_empty x -> LT
142+
| _,Some x when Set.is_empty x -> GT
141143
| None,None -> EQ
142144
| None,_ | _,None -> NC
143145
| Some x, Some y ->
@@ -146,7 +148,9 @@ module Slot = struct
146148
if Set.is_subset y x then GT else NC in
147149
let join x y = match x,y with
148150
| None,None -> Ok None
149-
| None,_ |Some _,None -> Error Jump_vs_Move
151+
| None,Some x |Some x,None ->
152+
if Set.is_empty x then Ok None
153+
else Error Jump_vs_Move
150154
| Some x, Some y -> Ok (Some (Set.union x y)) in
151155
let module IO = struct
152156
module Set = Set.Make_binable_using_comparator(Theory.Label)

lib/bap_disasm/bap_disasm_symbolizer.ml

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ open Bap_image_std
55
open Bap_disasm_source
66
open KB.Syntax
77

8+
include Bap_main.Loggers()
89

910
type t = {
1011
path : string option;
@@ -48,28 +49,47 @@ let empty = create (fun _ -> None)
4849
let chain ss =
4950
create (fun addr -> List.find_map ss ~f:(fun s -> run s addr))
5051

52+
let string_of_addr addrs =
53+
List.map addrs ~f:Addr.to_string |>
54+
String.concat ~sep:", "
55+
56+
let report_broken = function
57+
| Bap_relation.Non_injective_fwd (addrs,name) ->
58+
info "skipping (%s) as they all have the same name %s"
59+
(string_of_addr addrs) name
60+
| Bap_relation.Non_injective_bwd (names,addr) ->
61+
info "skipping (%s) as they all have the same address %a"
62+
(String.concat names ~sep:", ") Addr.pp addr
63+
5164
let of_image img =
5265
let symtab = Image.symbols img in
5366
let names = Addr.Table.create () in
54-
Table.iteri symtab ~f:(fun mem sym ->
67+
let init = Bap_relation.empty Addr.compare String.compare in
68+
Table.foldi symtab ~init ~f:(fun mem sym rels ->
5569
let name = Image.Symbol.name sym
5670
and addr = Memory.min_addr mem in
5771
if not (Name.is_empty name)
58-
then Hashtbl.set names ~key:addr ~data:name);
72+
then Bap_relation.add rels addr name
73+
else rels) |> fun rels ->
74+
Bap_relation.matching rels ()
75+
~saturated:(fun addr name () -> Hashtbl.add_exn names addr name)
76+
~unmatched:(fun reason () -> report_broken reason);
5977
{find = Hashtbl.find names; path=Image.filename img; biased=true}
6078

6179
let of_blocks seq =
6280
let names =
81+
let empty_rel = Bap_relation.empty Addr.compare String.compare in
6382
Seq.fold seq ~init:String.Map.empty ~f:(fun addrs (name,addr,_) ->
6483
Map.update addrs name (function
6584
| Some addr' -> Addr.min addr addr'
6685
| None -> addr)) |>
6786
Map.to_sequence |>
68-
Seq.fold ~init:Addr.Map.empty ~f:(fun names (name,entry) ->
69-
Map.add_multi names entry name) in
70-
create @@ fun addr -> match Map.find names addr with
71-
| Some [name] -> Some name
72-
| _ -> None
87+
Seq.fold ~init:empty_rel ~f:(fun rels (name,entry) ->
88+
Bap_relation.add rels entry name) |> fun rels ->
89+
Bap_relation.matching rels Addr.Map.empty
90+
~saturated:(fun addr name names -> Map.add_exn names addr name)
91+
~unmatched:(fun reason names -> report_broken reason; names) in
92+
create @@ Map.find names
7393

7494
module Factory = Factory.Make(struct type nonrec t = t end)
7595

@@ -102,9 +122,7 @@ let providing agent s =
102122
~propose:(provide_symbolizer s)
103123

104124
let get_name addr =
105-
let data = Some (Word.to_bitvec addr) in
106-
KB.Object.scoped Theory.Program.cls @@ fun label ->
107-
KB.provide Theory.Label.addr label data >>= fun () ->
125+
Theory.Label.for_addr (Word.to_bitvec addr) >>= fun label ->
108126
KB.collect Theory.Label.name label >>| function
109127
| None -> name_of_addr addr
110128
| Some name -> name

lib/bap_image/bap_memory.ml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ let last_byte mem : t =
170170
let contains mem =
171171
Addr.between ~low:(min_addr mem) ~high:(max_addr mem)
172172

173+
173174
let compare_with mem addr =
174175
let low = min_addr mem and high = max_addr mem in
175176
if Addr.between ~low ~high addr then `addr_is_inside else

lib/bap_image/bap_memory.mli

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ val create
1414
-> addr
1515
-> Bigstring.t -> t Or_error.t
1616

17+
1718
val rebase : t -> addr -> t
1819

1920
val slot : (Theory.program, t option) KB.slot

lib/bap_llvm/llvm_elf_loader.hpp

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -166,16 +166,19 @@ void emit_symbol_entry(const ELFObjectFile<T> &obj, const SymbolRef &sym, ogre_d
166166
auto name = prim::symbol_name(sym);
167167
auto addr = prim::symbol_address(sym);
168168
auto off = symbol_file_offset(obj, sym);
169-
if (name && addr && off && !name->empty() &&
170-
!is_external(*addr, *off, sym_elf->st_size)) {
171-
s.entry("llvm:symbol-entry") << *name
172-
<< *addr
173-
<< sym_elf->st_size
174-
<< *off
175-
<< sym_elf->st_value;
176-
177-
if (sym_elf->getType() == ELF::STT_FUNC)
178-
s.entry("llvm:code-entry") << *name << *off << sym_elf->st_size ;
169+
if (name && addr && off && !name->empty()) {
170+
if (is_external(*addr, *off, sym_elf->st_size) && sym_elf->st_value) {
171+
s.entry("llvm:name-reference") << sym_elf->st_value << *name;
172+
} else {
173+
s.entry("llvm:symbol-entry") << *name
174+
<< *addr
175+
<< sym_elf->st_size
176+
<< *off
177+
<< sym_elf->st_value;
178+
179+
if (sym_elf->getType() == ELF::STT_FUNC)
180+
s.entry("llvm:code-entry") << *name << *off << sym_elf->st_size ;
181+
}
179182
}
180183
}
181184

@@ -196,23 +199,22 @@ void emit_symbol_entries(const ELFObjectFile<T> &obj, ogre_doc &s) {
196199
template <typename T>
197200
void emit_relocations(const ELFObjectFile<T> &obj, ogre_doc &s) {
198201
for (auto sec : obj.sections()) {
199-
if (auto rel_sec = prim::relocated_section(sec)) {
200-
for (auto rel : sec.relocations()) {
201-
auto sym = rel.getSymbol();
202-
if (sym != prim::end_symbols(obj)) {
203-
uint64_t base = section_address(obj, *rel_sec);
204-
auto raddr = prim::relocation_offset(rel) + base;
205-
if (auto addr = prim::symbol_address(*sym))
206-
if (*addr) s.entry("llvm:relocation") << raddr << *addr;
207-
if (auto name = prim::symbol_name(*sym))
202+
for (auto rel : sec.relocations()) {
203+
auto sym = rel.getSymbol();
204+
if (sym != prim::end_symbols(obj)) {
205+
uint64_t raddr = prim::relocation_offset(rel);
206+
if (auto addr = prim::symbol_address(*sym))
207+
if (*addr) s.entry("llvm:relocation") << raddr << *addr;
208+
if (auto name = prim::symbol_name(*sym))
209+
if (!name->empty())
208210
s.entry("llvm:name-reference") << raddr << *name;
209-
}
210211
}
211212
}
212213
}
213214
}
214215

215216

217+
216218
} // namespace elf_loader
217219

218220
template <typename T>

lib/bap_llvm/llvm_macho_loader.hpp

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -409,20 +409,13 @@ void emit_indirect_symbols(const macho &obj, const MachO::dysymtab_command &dlc,
409409
uint64_t sym_numb = section_size(obj, sec) / stride;
410410
uint32_t tab_indx = section_reserved1(obj, sec);
411411
uint64_t sec_addr = section_addr(obj, sec);
412-
uint32_t sec_offs = section_offset(obj, sec);
413412

414413
for (uint32_t j = 0; j < sym_numb && tab_indx + j < dlc.nindirectsyms; j++) {
415414
auto sym = get_indirect_symbol(obj, dlc, tab_indx + j);
416415
if (sym != prim::end_symbols(obj)) {
417416
if (auto name = prim::symbol_name(*sym)) {
418417
auto sym_addr = sec_addr + j * stride;
419-
auto sym_offs = sec_offs + j * stride;
420-
s.entry("llvm:symbol-entry") << *name
421-
<< sym_addr
422-
<< stride
423-
<< sym_offs
424-
<< sym->getValue();
425-
s.entry("llvm:code-entry") << *name << sym_offs << stride;
418+
s.entry("llvm:name-reference") << sym_addr << *name;
426419
}
427420
}
428421
}

lib/bap_relation/bap_relation.ml

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
open Base
2+
3+
type ('k,'d) t = Rel : {
4+
vals : ('k, ('d,'vo) Set.t, 'ko) Map.t;
5+
keys : ('d, ('k,'ko) Set.t, 'vo ) Map.t;
6+
} -> ('k,'d) t
7+
8+
let empty (type key) (type data) compare_key compare_data =
9+
let module D = struct
10+
type t = data
11+
include Comparator.Make(struct
12+
type t = data
13+
let compare = compare_data
14+
let sexp_of_t _ = Sexp.List []
15+
end)
16+
end in
17+
let module K = struct
18+
type t = key
19+
include Base.Comparator.Make(struct
20+
type t = key
21+
let compare = compare_key
22+
let sexp_of_t _ = Sexp.List []
23+
end)
24+
end in
25+
Rel {
26+
vals = Map.empty (module K);
27+
keys = Map.empty (module D);
28+
}
29+
30+
let add (Rel {vals; keys}) key value = Rel {
31+
vals = Map.update vals key ~f:(function
32+
| Some vals -> Set.add vals value
33+
| None -> Set.singleton (Map.comparator_s keys) value);
34+
keys = Map.update keys value ~f:(function
35+
| Some keys -> Set.add keys key
36+
| None -> Set.singleton (Map.comparator_s vals) key);
37+
}
38+
39+
type ('k,'s) non_injective =
40+
| Non_injective_fwd of 'k list * 's
41+
| Non_injective_bwd of 's list * 'k
42+
43+
let skips _ _ x = x
44+
let skipu _ x = x
45+
46+
47+
let find_multi xs x = match Map.find xs x with
48+
| None -> []
49+
| Some xs -> Set.to_list xs
50+
51+
let matching (Rel {vals; keys}) ?(saturated=skips) ?(unmatched=skipu) init =
52+
Map.fold ~init vals ~f:(fun ~key ~data:vals init ->
53+
match Set.to_list vals with
54+
| [] -> assert false
55+
| _ :: _ :: _ as vals->
56+
unmatched (Non_injective_bwd (vals,key)) init
57+
| [s] -> match find_multi keys s with
58+
| [_] -> saturated key s init
59+
| xs -> unmatched (Non_injective_fwd (xs,s)) init)
60+
61+
let fold (Rel {vals}) ~init ~f =
62+
Map.fold vals ~init ~f:(fun ~key:left ~data:rights init ->
63+
Set.fold ~init rights ~f:(fun init right -> f left right init))
64+
65+
let iter rels ~f = fold rels ~init:() ~f:(fun x s () -> f x s)
66+
67+
let is_empty (Rel {vals}) = Map.is_empty vals
68+
let findl (Rel {vals}) = find_multi vals
69+
let findr (Rel {keys}) = find_multi keys
70+
let mem (Rel {vals; keys}) x s = Map.mem vals x && Map.mem keys s

lib/bap_relation/bap_relation.mli

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
(** A representation of relations between two sets.
2+
3+
A relation between two sets is a set of pairs made from the
4+
elements of these sets. The precise mathematical defition is given
5+
below. This module implements a bidirectional mapping between two
6+
sets and computes their matching that defines bijections between
7+
the sets.
8+
9+
{2 Format Definition and Notation}
10+
11+
Given two sets [K] and [S], with meta-variables [x,y,z] ranging
12+
over [K] and meta-variables [r,s,t] ranging over [S] we will
13+
denote a finitary relation [R] as a subset of the cartesian
14+
product [K x S], which is a set of pairs [(x,r), ..., (z,t)],
15+
which we represent as a bipartite graph [G = (K,S,R)].
16+
*)
17+
18+
(** the type for relation between ['k] and ['s]. *)
19+
type ('k,'s) t
20+
21+
(** [empty compare_k compare_s] the empty relation between two sets.
22+
23+
- [compare_k] is the function that defines order of the elements
24+
of the set [K].
25+
26+
- [compare_s] is the function that defines order of the elements
27+
of the set [S].
28+
29+
{3 Example}
30+
{[
31+
let empty = Bap_relation.empty
32+
Int.compare
33+
String.compare
34+
]}
35+
*)
36+
val empty : ('k -> 'k -> int) -> ('s -> 's -> int) -> ('k,'s) t
37+
38+
(** [is_empty rel] is true if the relation [rel] is an empty set. *)
39+
val is_empty : (_,_) t -> bool
40+
41+
(** [add relation x s] establishes a relation between [x] and [s]. *)
42+
val add : ('k,'s) t -> 'k -> 's -> ('k,'s) t
43+
44+
(** [mem rel x s] is [true] if [(k,s)] is in the relation [rel]. *)
45+
val mem : ('k,'s) t -> 'k -> 's -> bool
46+
47+
(** [findl rel x] finds all pairs in [rel] that have [x] on the left. *)
48+
val findl : ('k,'s) t -> 'k -> 's list
49+
50+
(** [findr rel s] finds all pairs in [rel] that have [s] on the right. *)
51+
val findr : ('k,'s) t -> 's -> 'k list
52+
53+
(** [fold rel init f] folds over all pairs in the relation [rel]. *)
54+
val fold : ('k,'s) t -> init:'a -> f:('k -> 's -> 'a -> 'a) -> 'a
55+
56+
(** [iter rel f] iterates over all pairs in the relation [rel]. *)
57+
val iter : ('k,'s) t -> f:('k -> 's -> unit) -> unit
58+
59+
60+
(** {2 Bijections and matching}
61+
62+
The set of independent edges [M] (the matching) of the graph [G]
63+
forms a finite bijection between [K] and [S]. It is guaranteed
64+
that for each pair [(x,s)] in [M] there is no other pair in [M],
65+
that will include [x] or [s].
66+
67+
Edges [R] that are not in the matching [M] represent a subset of
68+
[R] that do not match because of one the two anomalies:
69+
- A non-injective forward mapping occurs when the same value from
70+
the set [S] is in relation with more than one value from the set
71+
[K], e.g., [(x,s), (y,s)] is encoded as
72+
[Non_injective_fwd ([x,y],s)];
73+
74+
- A non-injective backward mapping occurs when the same value from
75+
the set [K] is in relation with more than one value from the set
76+
[S], e.g., [(x,r), (x,s)] is encoded as
77+
[Non_injective_bwd ([r;s],x);
78+
*)
79+
80+
(** the reason why was the pair left unmatched *)
81+
type ('k,'s) non_injective =
82+
| Non_injective_fwd of 'k list * 's (** Non-injective forward mapping. *)
83+
| Non_injective_bwd of 's list * 'k (** Non-injective backward mapping. *)
84+
85+
(** [matching relation data] computes the matching for the given [relation].
86+
87+
Calls [saturated x s data] for each [(x,s)] in the matching
88+
[M] (see the module description) and [unmatched z reason d] for
89+
each [(z,t)] in the relation that are not matched, the reason
90+
is one of the:
91+
92+
- [Non_injective_fwd (xs,s)] if the mapping [K -> S] that is
93+
induced by the [relation] is non-injective, because the set of
94+
values [xs] from [K] are mapped to the same value [s] in [S].
95+
96+
- [Non_injective_bwd (ss,x)] if the mapping [S -> K] that is
97+
induced by the [relation] is non-injective, because the set of
98+
values [ss] from [S] are mapped to the same value [x] in [K].
99+
*)
100+
val matching : ('k,'s) t ->
101+
?saturated : ('k -> 's -> 'a -> 'a) ->
102+
?unmatched : (('k,'s) non_injective -> 'a -> 'a) -> 'a -> 'a

lib/bap_types/bap_value.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ let register (type a) ?public ?desc ?package ~name ~uuid
100100
type t = S.t option [@@deriving bin_io]
101101
end) in
102102
let equal x y = S.compare x y = 0 in
103-
let domain = KB.Domain.optional ~equal name in
103+
let domain = KB.Domain.optional ~equal name ~inspect:S.sexp_of_t in
104104
let slot = KB.Class.property ?public ?desc ~persistent ?package
105105
Theory.Program.cls name domain in
106106
register_slot ~uuid slot (module S)

0 commit comments

Comments
 (0)