Skip to content

Commit cd5aeb4

Browse files
authored
moves knowledge base rules from the library to the plugin (#1403)
They all fit nicely into the disassemble plugin and it also opens an opportunity to introduce command-line parameters for the rules. None are added so far, but its good to have this option.
1 parent fb4ed64 commit cd5aeb4

File tree

10 files changed

+292
-260
lines changed

10 files changed

+292
-260
lines changed

lib/bap/bap.mli

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6773,6 +6773,14 @@ module Std : sig
67736773
(** [ops insn] gives an access to [insn]'s operands. *)
67746774
val ops : ('a,'k) t -> op array
67756775

6776+
(** [subs insn] an array of subinstructions.
6777+
6778+
An instruction can contain subinstructions, which could be
6779+
accessed with this function. Returns an empty array if
6780+
there are no subinstructions.
6781+
6782+
@since 2.5.0 *)
6783+
val subs : ('a,'k) t -> ('a,'k) t array
67766784
end
67776785

67786786
(** Trie maps over instructions *)
@@ -7029,6 +7037,12 @@ module Std : sig
70297037

70307038
(** [slot] for accessing the sequence number of a subinstruction. *)
70317039
val slot : (Theory.program, t option) KB.slot
7040+
7041+
7042+
(** [fresh] evaluates to a freshly generated sequence number.
7043+
7044+
@since 2.5.0 *)
7045+
val fresh : tid knowledge
70327046
end
70337047

70347048

lib/bap/bap_project.ml

Lines changed: 0 additions & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -39,163 +39,6 @@ let memory_slot = KB.Class.property Theory.Unit.cls "unit-memory"
3939
~desc:"annotated memory regions of the unit"
4040
Memmap.domain
4141

42-
module Symbols = struct
43-
open KB.Let
44-
open KB.Syntax
45-
46-
module Addr = struct
47-
include Bitvec
48-
include Bitvec_order
49-
include Bitvec_binprot.Functions
50-
include Bitvec_sexp.Functions
51-
end
52-
53-
type table = {
54-
roots : Set.M(Addr).t;
55-
names : string Map.M(Addr).t;
56-
aliases : Set.M(String).t Map.M(Addr).t;
57-
} [@@deriving compare, equal, bin_io, sexp]
58-
59-
let empty = {
60-
roots = Set.empty (module Addr);
61-
names = Map.empty (module Addr);
62-
aliases = Map.empty (module Addr);
63-
}
64-
65-
let slot = KB.Class.property Theory.Unit.cls
66-
~package:"bap" "symbol-table"
67-
~persistent:(KB.Persistent.of_binable (module struct
68-
type t = table [@@deriving bin_io]
69-
end)) @@
70-
KB.Domain.flat ~empty ~equal:equal_table "symbols"
71-
72-
let is_ident s =
73-
String.length s > 0 &&
74-
(Char.is_alpha s.[0] || Char.equal s.[0] '_') &&
75-
String.for_all s ~f:(fun c -> Char.is_alphanum c ||
76-
Char.equal c '_')
77-
78-
let from_spec t =
79-
let collect fld = Ogre.collect Ogre.Query.(select @@ from fld) in
80-
let open Ogre.Let in
81-
let to_addr =
82-
let m = Bitvec.modulus (Theory.Target.code_addr_size t) in
83-
let n = Theory.Target.code_alignment t / Theory.Target.byte t in
84-
let mask = Int64.(lnot (of_int n - 1L)) in
85-
fun x ->
86-
let x = Int64.(x land mask) in
87-
Bitvec.(int64 x mod m) in
88-
let add_alias tab addr alias = {
89-
tab with aliases = Map.update tab.aliases addr ~f:(function
90-
| None -> Set.singleton (module String) alias
91-
| Some names -> Set.add names alias)
92-
} in
93-
let pp_comma ppf () = Format.pp_print_string ppf ", " in
94-
let pp_addrs =
95-
Format.pp_print_list ~pp_sep:pp_comma Bitvec.pp
96-
and pp_names =
97-
Format.pp_print_list ~pp_sep:pp_comma Format.pp_print_string in
98-
let* roots =
99-
let+ roots =
100-
let* starts = collect Image.Scheme.code_start in
101-
let* values = collect Image.Scheme.symbol_value in
102-
let+ entry = Ogre.request Image.Scheme.entry_point in
103-
let roots = Seq.append starts (Seq.map ~f:fst values) in
104-
match entry with
105-
| None -> roots
106-
| Some entry -> Seq.cons entry roots in
107-
Seq.fold roots ~init:(Set.empty (module Bitvec_order))
108-
~f:(fun xs x -> Set.add xs (to_addr x)) in
109-
let+ named_symbols = collect Image.Scheme.named_symbol in
110-
let init = {empty with roots},
111-
Bap_relation.empty Bitvec.compare String.compare in
112-
Seq.fold named_symbols ~init ~f:(fun (tab,rel) (data,name) ->
113-
let addr = to_addr data in
114-
if Set.mem roots addr && is_ident name
115-
then tab,Bap_relation.add rel (to_addr data) name
116-
else add_alias tab addr name, rel) |> fun (table,rel) ->
117-
Bap_relation.matching rel table
118-
~saturated:(fun k v t -> {
119-
t with names = Map.add_exn t.names k v
120-
})
121-
~unmatched:(fun reason t -> match reason with
122-
| Non_injective_fwd (addrs,name) ->
123-
info "the symbol %s has ambiguous addresses: %a@\n"
124-
name pp_addrs addrs;
125-
t
126-
| Non_injective_bwd (names,addr) ->
127-
info "the symbol at %a has ambiguous names: %a@\n"
128-
Bitvec.pp addr pp_names names;
129-
t)
130-
131-
let build_table t spec = match Ogre.eval (from_spec t) spec with
132-
| Ok x -> x
133-
| Error err ->
134-
invalid_argf "Malformed ogre specification: %s"
135-
(Error.to_string_hum err) ()
136-
137-
let collect_inputs from obj f =
138-
KB.collect Theory.Label.unit obj >>=? fun unit ->
139-
KB.collect Theory.Label.addr obj >>=? fun addr ->
140-
let+ data = KB.collect from unit in
141-
f data addr
142-
143-
let promised_table : unit =
144-
KB.promise slot @@ fun unit ->
145-
let* t = KB.collect Theory.Unit.target unit in
146-
let+ s = KB.collect Image.Spec.slot unit in
147-
build_table t s
148-
149-
let promised_roots : unit =
150-
KB.Rule.(begin
151-
declare "provides roots" |>
152-
require Image.Spec.slot |>
153-
provide Theory.Label.is_subroutine |>
154-
comment "computes roots from spec";
155-
end);
156-
KB.promise Theory.Label.is_subroutine @@ fun obj ->
157-
collect_inputs slot obj @@ fun {roots} addr ->
158-
Option.some_if (Set.mem roots addr) true
159-
160-
161-
let names_agent = KB.Agent.register
162-
~package:"bap" "specification-provider"
163-
~desc:"provides names obtained from the image specification."
164-
165-
let promised_names : unit =
166-
KB.Rule.(begin
167-
declare "provides names" |>
168-
require Image.Spec.slot |>
169-
provide Theory.Label.possible_name |>
170-
comment "computes symbol names from spec";
171-
end);
172-
KB.propose names_agent Theory.Label.possible_name @@ fun obj ->
173-
collect_inputs slot obj @@ fun {names} addr ->
174-
Map.find names addr
175-
176-
177-
let promised_aliases : unit =
178-
KB.Rule.(begin
179-
declare "provides aliases" |>
180-
require Image.Spec.slot |>
181-
provide Theory.Label.possible_name |>
182-
comment "computes symbol aliases (names) from spec";
183-
end);
184-
KB.promise Theory.Label.aliases @@ fun obj ->
185-
let* unit = KB.collect Theory.Label.unit obj in
186-
let* addr = KB.collect Theory.Label.addr obj in
187-
match unit,addr with
188-
| None,_|_,None -> KB.return (Set.empty (module String))
189-
| Some unit, Some addr ->
190-
let+ {aliases} = KB.collect slot unit in
191-
match Map.find aliases addr with
192-
| None -> Set.empty (module String)
193-
| Some aliases -> aliases
194-
end
195-
196-
197-
198-
19942
let with_filename spec target _code memory path f =
20043
let open KB.Syntax in
20144
let width = Theory.Target.code_addr_size target in

lib/bap_disasm/bap_disasm_basic.ml

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ module Insn = struct
319319
encoding : string;
320320
code : int;
321321
name : string;
322-
asm : bytes;
322+
asm : string;
323323
kinds: kind list;
324324
opers: Op.t array;
325325
subs : ins_info array;
@@ -342,14 +342,18 @@ module Insn = struct
342342

343343
let name {name = x} = x
344344
let code op = op.code
345-
let asm x = Bytes.to_string x.asm
345+
let asm x = x.asm
346346
let ops x = x.opers
347347
let kinds x = x.kinds
348348
let subs x = x.subs
349349
let is op x =
350350
let equal x y = Kind.compare x y = 0 in
351351
List.mem ~equal op.kinds x
352352

353+
let normalize_asm asm =
354+
String.substr_replace_all asm ~pattern:"\t"
355+
~with_:" " |> String.strip
356+
353357
let rec create ?parent ~asm ~kinds dis ~insn =
354358
let subs = Hashtbl.create (module Int) in
355359
let opers =
@@ -379,8 +383,8 @@ module Insn = struct
379383
if asm then
380384
let data = Bytes.create (C.insn_asm_size !!dis ~insn) in
381385
C.insn_asm_copy !!dis ~insn data;
382-
data
383-
else Bytes.empty in
386+
normalize_asm @@ Bytes.to_string data
387+
else "" in
384388
let kinds =
385389
if kinds then
386390
List.fold Kind.all ~init:[] ~f:(fun ks k ->
@@ -416,7 +420,7 @@ let sexp_of_full_insn = sexp_of_insn
416420
let compare_full_insn i1 i2 =
417421
let open Insn in
418422
let r1 = Int.compare i1.code i2.code in
419-
if r1 <> 0 then Bytes.compare i1.asm i2.asm
423+
if r1 <> 0 then String.compare i1.asm i2.asm
420424
else r1
421425

422426

lib/bap_disasm/bap_disasm_insn.ml

Lines changed: 2 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,6 @@ end
8282
type t = Theory.Semantics.t
8383
type op = Op.t [@@deriving bin_io, compare, sexp]
8484

85-
let normalize_asm asm =
86-
String.substr_replace_all asm ~pattern:"\t"
87-
~with_:" " |> String.strip
88-
89-
9085
module Slot = struct
9186
type 'a t = (Theory.Effect.cls, 'a) KB.slot
9287
let empty = "#undefined"
@@ -111,19 +106,6 @@ module Slot = struct
111106
~public:true
112107
~desc:"an assembly string"
113108

114-
let provide_asm : unit =
115-
KB.Rule.(begin
116-
declare ~package:"bap" "asm-of-basic" |>
117-
require Insn.slot |>
118-
provide asm |>
119-
comment "provides the assembly string";
120-
end);
121-
let open KB.Syntax in
122-
KB.promise Theory.Semantics.slot @@ fun label ->
123-
let+ insn = label-->?Insn.slot in
124-
KB.Value.put asm Theory.Semantics.empty @@
125-
normalize_asm @@ Insn.asm insn
126-
127109
let sexp_of_op = function
128110
| Op.Reg r -> Sexp.Atom (Reg.name r)
129111
| Op.Imm w -> sexp_of_int64 (Imm.to_int64 w)
@@ -307,7 +289,7 @@ let write init ops =
307289
let set_basic effect insn : t =
308290
write effect Slot.[
309291
name <-- Insn.name insn;
310-
asm <-- normalize_asm (Insn.asm insn);
292+
asm <-- Insn.asm insn;
311293
ops <-- Some (Insn.ops insn);
312294
]
313295

@@ -337,7 +319,7 @@ let should = must
337319
let shouldn't = mustn't
338320

339321
let name = KB.Value.get Slot.name
340-
let asm = KB.Value.get Slot.asm
322+
let asm = KB.Value.get Slot.asm
341323
let bil insn = KB.Value.get Bil.slot insn
342324
let ops s = match KB.Value.get Slot.ops s with
343325
| None -> [||]
@@ -447,41 +429,6 @@ module Seqnum = struct
447429
let fresh = KB.Syntax.(freshnum >>= label)
448430
end
449431

450-
451-
452-
let provide_sequence_semantics () =
453-
let open KB.Syntax in
454-
KB.promise Theory.Semantics.slot @@ fun obj ->
455-
KB.collect Insn.slot obj >>= function
456-
| None -> !!Theory.Semantics.empty
457-
| Some insn when not (String.equal (Insn.name insn) "seq") ->
458-
!!Theory.Semantics.empty
459-
| Some insn -> match Insn.subs insn with
460-
| [||] -> !!Theory.Semantics.empty
461-
| subs ->
462-
Theory.instance () >>= Theory.require >>= fun (module CT) ->
463-
let subs = Array.to_list subs |>
464-
List.map ~f:(fun sub ->
465-
Seqnum.fresh >>| fun lbl ->
466-
lbl,sub) in
467-
KB.all subs >>=
468-
KB.List.map ~f:(fun (obj,sub) ->
469-
KB.provide Insn.slot obj (Some sub) >>= fun () ->
470-
KB.collect Theory.Semantics.slot obj >>= fun sema ->
471-
let nil = Theory.Effect.empty Theory.Effect.Sort.bot in
472-
CT.seq (CT.blk obj !!nil !!nil) !!sema) >>=
473-
KB.List.reduce ~f:(fun s1 s2 -> CT.seq !!s1 !!s2) >>| function
474-
| None -> empty
475-
| Some sema -> with_basic sema insn
476-
477-
let () =
478-
let open KB.Rule in
479-
declare "sequential-instruction" |>
480-
require Insn.slot |>
481-
provide Theory.Semantics.slot |>
482-
comment "computes sequential instructions semantics";
483-
provide_sequence_semantics ()
484-
485432
let () =
486433
Data.Write.create ~pp:Adt.pp () |>
487434
add_writer ~desc:"Abstract Data Type pretty printing format"

lib/bap_disasm/bap_disasm_insn.mli

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ module Seqnum : sig
4646
type t = int
4747
val label : ?package:string -> t -> Theory.Label.t KB.t
4848
val slot : (Theory.program, t option) KB.slot
49+
val fresh : tid KB.t
4950
end
5051

5152
module Slot : sig

lib/bap_image/bap_memory.ml

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,6 @@ include Printable.Make(struct
446446

447447
let hexdump t = Format.asprintf "%a" pp_hex t
448448

449-
450449
let domain = KB.Domain.optional "mem"
451450
~equal:(fun x y ->
452451
Addr.equal x.addr y.addr &&
@@ -457,30 +456,3 @@ let slot = KB.Class.property ~package:"bap"
457456
Theory.Program.cls "mem" domain
458457
~public:true
459458
~desc:"a memory region occupied by the program"
460-
461-
let () =
462-
let open KB.Syntax in
463-
KB.promise Theory.Label.addr @@ fun label ->
464-
KB.collect slot label >>|? fun mem ->
465-
Some (Addr.to_bitvec (min_addr mem))
466-
467-
let () =
468-
KB.Rule.(begin
469-
declare ~package:"bap" "code-of-mem" |>
470-
require slot |>
471-
provide Theory.Semantics.code |>
472-
comment "extracts the memory contents"
473-
end);
474-
let open KB.Syntax in
475-
KB.promise Theory.Semantics.slot @@ fun label ->
476-
let+ {data; off; size} = label-->?slot in
477-
let empty = KB.Value.empty Theory.Semantics.cls in
478-
KB.Value.put Theory.Semantics.code empty @@
479-
Some (Bigstring.to_string ~pos:off ~len:size data)
480-
481-
let () =
482-
let open KB.Rule in
483-
declare ~package:"bap" "addr-of-mem" |>
484-
require slot |>
485-
provide Theory.Label.addr |>
486-
comment "addr of the first byte"

lib/bap_types/bap_arch.ml

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -84,17 +84,6 @@ module T = struct
8484
Theory.Unit.cls "unit-arch" domain
8585
~persistent
8686

87-
let _arch_of_unit_ : unit =
88-
KB.Rule.(declare ~package:"bap" "arch-of-unit" |>
89-
require Theory.Label.unit |>
90-
require unit_slot |>
91-
provide slot |>
92-
comment "propagates arch from the unit");
93-
let open KB.Syntax in
94-
KB.promise slot @@ fun obj ->
95-
KB.collect Theory.Label.unit obj >>= function
96-
| None -> KB.return `unknown
97-
| Some unit -> KB.collect unit_slot unit
9887
end
9988

10089
include T

0 commit comments

Comments
 (0)