Skip to content

Commit 2faaa42

Browse files
authored
overhauls the target/architecture abstraction (3/n) (#1227)
In this episode, we liberate `bap mc` and `bap objdump` from the bonds of the `Arch.t` representation. We also add the systemz lifter for demonstration purposes. Of course, the lifter is minimal and far from being usable, but that serves well its didactic purposes. The interface of the `bap mc` command is preserved but is extended with a few more command-line options that provide a great deal of flexibility. Not only it is now possible to specify the target and encoding, but it is now possible to pass options directly to the backend, which is useful for disassembling targets that are not yet known to BAP. Below is an excerpt from the bap-mc man page (see bap mc --help) ``` SETTING ARCHITECHTURE The target architecture is controlled by several groups of options that can not be used together: - arch; - target and encoding; - triple, backend, cpu, bits, and order. The arch option provides the least control but is easiest to use. It relies on the dependency-injection mechanism and lets the target support packages (plugins that implement support for the given architecture) do their best to guess the target and encoding that matches the provided name. Use the common names for the architecture and it should work. You can use the bits and order options to give more hints to the target support packages. They default to 32 and little correspondingly. The target and encoding provides precise control over the selection of the target and the encoding that is used to represent machine instructions. The encoding field can be omitted and will be deduced from the target. Use bap list targets and bap list encodings to get the list of supported targets and encodings respectivly. Finally, the triple, backend, cpu,... group of options provides the full control over the disassembler backend and bypasses the dependency-injection mechanism to pass the specified options directly to the corresponding backends. This enables disassembling of targets and encodings that are not yet supported by BAP. The meanings of the options totally depend on the selected backend and they are passed as is to the corresponding arguments of the Disasm_expert.Basic.create function. The bits and order defaults to 32 and little corresondingly and are used to specify the number of bits in the target's addresses and the order of bytes in the word. This group of options is useful during the implementation and debugging of new targets and thus is reserved for experts. Note, when this group is used the semantics of the instructions will not be provided as it commonly requires the target specification. ```
1 parent aae5c10 commit 2faaa42

23 files changed

+676
-61
lines changed

lib/arm/arm_target.ml

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ let flags64 = [
5757

5858
let vars64 = gp64 @< fp64 @< sp64 @< lr64 @< flags64 @< [data64]
5959

60-
let parent = CT.Target.declare "arm"
60+
let parent = CT.Target.declare ~package "arm"
6161

6262
module type v4 = sig
6363
end
@@ -81,6 +81,7 @@ module type ARM = sig
8181
val v7fp : CT.Target.t
8282
val v7a : CT.Target.t
8383
val v7afp : CT.Target.t
84+
val v7m : CT.Target.t
8485
val v8a : CT.Target.t
8586
val v81a : CT.Target.t
8687
val v82a : CT.Target.t
@@ -131,16 +132,18 @@ module Family (Order : Endianness) = struct
131132
let v6m = v6 <: "armv6-m"
132133

133134
let v7 = if not is_bi_endian then v6t2 <: "armv7"
134-
else CT.Target.declare ~package (ordered "armv4")
135+
else CT.Target.declare ~package (ordered "armv7")
135136
~parent
136-
~nicknames:["armv4"]
137+
~nicknames:["armv7"]
137138
~bits:32
138139
~byte:8
139140
~endianness
140141
~code:data
141142
~data:data
142143
~vars:vars32
143144

145+
let v7m = v7 <: "armv7-m"
146+
144147
let v7fp = CT.Target.declare ~package (ordered "armv7+fp") ~parent:v7
145148
~nicknames:["armv7+fp"]
146149
~vars:vars32_fp
@@ -250,8 +253,7 @@ let enable_loader () =
250253
| "arm","v84a" -> Family.v84a
251254
| "arm","v85a" -> Family.v85a
252255
| "arm","v86a" -> Family.v86a
253-
| "thumb", "v4" -> Family.v4t
254-
| "thumb", "v5" -> Family.v5t
256+
| "thumb",_ -> Family.v7m
255257
| "aarch64",_ -> Family.v86a
256258
| _ -> Family.v7
257259

@@ -367,43 +369,39 @@ module Encodings = struct
367369
symbols_encoding
368370
end
369371

370-
let (>>=?) x f = x >>= function
371-
| None -> !!CT.Language.unknown
372-
| Some x -> f x
373372

374373
let compute_encoding_from_symbol_table default label =
374+
let (>>=?) x f = x >>= function
375+
| None -> !!default
376+
| Some x -> f x in
375377
KB.collect CT.Label.unit label >>=? fun unit ->
376378
KB.collect CT.Label.addr label >>=? fun addr ->
377379
KB.collect Encodings.slot unit >>= fun encodings ->
378380
KB.return @@ match Map.find encodings addr with
379381
| Some x -> x
380382
| None -> default
381383

382-
(* here less than means: was introduced before *)
383-
let (<) t p = not (CT.Target.belongs p t)
384-
let (<=) t p = CT.Target.equal t p || t < p
385-
let (>=) = CT.Target.belongs
386-
384+
(* here t < p means that t was introduced before p *)
385+
let (>=) t p = CT.Target.belongs t p
386+
let (<) t p = t >= p && not (p >= t)
387+
let (<=) t p = t = p || t < p
387388
let is_arm = CT.Target.belongs parent
388389

389-
let before_thumb2 t =
390-
t < LE.v6t2 ||
391-
t < EB.v6t2
392-
393-
let is_64bit t =
394-
t >= LE.v8a ||
395-
t >= EB.v8a ||
396-
t >= Bi.v8a
390+
let before_thumb2 t = t < LE.v6t2 || t < EB.v6t2
391+
let is_64bit t = LE.v8a <= t || EB.v8a <= t || Bi.v8a <= t
392+
let is_thumb_only t = LE.v7m <= t || EB.v7m <= t || Bi.v7m <= t
397393

398394
let guess_encoding label target =
399395
if is_arm target then
400396
if before_thumb2 target
401397
then compute_encoding_from_symbol_table llvm_a32 label
402398
else KB.return @@
403-
if is_64bit target then llvm_a64 else llvm_a32
399+
if is_64bit target then llvm_a64 else
400+
if is_thumb_only target
401+
then llvm_t32
402+
else llvm_a32
404403
else KB.return CT.Language.unknown
405404

406-
407405
let enable_decoder () =
408406
let open KB.Syntax in
409407
register llvm_a32 "armv7";
@@ -412,6 +410,7 @@ let enable_decoder () =
412410
KB.promise CT.Label.encoding @@ fun label ->
413411
CT.Label.target label >>= guess_encoding label
414412

413+
415414
let load () =
416415
enable_loader ();
417416
enable_arch ();

lib/bap_core_theory/bap_core_theory.mli

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,6 +1255,12 @@ module Theory : sig
12551255
*)
12561256
val get : ?package:string -> string -> t
12571257

1258+
(** [read ?package name] is a synonym for [get ?package name].
1259+
1260+
Introduces for the consistency with the [Enum.S] interface.
1261+
*)
1262+
val read : ?package:string -> string -> t
1263+
12581264
(** [declared ()] is the list of declared targets.
12591265
The order is unspecified, see also {!families}. The list
12601266
doesn't include the [unknown] target. *)
@@ -1694,6 +1700,9 @@ module Theory : sig
16941700

16951701
(** the hash value of the enum *)
16961702
val hash : t -> int
1703+
1704+
(** [members ()] the list of all members of the enumeration type. *)
1705+
val members : unit -> t list
16971706
end
16981707

16991708
(** Creates a new enumerated type. *)

lib/bap_core_theory/bap_core_theory_target.ml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ module Enum = struct
3535
val domain : t KB.domain
3636
val persistent : t KB.persistent
3737
val hash : t -> int
38+
val members : unit -> t list
3839
end
3940

4041
module Make() = struct
@@ -61,6 +62,7 @@ module Enum = struct
6162
let unknown = Name.of_string ":unknown"
6263
let is_unknown = Name.equal unknown
6364
let hash = Name.hash
65+
let members () = Hash_set.to_list elements
6466
include Base.Comparable.Make(Name)
6567
include (Name : Stringable.S with type t := t)
6668
include (Name : Pretty_printer.S with type t := t)
@@ -203,6 +205,8 @@ let get ?package name =
203205
then invalid_argf "Unknown target %s" (Name.to_string name) ();
204206
name
205207

208+
let read = get
209+
206210
let info name = match Hashtbl.find targets name with
207211
| None -> unknown
208212
| Some t -> t

lib/bap_core_theory/bap_core_theory_target.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ val declare :
3131
string -> t
3232

3333
val get : ?package:string -> string -> t
34+
val read : ?package:string -> string -> t
3435
val lookup : ?package:string -> string -> t option
3536
val unknown : t
3637
val is_unknown : t -> bool
@@ -76,6 +77,7 @@ module Enum : sig
7677
val domain : t KB.domain
7778
val persistent : t KB.persistent
7879
val hash : t -> int
80+
val members : unit -> t list
7981
end
8082

8183
module Make() : S

lib/bap_image/bap_memory.ml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,3 +461,16 @@ let slot = KB.Class.property ~package:"bap"
461461
Theory.Program.cls "mem" domain
462462
~public:true
463463
~desc:"a memory region occupied by the program"
464+
465+
let () =
466+
let open KB.Syntax in
467+
KB.promise Theory.Label.addr @@ fun label ->
468+
KB.collect slot label >>|? fun mem ->
469+
Some (Addr.to_bitvec (min_addr mem))
470+
471+
let () =
472+
let open KB.Rule in
473+
declare ~package:"bap" "addr-of-mem" |>
474+
require slot |>
475+
provide Theory.Label.addr |>
476+
comment "addr of the first byte"

lib/bap_systemz/bap_systemz_target.ml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
open Core_kernel
2+
open Bap_core_theory
3+
4+
let package = "bap"
5+
6+
type r64 and r32 and r16 and r8
7+
8+
type 'a bitv = 'a Theory.Bitv.t Theory.Value.sort
9+
10+
let r64 : r64 bitv = Theory.Bitv.define 64
11+
let r32 : r32 bitv = Theory.Bitv.define 32
12+
let r16 : r16 bitv = Theory.Bitv.define 16
13+
let r8 : r8 bitv = Theory.Bitv.define 8
14+
let bool = Theory.Bool.t
15+
16+
let reg t n = Theory.Var.define t n
17+
18+
let array ?(index=string_of_int) t pref size =
19+
List.init size ~f:(fun i -> reg t (pref ^ index i))
20+
21+
let untyped = List.map ~f:Theory.Var.forget
22+
let (@<) xs ys = untyped xs @ untyped ys
23+
24+
let mems = Theory.Mem.define r64 r8
25+
26+
let gpr = array r64 "R" 16
27+
let fpr = array r64 "F" 16
28+
let mem = reg mems "mem"
29+
30+
let vars = gpr @< fpr @< [mem]
31+
32+
let parent = Theory.Target.declare ~package "systemz"
33+
34+
let z9 = Theory.Target.declare ~package "systemz9" ~parent
35+
~bits:64
36+
~code:mem
37+
~data:mem
38+
~vars
39+
40+
let llvm_encoding = Theory.Language.declare ~package "llvm-systemz"
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
open Bap_core_theory
2+
3+
4+
type r64 and r32 and r16 and r8
5+
6+
type 'a bitv = 'a Theory.Bitv.t Theory.Value.sort
7+
8+
val r64 : r64 bitv
9+
val r32 : r32 bitv
10+
val r16 : r16 bitv
11+
val r8 : r8 bitv
12+
13+
val mem : (r64, r8) Theory.Mem.t Theory.var
14+
val gpr : r64 Theory.Bitv.t Theory.var list
15+
val fpr : r64 Theory.Bitv.t Theory.var list
16+
17+
val parent : Theory.Target.t
18+
19+
val z9 : Theory.Target.t
20+
21+
val llvm_encoding : Theory.Language.t

lib/bap_types/bap_arch.ml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ module T = struct
7575
let persistent = KB.Persistent.of_binable (module struct
7676
type t = arch [@@deriving bin_io]
7777
end)
78+
7879
let slot = KB.Class.property ~package:"bap"
7980
Theory.Program.cls "arch" domain
8081
~persistent

lib/bap_types/bap_ir.ml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ module Tid = struct
115115
then intern str
116116
else match str.[0] with
117117
| '%' -> parse @@ sprintf "#<%s 0x%s>"
118-
(KB.Name.show (KB.Class.name Theory.Program.Semantics.cls))
118+
(KB.Name.show (KB.Class.name Theory.Semantics.cls))
119119
(String.subo ~pos:1 str)
120120
| '@' -> intern (String.subo ~pos:1 str)
121121
| _ -> intern str
@@ -1251,7 +1251,7 @@ module Term = struct
12511251
end)
12521252

12531253
let slot = Knowledge.Class.property
1254-
Theory.Program.Semantics.cls "bir" domain
1254+
Theory.Semantics.cls "bir" domain
12551255
~package ~persistent
12561256
~public:true
12571257
~desc:"BIL semantics in a graphical IR"

lib/bap_types/bap_ir.mli

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ end
6969
module Term : sig
7070
type 'a t = 'a term
7171

72-
val slot : (Theory.Program.Semantics.cls, blk term list) KB.slot
72+
val slot : (Theory.Semantics.cls, blk term list) KB.slot
7373

7474
val clone : 'a t -> 'a t
7575
val same : 'a t -> 'a t -> bool

0 commit comments

Comments
 (0)