Skip to content

Commit e91bd87

Browse files
authored
improves arm-family support (#1353)
* tweaks symbolization and function start identification facilities We remove the dependency on the Bap.Std.Image and instead use the image specification directly. These gives us strictly more symbols, as image imposes extra constraints, which my hide functions starts and their names. More information is not always better, as we now have more chances to get the conflicting knowledge. To ensure that we're able to preserve as much information as possible without compromising correctness we leverage our agent-based conflict resolution system. We push all names in which we're not completely sure into possible aliases and use a new agent, `bap:gossiper` to propse names from that set. To make everything work fine, we pushed down the reliability of the objdump symbolizer (as we want bap to have the final word). The improved symbolization facility uncovered a small bug in the way how the x86 lock intrinsic was implemented, it was named just `"lock"`, which obviously may conflict with a normal function with the same name (which was uncovered by our testsuite). This commit adds the `x86` prefix to the intrinsic, e.g., `x86:lock` as well as properly delimits the locked code with the corresponding `x86:unlock` intrinsic. * adds 32-bit variants of armv8 and armv9, specifies alignments * fixes `blx pc` semantics It should be `call arm:unpredictable` instead of an interworking branch (which essentially breaks the disassembler) * assumes that all non word-aligned addresses have the T32 encoding * fixes the test case with a non-word-aligned base
1 parent 07e4fa9 commit e91bd87

File tree

6 files changed

+111
-13
lines changed

6 files changed

+111
-13
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ testsuite:
5555
git clone https://github.com/BinaryAnalysisPlatform/bap-testsuite.git testsuite
5656

5757
check: testsuite
58-
make REVISION=f8af868e4f61 -C testsuite
58+
make REVISION=81d9159 -C testsuite
5959

6060
.PHONY: indent check-style status-clean
6161

lib/arm/arm_target.ml

Lines changed: 62 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -188,19 +188,23 @@ module Family (Order : Endianness) = struct
188188
let order = CT.Endianness.name endianness in
189189
name ^ "+" ^ KB.Name.unqualified order
190190

191-
let (<:) parent name =
191+
let def ?code_alignment ~parent name =
192192
if CT.Target.is_unknown parent
193193
then CT.Target.unknown
194194
else CT.Target.declare ~package (ordered name) ~parent
195+
?code_alignment
195196
~nicknames:[name]
196197

198+
let (<:) parent name = def ~parent name
199+
197200
let is_bi_endian = CT.Endianness.(equal bi) endianness
198201

199202
let v4 =
200203
if is_bi_endian
201204
then CT.Target.unknown
202205
else CT.Target.declare ~package (ordered "armv4")
203206
~parent
207+
~code_alignment:32
204208
~nicknames:["armv4"]
205209
~bits:32
206210
~byte:8
@@ -210,7 +214,7 @@ module Family (Order : Endianness) = struct
210214
~vars:vars32
211215
~regs:regs32
212216

213-
let v4t = v4 <: "armv4t"
217+
let v4t = def "v4t" ~parent:v4 ~code_alignment:16
214218
let v5 = v4 <: "armv5"
215219
let v5t = v5 <: "armv5t"
216220
let v5te = v5t <: "armv5te"
@@ -225,6 +229,7 @@ module Family (Order : Endianness) = struct
225229
else CT.Target.declare ~package (ordered "armv7")
226230
~parent
227231
~nicknames:["armv7"]
232+
~code_alignment:16
228233
~bits:32
229234
~byte:8
230235
~endianness
@@ -247,24 +252,44 @@ module Family (Order : Endianness) = struct
247252
~vars:vars32_fp
248253
~regs:(regs32@vfp3regs)
249254

255+
250256
let v8a =
251257
CT.Target.declare ~package (ordered "armv8-a") ~parent:v7
252-
~nicknames:["armv8-a"]
258+
~nicknames:["armv8-a"; "aarch64"]
253259
~aliasing
260+
~code_alignment:32
254261
~bits:64
255262
~code:datav8
256263
~data:datav8
257264
~vars:varsv8
258265
~regs:regsv8
259266

267+
let v8a32 =
268+
Theory.Target.declare ~package (ordered "armv8-a+aarch32")
269+
~nicknames:["armv8-a+aarch32"]
270+
~parent:v7
271+
272+
let v8m32 =
273+
Theory.Target.declare ~package (ordered "armv8-m+aarch32")
274+
~nicknames:["armv8-m+aarch32"]
275+
~parent:v7m
276+
277+
let v8r32 =
278+
Theory.Target.declare ~package (ordered "armv8-r+aarch32")
279+
~nicknames:["armv8-r+aarch32"]
280+
~parent:v7
281+
260282
let v81a = v8a <: "armv8.1-a"
261283
let v82a = v81a <: "armv8.2-a"
262284
let v83a = v82a <: "armv8.3-a"
263285
let v84a = v83a <: "armv8.4-a"
264286
let v85a = v84a <: "armv8.5-a"
265287
let v86a = v85a <: "armv8.6-a"
266288

289+
let v9a = v86a <: "armv9-a"
290+
267291
let parent = if is_bi_endian then v7 else v4
292+
268293
end
269294

270295
module LE = Family(struct let endianness = CT.Endianness.le end)
@@ -376,7 +401,12 @@ let arms : arms Map.M(CT.Target).t =
376401
LE.v6t2, `armv6;
377402
LE.v7, `armv7;
378403
LE.v7a, `armv7;
404+
LE.v7m, `thumbv7;
379405
LE.v7afp, `armv7;
406+
Bi.v7, `armv7;
407+
Bi.v7a, `armv7;
408+
Bi.v7m, `thumbv7;
409+
Bi.v7afp, `armv7;
380410
LE.v8a, `aarch64;
381411
LE.v81a, `aarch64;
382412
LE.v82a, `aarch64;
@@ -397,6 +427,7 @@ let arms : arms Map.M(CT.Target).t =
397427
EB.v6t2,`armv6eb;
398428
EB.v7, `armv7eb;
399429
EB.v7a, `armv7eb;
430+
EB.v7m, `thumbv7eb;
400431
EB.v7afp, `armv7eb;
401432
EB.v8a, `aarch64_be;
402433
EB.v81a, `aarch64_be;
@@ -435,8 +466,18 @@ let register ?attrs encoding triple =
435466
Dis.create ?attrs ~backend:"llvm" triple
436467

437468
let symbol_values doc =
438-
let field = Ogre.Query.(select (from Image.Scheme.symbol_value)) in
439-
match Ogre.eval (Ogre.collect field) doc with
469+
let open Ogre.Let in
470+
let open Image.Scheme in
471+
let symbols =
472+
let* symtab =
473+
Ogre.(collect Query.(select (from symbol_value))) in
474+
let+ entry = Ogre.request entry_point in
475+
match entry with
476+
| None -> symtab
477+
| Some entry ->
478+
let mask = Int64.(-1L lsl 1) in
479+
Seq.cons Int64.(entry land mask, entry) symtab in
480+
match Ogre.eval symbols doc with
440481
| Ok syms -> syms
441482
| Error err ->
442483
failwithf "Arm_target: broken file specification: %s"
@@ -448,6 +489,7 @@ module Encodings = struct
448489
let lsb x = Int64.(x land 1L)
449490
let is_thumb x = Int64.equal (lsb x) 1L
450491

492+
451493
let symbols_encoding spec =
452494
symbol_values spec |>
453495
Seq.fold ~init:empty ~f:(fun symbols (addr,value) ->
@@ -479,26 +521,35 @@ let has_t32 label =
479521
Map.exists ~f:(Theory.Language.equal llvm_t32)
480522

481523

524+
let is_word_aligned x = Bitvec.(M32.(int 3 land x) = zero)
525+
482526
let compute_encoding_from_symbol_table label =
483527
let (>>=?) x f = x >>= function
484528
| None -> !!Theory.Language.unknown
485529
| Some x -> f x in
486530
KB.collect CT.Label.unit label >>=? fun unit ->
487531
KB.collect CT.Label.addr label >>=? fun addr ->
488-
KB.collect Encodings.slot unit >>= fun encodings ->
489-
KB.return @@ match Map.find encodings addr with
490-
| Some x -> x
491-
| None -> CT.Language.unknown
532+
KB.collect Encodings.slot unit >>| fun encodings ->
533+
if not (is_word_aligned addr) then llvm_t32
534+
else match Map.find encodings addr with
535+
| Some x -> x
536+
| None -> CT.Language.unknown
492537

493538
(* here t < p means that t was introduced before p *)
494539
let (>=) t p = CT.Target.belongs t p
495540
let (<) t p = t >= p && not (p >= t)
496541
let (<=) t p = t = p || t < p
497542
let is_arm = CT.Target.belongs parent
498543

499-
let before_thumb2 t = t < LE.v6t2 || t < EB.v6t2
500544
let is_64bit t = LE.v8a <= t || EB.v8a <= t || Bi.v8a <= t
501-
let is_thumb_only t = LE.v7m <= t || EB.v7m <= t || Bi.v7m <= t
545+
546+
let m_profiles = [
547+
LE.v7m; EB.v7m; Bi.v7m;
548+
LE.v8m32; EB.v8m32; Bi.v8m32;
549+
]
550+
let is_thumb_only t =
551+
List.exists m_profiles ~f:(fun p -> p <= t)
552+
502553

503554
let is_big t = Theory.Target.endianness t = Theory.Endianness.eb
504555
let is_little t = Theory.Target.endianness t = Theory.Endianness.le

lib/arm/arm_target.mli

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,16 @@ module LE : sig
3838
val v7a : Theory.Target.t
3939
val v7afp : Theory.Target.t
4040
val v8a : Theory.Target.t
41+
val v8a32 : Theory.Target.t
42+
val v8m32 : Theory.Target.t
43+
val v8r32 : Theory.Target.t
4144
val v81a : Theory.Target.t
4245
val v82a : Theory.Target.t
4346
val v83a : Theory.Target.t
4447
val v84a : Theory.Target.t
4548
val v85a : Theory.Target.t
4649
val v86a : Theory.Target.t
50+
val v9a : Theory.Target.t
4751
end
4852

4953

@@ -69,12 +73,16 @@ module EB : sig
6973
val v7a : Theory.Target.t
7074
val v7afp : Theory.Target.t
7175
val v8a : Theory.Target.t
76+
val v8a32 : Theory.Target.t
77+
val v8m32 : Theory.Target.t
78+
val v8r32 : Theory.Target.t
7279
val v81a : Theory.Target.t
7380
val v82a : Theory.Target.t
7481
val v83a : Theory.Target.t
7582
val v84a : Theory.Target.t
7683
val v85a : Theory.Target.t
7784
val v86a : Theory.Target.t
85+
val v9a : Theory.Target.t
7886
end
7987

8088

@@ -90,12 +98,16 @@ module Bi : sig
9098
val v7a : Theory.Target.t
9199
val v7afp : Theory.Target.t
92100
val v8a : Theory.Target.t
101+
val v8a32 : Theory.Target.t
102+
val v8m32 : Theory.Target.t
103+
val v8r32 : Theory.Target.t
93104
val v81a : Theory.Target.t
94105
val v82a : Theory.Target.t
95106
val v83a : Theory.Target.t
96107
val v84a : Theory.Target.t
97108
val v85a : Theory.Target.t
98109
val v86a : Theory.Target.t
110+
val v9a : Theory.Target.t
99111
end
100112

101113
val llvm_a32 : Theory.language

plugins/arm/arm_main.ml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ let backend =
2525
type arms = [
2626
| Arch.arm
2727
| Arch.armeb
28+
| Arch.thumb
29+
| Arch.thumbeb
2830
] [@@deriving enumerate]
2931

3032
let () = Bap_main.Extension.declare ~doc @@ fun ctxt ->

plugins/arm/semantics/thumb.lisp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
(require bits)
22
(require arm-bits)
33

4+
5+
;; Note: page references are from ARM DDI 0403E.b
6+
47
(declare (context (target arm)))
58

69
(defpackage thumb (:use core target arm))
@@ -63,3 +66,25 @@
6366
(set$ rd (* rn rm))
6467
(set ZF (is-zero rd))
6568
(set NF (msb rd)))
69+
70+
71+
(defun t2STRDi8 (rt1 rt2 rn imm pre _)
72+
"strd rt1, rt2, [rn, off]"
73+
(when (condition-holds pre)
74+
(store-word (+ rn imm) rt1)
75+
(store-word (+ rn imm (sizeof word-width)) rt2)))
76+
77+
(defun t2ADDri12 (rd rn imm pre _)
78+
"addw rd, rn, imm; A7-189, T4 "
79+
(when (condition-holds pre)
80+
(set$ rd (+ rn imm))))
81+
82+
(defun t2STRHi12 (rt rn imm pre _)
83+
"strh.w rt, [rn, imm]; A7-442; T2"
84+
(when (condition-holds pre)
85+
(store-word (+ rn imm) (cast-low 16 rt))))
86+
87+
(defun t2B (off pre _)
88+
"b.w imm; A7-207, T3"
89+
(when (condition-holds pre)
90+
(exec-addr (+ (get-program-counter) off 4))))

plugins/thumb/thumb_main.ml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ let (>>=?) x f = x >>= function
3535

3636
module Thumb(CT : Theory.Core) = struct
3737

38+
module Thumb = Thumb_core.Make(CT)
39+
3840
let reg r = Theory.Var.define s32 (Reg.name r)
3941
let imm x = Option.value_exn (Imm.to_int x)
4042
let regs rs = List.map rs ~f:(function
@@ -182,6 +184,10 @@ module Thumb(CT : Theory.Core) = struct
182184
info "unhandled bit-wise instruction: %a" pp_insn insn;
183185
!!Insn.empty
184186

187+
let unpredictable =
188+
Theory.Label.for_name "arm:unpredictable" >>= CT.goto
189+
190+
185191

186192
(* these are not entirely complete *)
187193
let lift_branch pc opcode insn =
@@ -192,7 +198,9 @@ module Thumb(CT : Theory.Core) = struct
192198
| `tBcc, [|Imm dst; Imm c; _|] -> bcc pc (cnd c) (imm dst)
193199
| `tBL, [|_; _; Imm dst; _|]
194200
| `tBLXi, [|_; _; Imm dst|] -> bli pc (imm dst)
195-
| `tBLXr, [|_; _; Reg dst|]when is_pc (reg dst) -> blxi pc 0
201+
| `tBLXr, [|_; _; Reg dst|]when is_pc (reg dst) ->
202+
(* blx pc is unpredictable in all versions of ARM *)
203+
Thumb.ctrl unpredictable
196204
| `tBLXr, [|_; _; Reg dst|]-> blxr pc (reg dst)
197205
| `tBX, [|Reg dst; _; _|]when is_pc (reg dst) -> bxi pc 0
198206
| `tBX, [|Reg dst;_;_|] -> bxr (reg dst)

0 commit comments

Comments
 (0)