Skip to content

Commit 784ebe2

Browse files
authored
fixes and improves label handling in the IR theory (#1333)
For the context, since the Ghidra PR the semantics of the [blk] operation in Core Theory was refined and corresponding theories were updated. Before #1326 all theories were just ignoring the labels passed to the [blk] operator. The reason for that was that the [blk] operation serves two purposes. It enables concatenation of data and control flow effects. And it attaches a label to that sequence so that it could be referenced elsewhere. Before #1326 it was only used for the first purpose, i.e., to merge data and control effects into the single effect. But for Ghidra we needed an ability to create labels (as ghidra is relying on Branch/Cbranch) instructions everywhere, even to express the intra-instruction logic, not real control flow. Now, the theories have to take into account the label passed to the blk operation, when they produce their denotations, unless the label is `Label.null`. If the label is `Label.null` then the operation is denotes just a sequence of data and control flow effects. Moreover, denotations are allowed to coalesce several blocks together. But if the label is non-null then the denotation has to preserve it. Before this PR the BIR theory wasn't fully respecting the passed labels and was sometimes optimizing them away, for example, when the label was attached to an empty denotation. This PR takes care of keeping the passed labels and at the same time preserving the minimal form of the generated IR. Of course assuming that lifters are using the `blk` operation correctly, i.e., that they are not passing non-null labels to blocks that they do not plan to invoke later. In other words, if you have a lifter that uses the `blk` operation you need to update it and pass `Theory.Label.null` there instead of the fresh label. This will have the same semantics as it had before. Passing non-null label now has a different semantics.
1 parent c0f0f96 commit 784ebe2

File tree

2 files changed

+71
-32
lines changed

2 files changed

+71
-32
lines changed

lib/bap_core_theory/bap_core_theory.mli

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2373,7 +2373,18 @@ module Theory : sig
23732373
(** [seq x y] performs effect [x], after that perform effect [y]. *)
23742374
val seq : 'a eff -> 'a eff -> 'a eff
23752375

2376-
(** [blk lbl data ctrl] a labeled sequence of effects. *)
2376+
(** [blk lbl data ctrl] an optionally labeled sequence of effects.
2377+
2378+
If [lbl] is [Label.null] then the block is unlabeled. If it is
2379+
not [Label.null] then the denotations will preserve the label
2380+
and assume that this [blk] is referenced from some other
2381+
blocks.
2382+
2383+
@since 2.4.0 the [blk] operator accepts (and welcomes)
2384+
[Label.null] as the label in cases when the block is not
2385+
really expected to be called from anywhere else.
2386+
2387+
*)
23772388
val blk : label -> data eff -> ctrl eff -> unit eff
23782389

23792390

plugins/bil/bil_ir.ml

Lines changed: 59 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@ open Bap_knowledge
66
open Bap_core_theory
77

88
open Knowledge.Syntax
9+
open KB.Let
910

1011
include Self()
1112

1213
type blk = {
1314
name : Theory.Label.t;
15+
keep : bool;
1416
defs : def term list;
1517
jmps : jmp term list;
1618
} [@@deriving bin_io]
@@ -74,18 +76,6 @@ let pp_cfg ppf ir =
7476

7577
let inspect cfg = Sexp.Atom (asprintf "%a" pp_cfg cfg)
7678

77-
let relink label {entry; blks} =
78-
if is_null entry then {
79-
entry = label;
80-
blks = [{name=label; defs=[]; jmps=[]}]
81-
} else {
82-
entry = label;
83-
blks = List.map blks ~f:(fun blk ->
84-
if Theory.Label.equal blk.name entry then {
85-
blk with name = entry;
86-
} else blk)
87-
}
88-
8979
let domain = KB.Domain.flat ~inspect "graph"
9080
~empty:{entry=null; blks=[]}
9181
~equal:(fun x y -> Theory.Label.equal x.entry y.entry)
@@ -102,10 +92,21 @@ let graph =
10292

10393
let slot = graph
10494

95+
96+
(*
97+
Invariants:
98+
1. all non-empty denotations have a non-empty list of blocks
99+
1.1 is_null entry <=> (blks = [])
100+
2. if the denotation is non-empty then the list of blocks
101+
has a block with the name equal to entry.
102+
3. explicitly labeled blocks keep their names.
103+
104+
105+
*)
105106
module IR = struct
106107
include Theory.Empty
107108
let ret = Knowledge.return
108-
let blk tid = {name=tid; defs=[]; jmps=[]}
109+
let blk ?(keep=true) tid = {name=tid; defs=[]; jmps=[]; keep}
109110

110111
let def = (fun x -> x.defs), (fun x d -> {x with defs = d})
111112
let jmp = (fun x -> x.jmps), (fun x d -> match x.jmps with
@@ -135,9 +136,36 @@ module IR = struct
135136
let data cfg = ret cfg
136137
let ctrl cfg = ret cfg
137138

138-
let set v x =
139+
let target =
139140
KB.Object.scoped Theory.Program.cls @@ fun lbl ->
140-
Theory.Label.target lbl >>= fun t ->
141+
Theory.Label.target lbl
142+
143+
144+
let goto ?(is_call=false) ?cnd ~tid dst =
145+
if is_call
146+
then Jmp.reify ?cnd ~tid ~alt:(Jmp.resolved dst) ()
147+
else Jmp.reify ?cnd ~tid ~dst:(Jmp.resolved dst) ()
148+
149+
let relink label {entry; blks} =
150+
if is_null entry then KB.return {
151+
entry = label;
152+
blks = [{name=label; keep=true; defs=[]; jmps=[]}]
153+
} else
154+
let+ blks = List.fold_map blks ~init:`Unbound ~f:(fun r blk ->
155+
if Theory.Label.equal blk.name entry
156+
then if blk.keep then `Relink blk.name, blk
157+
else `Relinked, {blk with name = label; keep=true}
158+
else r,blk) |> function
159+
| `Relinked,blks -> KB.return blks
160+
| `Relink dst, blks ->
161+
let+ tid = fresh in
162+
blks @ [blk label ++ goto ~tid dst]
163+
| `Unbound,[] -> assert false
164+
| `Unbound,_ -> assert false in
165+
{entry = label; blks}
166+
167+
let set v x =
168+
target >>= fun t ->
141169
if Theory.Target.has_roles t [Theory.Role.Register.constant] v
142170
then !!empty
143171
else
@@ -146,14 +174,14 @@ module IR = struct
146174
fresh >>= fun tid ->
147175
data {
148176
entry;
149-
blks = [{name=entry; jmps=[]; defs=[Def.reify ~tid v x]}]
177+
blks = [{
178+
name=entry;
179+
keep=false;
180+
jmps=[];
181+
defs=[Def.reify ~tid v x]
182+
}]
150183
}
151184

152-
let goto ?(is_call=false) ?cnd ~tid dst =
153-
if is_call
154-
then Jmp.reify ?cnd ~tid ~alt:(Jmp.resolved dst) ()
155-
else Jmp.reify ?cnd ~tid ~dst:(Jmp.resolved dst) ()
156-
157185
(** reifies a [while (<cnd>) <body>] loop to
158186
159187
{v
@@ -177,13 +205,14 @@ module IR = struct
177205
let repeat cnd body =
178206
cnd >>= fun cnd ->
179207
body >>| reify >>= function
180-
| {blks=[]} ->
208+
| {blks=[]} -> (* empty denotation *)
181209
fresh >>= fun head ->
182210
fresh >>= fun tid ->
183211
data {
184212
entry = head;
185213
blks = [{
186214
name = head;
215+
keep = true;
187216
defs = [];
188217
jmps = [goto ~cnd ~tid head]}]}
189218
| {entry=loop; blks=b::blks} ->
@@ -273,28 +302,28 @@ module IR = struct
273302
fresh >>= fun tid ->
274303
ctrl {
275304
entry;
276-
blks = [blk entry ++ Jmp.reify ~tid ~dst:(Jmp.indirect dst) ()]
305+
blks = [blk ~keep:false entry ++ Jmp.reify ~tid ~dst:(Jmp.indirect dst) ()]
277306
}
278307

279308
let appgraphs fst snd =
280309
if is_empty fst then ret snd else
281310
if is_empty snd then ret fst
282311
else match fst, snd with
283312
| _,{blks=[]} | {blks=[]}, _ -> assert false
284-
| {entry; blks={jmps=[]} as x :: xs},{blks=[y]} -> ret {
313+
| {entry; blks={jmps=[]} as x :: xs},{blks=[{keep=false} as y]} ->
314+
ret {
285315
entry;
286316
blks = {x with defs = y.defs @ x.defs; jmps = y.jmps} :: xs
287317
}
288-
| {entry; blks=x::xs}, {entry=snd; blks=y::ys} ->
318+
| {entry; blks=x::xs}, {entry=esnd; blks=y::ys} ->
289319
fresh >>= fun tid -> ret {
290320
entry;
291321
blks =
292322
y ::
293-
x ++ goto ~tid snd ::
323+
x ++ goto ~tid esnd ::
294324
List.rev_append xs ys
295325
}
296326

297-
298327
let seq fst snd =
299328
fst >>-> fun fst ->
300329
snd >>-> fun snd ->
@@ -306,16 +335,15 @@ module IR = struct
306335
KB.collect Theory.Label.is_subroutine dst >>= fun is_call ->
307336
ctrl {
308337
entry;
309-
blks = [blk entry ++ goto ?is_call ~tid dst]
338+
blks = [blk ~keep:false entry ++ goto ?is_call ~tid dst]
310339
}
311340

312341
let blk label defs jmps =
313342
defs >>-> fun defs ->
314343
jmps >>-> fun jmps ->
315344
appgraphs defs jmps >>-> fun res ->
316-
ret @@
317-
if is_null label then res
318-
else relink label res
345+
if is_null label then ret res
346+
else relink label res >>= ret
319347

320348
let goto = do_goto
321349
end

0 commit comments

Comments
 (0)