Skip to content

Commit a503e97

Browse files
authored
adds subinstruction contraction to improve the ghidra lifter output (#1443)
The code produced by ghidra lifter produces several basic blocks per instruction. This simple optimization contracts (collapeses into single block) basic blocks that originate from different subinstructions of the same instruction (when it preserves semantics, of course)
1 parent fb0414c commit a503e97

File tree

2 files changed

+75
-6
lines changed

2 files changed

+75
-6
lines changed

plugins/bil/bil_ir.ml

Lines changed: 72 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ include Self()
1313
type blk = {
1414
name : Theory.Label.t;
1515
keep : bool;
16+
weak : bool;
1617
defs : def term list;
1718
jmps : jmp term list;
1819
} [@@deriving bin_io]
@@ -54,13 +55,72 @@ module BIR = struct
5455
Blk.Builder.result b :: blks |>
5556
List.rev
5657

58+
let resolve jmp = Option.(Jmp.(dst jmp >>| resolve))
59+
60+
let references blks =
61+
List.fold ~init:Tid.Map.empty ~f:(fun refs {jmps} ->
62+
List.fold jmps ~init:refs ~f:(fun refs jmp ->
63+
match resolve jmp with
64+
| Some (First tid) when Set.mem blks tid ->
65+
Map.update refs tid ~f:(function
66+
| None -> 1
67+
| Some refs -> refs+1)
68+
| _ -> refs))
69+
70+
let names =
71+
List.fold ~init:Tid.Set.empty ~f:(fun blks {name} ->
72+
Set.add blks name)
73+
74+
let single_dst = function
75+
| [] | _ :: _ :: _ -> None
76+
| [x] -> match resolve x with
77+
| Some First tid -> Some tid
78+
| _ -> None
79+
80+
let is_sub {weak; keep} = keep && weak
81+
82+
let can_contract refs b1 b2 =
83+
is_sub b1 && is_sub b2 && match single_dst b1.jmps with
84+
| None -> false
85+
| Some dst ->
86+
Tid.equal dst b2.name &&
87+
match Map.find refs dst with
88+
| Some 1 -> true
89+
| _ -> false
90+
91+
(* pre: can_contract b1 b2 /\
92+
can_contract b2 b3 .. *)
93+
let contract blks = match List.hd blks, List.last blks with
94+
| Some first,Some last -> {
95+
first with
96+
defs = List.(rev@@concat_map blks ~f:(fun {defs} -> List.rev defs));
97+
jmps = last.jmps;
98+
}
99+
| _ -> assert false
100+
101+
let normalize blks =
102+
let names = names blks in
103+
let refs = references names blks in
104+
List.sort blks ~compare:(fun b1 b2 ->
105+
Tid.compare b1.name b2.name) |>
106+
List.group ~break:(fun b1 b2 ->
107+
not @@ can_contract refs b1 b2) |>
108+
List.map ~f:contract
109+
110+
let has_subs = List.exists ~f:is_sub
111+
112+
let normalize = function
113+
| [] | [_] as xs -> xs
114+
| xs -> if has_subs xs then normalize xs else xs
115+
57116
(* postconditions:
58117
- the first block is the entry block
59118
- the last block is the exit block
60119
*)
61120
let reify {entry; blks} =
62121
if is_null entry then [] else
63-
List.fold blks ~init:(None,[]) ~f:(fun (s,blks) b ->
122+
normalize blks |>
123+
List.fold ~init:(None,[]) ~f:(fun (s,blks) b ->
64124
match make_blk b with
65125
| [] -> assert false
66126
| blk::blks' ->
@@ -108,7 +168,8 @@ let slot = graph
108168
module IR = struct
109169
include Theory.Empty
110170
let ret = Knowledge.return
111-
let blk ?(keep=true) tid = {name=tid; defs=[]; jmps=[]; keep}
171+
let blk ?(keep=true) tid =
172+
{name=tid; defs=[]; jmps=[]; keep; weak=false}
112173

113174
let def = (fun x -> x.defs), (fun x d -> {x with defs = d})
114175
let jmp = (fun x -> x.jmps), (fun x d -> match x.jmps with
@@ -148,15 +209,20 @@ module IR = struct
148209
then Jmp.reify ?cnd ~tid ~alt:(Jmp.resolved dst) ()
149210
else Jmp.reify ?cnd ~tid ~dst:(Jmp.resolved dst) ()
150211

212+
let is_subinstruction label =
213+
KB.collect Insn.Seqnum.slot label >>|
214+
Option.is_some
215+
151216
let relink label {entry; blks} =
217+
let* weak = is_subinstruction label in
152218
if is_null entry then KB.return {
153219
entry = label;
154-
blks = [{name=label; keep=true; defs=[]; jmps=[]}]
220+
blks = [{name=label; keep=true; weak; defs=[]; jmps=[]}]
155221
} else
156222
let+ blks = List.fold_map blks ~init:`Unbound ~f:(fun r blk ->
157223
if Theory.Label.equal blk.name entry
158224
then if blk.keep then `Relink blk.name, blk
159-
else `Relinked, {blk with name = label; keep=true}
225+
else `Relinked, {blk with name = label; keep=true; weak}
160226
else r,blk) |> function
161227
| `Relinked,blks -> KB.return blks
162228
| `Relink dst, blks ->
@@ -175,6 +241,7 @@ module IR = struct
175241
blks = [{
176242
name=entry;
177243
keep=false;
244+
weak=false;
178245
jmps=[];
179246
defs=[Def.reify ~tid v x]
180247
}]
@@ -211,6 +278,7 @@ module IR = struct
211278
blks = [{
212279
name = head;
213280
keep = true;
281+
weak = false;
214282
defs = [];
215283
jmps = [goto ~cnd ~tid head]}]}
216284
| {entry=loop; blks=b::blks} ->

plugins/ghidra/semantics/pcode.lisp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@
3232
(set# td dst (load-bits td ptr)))
3333

3434
(defun branch (typ dst)
35-
(if (is-symbol typ) (exec-addr dst)
36-
(goto-subinstruction dst)))
35+
(if (is-symbol typ)
36+
(exec-addr dst)
37+
(goto-subinstruction dst)))
3738

3839
(defun BRANCH (typ dst)
3940
(branch typ dst))

0 commit comments

Comments
 (0)