Skip to content

Commit b80212b

Browse files
authored
adds the --print-missing option to print unlifed instructions (#1409)
1 parent c96cb70 commit b80212b

File tree

1 file changed

+125
-8
lines changed

1 file changed

+125
-8
lines changed

plugins/disassemble/disassemble_main.ml

Lines changed: 125 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,107 @@ open Err.Syntax
128128
let pass_error = Result.map_error ~f:(fun err -> Fail (Pass err))
129129
let proj_error = Result.map_error ~f:(fun err -> Fail (Project err))
130130

131+
module Missing : sig
132+
val enable : unit -> unit
133+
val print : unit -> unit
134+
end = struct
135+
open KB.Syntax
136+
module Basic = Disasm_expert.Basic.Insn
137+
138+
let bool = KB.Domain.flat ~equal:Bool.equal "flat-bool"
139+
~inspect:sexp_of_bool ~empty:false
140+
let has_semantics = KB.Class.property ~public:true ~package:"bap"
141+
Theory.Semantics.cls "has-semantics" bool
142+
143+
let present eff =
144+
eff >>| fun x -> KB.Value.put has_semantics x true
145+
146+
module Present : Theory.Core = struct
147+
include Theory.Empty
148+
let blk lbl data ctrl = present @@ blk lbl data ctrl
149+
let perform s = present @@ perform s
150+
let seq x y = present @@ seq x y
151+
let branch cnd yes nay = present @@ branch cnd yes nay
152+
end
153+
154+
let pp_code ppf code =
155+
let dom = KB.Slot.domain Theory.Semantics.code in
156+
match KB.Domain.inspect dom code with
157+
| Sexp.List [Atom s] -> Format.fprintf ppf "%s" s
158+
| _ -> Format.fprintf ppf "not disassembled"
159+
160+
let pp_ops ppf ops =
161+
if Array.is_empty ops then Format.fprintf ppf ""
162+
else Format.fprintf ppf " %s"
163+
(String.concat_array ~sep:" " @@
164+
Array.map ~f:Op.to_string ops)
165+
166+
let pp_basic ppf insn =
167+
Format.fprintf ppf "(%s:%s%a)"
168+
(Basic.encoding insn)
169+
(Basic.name insn)
170+
pp_ops (Basic.ops insn)
171+
172+
let update_missing insn histo =
173+
Map.update histo (Basic.name insn) ~f:(function
174+
| None -> 1
175+
| Some c -> c + 1)
176+
177+
let build_histo =
178+
Map.fold ~init:Int.Map.empty ~f:(fun ~key ~data ->
179+
Map.add_multi ~key:data ~data:key)
180+
181+
let pp_histo ppf stats =
182+
build_histo stats |>
183+
Map.to_sequence ~order:`Increasing_key |>
184+
Seq.iter ~f:(fun (count,codes) ->
185+
List.iter codes ~f:(Format.fprintf ppf "%-4d %s@\n" count))
186+
187+
let print_missing () =
188+
let lifted = ref 0 and missed = ref 0 and failed = ref 0 in
189+
KB.objects Theory.Program.cls >>=
190+
KB.Seq.fold ~init:String.Map.empty ~f:(fun stats insn ->
191+
let* sema = KB.collect Theory.Semantics.slot insn in
192+
let code = KB.Value.get Theory.Semantics.code sema in
193+
if Option.is_none code then KB.return stats
194+
else KB.collect Theory.Label.addr insn >>= function
195+
| None -> KB.return stats
196+
| Some addr ->
197+
KB.collect Basic.slot insn >>| function
198+
| None ->
199+
Format.printf "%a: %a ; not disassembled@\n" Bitvec.pp
200+
addr pp_code code;
201+
incr failed;
202+
stats
203+
| Some _ when KB.Value.get has_semantics sema ->
204+
incr lifted;
205+
stats
206+
| Some basic ->
207+
incr missed;
208+
Format.printf "%a: %a ; %a ; %a@\n" Bitvec.pp addr
209+
pp_code code Insn.pp sema
210+
pp_basic basic;
211+
update_missing basic stats) >>| fun stats ->
212+
Format.printf "@\nHistogram:@\n%a@\n\
213+
%-8s %d@\n\
214+
%-8s %d@\n\
215+
%-8s %d@\n"
216+
pp_histo stats "Lifted:" !lifted "Failed:" !failed "Missed:" !missed
217+
218+
let print () =
219+
Toplevel.exec @@ print_missing ()
220+
221+
let declare_theory () =
222+
Theory.declare
223+
~package:"bap"
224+
~name:"present"
225+
~desc:"tracks the presence of semantics"
226+
(KB.return (module Present : Theory.Core))
227+
228+
let enable () =
229+
declare_theory ()
230+
end
231+
131232
let run_passes base proj =
132233
Err.List.fold ~init:(0,proj) ~f:(fun (step,proj) pass ->
133234
report_progress
@@ -237,6 +338,17 @@ let knowledge =
237338
~aliases:["k"; "knowledge-base";]
238339
(Extension.Type.some rw_file) "project"
239340

341+
let print_missing =
342+
Extension.Command.flag
343+
~doc:"Print missing instructions. \
344+
This option disables cache and redisassembles the binary \
345+
from scratch. It then prints the list of all instructions \
346+
that do not have a representable semantics, followed by \
347+
the histogram of all missed opcodes, and finally prints \
348+
the number of lifted opcodes, the number of addresses \
349+
that wasn't disassembled at all, and the number of opcodes \
350+
that do not have semantics." "print-missing"
351+
240352
let input = Extension.Command.argument
241353
~doc:"The input file" Extension.Type.("FILE" %: string =? "a.out" )
242354

@@ -355,7 +467,8 @@ let setup_gc_unless_overriden () =
355467
then setup_gc ()
356468
else info "GC parameters are overriden by a user"
357469

358-
let load_knowledge digest = function
470+
let load_knowledge disable digest = function
471+
| _ when disable -> false
359472
| None -> import_knowledge_from_cache digest
360473
| Some path when not (Sys.file_exists path) ->
361474
import_knowledge_from_cache digest
@@ -372,8 +485,8 @@ let save_knowledge ~had_knowledge ~update digest = function
372485
Knowledge.save (Toplevel.current ()) path
373486
| Some _ -> ()
374487

375-
376-
let create_and_process input outputs passes loader target update kb ctxt =
488+
let create_and_process input outputs passes loader target update
489+
kb print_missing ctxt =
377490
let uses_file_loader = Sys.file_exists loader &&
378491
Fn.non Filename.is_implicit loader in
379492
let package = input in
@@ -382,29 +495,33 @@ let create_and_process input outputs passes loader target update kb ctxt =
382495
Caml.Digest.file input;
383496
if uses_file_loader then Caml.Digest.file loader else loader;
384497
] in
385-
let had_knowledge = load_knowledge digest kb in
498+
let had_knowledge = load_knowledge print_missing digest kb in
386499
let input = Project.Input.load ~target ~loader input in
500+
if print_missing then Missing.enable ();
387501
Project.create ~package
388502
input |> proj_error >>= fun proj ->
389503
process passes outputs proj >>| fun proj ->
504+
if print_missing then Missing.print ();
390505
save_knowledge ~had_knowledge ~update digest kb;
391506
proj
392507

393508
let _disassemble_command_registered : unit =
394509
let args =
395510
let open Extension.Command in
396511
args $input $outputs $old_style_passes $passes $loader $target
397-
$update $knowledge in
512+
$update $knowledge $print_missing in
398513
Extension.Command.declare ~doc:man "disassemble"
399514
~requires:features_used args @@
400-
fun input outputs old_style_passes passes loader target update kb ctxt ->
515+
fun input outputs old_style_passes passes loader target update
516+
kb print_missing ctxt ->
401517
setup_gc_unless_overriden ();
402518
validate_knowledge update kb >>= fun () ->
403519
validate_input input >>= fun () ->
404520
validate_passes_style old_style_passes (List.concat passes) >>=
405521
validate_passes >>= fun passes ->
406522
Dump_formats.parse outputs >>= fun outputs ->
407-
create_and_process input outputs passes loader target update kb ctxt >>= fun _ ->
523+
create_and_process input outputs passes loader target update kb
524+
print_missing ctxt >>= fun _ ->
408525
Ok ()
409526

410527
let _compare_command_registered : unit =
@@ -460,7 +577,7 @@ let _compare_command_registered : unit =
460577
let projs =
461578
Seq.map (Seq.of_list (input::inputs)) ~f:(fun input ->
462579
create_and_process input outputs passes loader target
463-
update kb ctxt) in
580+
update kb false ctxt) in
464581
let exception Escape of Extension.Error.t in
465582
try
466583
let projs = Seq.map projs ~f:(function

0 commit comments

Comments
 (0)