@@ -128,6 +128,107 @@ open Err.Syntax
128
128
let pass_error = Result. map_error ~f: (fun err -> Fail (Pass err))
129
129
let proj_error = Result. map_error ~f: (fun err -> Fail (Project err))
130
130
131
+ module Missing : sig
132
+ val enable : unit -> unit
133
+ val print : unit -> unit
134
+ end = struct
135
+ open KB.Syntax
136
+ module Basic = Disasm_expert.Basic. Insn
137
+
138
+ let bool = KB.Domain. flat ~equal: Bool. equal " flat-bool"
139
+ ~inspect: sexp_of_bool ~empty: false
140
+ let has_semantics = KB.Class. property ~public: true ~package: " bap"
141
+ Theory.Semantics. cls " has-semantics" bool
142
+
143
+ let present eff =
144
+ eff >> | fun x -> KB.Value. put has_semantics x true
145
+
146
+ module Present : Theory .Core = struct
147
+ include Theory. Empty
148
+ let blk lbl data ctrl = present @@ blk lbl data ctrl
149
+ let perform s = present @@ perform s
150
+ let seq x y = present @@ seq x y
151
+ let branch cnd yes nay = present @@ branch cnd yes nay
152
+ end
153
+
154
+ let pp_code ppf code =
155
+ let dom = KB.Slot. domain Theory.Semantics. code in
156
+ match KB.Domain. inspect dom code with
157
+ | Sexp. List [Atom s] -> Format. fprintf ppf " %s" s
158
+ | _ -> Format. fprintf ppf " not disassembled"
159
+
160
+ let pp_ops ppf ops =
161
+ if Array. is_empty ops then Format. fprintf ppf " "
162
+ else Format. fprintf ppf " %s"
163
+ (String. concat_array ~sep: " " @@
164
+ Array. map ~f: Op. to_string ops)
165
+
166
+ let pp_basic ppf insn =
167
+ Format. fprintf ppf " (%s:%s%a)"
168
+ (Basic. encoding insn)
169
+ (Basic. name insn)
170
+ pp_ops (Basic. ops insn)
171
+
172
+ let update_missing insn histo =
173
+ Map. update histo (Basic. name insn) ~f: (function
174
+ | None -> 1
175
+ | Some c -> c + 1 )
176
+
177
+ let build_histo =
178
+ Map. fold ~init: Int.Map. empty ~f: (fun ~key ~data ->
179
+ Map. add_multi ~key: data ~data: key)
180
+
181
+ let pp_histo ppf stats =
182
+ build_histo stats |>
183
+ Map. to_sequence ~order: `Increasing_key |>
184
+ Seq. iter ~f: (fun (count ,codes ) ->
185
+ List. iter codes ~f: (Format. fprintf ppf " %-4d %s@\n " count))
186
+
187
+ let print_missing () =
188
+ let lifted = ref 0 and missed = ref 0 and failed = ref 0 in
189
+ KB. objects Theory.Program. cls >> =
190
+ KB.Seq. fold ~init: String.Map. empty ~f: (fun stats insn ->
191
+ let * sema = KB. collect Theory.Semantics. slot insn in
192
+ let code = KB.Value. get Theory.Semantics. code sema in
193
+ if Option. is_none code then KB. return stats
194
+ else KB. collect Theory.Label. addr insn >> = function
195
+ | None -> KB. return stats
196
+ | Some addr ->
197
+ KB. collect Basic. slot insn >> | function
198
+ | None ->
199
+ Format. printf " %a: %a ; not disassembled@\n " Bitvec. pp
200
+ addr pp_code code;
201
+ incr failed;
202
+ stats
203
+ | Some _ when KB.Value. get has_semantics sema ->
204
+ incr lifted;
205
+ stats
206
+ | Some basic ->
207
+ incr missed;
208
+ Format. printf " %a: %a ; %a ; %a@\n " Bitvec. pp addr
209
+ pp_code code Insn. pp sema
210
+ pp_basic basic;
211
+ update_missing basic stats) >> | fun stats ->
212
+ Format. printf " @\n Histogram:@\n %a@\n \
213
+ %-8s %d@\n \
214
+ %-8s %d@\n \
215
+ %-8s %d@\n "
216
+ pp_histo stats " Lifted:" ! lifted " Failed:" ! failed " Missed:" ! missed
217
+
218
+ let print () =
219
+ Toplevel. exec @@ print_missing ()
220
+
221
+ let declare_theory () =
222
+ Theory. declare
223
+ ~package: " bap"
224
+ ~name: " present"
225
+ ~desc: " tracks the presence of semantics"
226
+ (KB. return (module Present : Theory.Core ))
227
+
228
+ let enable () =
229
+ declare_theory ()
230
+ end
231
+
131
232
let run_passes base proj =
132
233
Err.List. fold ~init: (0 ,proj) ~f: (fun (step ,proj ) pass ->
133
234
report_progress
@@ -237,6 +338,17 @@ let knowledge =
237
338
~aliases: [" k" ; " knowledge-base" ;]
238
339
(Extension.Type. some rw_file) " project"
239
340
341
+ let print_missing =
342
+ Extension.Command. flag
343
+ ~doc: " Print missing instructions. \
344
+ This option disables cache and redisassembles the binary \
345
+ from scratch. It then prints the list of all instructions \
346
+ that do not have a representable semantics, followed by \
347
+ the histogram of all missed opcodes, and finally prints \
348
+ the number of lifted opcodes, the number of addresses \
349
+ that wasn't disassembled at all, and the number of opcodes \
350
+ that do not have semantics." " print-missing"
351
+
240
352
let input = Extension.Command. argument
241
353
~doc: " The input file" Extension.Type. (" FILE" %: string =? " a.out" )
242
354
@@ -355,7 +467,8 @@ let setup_gc_unless_overriden () =
355
467
then setup_gc ()
356
468
else info " GC parameters are overriden by a user"
357
469
358
- let load_knowledge digest = function
470
+ let load_knowledge disable digest = function
471
+ | _ when disable -> false
359
472
| None -> import_knowledge_from_cache digest
360
473
| Some path when not (Sys. file_exists path) ->
361
474
import_knowledge_from_cache digest
@@ -372,8 +485,8 @@ let save_knowledge ~had_knowledge ~update digest = function
372
485
Knowledge. save (Toplevel. current () ) path
373
486
| Some _ -> ()
374
487
375
-
376
- let create_and_process input outputs passes loader target update kb ctxt =
488
+ let create_and_process input outputs passes loader target update
489
+ kb print_missing ctxt =
377
490
let uses_file_loader = Sys. file_exists loader &&
378
491
Fn. non Filename. is_implicit loader in
379
492
let package = input in
@@ -382,29 +495,33 @@ let create_and_process input outputs passes loader target update kb ctxt =
382
495
Caml.Digest. file input;
383
496
if uses_file_loader then Caml.Digest. file loader else loader;
384
497
] in
385
- let had_knowledge = load_knowledge digest kb in
498
+ let had_knowledge = load_knowledge print_missing digest kb in
386
499
let input = Project.Input. load ~target ~loader input in
500
+ if print_missing then Missing. enable () ;
387
501
Project. create ~package
388
502
input |> proj_error >> = fun proj ->
389
503
process passes outputs proj >> | fun proj ->
504
+ if print_missing then Missing. print () ;
390
505
save_knowledge ~had_knowledge ~update digest kb;
391
506
proj
392
507
393
508
let _disassemble_command_registered : unit =
394
509
let args =
395
510
let open Extension.Command in
396
511
args $ input $ outputs $ old_style_passes $ passes $ loader $ target
397
- $ update $ knowledge in
512
+ $ update $ knowledge $ print_missing in
398
513
Extension.Command. declare ~doc: man " disassemble"
399
514
~requires: features_used args @@
400
- fun input outputs old_style_passes passes loader target update kb ctxt ->
515
+ fun input outputs old_style_passes passes loader target update
516
+ kb print_missing ctxt ->
401
517
setup_gc_unless_overriden () ;
402
518
validate_knowledge update kb >> = fun () ->
403
519
validate_input input >> = fun () ->
404
520
validate_passes_style old_style_passes (List. concat passes) >> =
405
521
validate_passes >> = fun passes ->
406
522
Dump_formats. parse outputs >> = fun outputs ->
407
- create_and_process input outputs passes loader target update kb ctxt >> = fun _ ->
523
+ create_and_process input outputs passes loader target update kb
524
+ print_missing ctxt >> = fun _ ->
408
525
Ok ()
409
526
410
527
let _compare_command_registered : unit =
@@ -460,7 +577,7 @@ let _compare_command_registered : unit =
460
577
let projs =
461
578
Seq. map (Seq. of_list (input::inputs)) ~f: (fun input ->
462
579
create_and_process input outputs passes loader target
463
- update kb ctxt) in
580
+ update kb false ctxt) in
464
581
let exception Escape of Extension.Error. t in
465
582
try
466
583
let projs = Seq. map projs ~f: (function
0 commit comments