Skip to content

Commit 00484df

Browse files
committed
[Seedpool] refactor
1 parent d495b7f commit 00484df

File tree

2 files changed

+125
-158
lines changed

2 files changed

+125
-158
lines changed

src/seedpool.ml

Lines changed: 116 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,6 @@ module type QUEUE = sig
1010

1111
val empty : t
1212

13-
val register : elt -> t -> t
14-
(** push a seed to the queue for the first time *)
15-
1613
val push : elt -> t -> t
1714
(** push a seed to the queue -- not the first time *)
1815

@@ -32,16 +29,11 @@ module CfgSeed = struct
3229
importants : string;
3330
}
3431

35-
let make llm (trace : Coverage.BlockTrace.t) importants node_tbl distmap =
32+
let make llm trace importants node_tbl distmap =
3633
let score = Distance.distance_score trace node_tbl distmap in
3734
let covers = Distance.get_cover trace node_tbl distmap in
38-
{
39-
llm;
40-
score;
41-
covers;
42-
edge_cov = Coverage.EdgeCoverage.of_trace trace;
43-
importants;
44-
}
35+
let edge_cov = Coverage.EdgeCoverage.of_trace trace in
36+
{ llm; score; covers; edge_cov; importants }
4537

4638
let llmodule seed = seed.llm
4739
let covers seed = seed.covers
@@ -54,7 +46,7 @@ module CfgSeed = struct
5446
let int_score = Float.to_int seed.score in
5547
if seed.covers then 12 else if int_score >= 10 then 3 else 12 - int_score
5648

57-
let name ?(parent : int option) seed =
49+
let name ?parent seed =
5850
let hash = ALlvm.hash_llm seed.llm in
5951
match parent with
6052
| None ->
@@ -74,6 +66,15 @@ end
7466
module Seed = CfgSeed
7567
include Queue
7668

69+
let has_substr substr llv =
70+
try
71+
ignore
72+
(Str.search_forward (Str.regexp_string substr)
73+
(ALlvm.string_of_llvalue llv)
74+
0);
75+
true
76+
with Not_found -> false
77+
7778
let check_value_type_for_mutation llv =
7879
let ty = ALlvm.type_of llv in
7980
if String.starts_with ~prefix:"target" (ALlvm.string_of_lltype ty) then false
@@ -82,28 +83,11 @@ let check_value_type_for_mutation llv =
8283
| Void | Half | Float | Double | Fp128 | Label | Integer | Function | Array
8384
| Metadata ->
8485
true
85-
| Vector when ty |> ALlvm.element_type |> ALlvm.classify_type = Integer ->
86-
true
87-
| Vector -> false
88-
| Pointer -> (
89-
try
90-
ignore
91-
(Str.search_forward
92-
(Str.regexp_string "inttoptr")
93-
(ALlvm.string_of_llvalue llv)
94-
0);
95-
false
96-
with Not_found -> (
97-
try
98-
ignore
99-
(Str.search_forward
100-
(Str.regexp_string "no_cfi")
101-
(ALlvm.string_of_llvalue llv)
102-
0);
103-
false
104-
with Not_found ->
105-
llv |> ALlvm.type_of |> ALlvm.address_space = 0
106-
&& not (ALlvm.is_null llv)))
86+
| Vector -> ty |> ALlvm.element_type |> ALlvm.classify_type = Integer
87+
| Pointer ->
88+
if has_substr "inttoptr" llv then false
89+
else if has_substr "no_cfi" llv then false
90+
else ALlvm.address_space ty = 0 && not (ALlvm.is_null llv)
10791
| _ -> false
10892

10993
let check_opc_for_mutation opc =
@@ -117,33 +101,26 @@ let rec check_opd_for_mutation i llv res =
117101
|> check_value_type_for_mutation
118102
|> check_opd_for_mutation (i + 1) llv
119103

120-
let check_llv_for_mutation res llv =
121-
if not res then res
122-
else
123-
match ALlvm.classify_value llv with
124-
| Instruction opc ->
125-
opc |> check_opc_for_mutation |> check_opd_for_mutation 0 llv
126-
| _ -> check_value_type_for_mutation llv
104+
let check_llv_for_mutation llv =
105+
match ALlvm.classify_value llv with
106+
| Instruction opc ->
107+
opc |> check_opc_for_mutation |> check_opd_for_mutation 0 llv
108+
| _ -> check_value_type_for_mutation llv
127109

128110
let check_func_for_mutation f =
129-
let res = ALlvm.fold_left_params check_llv_for_mutation true f in
130-
let res = ALlvm.fold_left_all_instr check_llv_for_mutation res f in
131-
res
111+
ALlvm.for_all_params check_llv_for_mutation f
112+
&& ALlvm.for_all_instr check_llv_for_mutation f
132113

133114
let check_llm_for_mutation llm =
134-
let res = ALlvm.fold_left_globals check_llv_for_mutation true llm in
135-
if not res then res
136-
else
137-
ALlvm.fold_left_functions
138-
(fun res f -> if not res then res else check_func_for_mutation f)
139-
res llm
115+
ALlvm.for_all_globals check_llv_for_mutation llm
116+
&& ALlvm.for_all_functions check_func_for_mutation llm
140117

141-
let check_exist_ret func =
142-
let is_ret instr = ALlvm.instr_opcode instr = Ret in
143-
ALlvm.any_all_instr is_ret func
118+
let check_exist_ret f =
119+
ALlvm.any_all_instr (fun instr -> ALlvm.instr_opcode instr = Ret) f
144120

145121
let rec redef_fn llctx f_old wide instr =
146122
let extra_param = [| ALlvm.i32_type llctx; ALlvm.pointer_type llctx |] in
123+
let fmodule = ALlvm.global_parent f_old in
147124
let params_old = ALlvm.params f_old in
148125
let param_tys =
149126
Array.append (Array.map ALlvm.type_of params_old) extra_param
@@ -155,11 +132,8 @@ let rec redef_fn llctx f_old wide instr =
155132
if i = instr then true
156133
else if ALlvm.ChangeRetVal.check_target i then (
157134
let new_ret_ty = ALlvm.type_of i in
158-
let f_new =
159-
ALlvm.define_function ""
160-
(ALlvm.function_type new_ret_ty param_tys)
161-
(ALlvm.global_parent f_old)
162-
in
135+
let new_fty = ALlvm.function_type new_ret_ty param_tys in
136+
let f_new = ALlvm.define_function "" new_fty fmodule in
163137

164138
(* copy function with new return value (target).*)
165139
ALlvm.ChangeRetVal.copy_blocks llctx f_old f_new;
@@ -259,11 +233,19 @@ let rec clean_llm llctx wide llm =
259233
else None
260234
with _ -> if wide then clean_llm llctx false llm else None
261235

236+
let is_in_slice node_tbl distmap line =
237+
match !Config.coverage with
238+
| Config.FuzzingMode.Sliced_cfg ->
239+
int_of_string line
240+
|> Coverage.node_of_addr node_tbl distmap
241+
|> Option.is_some
242+
| _ -> true
243+
262244
let can_optimize seedfile node_tbl distmap =
263245
match
264246
Opt.run ~passes:!Config.optimizer_passes ~mtriple:!Config.mtriple seedfile
265247
with
266-
| Error Non_zero_exit | Error Hang ->
248+
| Error Non_zero_exit | Error Hang | Assert _ ->
267249
L.debug "%s cannot be optimized" seedfile;
268250
AUtil.name_opted_ver seedfile |> AUtil.clean;
269251
None
@@ -272,23 +254,10 @@ let can_optimize seedfile node_tbl distmap =
272254
AUtil.name_opted_ver seedfile |> AUtil.clean;
273255
if !Config.coverage = Config.FuzzingMode.All_edges then Some (seedfile, [])
274256
else None
275-
| Assert _ ->
276-
L.debug "Opt %s failed by Assertion Error" seedfile;
277-
AUtil.name_opted_ver seedfile |> AUtil.clean;
278-
None
279257
| Ok lines ->
280258
L.debug "%s can be optimized" seedfile;
281259
AUtil.name_opted_ver seedfile |> AUtil.clean;
282-
let filter_func =
283-
match !Config.coverage with
284-
| Config.FuzzingMode.Sliced_cfg ->
285-
fun line ->
286-
int_of_string line
287-
|> Coverage.node_of_addr node_tbl distmap
288-
|> Option.is_some
289-
| _ -> fun _ -> true
290-
in
291-
let lines = lines |> List.filter filter_func in
260+
let lines = lines |> List.filter (is_in_slice node_tbl distmap) in
292261
Some (seedfile, lines)
293262

294263
let push (seed : Seed.t) pool =
@@ -314,91 +283,83 @@ let save ?(parent : int option) (seed : Seed.t) =
314283
Printf.fprintf line "%s" seed.importants);
315284
ALlvm.save_ll !Config.corpus_dir seed_name llm |> ignore
316285

317-
let evaluate_seeds_and_construct_seedpool seeds node_tbl distmap =
318-
let open AUtil in
286+
let is_optimuzz_base () =
287+
(* Optimuzz_Base *)
288+
!Config.score = Config.FuzzingMode.Constant
289+
&& !Config.coverage = Config.FuzzingMode.All_edges
290+
291+
let is_nondirected () = !Config.coverage = Config.FuzzingMode.All_edges
292+
293+
let log_seed seed =
294+
L.debug "evaluate seed: \n%s\n" (ALlvm.string_of_llmodule seed.Seed.llm);
295+
L.debug "score: %f" (Seed.score seed)
296+
297+
let classify_seeds node_tbl distmap raw_seeds =
298+
if is_nondirected () then
299+
List.fold_left
300+
(fun (cover_seeds, noncover_seeds) (_, llm, traces) ->
301+
let seed = Seed.make llm traces "" node_tbl distmap in
302+
log_seed seed;
303+
(cover_seeds, seed :: noncover_seeds))
304+
([], []) raw_seeds
305+
else
306+
List.fold_left
307+
(fun (cover_seeds, noncover_seeds) (_, llm, traces) ->
308+
let seed = Seed.make llm traces "" node_tbl distmap in
309+
log_seed seed;
310+
if Seed.covers seed then (seed :: cover_seeds, noncover_seeds)
311+
else (cover_seeds, seed :: noncover_seeds))
312+
([], []) raw_seeds
313+
314+
let hash_seed seed = ALlvm.hash_llm seed.Seed.llm
315+
let compare_hash a b = compare (hash_seed a) (hash_seed b)
316+
let compare_score a b = compare a.Seed.score b.Seed.score
317+
318+
let closest_seeds seeds =
319+
let unique_seeds = List.sort_uniq compare_hash seeds in
320+
let sorted_seeds =
321+
if !Config.score = Config.FuzzingMode.Constant then unique_seeds
322+
else
323+
unique_seeds
324+
|> List.sort (fun a b -> compare (Seed.score a) (Seed.score b))
325+
in
326+
let first_seeds =
327+
if !Config.coverage = Config.FuzzingMode.All_edges then sorted_seeds
328+
else List.take !Config.max_initial_seed sorted_seeds
329+
in
330+
first_seeds
331+
332+
let compute_init_cov seeds =
333+
let open Coverage in
334+
List.fold_left
335+
(fun accu seed -> EdgeCoverage.union accu seed.Seed.edge_cov)
336+
EdgeCoverage.empty seeds
337+
338+
let make_pool_and_cov seeds =
339+
let open Coverage in
319340
let pool = create () in
320-
if
321-
(* Optimuzz_Base *)
322-
!Config.score = Config.FuzzingMode.Constant
323-
&& !Config.coverage = Config.FuzzingMode.All_edges
324-
then (
325-
let pools =
326-
List.fold_left
327-
(fun pools (_, llm, traces) ->
328-
let seed = Seed.make llm traces "" node_tbl distmap in
329-
L.debug "evaluate seed: \n%s\n" (ALlvm.string_of_llmodule llm);
330-
L.debug "score: %s" (string_of_float (Seed.score seed));
331-
seed :: pools)
332-
[] seeds
333-
in
334-
let init_cov =
335-
List.fold_left
336-
(fun accu seed -> Coverage.EdgeCoverage.union accu (Seed.edge_cov seed))
337-
Coverage.EdgeCoverage.empty pools
338-
in
339-
pools |> List.to_seq |> add_seq pool;
340-
(pool, init_cov))
341+
seeds |> List.to_seq |> add_seq pool;
342+
let init_cov = compute_init_cov seeds in
343+
(pool, init_cov)
344+
345+
let evaluate_seeds_and_construct_seedpool raw_seeds node_tbl distmap =
346+
let open AUtil in
347+
let open Coverage in
348+
if is_optimuzz_base () then
349+
let _, seeds = classify_seeds node_tbl distmap raw_seeds in
350+
make_pool_and_cov seeds
341351
else
342-
let pool_covers, pool_noncovers =
343-
List.fold_left
344-
(fun (pool_covers, pool_noncovers) (_, llm, traces) ->
345-
let seed = Seed.make llm traces "" node_tbl distmap in
346-
L.debug "evaluate seed: \n%s\n" (ALlvm.string_of_llmodule llm);
347-
L.debug "score: %s" (string_of_float (Seed.score seed));
348-
(* will assume all-edges option to non-directed fuzzing *)
349-
if !Config.coverage = Config.FuzzingMode.All_edges then
350-
(pool_covers, seed :: pool_noncovers)
351-
else if Seed.covers seed then (seed :: pool_covers, pool_noncovers)
352-
else (pool_covers, seed :: pool_noncovers))
353-
([], []) seeds
352+
let cover_seeds, noncover_seeds =
353+
classify_seeds node_tbl distmap raw_seeds
354354
in
355-
if pool_covers = [] then (
355+
if cover_seeds = [] then (
356356
L.info "No covering seeds found. Using closest seeds.";
357-
let pool_closest =
358-
pool_noncovers
359-
|> List.sort_uniq (fun a b ->
360-
compare
361-
(ALlvm.hash_llm (Seed.llmodule a))
362-
(ALlvm.hash_llm (Seed.llmodule b)))
363-
in
364-
let pool_closest =
365-
if !Config.score = Config.FuzzingMode.Constant then pool_closest
366-
else
367-
pool_closest
368-
|> List.sort (fun a b -> compare (Seed.score a) (Seed.score b))
369-
in
370-
let pool_closest =
371-
(* will assume all-edges option to non-directed fuzzing *)
372-
if !Config.coverage = Config.FuzzingMode.All_edges then pool_closest
373-
else pool_closest |> take !Config.max_initial_seed
374-
in
375-
376-
let init_cov =
377-
List.fold_left
378-
(fun accu seed ->
379-
Coverage.EdgeCoverage.union accu (Seed.edge_cov seed))
380-
Coverage.EdgeCoverage.empty pool_closest
381-
in
382-
pool_closest |> List.to_seq |> add_seq pool;
383-
(pool, init_cov))
357+
let pool_closest = closest_seeds noncover_seeds in
358+
make_pool_and_cov pool_closest)
384359
else (
385-
(* if we have covering seeds, we use covering seeds only. *)
386360
L.info "Covering seeds found. Using them only.";
387-
let pool_covers =
388-
pool_covers
389-
|> List.sort_uniq (fun a b ->
390-
compare
391-
(ALlvm.hash_llm (Seed.llmodule a))
392-
(ALlvm.hash_llm (Seed.llmodule b)))
393-
in
394-
let init_cov =
395-
List.fold_left
396-
(fun accu seed ->
397-
Coverage.EdgeCoverage.union accu (Seed.edge_cov seed))
398-
Coverage.EdgeCoverage.empty pool_covers
399-
in
400-
pool_covers |> List.to_seq |> add_seq pool;
401-
(pool, init_cov))
361+
let sorted_seeds = cover_seeds |> List.sort_uniq compare_score in
362+
make_pool_and_cov sorted_seeds)
402363

403364
let make llctx node_tbl (distmap : float Coverage.DistanceTable.t) =
404365
let open AUtil in
@@ -433,14 +394,13 @@ let make llctx node_tbl (distmap : float Coverage.DistanceTable.t) =
433394

434395
let seed_count = ref 0 in
435396

436-
let filter_seed seed =
397+
let preprocess_seed seed =
437398
let* path, lines = can_optimize seed node_tbl distmap in
438399
L.debug "filter seed: %s " path;
439400
match ALlvm.read_ll llctx path with
440401
| Ok llm when check_llm_for_mutation llm ->
441402
let cov = lines |> List.map int_of_string in
442403
let llm = add_dummy_params llm in
443-
L.debug "filtered seeds: %s" (ALlvm.string_of_llmodule llm);
444404
seed_count := !seed_count + 1;
445405
L.debug "seed count: %d" !seed_count;
446406
Some (path, llm, cov)
@@ -453,9 +413,7 @@ let make llctx node_tbl (distmap : float Coverage.DistanceTable.t) =
453413

454414
Array.to_list seed_files
455415
|> List.map (Filename.concat seed_dir)
456-
|> List.filter_map filter_seed
416+
|> List.filter_map preprocess_seed
457417
in
458418

459-
let pool = evaluate_seeds_and_construct_seedpool seeds node_tbl distmap in
460-
461-
pool
419+
evaluate_seeds_and_construct_seedpool seeds node_tbl distmap

0 commit comments

Comments
 (0)