Skip to content

Commit be62647

Browse files
committed
Start making benchmark runner declarative
Start making benchmark more declarative by having a configuration file which we can specify what datasets to download and what experiments to run with which solvers. This commit removes multi query mode, adds a `setup` command to download datasets from smtcomp zenodo page, and renames `singl-query` to `exec` and makes this command just parse smt2 benchmarks so it's easier to redirect efforts into checking parsing errors.
1 parent 00ea4e6 commit be62647

File tree

12 files changed

+376
-305
lines changed

12 files changed

+376
-305
lines changed

bench/datasets.sexp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
(dataset
2+
(name QF_FP)
3+
(url "https://zenodo.org/records/15493090/files/QF_FP.tar.zst")
4+
(md5sum "b7f9786b5317d7be8a185c861088f504"))
5+
6+
(dataset
7+
(name QF_S)
8+
(url "https://zenodo.org/records/15493090/files/QF_S.tar.zst")
9+
(md5sum "22c3c79ddd6bbcb2be5a9cae8b3ffdcf"))
10+

bench/runner/cmd_exec.ml

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
let files_to_run d =
2+
match
3+
Bos.OS.Dir.fold_contents ~traverse:`Any
4+
(fun path acc -> if Fpath.has_ext ".smt2" path then path :: acc else acc)
5+
[] d
6+
with
7+
| Ok results -> results
8+
| Error (`Msg err) -> Fmt.failwith "%s" err
9+
10+
let parse_status =
11+
let re = Dune_re.(compile @@ Perl.re {|^(sat|unsat|unknown)|}) in
12+
fun stdout ->
13+
match Dune_re.exec_opt re stdout with
14+
| None -> `Unknown
15+
| Some group -> (
16+
match Dune_re.Group.get group 1 with
17+
| "sat" -> `Sat
18+
| "unsat" -> `Unsat
19+
| "unknown" -> `Unknown
20+
| _ -> assert false )
21+
22+
let pp_status fmt = function
23+
| `Sat -> Fmt.string fmt "sat"
24+
| `Unsat -> Fmt.string fmt "unsat"
25+
| `Unknown -> Fmt.string fmt "unknown"
26+
27+
let summarize results =
28+
let results_list =
29+
List.map
30+
(fun (prover, prover_results) ->
31+
( prover
32+
, List.fold_left
33+
(fun (total, sat, unsat, unknown, time)
34+
(_, _, stdout, _, rtime, _rusage) ->
35+
let sat, unsat, unknown =
36+
match parse_status stdout with
37+
| `Sat -> (succ sat, unsat, unknown)
38+
| `Unsat -> (sat, succ unsat, unknown)
39+
| `Unknown -> (sat, unsat, succ unknown)
40+
in
41+
(succ total, sat, unsat, unknown, time +. rtime) )
42+
(0, 0, 0, 0, 0.) prover_results ) )
43+
results
44+
in
45+
(* let solver, total, sat, unsat, unknwon, rtime = *)
46+
let _ =
47+
List.fold_left
48+
(fun (solver, total, sat, unsat, unknown, rtime)
49+
(prover, (p_total, p_sat, p_unsat, p_unknown, p_rtime)) ->
50+
( Tool.prover_to_string prover :: solver
51+
, p_total :: total
52+
, p_sat :: sat
53+
, p_unsat :: unsat
54+
, p_unknown :: unknown
55+
, p_rtime :: rtime ) )
56+
([], [], [], [], [], []) results_list
57+
in
58+
(* Owl_dataframe.make *)
59+
(* [| "solver"; "total"; "sat"; "unsat"; "unknown"; "rtime" |] *)
60+
(* ~data: *)
61+
(* [| Owl_dataframe.pack_string_series @@ Array.of_list solver *)
62+
(* ; Owl_dataframe.pack_int_series @@ Array.of_list total *)
63+
(* ; Owl_dataframe.pack_int_series @@ Array.of_list sat *)
64+
(* ; Owl_dataframe.pack_int_series @@ Array.of_list unsat *)
65+
(* ; Owl_dataframe.pack_int_series @@ Array.of_list unknwon *)
66+
(* ; Owl_dataframe.pack_float_series @@ Array.of_list rtime *)
67+
(* |] *)
68+
assert false
69+
70+
(* Maybe we can clean this up later *)
71+
(* let make_data_frames results = *)
72+
(* List.map *)
73+
(* (fun (prover, prover_results) -> *)
74+
(* let provers, benchmark, res, stdout, stderr, rtime, utime, stime, maxrss = *)
75+
(* List.fold_left *)
76+
(* (fun ( prover_acc *)
77+
(* , bench_acc *)
78+
(* , res_acc *)
79+
(* , stdout_acc *)
80+
(* , stderr_acc *)
81+
(* , rtime_acc *)
82+
(* , utime_acc *)
83+
(* , stime_acc *)
84+
(* , maxrss_acc ) *)
85+
(* (_status, benchmark, stdout, stderr, rtime, rusage) -> *)
86+
(* ( Tool.prover_to_string prover :: prover_acc *)
87+
(* , Fmt.str "%a" Fpath.pp benchmark :: bench_acc *)
88+
(* , Fmt.str "%a" pp_status (parse_status stdout) :: res_acc *)
89+
(* , String.escaped stdout :: stdout_acc *)
90+
(* , String.escaped stderr :: stderr_acc *)
91+
(* , rtime :: rtime_acc *)
92+
(* , rusage.ExtUnix.Specific.ru_utime :: utime_acc *)
93+
(* , rusage.ExtUnix.Specific.ru_stime :: stime_acc *)
94+
(* , rusage.ExtUnix.Specific.ru_maxrss :: maxrss_acc ) ) *)
95+
(* ([], [], [], [], [], [], [], [], []) *)
96+
(* prover_results *)
97+
(* in *)
98+
(* let df = *)
99+
(* Owl_dataframe.make *)
100+
(* [| "prover" *)
101+
(* ; "benchmark" *)
102+
(* ; "res" *)
103+
(* ; "stdout" *)
104+
(* ; "stderr" *)
105+
(* ; "rtime" *)
106+
(* ; "utime" *)
107+
(* ; "stime" *)
108+
(* ; "maxrss" *)
109+
(* |] *)
110+
(* ~data: *)
111+
(* [| Owl_dataframe.pack_string_series @@ Array.of_list provers *)
112+
(* ; Owl_dataframe.pack_string_series @@ Array.of_list benchmark *)
113+
(* ; Owl_dataframe.pack_string_series @@ Array.of_list res *)
114+
(* ; Owl_dataframe.pack_string_series @@ Array.of_list stdout *)
115+
(* ; Owl_dataframe.pack_string_series @@ Array.of_list stderr *)
116+
(* ; Owl_dataframe.pack_float_series @@ Array.of_list rtime *)
117+
(* ; Owl_dataframe.pack_float_series @@ Array.of_list utime *)
118+
(* ; Owl_dataframe.pack_float_series @@ Array.of_list stime *)
119+
(* ; Owl_dataframe.pack_int_series *)
120+
(* @@ Array.of_list (List.map Int64.to_int maxrss) *)
121+
(* |] *)
122+
(* in *)
123+
(* (prover, df) ) *)
124+
(* results *)
125+
126+
let write_data_frame started_at results_dir (prover, _df) =
127+
let csv_file = Fmt.str "%a-%s.csv" Tool.pp_prover prover started_at in
128+
let _csv_path = Fpath.(results_dir / csv_file) in
129+
(* Owl_dataframe.to_csv ~sep:',' df (Fpath.to_string csv_path) *)
130+
assert false
131+
132+
let main ~hook:_ ~timeout ~provers ~dirs =
133+
let open Result in
134+
let start_time = Unix.gettimeofday () in
135+
let now = Unix.(localtime start_time) in
136+
let _started_at = ExtUnix.Specific.strftime "%Y%m%dT%H%M%S" now in
137+
assert (List.for_all Tool.is_available provers);
138+
let files = List.concat_map files_to_run dirs in
139+
let provers_str = List.map Tool.prover_to_string provers in
140+
let w_prover = List.fold_left max 0 (List.map String.length provers_str) in
141+
let num_files = List.length files in
142+
let num_provers = List.length provers in
143+
let run_provers ?timeout provers run_id benchmark =
144+
List.map
145+
(fun prover ->
146+
let start = Unix.gettimeofday () in
147+
let status, stdout, stderr, rusage =
148+
Tool.fork_and_run ?timeout prover [ Fpath.to_string benchmark ]
149+
in
150+
let rtime = Unix.gettimeofday () -. start in
151+
let prover = Fmt.str "%a" Tool.pp_prover prover in
152+
Fmt.pr
153+
"@[<v 2>%-*s: [%05d/%05d]@;\
154+
File: %a@;\
155+
Exited: %a@;\
156+
Result: %s@;\
157+
Time : %0.03f@]@."
158+
w_prover prover run_id num_files Fpath.pp benchmark
159+
Util.pp_exit_status status (String.trim stdout) rtime;
160+
(status, benchmark, stdout, stderr, rtime, rusage) )
161+
provers
162+
in
163+
(* For every benchmark we run the selected provers *)
164+
let results =
165+
let run_id = ref 0 in
166+
let init = List.init num_provers (fun _i -> []) in
167+
List.fold_left
168+
(fun acc file ->
169+
incr run_id;
170+
let res = run_provers ?timeout provers !run_id file in
171+
List.map2 (fun res acc -> res :: acc) res acc )
172+
init files
173+
in
174+
let results = List.map2 (fun p results -> (p, results)) provers results in
175+
let summary =
176+
List.map
177+
(fun (prover, results) ->
178+
( prover
179+
, List.fold_left
180+
(fun acc (status, _, _, _, _, _) ->
181+
match status with Unix.WEXITED 0 -> succ acc | _ -> acc )
182+
0 results ) )
183+
results
184+
in
185+
let time_elapsed = Unix.gettimeofday () -. start_time in
186+
List.iter
187+
(fun (prover, passed) ->
188+
let percent = (float passed /. float num_files) *. 100. in
189+
Fmt.pr "@[<v 2>%-*s:@;Passed: %05d/%05d (%03.2f%%)@." w_prover
190+
(Tool.prover_to_string prover)
191+
passed num_files percent )
192+
summary;
193+
Fmt.pr "Time elasped: %.2f@." time_elapsed;
194+
(* let data_frames = make_data_frames results in *)
195+
(* let summary = summarize results in *)
196+
(* let msg = *)
197+
(* Fmt.str "@[Single-Query Results:@;%a@]@." Owl_pretty.pp_dataframe summary *)
198+
(* in *)
199+
(* let results_dir = Fmt.kstr Fpath.v "res-single-query-%s" started_at in *)
200+
(* let* _ = Bos.OS.Dir.create ~path:true results_dir in *)
201+
(* List.iter (write_data_frame started_at results_dir) data_frames; *)
202+
(* Option.iter (Notify.notify_done msg) hook; *)
203+
(* Ok (Fmt.pr "%s" msg) *)
204+
Ok ()

bench/runner/cmd_setup.ml

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
let debug = true
2+
3+
let debug k = if debug then k Fmt.epr
4+
5+
let init ~datasets_dir =
6+
let open Result in
7+
let* dir_exists = Bos.OS.Dir.create ~path:true datasets_dir in
8+
if not dir_exists then debug (fun epr -> epr "Datasets dir already exists@.");
9+
Ok ()
10+
11+
type conf = { datasets : Dataset.t list }
12+
13+
let parse_conf fpath =
14+
In_channel.with_open_text (Fpath.to_string fpath) @@ fun ic ->
15+
let datasets = Sexplib.Sexp.input_sexps ic |> List.map Dataset.of_sexp in
16+
{ datasets }
17+
18+
let file_exists ?hash fpath =
19+
let open Result in
20+
let* file_exists = Bos.OS.File.exists fpath in
21+
if not file_exists then Ok false
22+
else begin
23+
match hash with
24+
| None -> Ok true
25+
| Some hash ->
26+
let md5sum = Digest.MD5.(to_hex (file (Fpath.to_string fpath))) in
27+
if Digest.MD5.equal hash md5sum then Ok true
28+
else
29+
Error
30+
(`Msg
31+
(Fmt.str "%a: expected hash %s but got %s" Fpath.pp fpath hash
32+
md5sum ) )
33+
end
34+
35+
let curl url out_file = Bos.Cmd.(v "curl" % url % "--output" % p out_file)
36+
37+
let tar_extract archive output_dir =
38+
Bos.Cmd.(v "tar" % "-xf" % p archive % "-C" % p output_dir)
39+
40+
let setup ~datasets_dir { datasets } =
41+
let open Result in
42+
list_iter
43+
(fun { Dataset.name; url; md5sum } ->
44+
let this_dataset_dir = Fpath.(datasets_dir / name) in
45+
let* _ = Bos.OS.Dir.create this_dataset_dir in
46+
debug (fun epr -> epr "Downloading %s from %s@." name url);
47+
let out_file = Fpath.(this_dataset_dir // base (v url)) in
48+
let* file_exists = file_exists ~hash:md5sum out_file in
49+
if file_exists then begin
50+
debug (fun epr -> epr "Skipping: correct file exists@.");
51+
Ok ()
52+
end
53+
else begin
54+
debug (fun epr -> epr "Saving to %a@." Fpath.pp out_file);
55+
let* () = Bos.OS.Cmd.run (curl url out_file) in
56+
Bos.OS.Cmd.run (tar_extract out_file this_dataset_dir)
57+
end )
58+
datasets
59+
60+
let main ~datasets_dir ~file =
61+
let open Result in
62+
debug (fun epr -> epr "Using config: '%a'@." Fpath.pp file);
63+
let* () = init ~datasets_dir in
64+
let conf = parse_conf file in
65+
setup ~datasets_dir conf

bench/runner/dataset.ml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
type t =
2+
{ name : string
3+
; url : string
4+
; md5sum : string
5+
}
6+
7+
let pp fmt { name; url; md5sum } =
8+
Fmt.pf fmt "@[<hov 1>{@ name@ =@ %a;@ url@ =@ %a;@ md5sum@ =@ %a@ }@]"
9+
Fmt.string name Fmt.string url Fmt.string md5sum
10+
11+
let of_sexp sexp =
12+
match sexp with
13+
| Sexplib.Sexp.List
14+
[ Atom "dataset"
15+
; List [ Atom "name"; Atom name ]
16+
; List [ Atom "url"; Atom url ]
17+
; List [ Atom "md5sum"; Atom md5sum ]
18+
] ->
19+
{ name; url; md5sum }
20+
| _ -> Fmt.failwith "Unable to parse sexp: %a" Sexplib.Sexp.pp_hum sexp
21+
22+
let to_sexp { name; url; md5sum } =
23+
Sexplib.Sexp.(
24+
List
25+
[ Atom "dataset"
26+
; List [ Atom "name"; Atom name ]
27+
; List [ Atom "url"; Atom url ]
28+
; List [ Atom "md5sum"; Atom md5sum ]
29+
] )

bench/runner/dune

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
(executable
2-
(enabled_if false)
3-
(name runner)
2+
(name main)
43
(libraries
54
bos
65
cohttp
76
cohttp-lwt-unix
87
cmdliner
98
extunix
109
dune-glob
11-
owl
1210
lwt
1311
yojson
1412
str
13+
sexplib
1514
smtml
1615
smtml.prelude)
1716
(flags

bench/runner/main.ml

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
open Cmdliner
2+
3+
let file =
4+
let fpath = Arg.conv (Fpath.of_string, Fpath.pp) in
5+
Arg.(required & pos 0 (some fpath) None & info [] ~docv:"FILE")
6+
7+
let fpath_dir =
8+
let dir_parser = Arg.(conv_parser dir) in
9+
Arg.conv
10+
( (fun str ->
11+
match dir_parser str with
12+
| Ok dir -> Ok (Fpath.v dir)
13+
| Error _ as err -> err )
14+
, Fpath.pp )
15+
16+
let dir_arg ~default arg_name =
17+
Arg.(value & opt fpath_dir default & info [ arg_name ])
18+
19+
let dirs = Arg.(value & pos_all fpath_dir [] & info [] ~docv:"PATH")
20+
21+
let provers =
22+
let prover_conv = Arg.conv (Tool.prover_of_string, Tool.pp_prover) in
23+
let default = Tool.(Smtml { name = Z3; st = false }) in
24+
Arg.(value & opt_all prover_conv [ default ] & info [ "prover" ])
25+
26+
let timeout = Arg.(value & opt (some int) None & info [ "timeout" ])
27+
28+
let hook = Arg.(value & opt (some string) None & info [ "webhook" ])
29+
30+
let _from_file = Arg.(value & opt (some file) None & info [ "from-file" ])
31+
32+
let exec =
33+
let open Term.Syntax in
34+
let info = Cmd.info "exec" in
35+
let cmd =
36+
let+ hook
37+
and+ timeout
38+
and+ provers
39+
and+ dirs in
40+
Cmd_exec.main ~hook ~timeout ~provers ~dirs
41+
in
42+
Cmd.v info cmd
43+
44+
let setup =
45+
let open Term.Syntax in
46+
let info = Cmd.info "setup" in
47+
let cmd =
48+
let+ file
49+
and+ datasets_dir = dir_arg ~default:Fpath.(v "_datasets") "datasets-dir" in
50+
Cmd_setup.main ~datasets_dir ~file
51+
in
52+
Cmd.v info cmd
53+
54+
let cli =
55+
let info = Cmd.info "benchme" in
56+
Cmd.group info [ exec; setup ]
57+
58+
let () =
59+
match Cmdliner.Cmd.eval_value' cli with
60+
| `Exit code -> exit code
61+
| `Ok (Error (`Msg err)) -> Fmt.failwith "%s" err
62+
| `Ok (Ok ()) -> exit 0

0 commit comments

Comments
 (0)