@@ -2031,26 +2031,35 @@ module Test = struct
20312031 let stat_max_lines = 20 (* maximum number of lines for a histogram *)
20322032
20332033 let print_stat ((name ,_ ), tbl ) =
2034- let avg = ref 0. in
2034+ let neg_avg_summands = ref [] in
2035+ let pos_avg_summands = ref [] in
20352036 let num = ref 0 in
20362037 let min_idx, max_idx =
20372038 Hashtbl. fold
20382039 (fun i res (m1 ,m2 ) ->
2039- avg := ! avg +. float_of_int (i * res);
2040+ let avg_summand = float_of_int (i * res) in
2041+ if avg_summand < 0.
2042+ then neg_avg_summands := avg_summand::! neg_avg_summands
2043+ else pos_avg_summands := avg_summand::! pos_avg_summands;
20402044 num := ! num + res;
20412045 min i m1, max i m2)
20422046 tbl (max_int,min_int)
20432047 in
2044- (* compute average *)
2048+ (* compute average, summing positive/negative separately by order of magnitude *)
2049+ let neg_avg_sums = List. sort Float. compare ! neg_avg_summands |> fun xs -> List. fold_right (+. ) xs 0. in
2050+ let pos_avg_sums = List. sort Float. compare ! pos_avg_summands |> List. fold_left (+. ) 0. in
2051+ let avg = ref (neg_avg_sums +. pos_avg_sums) in
20452052 if ! num > 0 then (
20462053 avg := ! avg /. float_of_int ! num
20472054 );
20482055 (* compute std-dev: sqroot of sum of squared distance-to-average
20492056 https://en.wikipedia.org/wiki/Standard_deviation *)
20502057 let stddev =
20512058 Hashtbl. fold
2052- (fun i res m -> m +. (float_of_int i -. ! avg) ** 2. *. float_of_int res)
2053- tbl 0.
2059+ (fun i res acc -> float_of_int res *. ((float_of_int i -. ! avg) ** 2. ) :: acc)
2060+ tbl []
2061+ |> List. sort Float. compare (* add summands in increasing order to preserve precision *)
2062+ |> List. fold_left (+. ) 0.
20542063 |> (fun s -> if ! num> 0 then s /. float_of_int ! num else s)
20552064 |> sqrt
20562065 in
@@ -2091,10 +2100,17 @@ module Test = struct
20912100 max_val := max ! max_val new_count) tbl;
20922101 (* print entries of the table, sorted by increasing index *)
20932102 let out = Buffer. create 128 in
2103+ (* Windows workaround to avoid annoying exponent zero such as "1.859e+018" *)
2104+ let cut_exp_zero s =
2105+ match String. split_on_char '+' s with
2106+ | [signif;exponent] -> Printf. sprintf " %s+%i" signif (int_of_string exponent)
2107+ | _ -> failwith " cut_exp_zero failed to parse scientific notation " ^ s in
2108+ let fmt_float f =
2109+ if f > 1e7 || f < - 1e7 then cut_exp_zero (Printf. sprintf " %.3e" f) else Printf. sprintf " %.2f" f in
20942110 Printf. bprintf out " stats %s:\n " name;
20952111 Printf. bprintf out
2096- " num: %d, avg: %.2f , stddev: %.2f , median %d, min %d, max %d\n "
2097- ! num ! avg stddev ! median min_idx max_idx;
2112+ " num: %d, avg: %s , stddev: %s , median %d, min %d, max %d\n "
2113+ ! num (fmt_float ! avg) (fmt_float stddev) ! median min_idx max_idx;
20982114 let indwidth =
20992115 let str_width i = String. length (Printf. sprintf " %d" i) in
21002116 List. map str_width [min_idx; max_idx; min_idx + bucket_size * hist_size] |> List. fold_left max min_int in
0 commit comments