Skip to content

Commit e293956

Browse files
committed
Conditionally provide gc stats based on OCaml version.
OCaml 5.0 - 5.2* will get overall allocation stats but not per-domain stats as the runtime counters are missing. OCaml 5.3 onwards gets per-domain statistics.
1 parent d0fadb4 commit e293956

File tree

6 files changed

+594
-291
lines changed

6 files changed

+594
-291
lines changed

.ocamlformat

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
version = 0.27.0
22
profile = conventional
33

4-
ocaml-version = 5.2.0
4+
ocaml-version = 5.0.0

lib/olly_gc_stats/dune

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,19 @@
22
(name olly_gc_stats)
33
(optional)
44
(libraries olly_common hdr_histogram))
5+
6+
(rule
7+
(deps olly_gc_stats.5.0.ml)
8+
(target olly_gc_impl.ml)
9+
(enabled_if
10+
(< %{ocaml_version} 5.3))
11+
(action
12+
(copy %{deps} %{target})))
13+
14+
(rule
15+
(deps olly_gc_stats.5.3.ml)
16+
(target olly_gc_impl.ml)
17+
(enabled_if
18+
(>= %{ocaml_version} 5.3))
19+
(action
20+
(copy %{deps} %{target})))
Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
module H = Hdr_histogram
2+
module Ts = Runtime_events.Timestamp
3+
4+
type ts = { mutable start_time : float; mutable end_time : float }
5+
6+
(* Maximum number of domains that can be active concurrently.
7+
Defaults to 128 on 64-bit platforms and 16 on 32-bit platforms.
8+
9+
This can be user configurable with OCAMLRUNPARAM=d=XXX
10+
*)
11+
let number_domains = 128
12+
let wall_time = { start_time = 0.; end_time = 0. }
13+
let domain_elapsed_times = Array.make number_domains 0.
14+
let domain_gc_times = Array.make number_domains 0
15+
let domain_minor_words = Array.make number_domains 0
16+
let domain_promoted_words = Array.make number_domains 0
17+
let domain_major_words = Array.make number_domains 0
18+
let minor_collections = ref 0
19+
let major_collections = ref 0
20+
let forced_major_collections = ref 0
21+
let compactions = ref 0
22+
23+
let lifecycle domain_id ts lifecycle_event _data =
24+
let ts = float_of_int Int64.(to_int @@ Ts.to_int64 ts) /. 1_000_000_000. in
25+
match lifecycle_event with
26+
| Runtime_events.EV_RING_START ->
27+
wall_time.start_time <- ts;
28+
domain_elapsed_times.(domain_id) <- ts
29+
| Runtime_events.EV_RING_STOP ->
30+
wall_time.end_time <- ts;
31+
domain_elapsed_times.(domain_id) <- ts -. domain_elapsed_times.(domain_id)
32+
| Runtime_events.EV_DOMAIN_SPAWN -> domain_elapsed_times.(domain_id) <- ts
33+
| Runtime_events.EV_DOMAIN_TERMINATE ->
34+
domain_elapsed_times.(domain_id) <- ts -. domain_elapsed_times.(domain_id)
35+
| _ -> ()
36+
37+
let print_table oc (data : string list list) =
38+
let column_widths =
39+
List.fold_left
40+
(fun widths row -> List.map2 max widths (List.map String.length row))
41+
(List.map String.length (List.hd data))
42+
(List.tl data)
43+
in
44+
let print_row row =
45+
let formatted_row =
46+
List.map2 (fun s w -> Printf.sprintf "%-*s" w s) row column_widths
47+
in
48+
Printf.fprintf oc "%s \n" (String.concat " " formatted_row)
49+
in
50+
51+
List.iter print_row data
52+
53+
let print_global_allocation_stats oc =
54+
Printf.fprintf oc "GC allocations (in words): \n";
55+
let minor_words = ref 0.0 in
56+
let promoted_words = ref 0.0 in
57+
Array.iteri
58+
(fun i v ->
59+
minor_words := !minor_words +. float_of_int v;
60+
promoted_words :=
61+
!promoted_words +. float_of_int domain_promoted_words.(i))
62+
domain_minor_words;
63+
Printf.fprintf oc "Total heap:\t %.0f\n" (!minor_words -. !promoted_words);
64+
Printf.fprintf oc "Minor heap:\t %.0f\n" !minor_words;
65+
Printf.fprintf oc "Promoted words:\t %.0f (%.2f%%)\n" !promoted_words
66+
(!promoted_words /. !minor_words *. 100.0);
67+
Printf.fprintf oc "\n"
68+
69+
let print_per_domain_stats oc =
70+
Printf.fprintf oc "Per domain stats: \n";
71+
let data =
72+
ref [ [ "Domain"; "Total"; "Minor"; "Promoted"; "Major"; "Promoted(%)" ] ]
73+
in
74+
75+
Array.iteri
76+
(fun i (domain_major_word, (domain_minor_word, domain_promoted_word)) ->
77+
if domain_major_word > 0 then
78+
data :=
79+
List.append !data
80+
[
81+
[
82+
string_of_int i;
83+
string_of_int
84+
(domain_minor_word - domain_promoted_word + domain_major_word);
85+
string_of_int domain_minor_word;
86+
string_of_int domain_promoted_word;
87+
string_of_int domain_major_word;
88+
Printf.sprintf "%.2f"
89+
(float_of_int domain_promoted_word
90+
/. float_of_int domain_minor_word
91+
*. 100.0);
92+
];
93+
])
94+
(Array.combine domain_minor_words domain_promoted_words
95+
|> Array.combine domain_major_words);
96+
print_table oc !data
97+
98+
let print_percentiles json output hist =
99+
let to_sec x = float_of_int x /. 1_000_000_000. in
100+
let ms ns = ns /. 1_000_000. in
101+
102+
let mean_latency = H.mean hist |> ms
103+
and max_latency = float_of_int (H.max hist) |> ms in
104+
let percentiles =
105+
[|
106+
25.0;
107+
50.0;
108+
60.0;
109+
70.0;
110+
75.0;
111+
80.0;
112+
85.0;
113+
90.0;
114+
95.0;
115+
96.0;
116+
97.0;
117+
98.0;
118+
99.0;
119+
99.9;
120+
99.99;
121+
99.999;
122+
99.9999;
123+
100.0;
124+
|]
125+
in
126+
let oc = match output with Some s -> open_out s | None -> stderr in
127+
let real_time = wall_time.end_time -. wall_time.start_time in
128+
let total_gc_time = to_sec @@ Array.fold_left ( + ) 0 domain_gc_times in
129+
130+
let total_cpu_time = ref 0. in
131+
let ap = Array.combine domain_elapsed_times domain_gc_times in
132+
Array.iteri
133+
(fun i (cpu_time, gc_time) ->
134+
if gc_time > 0 && cpu_time = 0. then
135+
Printf.fprintf stderr
136+
"[Olly] Warning: Domain %d has GC time but no CPU time\n" i
137+
else total_cpu_time := !total_cpu_time +. cpu_time)
138+
ap;
139+
140+
if json then
141+
let distribs =
142+
List.init (Array.length percentiles) (fun i ->
143+
let percentile = percentiles.(i) in
144+
let value =
145+
H.value_at_percentile hist percentiles.(i)
146+
|> float_of_int |> ms |> string_of_float
147+
in
148+
Printf.sprintf "\"%.4f\": %s" percentile value)
149+
|> String.concat ","
150+
in
151+
Printf.fprintf oc
152+
{|{"mean_latency": %f, "max_latency": %f, "distr_latency": {%s}}|}
153+
mean_latency max_latency distribs
154+
else (
155+
Printf.fprintf oc "\n";
156+
Printf.fprintf oc "Execution times:\n";
157+
Printf.fprintf oc "Wall time (s):\t%.2f\n" real_time;
158+
Printf.fprintf oc "CPU time (s):\t%.2f\n" !total_cpu_time;
159+
Printf.fprintf oc "GC time (s):\t%.2f\n" total_gc_time;
160+
Printf.fprintf oc "GC overhead (%% of CPU time):\t%.2f%%\n"
161+
(total_gc_time /. !total_cpu_time *. 100.);
162+
Printf.fprintf oc "\n";
163+
Printf.fprintf oc "Per domain stats:\n";
164+
let data = ref [ [ "Domain"; "Wall"; "GC(s)"; "GC(%)" ] ] in
165+
Array.iteri
166+
(fun i (c, g) ->
167+
if c > 0. then
168+
data :=
169+
List.append !data
170+
[
171+
[
172+
string_of_int i;
173+
Printf.sprintf "%.2f" c;
174+
Printf.sprintf "%.2f" (to_sec g);
175+
Printf.sprintf "%.2f" (to_sec g *. 100. /. c);
176+
];
177+
])
178+
(Array.combine domain_elapsed_times domain_gc_times);
179+
180+
print_table oc !data;
181+
Printf.fprintf oc "\n";
182+
Printf.fprintf oc "GC latency profile:\n";
183+
Printf.fprintf oc "#[Mean (ms):\t%.2f,\t Stddev (ms):\t%.2f]\n" mean_latency
184+
(H.stddev hist |> ms);
185+
Printf.fprintf oc "#[Min (ms):\t%.2f,\t max (ms):\t%.2f]\n"
186+
(float_of_int (H.min hist) |> ms)
187+
max_latency;
188+
Printf.fprintf oc "\n";
189+
Printf.fprintf oc "Percentile \t Latency (ms)\n";
190+
Fun.flip Array.iter percentiles (fun p ->
191+
Printf.fprintf oc "%.4f \t %.2f\n" p
192+
(float_of_int (H.value_at_percentile hist p) |> ms));
193+
Printf.fprintf oc "\n";
194+
print_global_allocation_stats oc;
195+
print_per_domain_stats oc;
196+
Printf.fprintf oc "Minor Gen: %i collections\n" !minor_collections;
197+
Printf.fprintf oc "Major Gen: %i collections %i forced collections\n"
198+
!major_collections !forced_major_collections;
199+
Printf.fprintf oc "Compactions: %i\n" !compactions)
200+
201+
let gc_stats poll_sleep json output runtime_events_dir runtime_events_log_wsize
202+
exec_args =
203+
let current_event = Hashtbl.create 13 in
204+
let hist =
205+
H.init ~lowest_discernible_value:10 ~highest_trackable_value:10_000_000_000
206+
~significant_figures:3
207+
in
208+
let is_gc_phase phase =
209+
match phase with
210+
| Runtime_events.EV_MAJOR | Runtime_events.EV_STW_LEADER
211+
| Runtime_events.EV_INTERRUPT_REMOTE ->
212+
true
213+
| _ -> false
214+
in
215+
let runtime_begin ring_id ts phase =
216+
if phase == Runtime_events.EV_EXPLICIT_GC_COMPACT && ring_id == 0 then
217+
incr compactions;
218+
219+
if phase == Runtime_events.EV_MINOR && ring_id == 0 then
220+
incr minor_collections;
221+
222+
(* Runtime_events.EV_MAJOR seems to correspond to any GC collection,
223+
be more specific and use stop-the-world phase done at the end of
224+
a major GC cycle *)
225+
if phase == Runtime_events.EV_MAJOR_GC_STW && ring_id == 0 then
226+
incr major_collections;
227+
228+
if
229+
(phase == Runtime_events.EV_EXPLICIT_GC_MAJOR
230+
|| phase == Runtime_events.EV_EXPLICIT_GC_FULL_MAJOR)
231+
&& ring_id == 0
232+
then incr forced_major_collections;
233+
234+
if is_gc_phase phase then
235+
match Hashtbl.find_opt current_event ring_id with
236+
| None -> Hashtbl.add current_event ring_id (phase, Ts.to_int64 ts)
237+
| _ -> ()
238+
in
239+
let runtime_end ring_id ts phase =
240+
match Hashtbl.find_opt current_event ring_id with
241+
| Some (saved_phase, saved_ts) when saved_phase = phase ->
242+
Hashtbl.remove current_event ring_id;
243+
let latency = Int64.to_int (Int64.sub (Ts.to_int64 ts) saved_ts) in
244+
assert (H.record_value hist latency);
245+
domain_gc_times.(ring_id) <- domain_gc_times.(ring_id) + latency
246+
| _ -> ()
247+
in
248+
let runtime_counter ring_id _ts counter_type value =
249+
match counter_type with
250+
| Runtime_events.EV_C_MINOR_PROMOTED ->
251+
(* Reported as bytes so we convert to words *)
252+
domain_promoted_words.(ring_id) <-
253+
domain_promoted_words.(ring_id) + (value / 8)
254+
| Runtime_events.EV_C_MINOR_ALLOCATED ->
255+
(* Reported as bytes so we convert to words *)
256+
domain_minor_words.(ring_id) <-
257+
domain_minor_words.(ring_id) + (value / 8)
258+
| _ -> ()
259+
in
260+
261+
let init = Fun.id in
262+
let cleanup () = print_percentiles json output hist in
263+
let open Olly_common.Launch in
264+
try
265+
`Ok
266+
(olly
267+
{
268+
empty_config with
269+
runtime_begin;
270+
runtime_end;
271+
runtime_counter;
272+
lifecycle;
273+
init;
274+
cleanup;
275+
poll_sleep;
276+
runtime_events_dir;
277+
runtime_events_log_wsize;
278+
}
279+
exec_args)
280+
with Fail msg -> `Error (false, msg)

0 commit comments

Comments
 (0)