11#! /usr/bin/env python
22
3+ import glob
34import re
45import warnings
56from dataclasses import dataclass
@@ -71,19 +72,32 @@ def format_func(x, pos):
7172
7273
7374class Criterea (Enum ):
74- GC_ALLOCS = "Gc allocated"
75- RC_ALLOCS = "Rc allocated"
76- ARC_ALLOCS = "Arc allocated"
77- BOX_ALLOCS = "Box allocated"
78- MUTATOR_TIME = "mutator time"
79- GC_TIME = "GC time"
80- GC_CYCLES = "num GCs"
81- BARRIERS_VISITED = "barriers visited"
82- FLZR_REGISTERED = "finalizers registered"
83- FLZR_COMPLETED = "finalizers completed"
84- FLZR_ELIDABLE = "finalizers elidable"
75+ COLLECTION_NUMBER = "collection_number"
76+ COLLECTION_KIND = "kind"
77+ HEAP_SIZE_ON_ENTRY = "heap_size_on_entry"
78+ TIME_MARKING_MS = "time_marking_ms"
79+ TIME_MARKING_NS = "time_marking_ns"
80+ BYTES_FREED = "bytes_freed"
81+ LIVE_OBJECTS_WITH_FINALIZERS = "live_objects_with_finalizers"
82+ OBJECTS_IN_FINALIZER_QUEUE = "objects_in_finalizer_queue"
83+ TIME_FINALIZER_QUEUE_MS = "time_fin_q_ms"
84+ TIME_FINALIZER_QUEUE_NS = "time_fin_q_ns"
85+ TIME_SWEEPING_MS = "time_sweeping_ms"
86+ TIME_SWEEPING_NS = "time_sweeping_ns"
87+ TIME_TOTAL_MS = "time_total_ms"
88+ TIME_TOTAL_NS = "time_total_ns"
89+ FINALIZERS_RUN = "finalizers_run"
90+ FINALIZERS_REGISTERED = "finalizers_registered"
91+ ALLOCATED_GC = "allocated_gc"
92+ ALLOCATED_ARC = "allocated_arc"
93+ ALLOCATED_RC = "allocated_rc"
94+ ALLOCATED_BOXED = "allocated_boxed"
95+ GC_ALLOCS = "allocated_gc"
96+ RC_ALLOCS = "allocated_rc"
97+ ARC_ALLOCS = "allocated_arc"
98+ BOX_ALLOCS = "allocated_box"
8599 WALLCLOCK = "total"
86- SYS = "sys "
100+ USER = "usr "
87101
88102 def __lt__ (self , other ):
89103 return self .value < other .value
@@ -643,6 +657,32 @@ class SuiteData:
643657 suite : BenchmarkSuite
644658 data : pd .DataFrame
645659
660+ def _arithmetic_mean (df ):
661+ def with_99_cis (series ):
662+ n = len (series )
663+ mean = series .mean ()
664+ std_err = series .std (ddof = 1 ) / (n ** 0.5 ) # Standard error
665+ margin_of_error = (
666+ stats .t .ppf ((1 + 0.99 ) / 2 , df = n - 1 ) * std_err
667+ ) # t-score * SE
668+ return pd .Series (
669+ {
670+ "mean" : mean ,
671+ "ci" : margin_of_error ,
672+ "lower" : mean - margin_of_error ,
673+ "upper" : mean + margin_of_error ,
674+ }
675+ )
676+
677+ data = (
678+ df
679+ .groupby (["configuration" , "benchmark" , "criterion" ])["value" ]
680+ .apply (with_99_cis )
681+ .unstack ()
682+ .reset_index ()
683+ )
684+ return data
685+
646686 @classmethod
647687 def from_raw_data (cls , suite , measurement ):
648688 # def to_executor(name):
@@ -651,12 +691,6 @@ def from_raw_data(cls, suite, measurement):
651691 # return cfg
652692 # raise ValueError(f"Executor for {name} not found.")
653693
654- def to_benchmark (name ):
655- for b in suite .benchmarks :
656- if b .name == name :
657- return b
658- raise ValueError (f"Benchmark for { name } not found." )
659-
660694 raw = pd .read_csv (
661695 suite .raw_data (measurement ),
662696 sep = "\t " ,
@@ -669,19 +703,31 @@ def to_benchmark(name):
669703 ]
670704 return cls (suite , raw )
671705
672- @classmethod
673- def for_measurements (cls , suite , measurements ):
674- dfs = []
675- for m in measurements :
676- print (suite .raw_data (m ))
677- if not suite .raw_data (m ).exists ():
678- continue
679- df = cls .from_raw_data (suite , m ).data
680- dfs .append (df )
681-
682- if not dfs :
683- return None
684- return SuiteData (suite , pd .concat (dfs , ignore_index = True ))
706+ def __init__ (self , suite , measurements ):
707+ from build import Metric
708+
709+ def to_benchmark (name ):
710+ for b in suite .benchmarks :
711+ if b .name .lower () == name .lower ():
712+ return b
713+ raise ValueError (f"Benchmark for { name } not found." )
714+ perf = pd .DataFrame ()
715+
716+ if Metric .PERF in measurements :
717+ file = suite .raw_data (Metric .PERF )
718+ if file .exists ():
719+ perf = pd .read_csv (
720+ file ,
721+ sep = "\t " ,
722+ comment = "#" ,
723+ index_col = "suite" ,
724+ converters = {"criterion" : Criterea , "benchmark" : to_benchmark },
725+ )
726+ perf = perf .rename (columns = {"executor" : "configuration" }).reset_index ()[
727+ ["benchmark" , "configuration" , "value" , "criterion" ,"invocation" ]
728+ ]
729+ # print(perf)
730+ self .data = perf
685731
686732 def summary (self ):
687733 return Summary (self )
@@ -882,7 +928,9 @@ def fmt_ci(low, high):
882928 best_configs = {}
883929 for crit in criteria_list :
884930 crit_df = suites [suites ["criterion" ] == crit ]
885- best_configs [crit ] = crit_df .loc [crit_df ["value" ].idxmin (), "configuration" ]
931+ best_configs [crit ] = crit_df .loc [
932+ crit_df ["value" ].idxmin (), "configuration"
933+ ]
886934
887935 cfgs = suites ["configuration" ].drop_duplicates ().to_list ()
888936
@@ -897,10 +945,17 @@ def fmt_ci(low, high):
897945 val = fmt_float (row ["value" ], bold = (c == best_configs [crit ]))
898946 ci = fmt_ci (row ["lower" ], row ["upper" ])
899947 else : # Other rows
900- val = fmt_float (row ["gmean_ratio" ], bold = (c == best_configs [crit ])) + "×"
948+ val = (
949+ fmt_float (
950+ row ["gmean_ratio" ], bold = (c == best_configs [crit ])
951+ )
952+ + "×"
953+ )
901954 if not row ["gmean_significant" ]:
902955 val += r"\textsuperscript{\dag}"
903- ci = fmt_ci (row ["gmean_ratio_ci_low" ], row ["gmean_ratio_ci_high" ])
956+ ci = fmt_ci (
957+ row ["gmean_ratio_ci_low" ], row ["gmean_ratio_ci_high" ]
958+ )
904959
905960 row_data .extend ([val , ci ])
906961
0 commit comments