@@ -609,6 +609,128 @@ def _cmd_stats(argv: List[Any]):
609609 print ("done." )
610610
611611
612+ def get_parser_agg () -> ArgumentParser :
613+ parser = ArgumentParser (
614+ prog = "agg" ,
615+ description = textwrap .dedent (
616+ """
617+ Aggregates statistics coming from benchmarks.
618+ Every run is a row. Every row is indexed by some keys,
619+ and produces values. Every row has a date.
620+ """
621+ ),
622+ epilog = "example\n python -m onnx_diagnostic agg test_agg.xlsx raw/*.zip -v 1" ,
623+ formatter_class = RawTextHelpFormatter ,
624+ )
625+ parser .add_argument ("output" , help = "output excel file" )
626+ parser .add_argument (
627+ "inputs" ,
628+ nargs = "+" ,
629+ help = "input csv or zip files, at least 1, it can be a name, or search path" ,
630+ )
631+ parser .add_argument (
632+ "--filter" , default = "rawdata_.*.csv" , help = "filter for input files inside zip files"
633+ )
634+ parser .add_argument (
635+ "--recent" ,
636+ default = True ,
637+ action = BooleanOptionalAction ,
638+ help = "Keeps only the most recent experiment for the same of keys." ,
639+ )
640+ parser .add_argument (
641+ "--raw" ,
642+ default = True ,
643+ action = BooleanOptionalAction ,
644+ help = "Keeps the raw data in a sheet." ,
645+ )
646+ parser .add_argument ("-t" , "--time" , default = "DATE" , help = "Date or time column" )
647+ parser .add_argument (
648+ "-k" ,
649+ "--keys" ,
650+ default = "^version_.*,^model_.*,device,opt_patterns,suite,memory_peak,machine,exporter,dynamic,rtopt,dtype,device,architecture" ,
651+ help = "List of columns to consider as keys, "
652+ "multiple values are separated by `,`\n "
653+ "regular expressions are allowed" ,
654+ )
655+ parser .add_argument (
656+ "-w" ,
657+ "--values" ,
658+ default = "^time_.*,^disc.*,^ERR_.*,CMD,^ITER.*" ,
659+ help = "List of columns to consider as values, "
660+ "multiple values are separated by `,`\n "
661+ "regular expressions are allowed" ,
662+ )
663+ parser .add_argument (
664+ "-i" , "--ignored" , default = "version_python" , help = "List of columns to ignore"
665+ )
666+ parser .add_argument (
667+ "-f" ,
668+ "--formula" ,
669+ default = "speedup,bucket[speedup],ERR1" ,
670+ help = "Columns to compute after the aggregation was done." ,
671+ )
672+ parser .add_argument (
673+ "--views" ,
674+ default = "agg-suite,disc,speedup,time,time_export,err,cmd,bucket-speedup,raw-short" ,
675+ help = "Views to add to the output files." ,
676+ )
677+ parser .add_argument (
678+ "--csv" ,
679+ default = "raw-short" ,
680+ help = "Views to dump as csv files." ,
681+ )
682+ parser .add_argument ("-v" , "--verbose" , type = int , default = 0 , help = "verbosity" )
683+ return parser
684+
685+
686+ def _cmd_agg (argv : List [Any ]):
687+ from .helpers .log_helper import CubeLogsPerformance , open_dataframe , enumerate_csv_files
688+
689+ parser = get_parser_agg ()
690+ args = parser .parse_args (argv [1 :])
691+ reg = re .compile (args .filter )
692+
693+ csv = list (
694+ enumerate_csv_files (
695+ args .inputs , verbose = args .verbose , filtering = lambda name : bool (reg .search (name ))
696+ )
697+ )
698+ assert csv , f"No csv files in { args .inputs } "
699+ if args .verbose :
700+ from tqdm import tqdm
701+
702+ loop = tqdm (csv )
703+ else :
704+ loop = csv
705+ dfs = []
706+ for c in loop :
707+ df = open_dataframe (c )
708+ assert args .time in df .columns , f"Missing time column { args .time !r} in { c .head ()!r} "
709+ dfs .append (df )
710+
711+ cube = CubeLogsPerformance (
712+ dfs ,
713+ time = args .time ,
714+ keys = [a for a in args .keys .split ("," ) if a ],
715+ values = [a for a in args .values .split ("," ) if a ],
716+ ignored = [a for a in args .ignored .split ("," ) if a ],
717+ recent = args .recent ,
718+ formulas = {k : k for k in args .formula .split ("," )},
719+ )
720+ cube .load (verbose = max (args .verbose - 1 , 0 ))
721+ if args .verbose :
722+ print (f"Dumps final file into { args .output !r} " )
723+ cube .to_excel (
724+ args .output ,
725+ {k : k for k in args .views .split ("," )},
726+ verbose = args .verbose ,
727+ csv = args .csv .split ("," ),
728+ raw = args .raw ,
729+ )
730+ if args .verbose :
731+ print (f"Wrote { args .output !r} " )
732+
733+
612734def get_main_parser () -> ArgumentParser :
613735 parser = ArgumentParser (
614736 prog = "onnx_diagnostic" ,
@@ -619,19 +741,29 @@ def get_main_parser() -> ArgumentParser:
619741 Type 'python -m onnx_diagnostic <cmd> --help'
620742 to get help for a specific command.
621743
744+ agg - aggregates statistics from multiple files
622745 config - prints a configuration for a model id
623746 find - find node consuming or producing a result
624747 lighten - makes an onnx model lighter by removing the weights,
625- unlighten - restores an onnx model produces by the previous experiment
626748 print - prints the model on standard output
627- validate - validate a model
628749 stats - produces statistics on a model
750+ unlighten - restores an onnx model produces by the previous experiment
751+ validate - validate a model
629752 """
630753 ),
631754 )
632755 parser .add_argument (
633756 "cmd" ,
634- choices = ["config" , "find" , "lighten" , "print" , "stats" , "unlighten" , "validate" ],
757+ choices = [
758+ "agg" ,
759+ "config" ,
760+ "find" ,
761+ "lighten" ,
762+ "print" ,
763+ "stats" ,
764+ "unlighten" ,
765+ "validate" ,
766+ ],
635767 help = "Selects a command." ,
636768 )
637769 return parser
@@ -646,6 +778,7 @@ def main(argv: Optional[List[Any]] = None):
646778 config = _cmd_config ,
647779 validate = _cmd_validate ,
648780 stats = _cmd_stats ,
781+ agg = _cmd_agg ,
649782 )
650783
651784 if argv is None :
@@ -667,6 +800,7 @@ def main(argv: Optional[List[Any]] = None):
667800 config = get_parser_config ,
668801 validate = get_parser_validate ,
669802 stats = get_parser_stats ,
803+ agg = get_parser_agg ,
670804 )
671805 cmd = argv [0 ]
672806 if cmd not in parsers :
0 commit comments