@@ -609,6 +609,111 @@ def _cmd_stats(argv: List[Any]):
609609 print ("done." )
610610
611611
612+ def get_parser_agg () -> ArgumentParser :
613+ parser = ArgumentParser (
614+ prog = "agg" ,
615+ description = textwrap .dedent (
616+ """
617+ Aggregates statistics coming from benchmarks.
618+ Every run is a row. Every row is indexed by some keys,
619+ and produces values. Every row has a date.
620+ """
621+ ),
622+ epilog = "example\n python -m onnx_diagnostic agg test_agg.xlsx raw/*.zip -v 1" ,
623+ formatter_class = RawTextHelpFormatter ,
624+ )
625+ parser .add_argument ("output" , help = "output excel file" )
626+ parser .add_argument (
627+ "inputs" ,
628+ nargs = "+" ,
629+ help = "input csv or zip files, at least 1, it can be a name, or search path" ,
630+ )
631+ parser .add_argument (
632+ "--filter" , default = "rawdata_.*.csv" , help = "filter for input files inside zip files"
633+ )
634+ parser .add_argument (
635+ "--recent" ,
636+ default = True ,
637+ action = BooleanOptionalAction ,
638+ help = "Keeps only the most recent experiment for the same of keys." ,
639+ )
640+ parser .add_argument ("-t" , "--time" , default = "DATE" , help = "Date or time column" )
641+ parser .add_argument (
642+ "-k" ,
643+ "--keys" ,
644+ default = "^version_.*,^model_.*,providers,opt_patterns,suite,memory_peak,machine,exporter,dynamic,rtopt,dtype,device,architecture" ,
645+ help = "List of columns to consider as keys, "
646+ "multiple values are separated by `,`\n "
647+ "regular expressions are allowed" ,
648+ )
649+ parser .add_argument (
650+ "-w" ,
651+ "--values" ,
652+ default = "^time_.*,^disc.*,^ERR_.*,CMD,^ITER.*" ,
653+ help = "List of columns to consider as values, "
654+ "multiple values are separated by `,`\n "
655+ "regular expressions are allowed" ,
656+ )
657+ parser .add_argument (
658+ "-i" , "--ignored" , default = "version_python" , help = "List of columns to ignore"
659+ )
660+ parser .add_argument (
661+ "-f" ,
662+ "--formula" ,
663+ default = "speedup,bucket[speedup],ERR1" ,
664+ help = "Columns to compute after the aggregation was done." ,
665+ )
666+ parser .add_argument (
667+ "--views" ,
668+ default = "summary-suite,disc,speedup,time,time_export,err,cmd,bucket-speedup" ,
669+ help = "Views to add to the output files." ,
670+ )
671+ parser .add_argument ("-v" , "--verbose" , type = int , default = 0 , help = "verbosity" )
672+ return parser
673+
674+
675+ def _cmd_agg (argv : List [Any ]):
676+ from .helpers .log_helper import CubeLogsPerformance , open_dataframe , enumerate_csv_files
677+
678+ parser = get_parser_agg ()
679+ args = parser .parse_args (argv [1 :])
680+ reg = re .compile (args .filter )
681+
682+ csv = list (
683+ enumerate_csv_files (
684+ args .inputs , verbose = args .verbose , filtering = lambda name : reg .search (name )
685+ )
686+ )
687+ assert csv , f"No csv files in { args .inputs } "
688+ if args .verbose :
689+ from tqdm import tqdm
690+
691+ loop = tqdm (csv )
692+ else :
693+ loop = csv
694+ dfs = []
695+ for c in loop :
696+ df = open_dataframe (c )
697+ assert args .time in df .columns , f"Missing time column { args .time !r} in { c .head ()!r} "
698+ dfs .append (df )
699+
700+ cube = CubeLogsPerformance (
701+ dfs ,
702+ time = args .time ,
703+ keys = [a for a in args .keys .split ("," ) if a ],
704+ values = [a for a in args .values .split ("," ) if a ],
705+ ignored = [a for a in args .ignored .split ("," ) if a ],
706+ recent = args .recent ,
707+ formulas = {k : k for k in args .formula .split ("," )},
708+ )
709+ cube .load (verbose = max (args .verbose - 1 , 0 ))
710+ if args .verbose :
711+ print (f"Dumps final file into { args .output !r} " )
712+ cube .to_excel (args .output , {k : k for k in args .views .split ("," )}, verbose = args .verbose )
713+ if args .verbose :
714+ print (f"Wrote { args .output !r} " )
715+
716+
612717def get_main_parser () -> ArgumentParser :
613718 parser = ArgumentParser (
614719 prog = "onnx_diagnostic" ,
@@ -619,19 +724,29 @@ def get_main_parser() -> ArgumentParser:
619724 Type 'python -m onnx_diagnostic <cmd> --help'
620725 to get help for a specific command.
621726
727+ agg - aggregates statistics from multiple files
622728 config - prints a configuration for a model id
623729 find - find node consuming or producing a result
624730 lighten - makes an onnx model lighter by removing the weights,
625- unlighten - restores an onnx model produces by the previous experiment
626731 print - prints the model on standard output
627- validate - validate a model
628732 stats - produces statistics on a model
733+ unlighten - restores an onnx model produces by the previous experiment
734+ validate - validate a model
629735 """
630736 ),
631737 )
632738 parser .add_argument (
633739 "cmd" ,
634- choices = ["config" , "find" , "lighten" , "print" , "stats" , "unlighten" , "validate" ],
740+ choices = [
741+ "agg" ,
742+ "config" ,
743+ "find" ,
744+ "lighten" ,
745+ "print" ,
746+ "stats" ,
747+ "unlighten" ,
748+ "validate" ,
749+ ],
635750 help = "Selects a command." ,
636751 )
637752 return parser
@@ -646,6 +761,7 @@ def main(argv: Optional[List[Any]] = None):
646761 config = _cmd_config ,
647762 validate = _cmd_validate ,
648763 stats = _cmd_stats ,
764+ agg = _cmd_agg ,
649765 )
650766
651767 if argv is None :
@@ -667,6 +783,7 @@ def main(argv: Optional[List[Any]] = None):
667783 config = get_parser_config ,
668784 validate = get_parser_validate ,
669785 stats = get_parser_stats ,
786+ agg = get_parser_agg ,
670787 )
671788 cmd = argv [0 ]
672789 if cmd not in parsers :
0 commit comments