@@ -125,6 +125,14 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None):
125125 # Check if verbose mode is enabled
126126 verbose_mode = extra_args and "--verbose" in extra_args
127127
128+ # Check if bench mode is enabled for cumulative timing
129+ bench_mode = extra_args and "--bench" in extra_args
130+ cumulative_timing = {
131+ "total_torch_time" : 0.0 ,
132+ "total_infinicore_time" : 0.0 ,
133+ "operators_tested" : 0 ,
134+ }
135+
128136 for test_file in operator_test_files :
129137 test_name = test_file .stem
130138
@@ -157,7 +165,7 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None):
157165 # Both operators not implemented - skipped test
158166 success = False # Not a failure, but skipped
159167 returncode = - 2 # Special code for skipped
160- elif "one operator not implemented" in stdout_lower :
168+ elif "operator not implemented" in stdout_lower :
161169 # One operator not implemented - partial test
162170 success = False # Not fully successful
163171 returncode = - 3 # Special code for partial
@@ -202,6 +210,34 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None):
202210 f"{ status_icon } { test_name } : { status_text } (return code: { returncode } )"
203211 )
204212
213+ # Extract benchmark timing if in bench mode
214+ if bench_mode and success :
215+ # Look for benchmark summary in stdout
216+ lines = result .stdout .split ("\n " )
217+ torch_time = 0.0
218+ infini_time = 0.0
219+
220+ for line in lines :
221+ if "PyTorch Total Time:" in line :
222+ try :
223+ # Extract time value (e.g., "PyTorch Total Time: 123.456 ms")
224+ torch_time = (
225+ float (line .split (":" )[1 ].strip ().split ()[0 ]) / 1000.0
226+ ) # Convert to seconds
227+ except :
228+ pass
229+ elif "InfiniCore Total Time:" in line :
230+ try :
231+ infini_time = (
232+ float (line .split (":" )[1 ].strip ().split ()[0 ]) / 1000.0
233+ ) # Convert to seconds
234+ except :
235+ pass
236+
237+ cumulative_timing ["total_torch_time" ] += torch_time
238+ cumulative_timing ["total_infinicore_time" ] += infini_time
239+ cumulative_timing ["operators_tested" ] += 1
240+
205241 # In verbose mode, stop execution on first failure
206242 if verbose_mode and not success and returncode not in [- 2 , - 3 ]:
207243 break
@@ -219,11 +255,13 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None):
219255 print (f"{ '!' * 60 } " )
220256 break
221257
222- return results
258+ return results , cumulative_timing
223259
224260
225- def print_summary (results , verbose_mode = False , total_expected_tests = 0 ):
226- """Print a comprehensive summary of test results."""
261+ def print_summary (
262+ results , verbose_mode = False , total_expected_tests = 0 , cumulative_timing = None
263+ ):
264+ """Print a comprehensive summary of test results including benchmark data."""
227265 print (f"\n { '=' * 80 } " )
228266 print ("CUMULATIVE TEST SUMMARY" )
229267 print (f"{ '=' * 80 } " )
@@ -272,6 +310,19 @@ def print_summary(results, verbose_mode=False, total_expected_tests=0):
272310 if partial > 0 :
273311 print (f"Partial: { partial } " )
274312
313+ # Print benchmark summary if cumulative_timing data is available
314+ if cumulative_timing and cumulative_timing ["operators_tested" ] > 0 :
315+ print (f"{ '-' * 40 } " )
316+ print ("BENCHMARK SUMMARY:" )
317+ print (f" Operators Tested: { cumulative_timing ['operators_tested' ]} " )
318+ print (
319+ f" Total PyTorch Time: { cumulative_timing ['total_torch_time' ] * 1000 :.3f} ms"
320+ )
321+ print (
322+ f" Total InfiniCore Time: { cumulative_timing ['total_infinicore_time' ] * 1000 :.3f} ms"
323+ )
324+ print (f"{ '-' * 40 } " )
325+
275326 # Display passed operators
276327 if passed_operators :
277328 print (f"\n ✅ PASSED OPERATORS ({ len (passed_operators )} ):" )
@@ -304,7 +355,7 @@ def print_summary(results, verbose_mode=False, total_expected_tests=0):
304355 print (" " + ", " .join (line_ops ))
305356
306357 if total > 0 :
307- # Calculate success rate based on executed tests only
358+ # Calculate success rate based on actual executed tests
308359 executed_tests = passed + failed + partial
309360 if executed_tests > 0 :
310361 success_rate = passed / executed_tests * 100
@@ -387,6 +438,9 @@ def generate_help_epilog(ops_dir):
387438 )
388439 epilog_parts .append (" python run.py --cpu --nvidia --verbose" )
389440 epilog_parts .append ("" )
441+ epilog_parts .append (" # Run with benchmarking to get cumulative timing" )
442+ epilog_parts .append (" python run.py --cpu --bench" )
443+ epilog_parts .append ("" )
390444 epilog_parts .append (" # List available tests without running" )
391445 epilog_parts .append (" python run.py --list" )
392446 epilog_parts .append ("" )
@@ -413,7 +467,7 @@ def generate_help_epilog(ops_dir):
413467 " - Operators are automatically discovered from the ops directory"
414468 )
415469 epilog_parts .append (
416- " - --bench option is disabled in batch mode (run individual tests for benchmarking) "
470+ " - --bench mode now shows cumulative timing across all operators "
417471 )
418472 epilog_parts .append (
419473 " - --verbose mode stops execution on first error and shows full traceback"
@@ -527,14 +581,16 @@ def main():
527581 print ()
528582
529583 # Run all tests
530- results = run_all_op_tests (
584+ results , cumulative_timing = run_all_op_tests (
531585 ops_dir = ops_dir ,
532586 specific_ops = args .ops ,
533587 extra_args = unknown_args ,
534588 )
535589
536590 # Print summary and exit with appropriate code
537- all_passed = print_summary (results , args .verbose , total_expected_tests )
591+ all_passed = print_summary (
592+ results , args .verbose , total_expected_tests , cumulative_timing
593+ )
538594
539595 # Check if there were any tests with missing implementations
540596 has_missing_implementations = any (
0 commit comments