issue/594 - cumulating total time in tests

wooway777 · wooway777 · commit 5c88cbbda536 · 2025-11-13T20:53:01.000+08:00
diff --git a/test/infinicore/framework/base.py b/test/infinicore/framework/base.py
@@ -1,6 +1,6 @@
 import torch
 import infinicore
-import traceback  # Add import for traceback
+import traceback
 
 from abc import ABC, abstractmethod
 from typing import List, Dict, Any, Optional
@@ -12,8 +12,6 @@
     create_test_comparator,
     infinicore_tensor_from_torch,
     profile_operation,
-    synchronize_device,
-    convert_infinicore_to_torch,
 )
 
 
@@ -244,6 +242,12 @@ def __init__(self, test_cases, test_config):
         self.passed_tests = (
             []
         )  # Track passed tests (both operators implemented and passed)
+        # Add benchmark timing statistics
+        self.benchmark_times = {
+            "torch_total": 0.0,
+            "infinicore_total": 0.0,
+            "per_test_case": {},  # Store timing per test case
+        }
 
     def run_tests(self, devices, test_func, test_type="Test"):
         """
@@ -344,9 +348,35 @@ def print_summary(self):
             else:
                 print(f"\n\033[92mAll tests passed!\033[0m")
 
+        # Print benchmark summary if benchmarking was enabled
+        if self.config.bench and (
+            self.benchmark_times["torch_total"] > 0
+            or self.benchmark_times["infinicore_total"] > 0
+        ):
+            self._print_benchmark_summary()
+
         print(f"{'='*60}")
         return result
 
+    def _print_benchmark_summary(self):
+        """Print benchmark timing summary"""
+        print(f"{'-'*60}")
+        print("BENCHMARK SUMMARY")
+
+        torch_total = self.benchmark_times["torch_total"]
+        infinicore_total = self.benchmark_times["infinicore_total"]
+
+        if torch_total > 0:
+            print(f"PyTorch Total Time: {torch_total * 1000:.3f} ms")
+        if infinicore_total > 0:
+            print(f"InfiniCore Total Time: {infinicore_total * 1000:.3f} ms")
+
+        if torch_total > 0 and infinicore_total > 0:
+            speedup = (
+                torch_total / infinicore_total if infinicore_total > 0 else float("inf")
+            )
+            print(f"Speedup (PyTorch/InfiniCore): {speedup:.2f}x")
+
 
 class BaseOperatorTest(ABC):
     """Base operator test"""
@@ -711,8 +741,13 @@ def _run_benchmarking(
         comparison_target,
     ):
         """
-        Unified benchmarking logic
+        Unified benchmarking logic with timing accumulation
         """
+
+        # Initialize timing variables
+        torch_time = 0.0
+        infini_time = 0.0
+
         if torch_implemented:
             if output_count > 1:
                 # For multiple outputs, just call the operator
@@ -735,12 +770,13 @@ def torch_op():
                             else inputs[comparison_target]
                         )
 
-            profile_operation(
+            torch_time = profile_operation(
                 "PyTorch   ",
                 torch_op,
                 device_str,
                 config.num_prerun,
                 config.num_iterations,
+                total=True,
             )
 
         if infini_implemented:
@@ -759,10 +795,17 @@ def infini_op():
                         else infini_inputs[comparison_target]
                     )
 
-            profile_operation(
+            infini_time = profile_operation(
                 "InfiniCore",
                 infini_op,
                 device_str,
                 config.num_prerun,
                 config.num_iterations,
+                total=True,
             )
+
+        # Store timing information in the test runner
+        if hasattr(config, "_test_runner") and config._test_runner:
+            # Accumulate total times
+            config._test_runner.benchmark_times["torch_total"] += torch_time
+            config._test_runner.benchmark_times["infinicore_total"] += infini_time
diff --git a/test/infinicore/framework/runner.py b/test/infinicore/framework/runner.py
@@ -32,6 +32,10 @@ def run(self):
         )
 
         runner = TestRunner(self.operator_test.test_cases, config)
+
+        # Pass the test runner instance to config for benchmark timing accumulation
+        config._test_runner = runner
+
         devices = get_test_devices(self.args)
 
         # Run unified tests - returns True if no tests failed
diff --git a/test/infinicore/framework/utils.py b/test/infinicore/framework/utils.py
@@ -22,10 +22,12 @@ def timed_op(func, num_iterations, device):
     for _ in range(num_iterations):
         func()
     synchronize_device(device)
-    return (time.time() - start) / num_iterations
+    return time.time() - start
 
 
-def profile_operation(desc, func, torch_device, num_prerun, num_iterations):
+def profile_operation(
+    desc, func, torch_device, num_prerun, num_iterations, total=False
+):
     """
     Performance profiling workflow
     """
@@ -35,7 +37,11 @@ def profile_operation(desc, func, torch_device, num_prerun, num_iterations):
 
     # Timed execution
     elapsed = timed_op(lambda: func(), num_iterations, torch_device)
-    print(f"    {desc} time: {elapsed * 1000 :6f} ms")
+    print(f"    {desc} time: {elapsed / num_iterations * 1000 :6f} ms")
+    if total:
+        return elapsed
+    else:
+        return elapsed / num_iterations
 
 
 def debug(actual, desired, atol=0, rtol=1e-2, equal_nan=False, verbose=True):
diff --git a/test/infinicore/run.py b/test/infinicore/run.py
@@ -125,6 +125,14 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None):
     # Check if verbose mode is enabled
     verbose_mode = extra_args and "--verbose" in extra_args
 
+    # Check if bench mode is enabled for cumulative timing
+    bench_mode = extra_args and "--bench" in extra_args
+    cumulative_timing = {
+        "total_torch_time": 0.0,
+        "total_infinicore_time": 0.0,
+        "operators_tested": 0,
+    }
+
     for test_file in operator_test_files:
         test_name = test_file.stem
 
@@ -157,7 +165,7 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None):
                 # Both operators not implemented - skipped test
                 success = False  # Not a failure, but skipped
                 returncode = -2  # Special code for skipped
-            elif "one operator not implemented" in stdout_lower:
+            elif "operator not implemented" in stdout_lower:
                 # One operator not implemented - partial test
                 success = False  # Not fully successful
                 returncode = -3  # Special code for partial
@@ -202,6 +210,34 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None):
                 f"{status_icon}  {test_name}: {status_text} (return code: {returncode})"
             )
 
+            # Extract benchmark timing if in bench mode
+            if bench_mode and success:
+                # Look for benchmark summary in stdout
+                lines = result.stdout.split("\n")
+                torch_time = 0.0
+                infini_time = 0.0
+
+                for line in lines:
+                    if "PyTorch Total Time:" in line:
+                        try:
+                            # Extract time value (e.g., "PyTorch Total Time: 123.456 ms")
+                            torch_time = (
+                                float(line.split(":")[1].strip().split()[0]) / 1000.0
+                            )  # Convert to seconds
+                        except:
+                            pass
+                    elif "InfiniCore Total Time:" in line:
+                        try:
+                            infini_time = (
+                                float(line.split(":")[1].strip().split()[0]) / 1000.0
+                            )  # Convert to seconds
+                        except:
+                            pass
+
+                cumulative_timing["total_torch_time"] += torch_time
+                cumulative_timing["total_infinicore_time"] += infini_time
+                cumulative_timing["operators_tested"] += 1
+
             # In verbose mode, stop execution on first failure
             if verbose_mode and not success and returncode not in [-2, -3]:
                 break
@@ -219,11 +255,13 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None):
                 print(f"{'!'*60}")
                 break
 
-    return results
+    return results, cumulative_timing
 
 
-def print_summary(results, verbose_mode=False, total_expected_tests=0):
-    """Print a comprehensive summary of test results."""
+def print_summary(
+    results, verbose_mode=False, total_expected_tests=0, cumulative_timing=None
+):
+    """Print a comprehensive summary of test results including benchmark data."""
     print(f"\n{'='*80}")
     print("CUMULATIVE TEST SUMMARY")
     print(f"{'='*80}")
@@ -272,6 +310,19 @@ def print_summary(results, verbose_mode=False, total_expected_tests=0):
     if partial > 0:
         print(f"Partial: {partial}")
 
+    # Print benchmark summary if cumulative_timing data is available
+    if cumulative_timing and cumulative_timing["operators_tested"] > 0:
+        print(f"{'-'*40}")
+        print("BENCHMARK SUMMARY:")
+        print(f"  Operators Tested: {cumulative_timing['operators_tested']}")
+        print(
+            f"  Total PyTorch Time: {cumulative_timing['total_torch_time'] * 1000:.3f} ms"
+        )
+        print(
+            f"  Total InfiniCore Time: {cumulative_timing['total_infinicore_time'] * 1000:.3f} ms"
+        )
+        print(f"{'-'*40}")
+
     # Display passed operators
     if passed_operators:
         print(f"\n✅ PASSED OPERATORS ({len(passed_operators)}):")
@@ -304,7 +355,7 @@ def print_summary(results, verbose_mode=False, total_expected_tests=0):
             print("  " + ", ".join(line_ops))
 
     if total > 0:
-        # Calculate success rate based on executed tests only
+        # Calculate success rate based on actual executed tests
         executed_tests = passed + failed + partial
         if executed_tests > 0:
             success_rate = passed / executed_tests * 100
@@ -387,6 +438,9 @@ def generate_help_epilog(ops_dir):
     )
     epilog_parts.append("  python run.py --cpu --nvidia --verbose")
     epilog_parts.append("")
+    epilog_parts.append("  # Run with benchmarking to get cumulative timing")
+    epilog_parts.append("  python run.py --cpu --bench")
+    epilog_parts.append("")
     epilog_parts.append("  # List available tests without running")
     epilog_parts.append("  python run.py --list")
     epilog_parts.append("")
@@ -413,7 +467,7 @@ def generate_help_epilog(ops_dir):
         "  - Operators are automatically discovered from the ops directory"
     )
     epilog_parts.append(
-        "  - --bench option is disabled in batch mode (run individual tests for benchmarking)"
+        "  - --bench mode now shows cumulative timing across all operators"
     )
     epilog_parts.append(
         "  - --verbose mode stops execution on first error and shows full traceback"
@@ -527,14 +581,16 @@ def main():
     print()
 
     # Run all tests
-    results = run_all_op_tests(
+    results, cumulative_timing = run_all_op_tests(
         ops_dir=ops_dir,
         specific_ops=args.ops,
         extra_args=unknown_args,
     )
 
     # Print summary and exit with appropriate code
-    all_passed = print_summary(results, args.verbose, total_expected_tests)
+    all_passed = print_summary(
+        results, args.verbose, total_expected_tests, cumulative_timing
+    )
 
     # Check if there were any tests with missing implementations
     has_missing_implementations = any(

Original file line number	Diff line number	Diff line change
`@@ -32,6 +32,10 @@ def run(self):`
`32`	`32`	`)`
`33`	`33`
`34`	`34`	`runner = TestRunner(self.operator_test.test_cases, config)`
	`35`	`+`
	`36`	`+ # Pass the test runner instance to config for benchmark timing accumulation`
	`37`	`+ config._test_runner = runner`
	`38`	`+`
`35`	`39`	`devices = get_test_devices(self.args)`
`36`	`40`
`37`	`41`	`# Run unified tests - returns True if no tests failed`