[perfstress] Add support for per-operation latency tracking and results file output (#42554)

jalauzon-msft · web-flow · commit 0cf4523c054f · 2025-08-19T16:32:48.000-07:00
diff --git a/tools/azure-sdk-tools/devtools_testutils/perfstress_tests/_batch_perf_test.py b/tools/azure-sdk-tools/devtools_testutils/perfstress_tests/_batch_perf_test.py
@@ -23,6 +23,7 @@ def __init__(self, arguments):
         self._test_proxy_policy: Optional[PerfTestProxyPolicy] = None
         self._client_kwargs: Dict[str, Any] = {}
         self._recording_id: Optional[str] = None
+        self._latencies: List[float] = []
 
         if self.args.insecure:
             # Disable SSL verification for SDK Client
@@ -147,8 +148,12 @@ def run_all_sync(self, duration: int, *, run_profiler: bool = False, **kwargs) -
             self._save_profile("sync", output_path=self.args.profile_path)
             self._print_profile_stats()
         else:
+            self._latencies = []
             while self._last_completion_time < duration:
+                start = time.perf_counter_ns()
                 self._completed_operations += self.run_batch_sync()
+                if self.args.latency:
+                    self._latencies.append((time.perf_counter_ns() - start) / 1_000_000)
                 self._last_completion_time = time.time() - starttime
 
     async def run_all_async(self, duration: int, *, run_profiler: bool = False, **kwargs) -> None:
@@ -168,6 +173,10 @@ async def run_all_async(self, duration: int, *, run_profiler: bool = False, **kw
             self._save_profile("async", output_path=self.args.profile_path)
             self._print_profile_stats()
         else:
+            self._latencies = []
             while self._last_completion_time < duration:
+                start = time.perf_counter_ns()
                 self._completed_operations += await self.run_batch_async()
+                if self.args.latency:
+                    self._latencies.append((time.perf_counter_ns() - start) / 1_000_000)
                 self._last_completion_time = time.time() - starttime
diff --git a/tools/azure-sdk-tools/devtools_testutils/perfstress_tests/_perf_stress_proc.py b/tools/azure-sdk-tools/devtools_testutils/perfstress_tests/_perf_stress_proc.py
@@ -64,7 +64,7 @@ async def _start_tests(index, test_class, num_tests, args, test_stages, results,
         if args.warmup:
             # Waiting till all processes are ready to start "Warmup"
             _synchronize(test_stages["Warmup"])
-            await _run_tests(args.warmup, args, tests, results, status, with_profiler=False)
+            await _run_tests(args.warmup, args, tests, results, status, with_profiler=False, warmup=True)
 
         # Waiting till all processes are ready to start "Tests"
         _synchronize(test_stages["Tests"])
@@ -108,7 +108,7 @@ async def _start_tests(index, test_class, num_tests, args, test_stages, results,
                 print(f"Failed to close tests: {e}")
 
 
-async def _run_tests(duration: int, args, tests, results, status, *, with_profiler: bool = False) -> None:
+async def _run_tests(duration: int, args, tests, results, status, *, with_profiler: bool = False, warmup: bool = False) -> None:
     """Run the listed tests either in parallel asynchronously or in a thread pool."""
     # Kick of a status monitoring thread.
     stop_status = threading.Event()
@@ -133,7 +133,9 @@ async def _run_tests(duration: int, args, tests, results, status, *, with_profil
 
         # Add final test results to the results queue to be accumulated by the parent process.
         for test in tests:
-            results.put((test._parallel_index, test.completed_operations, test.last_completion_time))
+            # Don't report latencies for warmup
+            latencies = test._latencies if not warmup else []
+            results.put((test._parallel_index, test.completed_operations, test.last_completion_time, latencies))
     finally:
         # Clean up status reporting thread.
         stop_status.set()
diff --git a/tools/azure-sdk-tools/devtools_testutils/perfstress_tests/_perf_stress_runner.py b/tools/azure-sdk-tools/devtools_testutils/perfstress_tests/_perf_stress_runner.py
@@ -5,6 +5,7 @@
 
 import argparse
 import inspect
+import json
 import logging
 import math
 import os
@@ -114,6 +115,14 @@ def _parse_args(self) -> str:
         per_test_arg_parser.add_argument(
             "--insecure", action="store_true", help="Disable SSL validation. Default is False.", default=False
         )
+        per_test_arg_parser.add_argument(
+            "-l", "--latency", action="store_true", help="Track per-operation latency statistics.", default=False
+        )
+        per_test_arg_parser.add_argument(
+            "--results-file",
+            type=str,
+            help="File path location to store the results for the test run.",
+        )
 
         # Per-test args
         self._test_class_to_run.add_arguments(per_test_arg_parser)
@@ -264,13 +273,16 @@ async def start(self):
 
     def _report_results(self):
         """Calculate and log the test run results across all child processes"""
-        operations = []
+        total_operations = 0
+        operations_per_second = 0.0
+        latencies = []
         while not self.results.empty():
-            operations.append(self.results.get())
+            result: Tuple[int, int, float, List[float]] = self.results.get()
+            total_operations += result[1]
+            operations_per_second += result[1] / result[2] if result[2] else 0
+            latencies.extend(result[3])
 
-        total_operations = self._get_completed_operations(operations)
         self.logger.info("")
-        operations_per_second = self._get_operations_per_second(operations)
         if operations_per_second:
             seconds_per_operation = 1 / operations_per_second
             weighted_average_seconds = total_operations / operations_per_second
@@ -282,6 +294,14 @@ def _report_results(self):
                     self._format_number(seconds_per_operation, 4),
                 )
             )
+
+            if self.per_test_args.latency and len(latencies) > 0:
+                self.logger.info("")
+                self._print_latencies(latencies)
+                if self.per_test_args.results_file:
+                    # Not all tests will have a size argument
+                    size = getattr(self.per_test_args, "size", None)
+                    self._write_results_file(self.per_test_args.results_file, latencies, size)
         else:
             self.logger.info("Completed without generating operation statistics.")
         self.logger.info("")
@@ -335,3 +355,18 @@ def _format_number(self, value, min_significant_digits):
         decimals = max(0, significant_digits - math.floor(log) - 1)
 
         return ("{:,." + str(decimals) + "f}").format(rounded)
+
+    def _print_latencies(self, latencies: List[float]):
+        self.logger.info("=== Latency Distribution ===")
+        latencies.sort()
+
+        percentiles = [50.0, 75.0, 90.0, 95.0, 99.0, 99.9, 100.0]
+        for p in percentiles:
+            index = math.ceil(p / 100 * len(latencies)) - 1
+            self.logger.info(f"{p:5.1f}% {latencies[index]:10.2f}ms")
+
+    def _write_results_file(self, path: str, latencies: List[float], size):
+        data = [{"Time": l, "Size": size} for l in latencies]
+        output = json.dumps(data, indent=2)
+        with open(path, 'w', encoding='utf-8') as f:
+            f.write(output)