Initial state for new progress output

markurtz · markurtz · commit 5af9bfc05d95 · 2025-11-04T09:06:35.000-05:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -69,6 +69,7 @@ dependencies = [
     "pyyaml>=6.0.0",
     "rich",
     "sanic",
+    "tabulate",
     "transformers",
     "uvloop>=0.18",
     "torch",
diff --git a/src/guidellm/benchmark/output.py b/src/guidellm/benchmark/output.py
@@ -4,16 +4,16 @@
 import json
 import math
 from abc import ABC, abstractmethod
-from collections import OrderedDict
+from collections import OrderedDict, defaultdict
 from copy import deepcopy
 from datetime import datetime
 from pathlib import Path
 from typing import Any, ClassVar
 
 from pydantic import BaseModel, ConfigDict, Field
-from rich.console import Console
 from rich.padding import Padding
 from rich.text import Text
+from tabulate import tabulate
 
 from guidellm.benchmark.profile import (
     AsyncProfile,
@@ -31,11 +31,13 @@
 from guidellm.settings import settings
 from guidellm.utils import (
     Colors,
+    Console,
     DistributionSummary,
     RegistryMixin,
     StatusDistributionSummary,
     camelize_str,
     recursive_key_update,
+    safe_format_number,
     safe_format_timestamp,
     split_text_list_by_length,
 )
@@ -175,12 +177,62 @@ async def finalize(self, report: GenerativeBenchmarksReport) -> str:
         :param report: The completed benchmark report.
         :return:
         """
-        self._print_benchmarks_metadata(report.benchmarks)
-        self._print_benchmarks_info(report.benchmarks)
-        self._print_benchmarks_stats(report.benchmarks)
+        self.console.print("\n\n")
+        self._print_report_benchmarks_info(report)
 
         return "printed to console"
 
+    def _print_report_benchmarks_info(self, report: GenerativeBenchmarksReport):
+        benchmark_key = "\nBenchmark"
+        start_key = "\nStart"
+        end_key = "\nEnd"
+        timings_key = "Duration, Warmup, Cooldown\nsec, sec, sec"
+        requests_key = "Requests\nCompl, Incomp, Err"
+        input_tokens_key = "Input Tokens\nCompl, Incomp, Err"
+        output_tokens_key = "Output Tokens\nCompl, Incomp, Err"
+
+        columns = defaultdict(list)
+
+        for benchmark in report.benchmarks:
+            columns[benchmark_key].append(str(benchmark.scheduler.strategy))
+            columns[start_key].append(safe_format_timestamp(benchmark.start_time))
+            columns[end_key].append(safe_format_timestamp(benchmark.end_time))
+            columns[timings_key].append(
+                f"{safe_format_number(benchmark.duration)}, "
+                f"{safe_format_number(report.args.warmup)}, "
+                f"{safe_format_number(report.args.cooldown)}"
+            )
+            columns[requests_key].append(
+                f"{safe_format_number(benchmark.request_totals.successful)}, "
+                f"{safe_format_number(benchmark.request_totals.incomplete)}, "
+                f"{safe_format_number(benchmark.request_totals.errored)}"
+            )
+            columns[input_tokens_key].append(
+                f"{safe_format_number(benchmark.metrics.prompt_token_count.successful.total_sum)}, "
+                f"{safe_format_number(benchmark.metrics.prompt_token_count.incomplete.total_sum)}, "
+                f"{safe_format_number(benchmark.metrics.prompt_token_count.errored.total_sum)}"
+            )
+            columns[output_tokens_key].append(
+                f"{safe_format_number(benchmark.metrics.output_token_count.successful.total_sum)}, "
+                f"{safe_format_number(benchmark.metrics.output_token_count.incomplete.total_sum)}, "
+                f"{safe_format_number(benchmark.metrics.output_token_count.errored.total_sum)}"
+            )
+
+        self.console.print_update("Benchmarks Info", None, "info")
+        self.console.print(
+            Padding(
+                tabulate(
+                    columns,
+                    headers="keys",
+                    tablefmt="pipe",
+                    numalign="center",
+                    stralign="center",
+                    rowalign="center",
+                ),
+                (0, 0, 0, 2),
+            )
+        )
+
     def _print_benchmarks_metadata(self, benchmarks: list[GenerativeBenchmark]):
         start_time = benchmarks[0].run_stats.start_time
         end_time = benchmarks[-1].run_stats.end_time
diff --git a/src/guidellm/benchmark/schemas.py b/src/guidellm/benchmark/schemas.py
@@ -1373,7 +1373,7 @@ def compile(
             # General token stats
             prompt_token_count=StatusDistributionSummary.from_values(
                 value_types=request_types,
-                values=[float(req.prompt_tokens or 0) for req in requests],
+                values=[float(req.input_tokens or 0) for req in requests],
             ),
             output_token_count=StatusDistributionSummary.from_values(
                 value_types=request_types,
@@ -1390,10 +1390,14 @@ def compile(
             time_per_output_token_ms=StatusDistributionSummary.from_values(
                 value_types=request_types,
                 values=[req.time_per_output_token_ms or 0.0 for req in requests],
+                weights=[req.output_tokens or 0.0 for req in requests],
             ),
             inter_token_latency_ms=StatusDistributionSummary.from_values(
                 value_types=request_types,
                 values=[req.inter_token_latency_ms or 0.0 for req in requests],
+                weights=[
+                    max(0.0, (req.output_tokens or 1.0) - 1.0) for req in requests
+                ],
             ),
             output_tokens_wo_first_per_iteration=StatusDistributionSummary.from_values(
                 value_types=request_types,
diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py
@@ -13,6 +13,7 @@
     all_defined,
     safe_add,
     safe_divide,
+    safe_format_number,
     safe_format_timestamp,
     safe_getattr,
     safe_multiply,
@@ -114,6 +115,7 @@
     "recursive_key_update",
     "safe_add",
     "safe_divide",
+    "safe_format_number",
     "safe_format_timestamp",
     "safe_getattr",
     "safe_multiply",
diff --git a/src/guidellm/utils/functions.py b/src/guidellm/utils/functions.py
@@ -15,6 +15,7 @@
     "all_defined",
     "safe_add",
     "safe_divide",
+    "safe_format_number",
     "safe_format_timestamp",
     "safe_getattr",
     "safe_multiply",
@@ -132,3 +133,27 @@ def safe_format_timestamp(
         return datetime.fromtimestamp(timestamp).strftime(format_)
     except (ValueError, OverflowError, OSError):
         return default
+
+
+def safe_format_number(
+    number: int | float | None, default: str = "--", precision: int = 1
+) -> str:
+    """
+    Safely format a number with specified precision and default handling.
+
+    :param number: Number to format, or None
+    :param default: Value to return if number is None
+    :param precision: Number of decimal places for formatting floats
+    :return: Formatted number string or default value
+    """
+    if number is None:
+        return default
+
+    if isinstance(number, int):
+        return str(number)
+
+    try:
+        format_str = f"{{:.{precision}f}}"
+        return format_str.format(number)
+    except (ValueError, TypeError):
+        return default