add html save functionality

DaltheCow · DaltheCow · commit 608bc1f1cb5f · 2025-06-16T10:39:17.000-04:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -56,6 +56,7 @@ dependencies = [
     "pyyaml>=6.0.0",
     "rich",
     "transformers",
+    "pyhumps>=3.8.0",
 ]
 
 [project.optional-dependencies]
diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
@@ -206,7 +206,7 @@ def cli():
     help=(
         "The path to save the output to. If it is a directory, "
         "it will save benchmarks.json under it. "
-        "Otherwise, json, yaml, or csv files are supported for output types "
+        "Otherwise, json, yaml, csv, or html files are supported for output types "
         "which will be read from the extension for the file path."
     ),
 )
diff --git a/src/guidellm/benchmark/output.py b/src/guidellm/benchmark/output.py
@@ -1,6 +1,7 @@
 import csv
 import json
 import math
+import humps
 from collections import OrderedDict
 from datetime import datetime
 from pathlib import Path
@@ -27,7 +28,8 @@
 )
 from guidellm.scheduler import strategy_display_str
 from guidellm.utils import Colors, split_text_list_by_length
-
+from guidellm.utils.injector import create_report
+from guidellm.presentation import UIDataBuilder
 __all__ = [
     "GenerativeBenchmarksConsole",
     "GenerativeBenchmarksReport",
@@ -67,6 +69,9 @@ def load_file(path: Union[str, Path]) -> "GenerativeBenchmarksReport":
 
         if type_ == "csv":
             raise ValueError(f"CSV file type is not supported for loading: {path}.")
+        
+        if type_ == "html":
+            raise ValueError(f"HTML file type is not supported for loading: {path}.")
 
         raise ValueError(f"Unsupported file type: {type_} for {path}.")
 
@@ -114,6 +119,9 @@ def save_file(self, path: Union[str, Path]) -> Path:
         if type_ == "csv":
             return self.save_csv(path)
 
+        if type_ == "html":
+            return self.save_html(path)
+
         raise ValueError(f"Unsupported file type: {type_} for {path}.")
 
     def save_json(self, path: Union[str, Path]) -> Path:
@@ -220,11 +228,44 @@ def save_csv(self, path: Union[str, Path]) -> Path:
 
         return path
 
+    def save_html(self, path: str | Path) -> Path:
+        """
+        Download html, inject report data and save to a file.
+        If the file is a directory, it will create the report in a file named
+        benchmarks.html under the directory.
+
+        :param path: The path to create the report at.
+        :return: The path to the report.
+        """
+
+        # json_data = json.dumps(data, indent=2)
+        # thing = f'window.{variable_name} = {json_data};'
+
+        data_builder = UIDataBuilder(self.benchmarks)
+        data = data_builder.to_dict()
+        camel_data = humps.camelize(data)
+        ui_api_data = {
+            f"window.{humps.decamelize(k)} = {{}};": f'window.{humps.decamelize(k)} = {json.dumps(v, indent=2)};\n'
+            for k, v in camel_data.items()
+        }
+        print("________")
+        print("________")
+        print("________")
+        print("________")
+        print("ui_api_data")
+        print(ui_api_data)
+        print("________")
+        print("________")
+        print("________")
+        print("________")
+        create_report(ui_api_data, path)
+        return path
+
     @staticmethod
     def _file_setup(
         path: Union[str, Path],
-        default_file_type: Literal["json", "yaml", "csv"] = "json",
-    ) -> tuple[Path, Literal["json", "yaml", "csv"]]:
+        default_file_type: Literal["json", "yaml", "csv", "html"] = "json",
+    ) -> tuple[Path, Literal["json", "yaml", "csv", "html"]]:
         path = Path(path) if not isinstance(path, Path) else path
 
         if path.is_dir():
@@ -242,6 +283,9 @@ def _file_setup(
         if path_suffix in [".csv"]:
             return path, "csv"
 
+        if path_suffix in [".html"]:
+            return path, "html"
+
         raise ValueError(f"Unsupported file extension: {path_suffix} for {path}.")
 
     @staticmethod
diff --git a/src/guidellm/config.py b/src/guidellm/config.py
@@ -30,10 +30,10 @@ class Environment(str, Enum):
 
 
 ENV_REPORT_MAPPING = {
-    Environment.PROD: "https://guidellm.neuralmagic.com/local-report/index.html",
-    Environment.STAGING: "https://staging.guidellm.neuralmagic.com/local-report/index.html",
-    Environment.DEV: "https://dev.guidellm.neuralmagic.com/local-report/index.html",
-    Environment.LOCAL: "tests/dummy/report.html",
+    Environment.PROD: "https://neuralmagic.github.io/ui/latest/index.html",
+    Environment.STAGING: "https://neuralmagic.github.io/ui/staging/latest/index.html",
+    Environment.DEV: "https://neuralmagic.github.io/ui/dev/index.html",
+    Environment.LOCAL: "https://neuralmagic.github.io/ui/dev/index.html",
 }
 
 
@@ -86,6 +86,12 @@ class OpenAISettings(BaseModel):
     base_url: str = "http://localhost:8000"
     max_output_tokens: int = 16384
 
+class ReportGenerationSettings(BaseModel):
+    """
+    Report generation settings for the application
+    """
+
+    source: str = ""
 
 class Settings(BaseSettings):
     """
@@ -140,6 +146,9 @@ class Settings(BaseSettings):
     )
     openai: OpenAISettings = OpenAISettings()
 
+    # Report settings
+    report_generation: ReportGenerationSettings = ReportGenerationSettings()
+
     # Output settings
     table_border_char: str = "="
     table_headers_border_char: str = "-"
@@ -148,6 +157,8 @@ class Settings(BaseSettings):
     @model_validator(mode="after")
     @classmethod
     def set_default_source(cls, values):
+        if not values.report_generation.source:
+            values.report_generation.source = ENV_REPORT_MAPPING.get(values.env)
         return values
 
     def generate_env_file(self) -> str:
diff --git a/src/guidellm/presentation/__init__.py b/src/guidellm/presentation/__init__.py
@@ -0,0 +1,18 @@
+from .builder import UIDataBuilder
+from .data_models import (Bucket, Model, Dataset, RunInfo, TokenDistribution, TokenDetails, Server, WorkloadDetails, BenchmarkDatum)
+from .injector import (create_report, inject_data)
+
+__all__ = [
+    "UIDataBuilder",
+    "Bucket",
+    "Model",
+    "Dataset",
+    "RunInfo",
+    "TokenDistribution",
+    "TokenDetails",
+    "Server",
+    "WorkloadDetails",
+    "BenchmarkDatum",
+    "create_report",
+    "inject_data",
+]
diff --git a/src/guidellm/presentation/builder.py b/src/guidellm/presentation/builder.py
@@ -0,0 +1,26 @@
+from typing import Any
+from .data_models import RunInfo, WorkloadDetails, BenchmarkDatum
+from guidellm.benchmark.benchmark import GenerativeBenchmark
+
+__all__ = ["UIDataBuilder"]
+
+
+class UIDataBuilder:
+    def __init__(self, benchmarks: list[GenerativeBenchmark]):
+        self.benchmarks = benchmarks
+
+    def build_run_info(self):
+        return RunInfo.from_benchmarks(self.benchmarks)
+
+    def build_workload_details(self):
+        return WorkloadDetails.from_benchmarks(self.benchmarks)
+
+    def build_benchmarks(self):
+        return [ BenchmarkDatum.from_benchmark(b) for b in self.benchmarks ]
+    
+    def to_dict(self) -> dict[str, Any]:
+        return {
+          "run_info": self.build_run_info().dict(),
+          "workload_details": self.build_workload_details().dict(),
+          "benchmarks": [b.dict() for b in self.build_benchmarks()],
+        }
diff --git a/src/guidellm/presentation/data_models.py b/src/guidellm/presentation/data_models.py
@@ -0,0 +1,149 @@
+from collections import defaultdict
+from math import ceil
+from pydantic import BaseModel
+import random
+from typing import List, Optional, Tuple
+
+from guidellm.benchmark.benchmark import GenerativeBenchmark
+from guidellm.objects.statistics import DistributionSummary
+
+__all__ = ["Bucket", "Model", "Dataset", "RunInfo", "TokenDistribution", "TokenDetails", "Server", "WorkloadDetails", "BenchmarkDatum"]
+
+class Bucket(BaseModel):
+  value: float
+  count: int
+
+  @staticmethod
+  def from_data(
+      data: List[float],
+      bucket_width: Optional[float] = None,
+      n_buckets: Optional[int] = None
+  ) -> Tuple[List["Bucket"], float]:
+      if not data:
+          return [], 1.0
+
+      min_v = min(data)
+      max_v = max(data)
+      range_v = max_v - min_v
+
+      if bucket_width is None:
+          if n_buckets is None:
+              n_buckets = 10
+          bucket_width = range_v / n_buckets
+      else:
+          n_buckets = ceil(range_v / bucket_width)
+
+      bucket_counts = defaultdict(int)
+      for val in data:
+          idx = int((val - min_v) // bucket_width)
+          if idx >= n_buckets:
+              idx = n_buckets - 1
+          bucket_start = min_v + idx * bucket_width
+          bucket_counts[bucket_start] += 1
+
+      buckets = [Bucket(value=start, count=count) for start, count in sorted(bucket_counts.items())]
+      return buckets, bucket_width
+
+
+class Model(BaseModel):
+  name: str
+  size: int
+
+class Dataset(BaseModel):
+  name: str
+
+class RunInfo(BaseModel):
+  model: Model
+  task: str
+  timestamp: float
+  dataset: Dataset
+
+  @classmethod
+  def from_benchmarks(cls, benchmarks: list[GenerativeBenchmark]):
+    model = benchmarks[0].worker.backend_model or 'N/A'
+    timestamp = max(bm.run_stats.start_time for bm in benchmarks if bm.start_time is not None)
+    return cls(
+      model=Model(name=model, size=0),
+      task='N/A',
+      timestamp=timestamp,
+      dataset=Dataset(name="N/A")
+    )
+
+class TokenDistribution(BaseModel):
+  statistics: Optional[DistributionSummary] = None
+  buckets: list[Bucket]
+  bucket_width: float
+
+
+class TokenDetails(BaseModel):
+  samples: list[str]
+  token_distributions: TokenDistribution
+
+class Server(BaseModel):
+  target: str
+
+class RequestOverTime(BaseModel):
+   num_benchmarks: int
+   requests_over_time: TokenDistribution
+
+class WorkloadDetails(BaseModel):
+  prompts: TokenDetails
+  generations: TokenDetails
+  requests_over_time: RequestOverTime
+  rate_type: str
+  server: Server
+  @classmethod
+  def from_benchmarks(cls, benchmarks: list[GenerativeBenchmark]):
+    target = benchmarks[0].worker.backend_target
+    rate_type = benchmarks[0].args.profile.type_
+    successful_requests = [req for bm in benchmarks for req in bm.requests.successful]
+    sample_indices = random.sample(range(len(successful_requests)), min(5, len(successful_requests)))
+    sample_prompts = [successful_requests[i].prompt.replace("\n", " ").replace("\"", "'") for i in sample_indices]
+    sample_outputs = [successful_requests[i].output.replace("\n", " ").replace("\"", "'") for i in sample_indices]
+
+    prompt_tokens = [req.prompt_tokens for bm in benchmarks for req in bm.requests.successful]
+    output_tokens = [req.output_tokens for bm in benchmarks for req in bm.requests.successful]
+
+    prompt_token_buckets, _prompt_token_bucket_width = Bucket.from_data(prompt_tokens, 1)
+    output_token_buckets, _output_token_bucket_width = Bucket.from_data(output_tokens, 1)
+    
+    prompt_token_stats = DistributionSummary.from_values(prompt_tokens)
+    output_token_stats = DistributionSummary.from_values(output_tokens)
+    prompt_token_distributions = TokenDistribution(statistics=prompt_token_stats, buckets=prompt_token_buckets, bucket_width=1)
+    output_token_distributions = TokenDistribution(statistics=output_token_stats, buckets=output_token_buckets, bucket_width=1)
+
+    min_start_time = benchmarks[0].run_stats.start_time
+
+    all_req_times = [
+       req.start_time - min_start_time
+       for bm in benchmarks
+       for req in bm.requests.successful
+       if req.start_time is not None
+    ]
+    number_of_buckets = len(benchmarks)
+    request_over_time_buckets, bucket_width = Bucket.from_data(all_req_times, None, number_of_buckets)
+    request_over_time_distribution = TokenDistribution(buckets=request_over_time_buckets, bucket_width=bucket_width)
+    return cls(
+       prompts=TokenDetails(samples=sample_prompts, token_distributions=prompt_token_distributions),
+       generations=TokenDetails(samples=sample_outputs, token_distributions=output_token_distributions),
+       requests_over_time=RequestOverTime(requests_over_time=request_over_time_distribution, num_benchmarks=number_of_buckets),
+       rate_type=rate_type,
+       server=Server(target=target)
+    )
+
+class BenchmarkDatum(BaseModel):
+  requests_per_second: float
+  tpot: DistributionSummary
+  ttft: DistributionSummary
+  throughput: DistributionSummary
+  time_per_request: DistributionSummary
+
+  @classmethod
+  def from_benchmark(cls, bm: GenerativeBenchmark):
+    return cls(
+       requests_per_second=bm.metrics.requests_per_second.successful.mean,
+       tpot=bm.metrics.inter_token_latency_ms.successful,
+       ttft=bm.metrics.time_to_first_token_ms.successful,
+       throughput=bm.metrics.output_tokens_per_second.successful,
+       time_per_request=bm.metrics.request_latency.successful,
+    )
diff --git a/src/guidellm/presentation/injector.py b/src/guidellm/presentation/injector.py
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py

Original file line number	Diff line number	Diff line change
`@@ -56,6 +56,7 @@ dependencies = [`
`56`	`56`	`"pyyaml>=6.0.0",`
`57`	`57`	`"rich",`
`58`	`58`	`"transformers",`
	`59`	`+ "pyhumps>=3.8.0",`
`59`	`60`	`]`
`60`	`61`
`61`	`62`	`[project.optional-dependencies]`
Original file line number	Diff line number	Diff line change
`@@ -206,7 +206,7 @@ def cli():`
`206`	`206`	`help=(`
`207`	`207`	`"The path to save the output to. If it is a directory, "`
`208`	`208`	`"it will save benchmarks.json under it. "`
`209`		`- "Otherwise, json, yaml, or csv files are supported for output types "`
	`209`	`+ "Otherwise, json, yaml, csv, or html files are supported for output types "`
`210`	`210`	`"which will be read from the extension for the file path."`
`211`	`211`	`),`
`212`	`212`	`)`