diff --git a/src/guidellm/config.py b/src/guidellm/config.py index 2d4e102a..52dcaf3e 100644 --- a/src/guidellm/config.py +++ b/src/guidellm/config.py @@ -32,9 +32,9 @@ class Environment(str, Enum): ENV_REPORT_MAPPING = { Environment.PROD: "https://guidellm.neuralmagic.com/local-report/index.html", - Environment.STAGING: "https://staging.guidellm.neuralmagic.com/local-report/index.html", - Environment.DEV: "https://dev.guidellm.neuralmagic.com/local-report/index.html", - Environment.LOCAL: "tests/dummy/report.html", + Environment.STAGING: "https://review.neuralmagic.com/guidellm-ui/staging/index.html", + Environment.DEV: "https://review.neuralmagic.com/guidellm-ui/dev/index.html", + Environment.LOCAL: "http://localhost:3000/index.html", } @@ -112,8 +112,6 @@ class ReportGenerationSettings(BaseModel): """ source: str = "" - report_html_match: str = "window.report_data = {};" - report_html_placeholder: str = "{}" class Settings(BaseSettings): @@ -138,7 +136,7 @@ class Settings(BaseSettings): ) # general settings - env: Environment = Environment.PROD + env: Environment = Environment.DEV request_timeout: int = 60 * 5 # 5 minutes request_http2: bool = True max_concurrency: int = 512 diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py index 2670c105..4acabf60 100644 --- a/src/guidellm/core/result.py +++ b/src/guidellm/core/result.py @@ -400,6 +400,22 @@ def output_token_throughput(self) -> float: return output_tokens / self.duration if self.duration else 0.0 + @property + def output_token_throughput_distribution(self) -> Distribution: + """ + Get the distribution for output token throughput. + + :return: The distribution of output token throughput. + :rtype: Distribution + """ + throughputs = [] + for r in self.results: + duration = (r.end_time or 0) - (r.start_time or 0) + if duration > 0: + throughputs.append(r.output_token_count / duration) + + return Distribution(data=throughputs) + @property def prompt_token_distribution(self) -> Distribution: """ diff --git a/src/guidellm/main.py b/src/guidellm/main.py index e7363c6e..5bd79b04 100644 --- a/src/guidellm/main.py +++ b/src/guidellm/main.py @@ -2,6 +2,7 @@ from typing import Any, Literal, Mapping, Optional, Union, get_args import click +from guidellm.utils.injector import create_report from loguru import logger from transformers import AutoTokenizer # type: ignore[import-untyped] @@ -15,6 +16,7 @@ ) from guidellm.request.base import RequestGenerator from guidellm.utils import BenchmarkReportProgress, cli_params +from guidellm.utils.generate_ui_data import generate_ui_api_data __all__ = ["generate_benchmark_report"] @@ -184,7 +186,6 @@ def generate_benchmark_report_cli( cont_refresh_table=enable_continuous_refresh, ) - def generate_benchmark_report( target: str, data: Optional[str], @@ -290,6 +291,9 @@ def generate_benchmark_report( ) report = asyncio.run(_run_executor_for_result(executor)) + js_data = generate_ui_api_data(report) + create_report(js_data, 'guidellm_report') + # Save and print report guidance_report = GuidanceReport() guidance_report.benchmarks.append(report) diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py index 2fdd8ca8..8ca923b7 100644 --- a/src/guidellm/utils/__init__.py +++ b/src/guidellm/utils/__init__.py @@ -1,3 +1,4 @@ +from .generate_ui_data import generate_ui_api_data from .injector import create_report, inject_data from .progress import BenchmarkReportProgress from .text import ( @@ -24,6 +25,7 @@ "clean_text", "create_report", "filter_text", + "generate_ui_api_data", "inject_data", "is_path", "is_path_like", @@ -37,4 +39,5 @@ "resolve_transformers_dataset_split", "split_lines_by_punctuation", "split_text", + "stretch_list", ] diff --git a/src/guidellm/utils/generate_ui_data.py b/src/guidellm/utils/generate_ui_data.py new file mode 100644 index 00000000..6181c0c3 --- /dev/null +++ b/src/guidellm/utils/generate_ui_data.py @@ -0,0 +1,188 @@ +import os +import json +import random +import math +from typing import Any, Dict, List +from guidellm.core.distribution import Distribution +from guidellm.core import TextGenerationBenchmarkReport, TextGenerationBenchmark + +def generate_metric_report(dist: Distribution, metric_label: str, n_buckets: int = 18): + total = dist.__len__() + mean = dist.mean + median = dist.median + minv = dist.min + maxv = dist.max + std_dev = dist.std_deviation + + pvals = dist.percentiles([50, 90, 95, 99]) + + percentile_list = [ + {"percentile": "p50", "value": pvals[0]}, + {"percentile": "p90", "value": pvals[1]}, + {"percentile": "p95", "value": pvals[2]}, + {"percentile": "p99", "value": pvals[3]}, + ] + + if dist.range == 0: + buckets = [{"value": minv, "count": total}] + bucket_width = 0 + else: + bucket_width = dist.range / n_buckets + bucket_counts = [0] * n_buckets + + for val in dist.data: + + idx = int((val - minv) // bucket_width) + if idx == n_buckets: + idx = n_buckets - 1 + bucket_counts[idx] += 1 + + buckets = [] + for i, count in enumerate(bucket_counts): + bucket_start = minv + i * bucket_width + buckets.append({ + "value": bucket_start, + "count": count + }) + + return { + metric_label: { + "statistics": { + "total": total, + "mean": mean, + "median": median, + "min": minv, + "max": maxv, + "std": std_dev, + }, + "percentiles": percentile_list, + "buckets": buckets, + "bucketWidth": bucket_width, + } + } + +def generate_run_info(report: TextGenerationBenchmarkReport, benchmarks: List[TextGenerationBenchmark]) -> Dict[str, Any]: + timestamp = max(bm.start_time for bm in benchmarks if bm.start_time is not None) + return { + "model": { + "name": report.args.get('model', 'N/A'), + "size": 0 + }, + "task": "N/A", + "dataset": { + "name": "N/A" + }, + "timestamp": timestamp + } + +def generate_request_over_time_data(benchmarks: List[TextGenerationBenchmark]) -> List[Dict[str, Any]]: + filtered_benchmarks = filter(lambda bm: bm.start_time is not None, benchmarks) + sorted_benchmarks = list(sorted(filtered_benchmarks, key=lambda bm: bm.start_time)) + min_start_time = sorted_benchmarks[0].start_time + + all_request_times = [ + result.start_time - min_start_time + for benchmark in sorted_benchmarks + for result in benchmark.results + if result.start_time is not None + ] + + request_distribution = Distribution(data=all_request_times) + final_result = generate_metric_report(request_distribution, "requestsOverTime") + return { "numBenchmarks": len(sorted_benchmarks), **final_result } + + +def generate_workload_details(report: TextGenerationBenchmarkReport, benchmarks: List[TextGenerationBenchmark]) -> Dict[str, Any]: + all_prompt_token_data = [data for benchmark in benchmarks for data in benchmark.prompt_token_distribution.data] + all_prompt_token_distribution = Distribution(data=all_prompt_token_data) + all_output_token_data = [data for benchmark in benchmarks for data in benchmark.output_token_distribution.data] + all_output_token_distribution = Distribution(data=all_output_token_data) + + prompt_token_data = generate_metric_report(all_prompt_token_distribution, "tokenDistributions") + output_token_data = generate_metric_report(all_output_token_distribution, "tokenDistributions") + + prompt_token_samples = [result.request.prompt for benchmark in benchmarks for result in benchmark.results] + output_token_samples = [result.output for benchmark in benchmarks for result in benchmark.results] + + num_samples = min(5, len(prompt_token_samples), len(output_token_samples)) + sample_indices = random.sample(range(len(prompt_token_samples)), num_samples) + + sample_prompts = [prompt_token_samples[i] for i in sample_indices] + """ + Need a wholistic approach to parsing out characters in the prompt that don't covert well into the format we need + """ + sample_prompts = list(map(lambda prompt: prompt.replace("\n", " ").replace("\"", "'"), sample_prompts)) + + sample_outputs = [output_token_samples[i] for i in sample_indices] + sample_outputs = list(map(lambda output: output.replace("\n", " ").replace("\"", "'"), sample_outputs)) + + request_over_time_results = generate_request_over_time_data(benchmarks) + + return { + "prompts": { + "samples": sample_prompts, + **prompt_token_data + }, + "generations": { + "samples": sample_outputs, + **output_token_data + }, + "requestsOverTime": request_over_time_results, + "rateType": report.args["mode"], + "server": { + "target": report.args.get('target', 'N/A') + } + } + +def generate_benchmark_json(bm: TextGenerationBenchmark) -> Dict[str, Any]: + ttft_dist_ms = Distribution(data=bm.ttft_distribution.data) + ttft_data = generate_metric_report(ttft_dist_ms, 'ttft') + itl_dist_ms = Distribution(data=bm.itl_distribution.data) + itl_data = generate_metric_report(itl_dist_ms, 'tpot') + throughput_dist_ms = Distribution(data=bm.output_token_throughput_distribution.data) + throughput_data = generate_metric_report(throughput_dist_ms, 'throughput') + latency_dist_ms = Distribution(data=[val * 1000 for val in bm.request_latency_distribution.data]) + latency__data = generate_metric_report(latency_dist_ms, 'timePerRequest') + return { + "requestsPerSecond": bm.completed_request_rate, + **itl_data, + **ttft_data, + **throughput_data, + **latency__data, + } + +def generate_benchmarks_json(benchmarks: List[TextGenerationBenchmark]): + benchmark_json = [] + for benchmark in benchmarks: + benchmarks_report = generate_benchmark_json(benchmark) + benchmark_json.append(benchmarks_report) + + return { "benchmarks": benchmark_json } + +def generate_js_variable(variable_name: str, data: dict) -> str: + json_data = json.dumps(data, indent=2) + return f'window.{variable_name} = {json_data};' + +def generate_ui_api_data(report: TextGenerationBenchmarkReport): + filtered_benchmarks = list(filter(lambda bm: (bm.completed_request_rate > 0) and bm.mode != 'throughput', report.benchmarks)) + run_info_data = generate_run_info(report, filtered_benchmarks) + workload_details_data = generate_workload_details(report, filtered_benchmarks) + benchmarks_data = generate_benchmarks_json(filtered_benchmarks) + run_info_script = generate_js_variable("run_info", run_info_data) + workload_details_script = generate_js_variable("workload_details", workload_details_data) + benchmarks_script = generate_js_variable("benchmarks", benchmarks_data) + + os.makedirs("ben_test", exist_ok=True) + # generate json files based off of api specs, https://codepen.io/dalthecow/pen/bNGVQbq, for consumption by UI + with open("ben_test/run_info.js", "w") as f: + f.write(run_info_script) + with open("ben_test/workload_details.js", "w") as f: + f.write(workload_details_script) + with open("ben_test/benchmarks.js", "w") as f: + f.write(benchmarks_script) + + return { + "window.run_info = {};": run_info_script, + "window.workload_details = {};": workload_details_script, + "window.benchmarks = {};": benchmarks_script, + } \ No newline at end of file diff --git a/src/guidellm/utils/injector.py b/src/guidellm/utils/injector.py index fb5216aa..21e20901 100644 --- a/src/guidellm/utils/injector.py +++ b/src/guidellm/utils/injector.py @@ -1,20 +1,18 @@ from pathlib import Path from typing import Union -from pydantic import BaseModel - from guidellm.config import settings from guidellm.utils.text import load_text __all__ = ["create_report", "inject_data"] -def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path: +def create_report(js_data: dict, output_path: Union[str, Path]) -> Path: """ - Creates a report from the model and saves it to the output path. + Creates a report from the dictionary and saves it to the output path. - :param model: the model to serialize and inject - :type model: BaseModel + :param js_data: dict with match str and json data to inject + :type js_data: dict :param output_path: the path, either a file or a directory, to save the report to. If a directory, the report will be saved as "report.html" inside of the directory. @@ -27,10 +25,8 @@ def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path: html_content = load_text(settings.report_generation.source) report_content = inject_data( - model, + js_data, html_content, - settings.report_generation.report_html_match, - settings.report_generation.report_html_placeholder, ) if not output_path.suffix: @@ -39,32 +35,23 @@ def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path: output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(report_content) - + print(f'Report saved to {output_path}') return output_path - def inject_data( - model: BaseModel, + js_data: dict, html: str, - match: str, - placeholder: str, ) -> str: """ - Injects the data from the model into the HTML while replacing the placeholder. + Injects the json data into the HTML while replacing the placeholder. - :param model: the model to serialize and inject - :type model: BaseModel + :param js_data: the json data to inject + :type js_data: dict :param html: the html to inject the data into :type html: str - :param match: the string to match in the html to find the placeholder - :type match: str - :param placeholder: the placeholder to replace with the model data - inside of the placeholder - :type placeholder: str - :return: the html with the model data injected + :return: the html with the json data injected :rtype: str """ - model_str = model.json() - inject_str = match.replace(placeholder, model_str) - - return html.replace(match, inject_str) + for placeholder, script in js_data.items(): + html = html.replace(placeholder, script) + return html \ No newline at end of file