Create constants

Muqi1029 · Muqi1029 · commit b1fa2bbb33de · 2026-03-16T00:05:21.000+08:00
diff --git a/ai_infra_bench/check.py b/ai_infra_bench/check.py
@@ -4,6 +4,7 @@
 from datetime import datetime
 from typing import List
 
+from ai_infra_bench.constants import DEFAULT_BENCH_SERVING_PATH, SGLANG_KEYS
 from ai_infra_bench.utils import is_ci
 
 try:
@@ -15,8 +16,6 @@
 
 logger = logging.getLogger(__name__)
 
-DEFAULT_BENCH_SERVING_PATH = "/tmp/ai_infra_bench/bench_serving.py"
-
 
 def ensure_bench_serving_available() -> None:
     """
@@ -127,45 +126,6 @@ def install_bench_serving_dependencies() -> None:
         )
 
 
-SGLANG_KEYS = [
-    "backend",
-    "dataset_name",
-    "request_rate",
-    "max_concurrency",
-    "sharegpt_output_len",
-    "random_input_len",
-    "random_output_len",
-    "random_range_ratio",
-    "duration",
-    "completed",
-    "total_input_tokens",
-    "total_output_tokens",
-    "total_output_tokens_retokenized",
-    "request_throughput",
-    "input_throughput",
-    "output_throughput",
-    "mean_e2e_latency_ms",
-    "median_e2e_latency_ms",
-    "std_e2e_latency_ms",
-    "p99_e2e_latency_ms",
-    "mean_ttft_ms",
-    "median_ttft_ms",
-    "std_ttft_ms",
-    "p99_ttft_ms",
-    "mean_tpot_ms",
-    "median_tpot_ms",
-    "std_tpot_ms",
-    "p99_tpot_ms",
-    "mean_itl_ms",
-    "median_itl_ms",
-    "std_itl_ms",
-    "p95_itl_ms",
-    "p99_itl_ms",
-    "concurrency",
-    "accept_length",
-]
-
-
 def check_dir(output_dir: str, full_data_json_path):
     """
     Checks if the specified output directory exists. If it does, it prompts the user
diff --git a/ai_infra_bench/client.py b/ai_infra_bench/client.py
@@ -10,11 +10,11 @@
     check_str_list_str,
     check_values_in_features_metrics,
 )
+from ai_infra_bench.constants import FULL_DATA_JSON_PATH
 from ai_infra_bench.modes.cmp import cmp_export_table
 from ai_infra_bench.modes.gen import gen_export_csv, gen_export_table, gen_plot, gen_run
 from ai_infra_bench.modes.slo import slo_run
 from ai_infra_bench.utils import (
-    FULL_DATA_JSON_PATH,
     ServerAccessInfo,
     add_request_rate,
     cmp_preprocess_client_cmds,
@@ -171,6 +171,7 @@ def client_gen(
                 input_features=input_features,
                 output_metrics=output_metrics,
                 output_dir=output_dir,
+                server_label=server_labels[0],
             )
 
         if not disable_csv:
diff --git a/ai_infra_bench/constants.py b/ai_infra_bench/constants.py
@@ -0,0 +1,85 @@
+DEFAULT_BENCH_SERVING_PATH = "/tmp/ai_infra_bench/bench_serving.py"
+
+FULL_DATA_JSON_PATH = "full_data_json"  # used to store all json files
+TABLE_NAME = "table.md"
+CSV_NAME = "data.csv"
+WARMUP_FILE = ".warmup.json"
+COLORS = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b"]
+GRAPH_PER_ROW = 3
+
+SGLANG_KEYS = [
+    "backend",
+    "dataset_name",
+    "request_rate",
+    "max_concurrency",
+    "sharegpt_output_len",
+    "random_input_len",
+    "random_output_len",
+    "random_range_ratio",
+    "duration",
+    "completed",
+    "total_input_tokens",
+    "total_output_tokens",
+    "total_output_tokens_retokenized",
+    "request_throughput",
+    "input_throughput",
+    "output_throughput",
+    "mean_e2e_latency_ms",
+    "median_e2e_latency_ms",
+    "std_e2e_latency_ms",
+    "p99_e2e_latency_ms",
+    "mean_ttft_ms",
+    "median_ttft_ms",
+    "std_ttft_ms",
+    "p99_ttft_ms",
+    "mean_tpot_ms",
+    "median_tpot_ms",
+    "std_tpot_ms",
+    "p99_tpot_ms",
+    "mean_itl_ms",
+    "median_itl_ms",
+    "std_itl_ms",
+    "p95_itl_ms",
+    "p99_itl_ms",
+    "concurrency",
+    "accept_length",
+]
+
+
+demo_output = {
+    "backend": "sglang-oai",
+    "dataset_name": "random",
+    "request_rate": 10.0,
+    "max_concurrency": 10,
+    "sharegpt_output_len": None,
+    "random_input_len": 1200,
+    "random_output_len": 800,
+    "random_range_ratio": 1.0,
+    "duration": 45.11868940386921,
+    "completed": 100,
+    "total_input_tokens": 120000,
+    "total_output_tokens": 80000,
+    "total_output_tokens_retokenized": 79998,
+    "request_throughput": 2.2163764356024127,
+    "input_throughput": 2659.6517227228956,
+    "output_throughput": 1773.1011484819303,
+    "mean_e2e_latency_ms": 4482.026166650467,
+    "median_e2e_latency_ms": 4487.435979535803,
+    "std_e2e_latency_ms": 32.15524448450066,
+    "p99_e2e_latency_ms": 4534.823208898306,
+    "mean_ttft_ms": 38.534140698611736,
+    "median_ttft_ms": 42.44273528456688,
+    "std_ttft_ms": 10.558202315257851,
+    "p99_ttft_ms": 61.15902605932206,
+    "mean_tpot_ms": 5.561316678287678,
+    "median_tpot_ms": 5.56157646876747,
+    "std_tpot_ms": 0.04168330778296244,
+    "p99_tpot_ms": 5.627061070545631,
+    "mean_itl_ms": 5.561935330397016,
+    "median_itl_ms": 5.495080258697271,
+    "std_itl_ms": 1.1977701758121588,
+    "p95_itl_ms": 6.047771545127034,
+    "p99_itl_ms": 6.62423954345286,
+    "concurrency": 9.933857179517508,
+    "accept_length": None,
+}
diff --git a/ai_infra_bench/modes/cmp.py b/ai_infra_bench/modes/cmp.py
@@ -4,13 +4,8 @@
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 
-from ai_infra_bench.utils import (
-    TABLE_NAME,
-    avg_std_strf,
-    colors,
-    enter_decorate,
-    graph_per_row,
-)
+from ai_infra_bench.constants import COLORS, GRAPH_PER_ROW, TABLE_NAME
+from ai_infra_bench.utils import avg_std_strf, enter_decorate
 
 
 @enter_decorate("PLOT TO HTML", filename="<input_feature>.html")
@@ -23,8 +18,8 @@ def cmp_plot(data, input_features, metrics, labels, output_dir):
 
     # there are totally len(input_features) html files
     for input_feature in input_features:
-        rows = (len(metrics) - 1) // graph_per_row + 1
-        cols = graph_per_row
+        rows = (len(metrics) - 1) // GRAPH_PER_ROW + 1
+        cols = GRAPH_PER_ROW
         fig = make_subplots(rows=rows, cols=cols)
 
         # there totally are len(metric) subplots
@@ -47,7 +42,7 @@ def cmp_plot(data, input_features, metrics, labels, output_dir):
                         mode="lines+markers",
                         marker=dict(size=8),
                         line=dict(
-                            color=colors[server_idx % len(colors)],
+                            color=COLORS[server_idx % len(COLORS)],
                             width=3,
                         ),
                         hovertemplate=f"<br>{input_feature}: %{{x}}<br>{metric}: %{{y}}<br><extra></extra>",
@@ -60,7 +55,7 @@ def cmp_plot(data, input_features, metrics, labels, output_dir):
 
             # one subplot is over
             cur_col += 1
-            if cur_col == graph_per_row:
+            if cur_col == GRAPH_PER_ROW:
                 cur_col = 0
                 cur_row += 1
 
diff --git a/ai_infra_bench/modes/gen.py b/ai_infra_bench/modes/gen.py
@@ -9,17 +9,14 @@
 from plotly.subplots import make_subplots
 from tqdm import tqdm
 
-from ai_infra_bench.utils import (
+from ai_infra_bench.constants import (
+    COLORS,
     CSV_NAME,
     FULL_DATA_JSON_PATH,
+    GRAPH_PER_ROW,
     TABLE_NAME,
-    avg_std_strf,
-    colors,
-    enter_decorate,
-    graph_per_row,
-    read_jsonl,
-    run_cmd,
 )
+from ai_infra_bench.utils import avg_std_strf, enter_decorate, read_jsonl, run_cmd
 
 logger = logging.getLogger(__name__)
 
@@ -103,22 +100,22 @@ def gen_plot(
 ):
     for feature in input_features:
         num_graphs = len(output_metrics)
-        num_rows = math.ceil(num_graphs / graph_per_row)
+        num_rows = math.ceil(num_graphs / GRAPH_PER_ROW)
 
-        fig = make_subplots(rows=num_rows, cols=graph_per_row)
+        fig = make_subplots(rows=num_rows, cols=GRAPH_PER_ROW)
         x_values = [
             np.mean([item[feature] for item in client])
             for client in all_clients_results
         ]
 
         for idx, metric in enumerate(output_metrics):
-            row, col = divmod(idx, graph_per_row)
+            row, col = divmod(idx, GRAPH_PER_ROW)
 
             y_values = [
                 np.mean([item[metric] for item in client])
                 for client in all_clients_results
             ]
-            color = colors[idx % len(colors)]
+            color = COLORS[idx % len(COLORS)]
 
             fig.add_trace(
                 go.Scatter(
@@ -141,7 +138,7 @@ def gen_plot(
             title_text=f"{server_label} - {feature}" if server_label else feature,
             showlegend=True,
             height=300 * num_rows,
-            width=400 * graph_per_row,
+            width=400 * GRAPH_PER_ROW,
             margin=dict(t=50, b=30, l=30, r=30),
         )
 
diff --git a/ai_infra_bench/modes/slo.py b/ai_infra_bench/modes/slo.py
@@ -4,8 +4,8 @@
 
 import numpy as np
 
+from ai_infra_bench.constants import FULL_DATA_JSON_PATH
 from ai_infra_bench.utils import (
-    FULL_DATA_JSON_PATH,
     add_request_rate,
     enter_decorate,
     read_jsonl,
diff --git a/ai_infra_bench/utils.py b/ai_infra_bench/utils.py
@@ -17,6 +17,8 @@
 import psutil
 import requests
 
+from ai_infra_bench.constants import WARMUP_FILE, demo_output
+
 
 @dataclass
 class ServerAccessInfo:
@@ -27,50 +29,6 @@ class ServerAccessInfo:
 
 logger = logging.getLogger(__name__)
 
-colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b"]
-graph_per_row = 3
-FULL_DATA_JSON_PATH = "full_data_json"  # used to store all json files
-TABLE_NAME = "table.md"
-CSV_NAME = "data.csv"
-WARMUP_FILE = ".warmup.json"
-demo_output = {
-    "backend": "sglang-oai",
-    "dataset_name": "random",
-    "request_rate": 10.0,
-    "max_concurrency": 10,
-    "sharegpt_output_len": None,
-    "random_input_len": 1200,
-    "random_output_len": 800,
-    "random_range_ratio": 1.0,
-    "duration": 45.11868940386921,
-    "completed": 100,
-    "total_input_tokens": 120000,
-    "total_output_tokens": 80000,
-    "total_output_tokens_retokenized": 79998,
-    "request_throughput": 2.2163764356024127,
-    "input_throughput": 2659.6517227228956,
-    "output_throughput": 1773.1011484819303,
-    "mean_e2e_latency_ms": 4482.026166650467,
-    "median_e2e_latency_ms": 4487.435979535803,
-    "std_e2e_latency_ms": 32.15524448450066,
-    "p99_e2e_latency_ms": 4534.823208898306,
-    "mean_ttft_ms": 38.534140698611736,
-    "median_ttft_ms": 42.44273528456688,
-    "std_ttft_ms": 10.558202315257851,
-    "p99_ttft_ms": 61.15902605932206,
-    "mean_tpot_ms": 5.561316678287678,
-    "median_tpot_ms": 5.56157646876747,
-    "std_tpot_ms": 0.04168330778296244,
-    "p99_tpot_ms": 5.627061070545631,
-    "mean_itl_ms": 5.561935330397016,
-    "median_itl_ms": 5.495080258697271,
-    "std_itl_ms": 1.1977701758121588,
-    "p95_itl_ms": 6.047771545127034,
-    "p99_itl_ms": 6.62423954345286,
-    "concurrency": 9.933857179517508,
-    "accept_length": None,
-}
-
 
 def cmp_preprocess_client_cmds(
     client_cmds: List[str], server_access_info: ServerAccessInfo
diff --git a/examples/client_gen.py b/examples/client_gen.py
@@ -4,7 +4,7 @@
 
 # input args
 base_url = os.environ["BASE_URL"]
-dataset_path = os.environ["SHAREGPT_DATASET"]
+dataset_path = os.environ["SHAREGPT_DATAPATH"]
 input_features = [
     "random_input_len",
     "random_output_len",
@@ -85,5 +85,5 @@
         server_label="qwen3_06b",
         n=3,
         only_last=True,
-        output_dir="output",
+        output_dir="client_gen_output",
     )