Update

Muqi1029 · Muqi1029 · commit 84b5076b4b45 · 2025-12-30T21:21:39.000+08:00
diff --git a/ai_infra_bench/modes/cmp.py b/ai_infra_bench/modes/cmp.py
@@ -71,11 +71,11 @@ def cmp_plot(data, input_features, metrics, labels, output_dir):
     print("Ploting graphs DONE")
 
 
-@enter_decorate("CMP EXPORT TBALE", filename=TABLE_NAME)
+@enter_decorate("CMP EXPORT TABLE", filename=TABLE_NAME)
 def cmp_export_table(
     all_clients_results: List[List[Dict]],
     input_features: List[str],
-    output_metrics: List[Dict],
+    output_metrics: List[str],
     num_clients: int,
     num_servers: int,
     output_dir: str,
@@ -84,45 +84,140 @@ def cmp_export_table(
     if not all_clients_results or not all_clients_results[0]:
         raise ValueError("No data available to export.")
 
-    if server_labels[0] is None:
+    if server_labels is None or server_labels[0] is None:
         server_labels = [f"server_{i + 1}" for i in range(num_servers)]
 
-    # header
-    header_cells = input_features + [" - "]
-    for output_metric in output_metrics:
-        header_cells += [output_metric] + [" - "] * (len(server_labels) - 1)
-    header_row = "| " + " | ".join(map(str, header_cells)) + " |"
+    # --- 1. 动态构建表头 ---
+    # 将 input_features 组合成标题，例如: "Config (input_len / output_len / rate)"
+    config_header_name = f"Config ({' / '.join(input_features)})"
 
-    # sub header
-    sub_header_cells = [" - "] * (len(input_features) + 1) + server_labels * len(
-        output_metrics
-    )
-    sub_header_row = "| " + " | ".join(map(str, sub_header_cells)) + " |"
+    header_cells = [config_header_name, "Metric"] + server_labels
+    if num_servers == 2:
+        header_cells.append("Diff (%)")
 
+    header_row = "| " + " | ".join(header_cells) + " |"
     separator_row = "| " + " | ".join(["---"] * len(header_cells)) + " |"
-    lines = [header_row, sub_header_row, separator_row]
+    lines = [header_row, separator_row]
 
+    # --- 2. 遍历每一个配置 (Client Config) ---
     for client_idx in range(num_clients):
-        #
-        row_values = []
-
-        all_server_metrics = []
-        for server_idx in range(num_servers):
-            server_metrics = []
-            idx = client_idx + server_idx * num_clients
-            row_results = all_clients_results[idx]
-            if server_idx == 0:
-                for feature in input_features:
-                    row_values.append(f"{row_results[0][feature]:.2f}")
-                row_values.append("-")
-            for metric in output_metrics:
-                server_metrics.append(avg_std_strf(metric, row_results, precision=2))
-            all_server_metrics.append(server_metrics)
-
-        for i in range(len(output_metrics)):
-            for j in range(num_servers):
-                row_values.append(all_server_metrics[j][i])
-        lines.append("| " + " | ".join(row_values) + " |")
-
-    with open(os.path.join(output_dir, TABLE_NAME), mode="w", encoding="utf-8") as f:
+
+        # 动态提取当前配置下所有 feature 的值
+        # 索引逻辑: client_idx 对应第一个 server 的该配置结果
+        first_server_res_list = all_clients_results[client_idx]
+        first_sample = first_server_res_list[0]
+
+        config_val_list = []
+        for feat in input_features:
+            val = first_sample.get(feat, "N/A")
+            # 格式化数值：如果是浮点数保留两位，否则转字符串
+            if isinstance(val, float):
+                config_val_list.append(f"{val:.2f}")
+            else:
+                config_val_list.append(str(val))
+
+        # 拼接后的配置字符串，例如 "1200.00 / 800.00 / 4.00"
+        config_str = " / ".join(config_val_list)
+
+        # --- 3. 遍历每一个指标 (Metric) ---
+        for m_idx, metric in enumerate(output_metrics):
+            row_values = []
+
+            # 第一列：仅在指标块的第一行显示配置
+            if m_idx == 0:
+                row_values.append(f"**{config_str}**")
+            else:
+                row_values.append(" ")
+
+            # 第二列：指标名称
+            row_values.append(metric)
+
+            # 后面几列：各个 Server 的数值
+            numerical_means = []
+            for s_idx in range(num_servers):
+                idx = client_idx + s_idx * num_clients
+                res_list = all_clients_results[idx]
+
+                # 使用你原有的格式化函数获取 "均值 ± 标准差"
+                display_str = avg_std_strf(metric, res_list, precision=2)
+                row_values.append(display_str)
+
+                # 为计算 Diff 提取纯数值均值
+                try:
+                    m_val = sum(r[metric] for r in res_list) / len(res_list)
+                    numerical_means.append(m_val)
+                except:
+                    numerical_means.append(None)
+
+            # 最后一列：动态计算两个 Server 间的差异
+            if num_servers == 2:
+                v1, v2 = numerical_means[0], numerical_means[1]
+                if v1 is not None and v2 is not None and v1 != 0:
+                    diff = (v2 - v1) / v1 * 100
+                    row_values.append(f"{diff:+.2f}%")
+                else:
+                    row_values.append("-")
+
+            lines.append("| " + " | ".join(row_values) + " |")
+
+    # --- 4. 写入文件 ---
+    output_path = os.path.join(output_dir, TABLE_NAME)
+    with open(output_path, mode="w", encoding="utf-8") as f:
         f.write("\n".join(lines))
+
+
+# @enter_decorate("CMP EXPORT TBALE", filename=TABLE_NAME)
+# def cmp_export_table(
+#     all_clients_results: List[List[Dict]],
+#     input_features: List[str],
+#     output_metrics: List[Dict],
+#     num_clients: int,
+#     num_servers: int,
+#     output_dir: str,
+#     server_labels: List[str],
+# ):
+#     if not all_clients_results or not all_clients_results[0]:
+#         raise ValueError("No data available to export.")
+#
+#     if server_labels[0] is None:
+#         server_labels = [f"server_{i + 1}" for i in range(num_servers)]
+#
+#     # header
+#     header_cells = input_features + [" - "]
+#     for output_metric in output_metrics:
+#         header_cells += [output_metric] + [" - "] * (len(server_labels) - 1)
+#     header_row = "| " + " | ".join(map(str, header_cells)) + " |"
+#
+#     # sub header
+#     sub_header_cells = [" - "] * (len(input_features) + 1) + server_labels * len(
+#         output_metrics
+#     )
+#     sub_header_row = "| " + " | ".join(map(str, sub_header_cells)) + " |"
+#
+#     separator_row = "| " + " | ".join(["---"] * len(header_cells)) + " |"
+#     lines = [header_row, sub_header_row, separator_row]
+#
+#     for client_idx in range(num_clients):
+#         #
+#         row_values = []
+#
+#         all_server_metrics = []
+#         for server_idx in range(num_servers):
+#             server_metrics = []
+#             idx = client_idx + server_idx * num_clients
+#             row_results = all_clients_results[idx]
+#             if server_idx == 0:
+#                 for feature in input_features:
+#                     row_values.append(f"{row_results[0][feature]:.2f}")
+#                 row_values.append("-")
+#             for metric in output_metrics:
+#                 server_metrics.append(avg_std_strf(metric, row_results, precision=2))
+#             all_server_metrics.append(server_metrics)
+#
+#         for i in range(len(output_metrics)):
+#             for j in range(num_servers):
+#                 row_values.append(all_server_metrics[j][i])
+#         lines.append("| " + " | ".join(row_values) + " |")
+#
+#     with open(os.path.join(output_dir, TABLE_NAME), mode="w", encoding="utf-8") as f:
+#         f.write("\n".join(lines))
diff --git a/ai_infra_bench/sgl/cmp_bench.py b/ai_infra_bench/sgl/cmp_bench.py
@@ -19,6 +19,7 @@
     maybe_create_labels,
     maybe_warmup,
     run_cmd,
+    stop_server_process,
     wait_for_server,
 )
 
@@ -91,9 +92,7 @@ def cmp_bench(
                     output_dir=output_dir,
                 )
             )
-
-            if server_process:
-                server_process.terminate()
+            stop_server_process(server_process)
 
         if not disable_csv:
             gen_export_csv(
diff --git a/ai_infra_bench/utils.py b/ai_infra_bench/utils.py
@@ -287,3 +287,53 @@ def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid: int = N
             itself.send_signal(signal.SIGQUIT)
         except psutil.NoSuchProcess:
             pass
+
+
+def stop_server_process(process, timeout=30, cooldown_period=3):
+    """
+    Stops a server sub-process safely by attempting a graceful shutdown
+    followed by a forced kill if necessary.
+
+    Args:
+        process (subprocess.Popen): The process object to stop.
+        timeout (int): Seconds to wait for a graceful shutdown.
+        cooldown_period (int): Extra seconds to wait for VRAM and Port cleanup.
+    """
+    if process is None:
+        logger.warning("No process found to terminate.")
+        return
+
+    # Check if the process is already dead
+    if process.poll() is not None:
+        logger.info(
+            f"Process (PID: {process.pid}) has already exited with code: {process.returncode}"
+        )
+        return
+
+    try:
+        logger.info(f"Sending SIGTERM to process {process.pid} (Graceful Shutdown)...")
+        # 1. Start graceful shutdown
+        process.terminate()
+
+        try:
+            # 2. Block and wait for the process to die
+            process.wait(timeout=timeout)
+            logger.info("Server exited gracefully.")
+        except subprocess.TimeoutExpired:
+            # 3. If it takes too long, force kill it
+            logger.warning(
+                f"Server did not exit within {timeout}s. Sending SIGKILL (Force Kill)..."
+            )
+            process.kill()
+            process.wait()  # Ensure the process is removed from the OS process table
+            logger.info("Server was forcibly killed.")
+
+    except Exception as e:
+        logger.error(f"An error occurred while stopping the server: {e}")
+
+    # 4. Critical: Wait for hardware/network cleanup
+    if cooldown_period > 0:
+        logger.info(
+            f"Waiting {cooldown_period}s for GPU VRAM and TCP ports to be fully released..."
+        )
+        time.sleep(cooldown_period)
diff --git a/examples/cmp_bench.py b/examples/cmp_bench.py
@@ -9,8 +9,27 @@
 host = "127.0.0.1"
 port = "8888"
 tp_size = 1
-model_path = os.environ["QWEN3_32B_FP8"]
+model_path = "Qwen/Qwen3-0.6B"
+rate_list = [8]
 dataset_path = os.environ["SHAREGPT_DATAPATH"]
+input_features = [
+    "random_input_len",
+    "random_output_len",
+    "request_rate",
+    "max_concurrency",
+]
+
+output_metrics = [
+    "mean_ttft_ms",
+    "p99_ttft_ms",
+    "mean_tpot_ms",
+    "p99_tpot_ms",
+    "mean_itl_ms",
+    "p99_itl_ms",
+    "mean_e2e_latency_ms",
+    "p99_e2e_latency_ms",
+    "output_throughput",
+]
 
 
 ####################################
@@ -23,17 +42,16 @@
     --host {host}
     --port {port}
     --disable-radix-cache
-    --kv-cache-dtype fp8_e4m3
 """
 
 server_cmds: List[str] = [
     server_template.format(
         model_path=model_path, tp_size=tp_size, host=host, port=port
     ),
     server_template.format(model_path=model_path, tp_size=tp_size, host=host, port=port)
-    + " --tool-call-parser qwen25",
+    + " --tool-call-parser qwen",
 ]
-server_labels = ["Qwen3-32B-FP8", "QWEN3-32B-FP8-Without-tool"]
+server_labels = ["Qwen3-06B", "QWEN3-06B-With-Tool-Call-Parser"]
 
 ##########################
 # Constructing client_cmds
@@ -61,27 +79,12 @@
         output_len=output_len,
         request_rate=rate,
         dataset_path=dataset_path,
-        num_prompt=rate * 10,
+        num_prompt=min(max(rate * 10, 80), 250),
     )
-    for rate in range(4, 12 + 1, 2)
+    for rate in rate_list
 ]
 
 #####################
-input_features = [
-    "request_rate",
-]
-
-output_metrics = [
-    "mean_ttft_ms",
-    "p99_ttft_ms",
-    "mean_tpot_ms",
-    "p99_tpot_ms",
-    "mean_itl_ms",
-    "p99_itl_ms",
-    "mean_e2e_latency_ms",
-    "p99_e2e_latency_ms",
-    "output_throughput",
-]
 
 if __name__ == "__main__":
     cmp_bench(
@@ -92,7 +95,8 @@
         server_labels=server_labels,
         host=host,
         port=port,
-        output_dir="tool_cmp_bench_output",
-        n=3,
+        n=1,
         only_last=True,
+        output_dir="tool_cmp_bench_output",
+        disable_warmup=True,
     )

Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,7 @@`
`19`	`19`	`maybe_create_labels,`
`20`	`20`	`maybe_warmup,`
`21`	`21`	`run_cmd,`
	`22`	`+ stop_server_process,`
`22`	`23`	`wait_for_server,`
`23`	`24`	`)`
`24`	`25`
`@@ -91,9 +92,7 @@ def cmp_bench(`
`91`	`92`	`output_dir=output_dir,`
`92`	`93`	`)`
`93`	`94`	`)`
`94`		`-`
`95`		`- if server_process:`
`96`		`- server_process.terminate()`
	`95`	`+ stop_server_process(server_process)`
`97`	`96`
`98`	`97`	`if not disable_csv:`
`99`	`98`	`gen_export_csv(`