Show input and output length on vLLM dashboard (#6992)

huydhn · web-flow · commit f9f933a5cea2 · 2025-08-12T12:52:35.000-07:00
Fixes #6974 Input and output lengths are new dimensions on the dashboard that needs to be displayed after pytorch/pytorch-integration-testing#42. This PR also cleans up some old TODO code path for vLLM dashboard. ### Testing Different input and output lengths are showing up correctly now with their benchmark results on [the preview](https://torchci-git-fork-huydhn-query-input-output-length-fbopensource.vercel.app/benchmark/llms?startTime=Sat%2C%2002%20Aug%202025%2001%3A35%3A55%20GMT&stopTime=Sat%2C%2009%20Aug%202025%2001%3A35%3A55%20GMT&granularity=day&lBranch=main&lCommit=0edaf752d7482a3c170c25376c466e730ab87ddd&rBranch=main&rCommit=e5ebeeba531755a78f68413e88a23d061404f3e3&repoName=vllm-project%2Fvllm&benchmarkName=&modelName=meta-llama%2FLlama-4-Scout-17B-16E-Instruct&backendName=All%20Backends&modeName=All%20Modes&dtypeName=All%20DType&deviceName=All%20Devices&archName=All%20Platforms) --------- Signed-off-by: Huy Do <huydhn@gmail.com>
diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql b/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql
@@ -49,6 +49,26 @@ WITH benchmarks AS (
                 tupleElement(o.benchmark, 'extra_info')['args'],
                 'tensor_parallel_size'
             ),
+            'random_input_len',
+            JSONExtractString(
+              tupleElement(benchmark, 'extra_info')['args'],
+              'random_input_len'
+            ),
+            'random_output_len',
+            JSONExtractString(
+              tupleElement(benchmark, 'extra_info')['args'],
+              'random_output_len'
+            ),
+            'input_len',
+            JSONExtractString(
+              tupleElement(benchmark, 'extra_info')['args'],
+              'input_len'
+            ),
+            'output_len',
+            JSONExtractString(
+              tupleElement(benchmark, 'extra_info')['args'],
+              'output_len'
+            ),
             -- Used by Cachebench
             'is_dynamic',
             IF(
diff --git a/torchci/components/benchmark/llms/components/LLMsGraphPanel.tsx b/torchci/components/benchmark/llms/components/LLMsGraphPanel.tsx
@@ -170,31 +170,24 @@ export default function LLMsGraphPanel({
               const metric = record.metric;
 
               if (repoName === "vllm-project/vllm") {
-                let requestRate = record.extra!["request_rate"];
-                // TODO (huydhn): Fix the invalid JSON on vLLM side
-                if (
-                  metric.includes("itl") ||
-                  metric.includes("tpot") ||
-                  metric.includes("ttft")
-                ) {
-                  requestRate = requestRate !== "" ? requestRate : "Inf";
-                }
+                const requestRate = record.extra!["request_rate"];
+                const tensorParallel = record.extra!["tensor_parallel_size"];
+                const inputLen = record.extra!["random_input_len"]
+                  ? record.extra!["random_input_len"]
+                  : record.extra!["input_len"];
+                const outputLen = record.extra!["random_output_len"]
+                  ? record.extra!["random_output_len"]
+                  : record.extra!["output_len"];
 
-                let tensorParallel = record.extra!["tensor_parallel_size"];
-                // TODO (huydhn): Fix the passing of tensor_parallel_size to the benchmark
-                // script on vLLM side
-                if (model.includes("8B")) {
-                  tensorParallel = tensorParallel !== "" ? tensorParallel : "1";
-                } else if (model.includes("70B")) {
-                  tensorParallel = tensorParallel !== "" ? tensorParallel : "4";
-                } else if (model.includes("8x7B")) {
-                  tensorParallel = tensorParallel !== "" ? tensorParallel : "2";
+                record.display = `${model} / tp${tensorParallel}`;
+                if (requestRate) {
+                  record.display = `${record.display} / qps_${requestRate}`;
                 }
-
-                if (requestRate !== "") {
-                  record.display = `${model} / tp${tensorParallel} / qps_${requestRate}`;
-                } else {
-                  record.display = `${model} / tp${tensorParallel}`;
+                if (inputLen) {
+                  record.display = `${record.display} / in_${inputLen}`;
+                }
+                if (outputLen) {
+                  record.display = `${record.display} / out_${outputLen}`;
                 }
               } else if (
                 repoName === "pytorch/pytorch" &&
diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx
@@ -186,6 +186,24 @@ export default function LLMsSummaryPanel({
         return `${params.value}`;
       },
     });
+
+    columns.push({
+      field: "input_len",
+      headerName: "Input len.",
+      flex: 1,
+      renderCell: (params: GridRenderCellParams<any>) => {
+        return `${params.value}`;
+      },
+    });
+
+    columns.push({
+      field: "output_len",
+      headerName: "Output len.",
+      flex: 1,
+      renderCell: (params: GridRenderCellParams<any>) => {
+        return `${params.value}`;
+      },
+    });
   }
 
   if (
diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts
@@ -315,37 +315,15 @@ const toRowData = (
     if (repoName === "vllm-project/vllm") {
       // These fields are only available on vLLM benchmark
       const extraInfo = JSON.parse(extra);
-      // TODO (huydhn): Fix the invalid JSON on vLLM side
-      if (
-        metric.includes("itl") ||
-        metric.includes("tpot") ||
-        metric.includes("ttft")
-      ) {
-        extraInfo["request_rate"] =
-          extraInfo["request_rate"] !== "" ? extraInfo["request_rate"] : "Inf";
-      }
-      // TODO (huydhn): Fix the passing of tensor_parallel_size to the benchmark
-      // script on vLLM side
-      if (model.includes("8B")) {
-        extraInfo["tensor_parallel_size"] =
-          extraInfo["tensor_parallel_size"] !== ""
-            ? extraInfo["tensor_parallel_size"]
-            : 1;
-      } else if (model.includes("70B")) {
-        extraInfo["tensor_parallel_size"] =
-          extraInfo["tensor_parallel_size"] !== ""
-            ? extraInfo["tensor_parallel_size"]
-            : 4;
-      } else if (model.includes("8x7B")) {
-        extraInfo["tensor_parallel_size"] =
-          extraInfo["tensor_parallel_size"] !== ""
-            ? extraInfo["tensor_parallel_size"]
-            : 2;
-      }
-
       row["extra"] = extraInfo;
       row["tensor_parallel_size"] = extraInfo["tensor_parallel_size"];
       row["request_rate"] = extraInfo["request_rate"];
+      row["input_len"] = extraInfo["random_input_len"]
+        ? extraInfo["random_input_len"]
+        : extraInfo["input_len"];
+      row["output_len"] = extraInfo["random_output_len"]
+        ? extraInfo["random_input_len"]
+        : extraInfo["output_len"];
     }
 
     if (