diff --git a/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql b/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql index 2be43a00ab..79c978920e 100644 --- a/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql +++ b/torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql @@ -49,6 +49,26 @@ WITH benchmarks AS ( tupleElement(o.benchmark, 'extra_info')['args'], 'tensor_parallel_size' ), + 'random_input_len', + JSONExtractString( + tupleElement(benchmark, 'extra_info')['args'], + 'random_input_len' + ), + 'random_output_len', + JSONExtractString( + tupleElement(benchmark, 'extra_info')['args'], + 'random_output_len' + ), + 'input_len', + JSONExtractString( + tupleElement(benchmark, 'extra_info')['args'], + 'input_len' + ), + 'output_len', + JSONExtractString( + tupleElement(benchmark, 'extra_info')['args'], + 'output_len' + ), -- Used by Cachebench 'is_dynamic', IF( diff --git a/torchci/components/benchmark/llms/components/LLMsGraphPanel.tsx b/torchci/components/benchmark/llms/components/LLMsGraphPanel.tsx index faf66554a9..2c4084a705 100644 --- a/torchci/components/benchmark/llms/components/LLMsGraphPanel.tsx +++ b/torchci/components/benchmark/llms/components/LLMsGraphPanel.tsx @@ -170,31 +170,24 @@ export default function LLMsGraphPanel({ const metric = record.metric; if (repoName === "vllm-project/vllm") { - let requestRate = record.extra!["request_rate"]; - // TODO (huydhn): Fix the invalid JSON on vLLM side - if ( - metric.includes("itl") || - metric.includes("tpot") || - metric.includes("ttft") - ) { - requestRate = requestRate !== "" ? requestRate : "Inf"; - } + const requestRate = record.extra!["request_rate"]; + const tensorParallel = record.extra!["tensor_parallel_size"]; + const inputLen = record.extra!["random_input_len"] + ? record.extra!["random_input_len"] + : record.extra!["input_len"]; + const outputLen = record.extra!["random_output_len"] + ? record.extra!["random_output_len"] + : record.extra!["output_len"]; - let tensorParallel = record.extra!["tensor_parallel_size"]; - // TODO (huydhn): Fix the passing of tensor_parallel_size to the benchmark - // script on vLLM side - if (model.includes("8B")) { - tensorParallel = tensorParallel !== "" ? tensorParallel : "1"; - } else if (model.includes("70B")) { - tensorParallel = tensorParallel !== "" ? tensorParallel : "4"; - } else if (model.includes("8x7B")) { - tensorParallel = tensorParallel !== "" ? tensorParallel : "2"; + record.display = `${model} / tp${tensorParallel}`; + if (requestRate) { + record.display = `${record.display} / qps_${requestRate}`; } - - if (requestRate !== "") { - record.display = `${model} / tp${tensorParallel} / qps_${requestRate}`; - } else { - record.display = `${model} / tp${tensorParallel}`; + if (inputLen) { + record.display = `${record.display} / in_${inputLen}`; + } + if (outputLen) { + record.display = `${record.display} / out_${outputLen}`; } } else if ( repoName === "pytorch/pytorch" && diff --git a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx index 5c6773a348..965cae662e 100644 --- a/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx +++ b/torchci/components/benchmark/llms/components/LLMsSummaryPanel.tsx @@ -186,6 +186,24 @@ export default function LLMsSummaryPanel({ return `${params.value}`; }, }); + + columns.push({ + field: "input_len", + headerName: "Input len.", + flex: 1, + renderCell: (params: GridRenderCellParams) => { + return `${params.value}`; + }, + }); + + columns.push({ + field: "output_len", + headerName: "Output len.", + flex: 1, + renderCell: (params: GridRenderCellParams) => { + return `${params.value}`; + }, + }); } if ( diff --git a/torchci/lib/benchmark/llms/utils/llmUtils.ts b/torchci/lib/benchmark/llms/utils/llmUtils.ts index 510a81f797..b95ca65067 100644 --- a/torchci/lib/benchmark/llms/utils/llmUtils.ts +++ b/torchci/lib/benchmark/llms/utils/llmUtils.ts @@ -293,37 +293,15 @@ const toRowData = ( if (repoName === "vllm-project/vllm") { // These fields are only available on vLLM benchmark const extraInfo = JSON.parse(extra); - // TODO (huydhn): Fix the invalid JSON on vLLM side - if ( - metric.includes("itl") || - metric.includes("tpot") || - metric.includes("ttft") - ) { - extraInfo["request_rate"] = - extraInfo["request_rate"] !== "" ? extraInfo["request_rate"] : "Inf"; - } - // TODO (huydhn): Fix the passing of tensor_parallel_size to the benchmark - // script on vLLM side - if (model.includes("8B")) { - extraInfo["tensor_parallel_size"] = - extraInfo["tensor_parallel_size"] !== "" - ? extraInfo["tensor_parallel_size"] - : 1; - } else if (model.includes("70B")) { - extraInfo["tensor_parallel_size"] = - extraInfo["tensor_parallel_size"] !== "" - ? extraInfo["tensor_parallel_size"] - : 4; - } else if (model.includes("8x7B")) { - extraInfo["tensor_parallel_size"] = - extraInfo["tensor_parallel_size"] !== "" - ? extraInfo["tensor_parallel_size"] - : 2; - } - row["extra"] = extraInfo; row["tensor_parallel_size"] = extraInfo["tensor_parallel_size"]; row["request_rate"] = extraInfo["request_rate"]; + row["input_len"] = extraInfo["random_input_len"] + ? extraInfo["random_input_len"] + : extraInfo["input_len"]; + row["output_len"] = extraInfo["random_output_len"] + ? extraInfo["random_input_len"] + : extraInfo["output_len"]; } if (