Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions torchci/clickhouse_queries/vllm/docker_build_runtime/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"params": {
"repo": "String",
"startTime": "DateTime64(3)",
"stopTime": "DateTime64(3)"
},
"tests": [
{
"repo": "https://github.com/vllm-project/vllm.git",
"startTime": "2025-10-01T00:00:00.000",
"stopTime": "2025-11-01T00:00:00.000"
}
]
}
32 changes: 32 additions & 0 deletions torchci/clickhouse_queries/vllm/docker_build_runtime/query.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
-- vLLM Docker Build Image Runtime Trends (main branch only)
-- Tracks runtime for the ":docker: build image" job specifically
-- This is a critical job for build speed monitoring

WITH jobs AS (
SELECT
tupleElement(job, 'name') AS job_name,
tupleElement(job, 'started_at') AS job_started_at,
tupleElement(job, 'finished_at') AS job_finished_at,
tupleElement(job, 'state') AS job_state,
tupleElement(build, 'number') AS build_number
FROM vllm.vllm_buildkite_jobs
WHERE
tupleElement(pipeline, 'repository') = {repo: String }
AND tupleElement(build, 'branch') = 'main'
AND tupleElement(job, 'name') = ':docker: build image'
Copy link
Contributor

@huydhn huydhn Nov 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, you could consider making this job name an input parameter, then you can reuse this query to get the runtime of other jobs too, not only :docker: build image

AND tupleElement(job, 'started_at') IS NOT NULL
AND tupleElement(job, 'finished_at') IS NOT NULL
AND tupleElement(job, 'started_at') >= {startTime: DateTime64(3) }
AND tupleElement(job, 'started_at') < {stopTime: DateTime64(3) }
AND lowerUTF8(tupleElement(job, 'state')) IN (
'passed', 'finished', 'success', 'failed'
)
)

SELECT
job_started_at AS timestamp,
build_number,
round(dateDiff('second', job_started_at, job_finished_at) / 60.0, 2)
AS runtime_minutes
FROM jobs
ORDER BY job_started_at ASC
16 changes: 16 additions & 0 deletions torchci/clickhouse_queries/vllm/job_runtime_trends/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"params": {
"repo": "String",
"startTime": "DateTime64(3)",
"stopTime": "DateTime64(3)",
"jobGroups": "Array(String)"
},
"tests": [
{
"repo": "https://github.com/vllm-project/vllm.git",
"startTime": "2025-10-01T00:00:00.000",
"stopTime": "2025-10-08T00:00:00.000",
"jobGroups": ["main", "amd", "torch_nightly"]
}
]
}
66 changes: 66 additions & 0 deletions torchci/clickhouse_queries/vllm/job_runtime_trends/query.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
-- vLLM Job Runtime Trends (main branch only)
-- Aggregates per-job runtime statistics by day
-- Shows count, mean, p90, and max runtime for each job per day
-- Supports filtering by job groups: AMD, Torch Nightly, or Main

WITH jobs AS (
SELECT
tupleElement(job, 'name') AS job_name,
tupleElement(job, 'started_at') AS job_started_at,
tupleElement(job, 'finished_at') AS job_finished_at,
tupleElement(job, 'state') AS job_state,
tupleElement(build, 'branch') AS branch
FROM vllm.vllm_buildkite_jobs
WHERE
tupleElement(pipeline, 'repository') = {repo: String }
AND tupleElement(build, 'branch') = 'main'
AND tupleElement(job, 'started_at') IS NOT NULL
AND tupleElement(job, 'finished_at') IS NOT NULL
AND tupleElement(job, 'started_at') >= {startTime: DateTime64(3) }
AND tupleElement(job, 'started_at') < {stopTime: DateTime64(3) }
AND lowerUTF8(tupleElement(job, 'state')) IN (
'passed', 'finished', 'success', 'failed'
)
-- Job group filtering: AMD, Torch Nightly, or Main
AND (
(
has({jobGroups: Array(String)}, 'amd')
AND positionCaseInsensitive(tupleElement(job, 'name'), 'AMD')
> 0
)
OR (
has({jobGroups: Array(String)}, 'torch_nightly')
AND positionCaseInsensitive(
tupleElement(job, 'name'), 'Torch Nightly'
)
> 0
)
OR (
has({jobGroups: Array(String)}, 'main')
AND positionCaseInsensitive(tupleElement(job, 'name'), 'AMD')
= 0
AND positionCaseInsensitive(
tupleElement(job, 'name'), 'Torch Nightly'
)
= 0
)
)
)

SELECT
job_name,
toDate(job_started_at) AS date,
count() AS count,
round(avg(dateDiff('second', job_started_at, job_finished_at) / 60.0), 2)
AS mean_runtime_minutes,
round(
quantile(0.9) (
dateDiff('second', job_started_at, job_finished_at) / 60.0
),
2
) AS p90_runtime_minutes,
round(max(dateDiff('second', job_started_at, job_finished_at) / 60.0), 2)
AS max_runtime_minutes
FROM jobs
GROUP BY job_name, date
ORDER BY job_name ASC, date ASC
19 changes: 1 addition & 18 deletions torchci/components/metrics/vllm/CiDurationsPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -230,28 +230,11 @@ export default function CiDurationsPanel({
...getLineSeries(dailyMeanSuccess, dailyMeanNonCanceled),
...getScatterSeriesByState(source),
],
dataZoom: [
{
type: "slider",
show: true,
xAxisIndex: 0,
bottom: 0,
start: 0,
end: 100,
height: 25,
},
{
type: "inside",
xAxisIndex: 0,
start: 0,
end: 100,
},
],
};

return (
<ChartPaper
tooltip="Main branch CI runtimes over time. Green line = mean runtime for successful builds, Pink line = mean including failures. Scatter points = individual builds (click to view in Buildkite). Use slider or scroll to zoom."
tooltip="Main branch CI runtimes over time. Green line = mean runtime for successful builds, Pink line = mean including failures. Scatter points = individual builds (click to view in Buildkite)."
option={options}
onEvents={{
click: handleBuildClick,
Expand Down
164 changes: 164 additions & 0 deletions torchci/components/metrics/vllm/DockerBuildRuntimePanel.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import dayjs from "dayjs";
import { EChartsOption } from "echarts";
import { useDarkMode } from "lib/DarkModeContext";
import _ from "lodash";
import { ChartPaper } from "./chartUtils";
import { COLOR_SUCCESS, COLOR_WARNING } from "./constants";

interface DockerBuildData {
timestamp: string;
build_number: number;
runtime_minutes: number;
}

// Helper function to format tooltip
function formatTooltip(params: any): string {
if (!params || !params.data) return "";

const data = params.data;

// Handle both scatter (array) and line (object) series
let timestamp, runtime, buildNumber;

if (Array.isArray(data)) {
timestamp = data[0];
runtime = data[1];
buildNumber = data[2];
} else {
// For line series (daily average)
timestamp = data.day;
runtime = data.value;
buildNumber = null;
}

if (!timestamp || runtime === undefined) return "";

const formattedTime = dayjs(timestamp).format("M/D/YY h:mm A");

let result = buildNumber
? `<b>Build #${buildNumber}</b><br/>`
: `<b>Daily Average</b><br/>`;
result += `Time: ${formattedTime}<br/>`;
result += `Runtime: <b>${runtime.toFixed(1)} min</b>`;

return result;
}

// Helper function to handle click events
function handleBuildClick(params: any) {
if (params?.componentType === "series") {
const data = Array.isArray(params.data) ? params.data : [params.data];
const buildNumber = data[2];
if (buildNumber !== undefined && buildNumber !== null) {
const url = `https://buildkite.com/vllm/ci/builds/${buildNumber}/`;
if (typeof window !== "undefined") {
window.open(url, "_blank");
}
}
}
}

export default function DockerBuildRuntimePanel({
data,
}: {
data: DockerBuildData[] | undefined;
}) {
const { darkMode } = useDarkMode();

// Process data for chart
const chartData = (data || []).map((d) => [
dayjs(d.timestamp).toISOString(),
d.runtime_minutes,
d.build_number,
]);

// Calculate daily average for trend line
const groupedByDay = _.groupBy(data || [], (d) =>
dayjs(d.timestamp).format("YYYY-MM-DD")
);

const dailyAvg = Object.entries(groupedByDay)
.map(([day, records]) => {
const avgRuntime = _.meanBy(records, "runtime_minutes");
return {
day,
value: Number(avgRuntime.toFixed(1)),
};
})
.sort((a, b) => (a.day < b.day ? -1 : 1));

// Calculate statistics
const runtimes = (data || []).map((d) => d.runtime_minutes);
const avgRuntime = runtimes.length ? _.mean(runtimes).toFixed(1) : "N/A";
const p90Runtime = runtimes.length
? runtimes
.sort((a, b) => a - b)
[Math.floor(runtimes.length * 0.9)].toFixed(1)
: "N/A";

const options: EChartsOption = {
title: {
text: "Docker Build Image Runtime",
subtext: `Avg: ${avgRuntime}m | P90: ${p90Runtime}m | Total builds: ${runtimes.length}`,
textStyle: {
fontSize: 14,
},
},
legend: {
top: 24,
data: ["Individual Builds", "Daily Average"],
},
grid: { top: 60, right: 20, bottom: 80, left: 60 },
dataset: [{ source: chartData }, { source: dailyAvg }],
xAxis: {
type: "time",
axisLabel: {
hideOverlap: true,
formatter: (value: number) => dayjs(value).format("M/D"),
},
},
yAxis: {
type: "value",
name: "Runtime (minutes)",
nameLocation: "middle",
nameGap: 45,
nameRotate: 90,
axisLabel: {
formatter: (value: number) => `${value}m`,
},
},
series: [
{
name: "Individual Builds",
type: "scatter",
datasetIndex: 0,
symbolSize: 6,
itemStyle: { color: COLOR_SUCCESS, opacity: 0.6 },
},
{
name: "Daily Average",
type: "line",
datasetIndex: 1,
smooth: true,
encode: { x: "day", y: "value" },
lineStyle: { color: COLOR_WARNING, width: 2 },
itemStyle: { color: COLOR_WARNING },
showSymbol: true,
symbolSize: 4,
},
],
tooltip: {
trigger: "item",
formatter: formatTooltip,
},
};

return (
<ChartPaper
tooltip="Docker build image runtime over time. Each point is an individual build (click to open in Buildkite). Green line shows daily average trend."
option={options}
onEvents={{ click: handleBuildClick }}
darkMode={darkMode}
/>
);
}
19 changes: 1 addition & 18 deletions torchci/components/metrics/vllm/DurationDistributionPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -164,28 +164,11 @@ export default function DurationDistributionPanel({
axisPointer: { type: "shadow" },
formatter: formatDistributionTooltip,
},
dataZoom: [
{
type: "slider",
show: true,
xAxisIndex: 0,
bottom: 0,
start: 0,
end: 100,
height: 25,
},
{
type: "inside",
xAxisIndex: 0,
start: 0,
end: 100,
},
],
};

return (
<ChartPaper
tooltip="Histogram showing distribution of main branch CI runtimes (how long builds take to complete). Green = successful builds, Red = failed builds, Gray = canceled builds. Use slider or scroll to zoom."
tooltip="Histogram showing distribution of main branch CI runtimes (how long builds take to complete). Green = successful builds, Red = failed builds, Gray = canceled builds."
option={options}
darkMode={darkMode}
/>
Expand Down
Loading