Skip to content

Commit 344d1e3

Browse files
authored
vllm - Add initial set of metrics (#7285)
Adds metrics for both CI runtime and code review cycle Updated to now add reliability metrics as well.
1 parent 401d216 commit 344d1e3

37 files changed

+4082
-120
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"params": {
3+
"granularity": "String",
4+
"repo": "String",
5+
"pipelineName": "String",
6+
"startTime": "DateTime64(3)",
7+
"stopTime": "DateTime64(3)"
8+
},
9+
"tests": [
10+
{
11+
"granularity": "day",
12+
"repo": "https://github.com/vllm-project/vllm.git",
13+
"pipelineName": "CI",
14+
"startTime": "2025-09-26T00:00:00.000",
15+
"stopTime": "2025-10-03T00:00:00.000"
16+
}
17+
]
18+
}
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
-- vLLM CI reliability metrics (main branch only)
2+
-- Computes CI success rate, failure rate over time for Buildkite builds
3+
-- Daily breakdown of build states (passed, failed, canceled)
4+
-- Accounts for soft failures: builds with only soft failures count as successful
5+
-- Only tracks main branch to exclude work-in-progress PR noise
6+
7+
WITH build_jobs AS (
8+
SELECT
9+
tupleElement(pipeline, 'repository') AS repository,
10+
tupleElement(pipeline, 'name') AS pipeline_name,
11+
toUInt32(tupleElement(build, 'number')) AS build_number,
12+
tupleElement(build, 'started_at') AS build_started_at,
13+
tupleElement(build, 'finished_at') AS build_finished_at,
14+
tupleElement(build, 'state') AS build_state,
15+
tupleElement(job, 'state') AS job_state,
16+
tupleElement(job, 'soft_failed') AS soft_failed,
17+
formatDateTime(
18+
DATE_TRUNC(
19+
{granularity: String },
20+
tupleElement(build, 'started_at')
21+
),
22+
'%Y-%m-%d'
23+
) AS bucket
24+
FROM vllm.vllm_buildkite_jobs
25+
WHERE
26+
tupleElement(pipeline, 'repository') = {repo: String }
27+
AND tupleElement(pipeline, 'name') = {pipelineName: String }
28+
AND tupleElement(build, 'branch') = 'main'
29+
AND tupleElement(build, 'started_at') IS NOT NULL
30+
AND tupleElement(build, 'started_at') >= {startTime: DateTime64(3) }
31+
AND tupleElement(build, 'started_at') < {stopTime: DateTime64(3) }
32+
),
33+
34+
builds AS (
35+
SELECT
36+
repository,
37+
pipeline_name,
38+
build_number,
39+
any(build_started_at) AS build_started_at,
40+
any(build_finished_at) AS build_finished_at,
41+
any(build_state) AS build_state,
42+
any(bucket) AS bucket,
43+
-- Count hard failures: job.state='failed' AND soft_failed=false
44+
countIf(lowerUTF8(job_state) = 'failed' AND soft_failed = FALSE)
45+
AS hard_failures,
46+
-- A build is successful if it has no hard failures
47+
-- (even if it has soft failures)
48+
if(
49+
hard_failures = 0
50+
AND lowerUTF8(build_state) NOT IN ('canceled', 'cancelled'),
51+
1,
52+
0
53+
) AS is_success,
54+
if(lowerUTF8(build_state) IN ('canceled', 'cancelled'), 1, 0)
55+
AS is_canceled
56+
FROM build_jobs
57+
GROUP BY
58+
repository,
59+
pipeline_name,
60+
build_number
61+
),
62+
63+
daily_stats AS (
64+
SELECT
65+
bucket,
66+
sum(is_success) AS passed_count,
67+
count() - sum(is_success) - sum(is_canceled) AS failed_count,
68+
sum(is_canceled) AS canceled_count,
69+
count() AS total_count,
70+
count() - sum(is_canceled) AS non_canceled_count,
71+
if(
72+
non_canceled_count > 0,
73+
round(passed_count / non_canceled_count, 4),
74+
NULL
75+
) AS success_rate
76+
FROM builds
77+
GROUP BY bucket
78+
)
79+
80+
SELECT
81+
bucket AS granularity_bucket,
82+
passed_count,
83+
failed_count,
84+
canceled_count,
85+
total_count,
86+
non_canceled_count,
87+
success_rate
88+
FROM daily_stats
89+
ORDER BY granularity_bucket ASC
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"params": {
3+
"repo": "String",
4+
"pipelineName": "String",
5+
"startTime": "DateTime64(3)",
6+
"stopTime": "DateTime64(3)"
7+
},
8+
"tests": [
9+
{
10+
"repo": "vllm-project/vllm",
11+
"pipelineName": "CI",
12+
"startTime": "2025-09-26T00:00:00.000",
13+
"stopTime": "2025-10-03T00:00:00.000"
14+
}
15+
]
16+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
-- vLLM CI run durations (main branch only)
2+
-- Lists per-build durations based on build.started_at and build.finished_at
3+
-- Only tracks main branch to exclude work-in-progress PR noise
4+
5+
WITH b AS (
6+
SELECT
7+
tupleElement(pipeline, 'repository') AS repository,
8+
tupleElement(pipeline, 'name') AS pipeline_name,
9+
toUInt32(tupleElement(build, 'number')) AS build_number,
10+
tupleElement(build, 'started_at') AS build_started_at,
11+
tupleElement(build, 'finished_at') AS build_finished_at,
12+
tupleElement(build, 'state') AS build_state
13+
FROM vllm.vllm_buildkite_jobs
14+
WHERE
15+
tupleElement(pipeline, 'repository') = {repo: String }
16+
AND tupleElement(pipeline, 'name') = {pipelineName: String }
17+
AND tupleElement(build, 'branch') = 'main'
18+
AND tupleElement(build, 'started_at') IS NOT NULL
19+
AND tupleElement(build, 'finished_at') IS NOT NULL
20+
AND tupleElement(build, 'started_at') >= {startTime: DateTime64(3) }
21+
AND tupleElement(build, 'started_at') < {stopTime: DateTime64(3) }
22+
)
23+
24+
SELECT
25+
pipeline_name,
26+
build_number,
27+
max(build_started_at) AS started_at,
28+
max(build_finished_at) AS finished_at,
29+
any(build_state) AS build_state,
30+
dateDiff('second', started_at, finished_at) AS duration_seconds,
31+
round(duration_seconds / 3600.0, 3) AS duration_hours
32+
FROM b
33+
GROUP BY pipeline_name, build_number
34+
ORDER BY started_at ASC
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"params": {
3+
"repo": "String",
4+
"pipelineName": "String",
5+
"jobNames": "Array(String)",
6+
"lookbackDays": "UInt32"
7+
},
8+
"tests": [
9+
{
10+
"repo": "https://github.com/vllm-project/vllm.git",
11+
"pipelineName": "CI",
12+
"jobNames": ["Basic Correctness Test", "Neuron Test", "TPU V1 Test"],
13+
"lookbackDays": 60
14+
}
15+
]
16+
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
-- Find most recent failure and first break for each job
2+
3+
WITH most_recent_any_failure AS (
4+
SELECT
5+
tupleElement(job, 'name') AS job_name,
6+
argMax(toUInt32(tupleElement(build, 'number')), tupleElement(build, 'started_at')) AS recent_failed_build,
7+
max(tupleElement(build, 'started_at')) AS recent_failed_at
8+
FROM vllm.vllm_buildkite_jobs
9+
WHERE
10+
tupleElement(pipeline, 'repository') = {repo: String}
11+
AND tupleElement(pipeline, 'name') = {pipelineName: String}
12+
AND tupleElement(build, 'branch') = 'main'
13+
AND tupleElement(job, 'name') IN {jobNames: Array(String)}
14+
AND tupleElement(build, 'started_at') IS NOT NULL
15+
AND tupleElement(build, 'started_at') >= now() - INTERVAL {lookbackDays: UInt32} DAY
16+
AND tupleElement(build, 'started_at') < now()
17+
AND lowerUTF8(tupleElement(job, 'state')) = 'failed'
18+
GROUP BY job_name
19+
),
20+
21+
-- Get all job runs with success/failure state
22+
all_job_runs AS (
23+
SELECT
24+
tupleElement(job, 'name') AS job_name,
25+
toUInt32(tupleElement(build, 'number')) AS build_number,
26+
tupleElement(build, 'started_at') AS build_started_at,
27+
tupleElement(job, 'state') AS job_state,
28+
tupleElement(job, 'soft_failed') AS soft_failed,
29+
-- Success if passed OR soft failure
30+
if(
31+
lowerUTF8(job_state) IN ('passed', 'finished', 'success')
32+
OR (lowerUTF8(job_state) = 'failed' AND soft_failed = true),
33+
1,
34+
if(lowerUTF8(job_state) = 'failed' AND soft_failed = false, 0, -1)
35+
) AS is_success
36+
FROM vllm.vllm_buildkite_jobs
37+
WHERE
38+
tupleElement(pipeline, 'repository') = {repo: String}
39+
AND tupleElement(pipeline, 'name') = {pipelineName: String}
40+
AND tupleElement(build, 'branch') = 'main'
41+
AND tupleElement(job, 'name') IN {jobNames: Array(String)}
42+
AND tupleElement(build, 'started_at') IS NOT NULL
43+
AND tupleElement(build, 'started_at') >= now() - INTERVAL {lookbackDays: UInt32} DAY
44+
AND tupleElement(build, 'started_at') < now()
45+
AND is_success IN (0, 1)
46+
),
47+
48+
all_runs_with_prev AS (
49+
SELECT
50+
job_name,
51+
build_number,
52+
build_started_at,
53+
is_success,
54+
lagInFrame(is_success) OVER (PARTITION BY job_name ORDER BY build_started_at) AS prev_is_success
55+
FROM all_job_runs
56+
),
57+
58+
-- Find most recent success->failure transition for each job
59+
first_break_per_job AS (
60+
SELECT
61+
job_name,
62+
build_number AS first_break_build,
63+
build_started_at AS first_break_at,
64+
ROW_NUMBER() OVER (PARTITION BY job_name ORDER BY build_started_at DESC) AS rn
65+
FROM all_runs_with_prev
66+
WHERE is_success = 0 AND prev_is_success = 1
67+
)
68+
69+
-- Combine recent failure and first break info (URLs constructed client-side from build numbers)
70+
SELECT
71+
a.job_name AS job_name,
72+
a.recent_failed_build AS recent_failed_build,
73+
a.recent_failed_at AS recent_failed_at,
74+
b.first_break_build AS first_break_build,
75+
b.first_break_at AS first_break_at
76+
FROM most_recent_any_failure a
77+
LEFT JOIN first_break_per_job b ON a.job_name = b.job_name AND b.rn = 1
78+
ORDER BY a.recent_failed_at DESC
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"params": {
3+
"repo": "String",
4+
"pipelineName": "String",
5+
"startTime": "DateTime64(3)",
6+
"stopTime": "DateTime64(3)",
7+
"minRuns": "UInt32"
8+
},
9+
"tests": [
10+
{
11+
"repo": "https://github.com/vllm-project/vllm.git",
12+
"pipelineName": "CI",
13+
"startTime": "2025-09-26T00:00:00.000",
14+
"stopTime": "2025-10-03T00:00:00.000",
15+
"minRuns": 3
16+
}
17+
]
18+
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
-- vLLM per-job reliability metrics (main branch only)
2+
-- Computes success rate for each individual job in the CI pipeline
3+
-- Shows which jobs are most/least reliable
4+
-- Only tracks main branch to exclude work-in-progress PR noise
5+
6+
WITH jobs AS (
7+
SELECT
8+
tupleElement(pipeline, 'repository') AS repository,
9+
tupleElement(pipeline, 'name') AS pipeline_name,
10+
toUInt32(tupleElement(build, 'number')) AS build_number,
11+
tupleElement(job, 'name') AS job_name,
12+
tupleElement(job, 'state') AS job_state,
13+
tupleElement(job, 'soft_failed') AS soft_failed,
14+
tupleElement(job, 'finished_at') AS job_finished_at
15+
FROM vllm.vllm_buildkite_jobs
16+
WHERE
17+
tupleElement(pipeline, 'repository') = {repo: String }
18+
AND tupleElement(pipeline, 'name') = {pipelineName: String }
19+
AND tupleElement(build, 'branch') = 'main'
20+
AND tupleElement(job, 'finished_at') IS NOT NULL
21+
AND tupleElement(job, 'finished_at') >= {startTime: DateTime64(3) }
22+
AND tupleElement(job, 'finished_at') < {stopTime: DateTime64(3) }
23+
),
24+
25+
job_stats AS (
26+
SELECT
27+
job_name,
28+
-- Count clean successes: passed jobs only
29+
countIf(
30+
lowerUTF8(job_state) IN ('passed', 'finished', 'success')
31+
) AS passed_count,
32+
-- Count soft failures: failed but soft_failed=true (flaky tests)
33+
countIf(
34+
lowerUTF8(job_state) = 'failed' AND soft_failed = TRUE
35+
) AS soft_failed_count,
36+
-- Count hard failures: failed jobs with soft_failed=false
37+
countIf(
38+
lowerUTF8(job_state) = 'failed' AND soft_failed = FALSE
39+
) AS failed_count,
40+
countIf(lowerUTF8(job_state) IN ('canceled', 'cancelled'))
41+
AS canceled_count,
42+
passed_count
43+
+ soft_failed_count
44+
+ failed_count
45+
+ canceled_count AS total_count,
46+
passed_count + soft_failed_count + failed_count AS non_canceled_count,
47+
-- Success rate = ONLY clean passes / (all non-canceled)
48+
-- This shows true reliability (soft failures don't count as success for job reliability)
49+
if(
50+
non_canceled_count > 0,
51+
round(passed_count / non_canceled_count, 4),
52+
NULL
53+
) AS success_rate
54+
FROM jobs
55+
GROUP BY job_name
56+
HAVING non_canceled_count >= {minRuns: UInt32}
57+
)
58+
59+
SELECT
60+
job_name,
61+
passed_count,
62+
soft_failed_count,
63+
failed_count,
64+
canceled_count,
65+
total_count,
66+
non_canceled_count,
67+
success_rate
68+
FROM job_stats
69+
ORDER BY
70+
success_rate ASC,
71+
non_canceled_count DESC
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"params": {
3+
"repo": "String",
4+
"pipelineName": "String",
5+
"startTime": "DateTime64(3)",
6+
"stopTime": "DateTime64(3)",
7+
"minRuns": "UInt32"
8+
},
9+
"tests": [
10+
{
11+
"repo": "https://github.com/vllm-project/vllm.git",
12+
"pipelineName": "CI",
13+
"startTime": "2025-09-22T00:00:00.000",
14+
"stopTime": "2025-09-29T00:00:00.000",
15+
"minRuns": 5
16+
}
17+
]
18+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
-- vLLM job retry statistics
2+
-- Shows which jobs are retried most often
3+
4+
SELECT
5+
tupleElement(job, 'name') AS job_name,
6+
count(*) AS total_runs,
7+
countIf(tupleElement(job, 'retried') = true) AS retried_count,
8+
if(
9+
total_runs > 0,
10+
round(retried_count / total_runs, 4),
11+
null
12+
) AS retry_rate
13+
FROM vllm.vllm_buildkite_jobs
14+
WHERE
15+
tupleElement(pipeline, 'repository') = {repo: String}
16+
AND tupleElement(pipeline, 'name') = {pipelineName: String}
17+
AND tupleElement(build, 'branch') = 'main'
18+
AND tupleElement(build, 'started_at') IS NOT null
19+
AND tupleElement(build, 'started_at') >= {startTime: DateTime64(3)}
20+
AND tupleElement(build, 'started_at') < {stopTime: DateTime64(3)}
21+
GROUP BY job_name
22+
HAVING total_runs >= {minRuns: UInt32}
23+
ORDER BY retry_rate DESC, retried_count DESC
24+
LIMIT 10

0 commit comments

Comments
 (0)