Skip to content

Commit 74cf17a

Browse files
committed
vllm - add CI runtime and review cycle metrics
1 parent cb4df87 commit 74cf17a

File tree

8 files changed

+565
-85
lines changed

8 files changed

+565
-85
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"params": {
3+
"repo": "String",
4+
"pipelineName": "String",
5+
"startTime": "DateTime64(3)",
6+
"stopTime": "DateTime64(3)"
7+
},
8+
"tests": [
9+
{
10+
"repo": "vllm-project/vllm",
11+
"pipelineName": "CI",
12+
"startTime": "2025-09-26T00:00:00.000",
13+
"stopTime": "2025-10-03T00:00:00.000"
14+
}
15+
]
16+
}
17+
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
-- vLLM CI run durations (Buildkite builds)
2+
-- Lists per-build durations based on build.started_at and build.finished_at
3+
4+
WITH b AS (
5+
SELECT
6+
tupleElement(pipeline, 'repository') AS repository,
7+
tupleElement(pipeline, 'name') AS pipeline_name,
8+
toUInt32(tupleElement(build, 'number')) AS build_number,
9+
tupleElement(build, 'started_at') AS build_started_at,
10+
tupleElement(build, 'finished_at') AS build_finished_at,
11+
tupleElement(build, 'state') AS build_state
12+
FROM vllm.vllm_buildkite_jobs
13+
WHERE
14+
tupleElement(pipeline, 'repository') = {repo: String }
15+
AND tupleElement(pipeline, 'name') = {pipelineName: String }
16+
AND tupleElement(build, 'started_at') IS NOT NULL
17+
AND tupleElement(build, 'finished_at') IS NOT NULL
18+
AND tupleElement(build, 'started_at') >= {startTime: DateTime64(3) }
19+
AND tupleElement(build, 'started_at') < {stopTime: DateTime64(3) }
20+
)
21+
SELECT
22+
pipeline_name,
23+
build_number,
24+
max(build_started_at) AS started_at,
25+
max(build_finished_at) AS finished_at,
26+
any(build_state) AS build_state,
27+
dateDiff('second', started_at, finished_at) AS duration_seconds,
28+
round(duration_seconds / 3600.0, 3) AS duration_hours
29+
FROM b
30+
GROUP BY pipeline_name, build_number
31+
ORDER BY started_at ASC
32+
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"params": {
3+
"repo": "String",
4+
"startTime": "DateTime64(3)",
5+
"stopTime": "DateTime64(3)"
6+
},
7+
"tests": [
8+
{
9+
"repo": "vllm-project/vllm",
10+
"startTime": "2025-09-22T00:00:00.000",
11+
"stopTime": "2025-09-29T00:00:00.000"
12+
}
13+
]
14+
}
15+
16+
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
-- vLLM PR cycle time breakdown
2+
-- Computes P50 and P90 (hours) for:
3+
-- 1) Time to first (human) review: PR ready -> first human review
4+
-- 2) Time to approval: first human review -> first approval
5+
-- 3) Time in merge queue: first approval -> merge time
6+
-- Notes:
7+
-- - "Ready" is derived from the first time the 'ready' label was applied.
8+
-- - Reviews excluded if state = 'DISMISSED' and if reviewer looks like a bot.
9+
-- - Human review is approximated via author_association in an allowed set and reviewer != PR author.
10+
-- - Metrics only consider merged PRs within the window [startTime, stopTime).
11+
12+
WITH prs AS (
13+
SELECT
14+
number AS pr_number,
15+
user.login AS author,
16+
parseDateTimeBestEffort(created_at) AS created_at_ts,
17+
parseDateTimeBestEffort(closed_at) AS merged_at_ts
18+
FROM default.pull_request
19+
WHERE
20+
dynamoKey LIKE concat({repo: String }, '%')
21+
AND state = 'closed'
22+
AND closed_at != ''
23+
AND parseDateTimeBestEffort(closed_at) >= {startTime: DateTime64(3) }
24+
AND parseDateTimeBestEffort(closed_at) < {stopTime: DateTime64(3) }
25+
),
26+
27+
ready_events AS (
28+
SELECT
29+
ple.pr_number,
30+
minIf(ple.event_time, lowerUTF8(ple.label_name) = 'ready' AND ple.action = 'labeled') AS first_ready_ts
31+
FROM default.pull_label_event ple
32+
WHERE
33+
ple.repo_name = {repo: String }
34+
GROUP BY ple.pr_number
35+
),
36+
37+
reviews_raw AS (
38+
SELECT
39+
toUInt32(extractGroups(review.'pull_request_url', 'pulls/([0-9]+)')[1]) AS pr_number,
40+
review.'user'.'login' AS reviewer,
41+
review.'state' AS state,
42+
review.'author_association' AS author_association,
43+
review.'submitted_at' AS submitted_at_ts
44+
FROM default.pull_request_review
45+
WHERE
46+
dynamoKey LIKE concat({repo: String }, '%')
47+
AND review.'submitted_at' IS NOT NULL
48+
),
49+
50+
-- Filter to human reviews and exclude dismissed ones and bot reviewers
51+
human_reviews AS (
52+
SELECT
53+
r.pr_number,
54+
r.reviewer,
55+
r.state,
56+
r.author_association,
57+
r.submitted_at_ts
58+
FROM reviews_raw r
59+
WHERE
60+
lowerUTF8(r.state) != 'dismissed'
61+
AND r.author_association IN ('MEMBER', 'OWNER', 'COLLABORATOR', 'CONTRIBUTOR')
62+
AND r.reviewer NOT LIKE '%[bot]'
63+
AND lowerUTF8(r.reviewer) NOT LIKE '%bot%'
64+
),
65+
66+
first_human_review AS (
67+
SELECT
68+
pr.pr_number,
69+
-- Define "first review" as first non-approved human review (commented/changes_requested)
70+
minIf(
71+
hr.submitted_at_ts,
72+
hr.reviewer != pr.author AND lowerUTF8(hr.state) IN ('commented','changes_requested')
73+
) AS first_review_ts
74+
FROM prs pr
75+
LEFT JOIN human_reviews hr ON pr.pr_number = hr.pr_number
76+
GROUP BY pr.pr_number
77+
),
78+
79+
first_approval AS (
80+
SELECT
81+
pr.pr_number,
82+
-- Only count approvals from maintainers (exclude contributor approvals)
83+
minIf(
84+
hr.submitted_at_ts,
85+
lowerUTF8(hr.state) = 'approved'
86+
AND hr.reviewer != pr.author
87+
AND hr.author_association IN ('MEMBER','OWNER','COLLABORATOR')
88+
) AS first_approval_ts
89+
FROM prs pr
90+
LEFT JOIN human_reviews hr ON pr.pr_number = hr.pr_number
91+
GROUP BY pr.pr_number
92+
),
93+
94+
durations AS (
95+
SELECT
96+
pr.pr_number,
97+
coalesce(re.first_ready_ts, pr.created_at_ts) AS ready_ts,
98+
fr.first_review_ts,
99+
fa.first_approval_ts,
100+
pr.merged_at_ts,
101+
-- Durations in hours
102+
if(
103+
fr.first_review_ts IS NULL OR fr.first_review_ts < coalesce(re.first_ready_ts, pr.created_at_ts),
104+
NULL,
105+
dateDiff('second', coalesce(re.first_ready_ts, pr.created_at_ts), fr.first_review_ts) / 3600.0
106+
) AS time_to_first_review_hours,
107+
108+
if(
109+
fa.first_approval_ts IS NULL OR fr.first_review_ts IS NULL OR fa.first_approval_ts < fr.first_review_ts,
110+
NULL,
111+
dateDiff('second', fr.first_review_ts, fa.first_approval_ts) / 3600.0
112+
) AS time_to_approval_hours,
113+
114+
if(
115+
fa.first_approval_ts IS NULL OR pr.merged_at_ts < fa.first_approval_ts,
116+
NULL,
117+
dateDiff('second', fa.first_approval_ts, pr.merged_at_ts) / 3600.0
118+
) AS time_in_merge_queue_hours
119+
FROM prs pr
120+
LEFT JOIN ready_events re ON pr.pr_number = re.pr_number
121+
LEFT JOIN first_human_review fr ON pr.pr_number = fr.pr_number
122+
LEFT JOIN first_approval fa ON pr.pr_number = fa.pr_number
123+
),
124+
125+
filtered AS (
126+
SELECT
127+
*
128+
FROM durations
129+
WHERE
130+
(time_to_first_review_hours IS NULL OR (time_to_first_review_hours >= 0 AND time_to_first_review_hours < 24 * 30))
131+
AND (time_to_approval_hours IS NULL OR (time_to_approval_hours >= 0 AND time_to_approval_hours < 24 * 30))
132+
AND (time_in_merge_queue_hours IS NULL OR (time_in_merge_queue_hours >= 0 AND time_in_merge_queue_hours < 24 * 30))
133+
)
134+
135+
SELECT
136+
round(quantile(0.5)(time_to_first_review_hours), 2) AS time_to_first_review_p50,
137+
round(quantile(0.9)(time_to_first_review_hours), 2) AS time_to_first_review_p90,
138+
round(quantile(0.5)(time_to_approval_hours), 2) AS time_to_approval_p50,
139+
round(quantile(0.9)(time_to_approval_hours), 2) AS time_to_approval_p90,
140+
round(quantile(0.5)(time_in_merge_queue_hours), 2) AS time_in_merge_queue_p50,
141+
round(quantile(0.9)(time_in_merge_queue_hours), 2) AS time_in_merge_queue_p90
142+
FROM filtered
143+
-- Quantiles ignore NULLs implicitly; if a column is entirely NULL in window, result will be NULL
144+

torchci/components/metrics/panels/ScalarPanel.tsx

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,14 @@ export function ScalarPanelWithValue({
1515
valueRenderer,
1616
// Callback to decide whether the scalar value is "bad" and should be displayed red.
1717
badThreshold,
18+
// Optional styles to apply to the Paper
19+
paperSx,
1820
}: {
1921
title: string;
2022
value: any;
2123
valueRenderer: (_value: any) => string;
2224
badThreshold: (_value: any) => boolean;
25+
paperSx?: any;
2326
}) {
2427
if (value === undefined) {
2528
return <Skeleton variant={"rectangular"} height={"100%"} />;
@@ -28,14 +31,18 @@ export function ScalarPanelWithValue({
2831
let fontColor = badThreshold(value) ? "#ee6666" : "inherit";
2932

3033
return (
31-
<Paper sx={{ p: 2 }} elevation={3}>
34+
<Paper sx={{ p: 2, ...(paperSx || {}) }} elevation={3}>
3235
<Box
3336
sx={{
3437
display: "flex",
3538
flexDirection: "column",
3639
}}
3740
>
38-
<Typography sx={{ fontSize: "1rem", fontWeight: "bold" }}>
41+
<Typography
42+
sx={{ fontSize: "1rem", fontWeight: "bold", whiteSpace: "nowrap", overflow: "hidden", textOverflow: "ellipsis" }}
43+
noWrap
44+
title={title}
45+
>
3946
{title}
4047
</Typography>
4148
<Typography

0 commit comments

Comments
 (0)