diff --git a/torchci/clickhouse_queries/vllm/ci_reliability/params.json b/torchci/clickhouse_queries/vllm/ci_reliability/params.json
new file mode 100644
index 0000000000..8bf3432c59
--- /dev/null
+++ b/torchci/clickhouse_queries/vllm/ci_reliability/params.json
@@ -0,0 +1,18 @@
+{
+ "params": {
+ "granularity": "String",
+ "repo": "String",
+ "pipelineName": "String",
+ "startTime": "DateTime64(3)",
+ "stopTime": "DateTime64(3)"
+ },
+ "tests": [
+ {
+ "granularity": "day",
+ "repo": "https://github.com/vllm-project/vllm.git",
+ "pipelineName": "CI",
+ "startTime": "2025-09-26T00:00:00.000",
+ "stopTime": "2025-10-03T00:00:00.000"
+ }
+ ]
+}
diff --git a/torchci/clickhouse_queries/vllm/ci_reliability/query.sql b/torchci/clickhouse_queries/vllm/ci_reliability/query.sql
new file mode 100644
index 0000000000..b6a681e885
--- /dev/null
+++ b/torchci/clickhouse_queries/vllm/ci_reliability/query.sql
@@ -0,0 +1,66 @@
+-- vLLM CI reliability metrics
+-- Computes CI success rate, failure rate over time for Buildkite builds
+-- Daily breakdown of build states (passed, failed, canceled)
+-- Overall success rate and job-level reliability
+
+WITH builds AS (
+ SELECT
+ tupleElement(pipeline, 'repository') AS repository,
+ tupleElement(pipeline, 'name') AS pipeline_name,
+ toUInt32(tupleElement(build, 'number')) AS build_number,
+ tupleElement(build, 'started_at') AS build_started_at,
+ tupleElement(build, 'finished_at') AS build_finished_at,
+ tupleElement(build, 'state') AS build_state,
+ formatDateTime(
+ DATE_TRUNC(
+ {granularity: String },
+ tupleElement(build, 'started_at')
+ ),
+ '%Y-%m-%d'
+ ) AS bucket
+ FROM vllm.vllm_buildkite_jobs
+ WHERE
+ tupleElement(pipeline, 'repository') = {repo: String }
+ AND tupleElement(pipeline, 'name') = {pipelineName: String }
+ AND tupleElement(build, 'started_at') IS NOT NULL
+ AND tupleElement(build, 'started_at') >= {startTime: DateTime64(3) }
+ AND tupleElement(build, 'started_at') < {stopTime: DateTime64(3) }
+ GROUP BY
+ repository,
+ pipeline_name,
+ build_number,
+ build_started_at,
+ build_finished_at,
+ build_state,
+ bucket
+),
+
+daily_stats AS (
+ SELECT
+ bucket,
+ countIf(lowerUTF8(build_state) IN ('passed', 'finished', 'success'))
+ AS passed_count,
+ countIf(lowerUTF8(build_state) = 'failed') AS failed_count,
+ countIf(lowerUTF8(build_state) IN ('canceled', 'cancelled'))
+ AS canceled_count,
+ passed_count + failed_count + canceled_count AS total_count,
+ passed_count + failed_count AS non_canceled_count,
+ if(
+ non_canceled_count > 0,
+ round(passed_count / non_canceled_count, 4),
+ NULL
+ ) AS success_rate
+ FROM builds
+ GROUP BY bucket
+)
+
+SELECT
+ bucket AS granularity_bucket,
+ passed_count,
+ failed_count,
+ canceled_count,
+ total_count,
+ non_canceled_count,
+ success_rate
+FROM daily_stats
+ORDER BY granularity_bucket ASC
diff --git a/torchci/clickhouse_queries/vllm/ci_run_duration/params.json b/torchci/clickhouse_queries/vllm/ci_run_duration/params.json
new file mode 100644
index 0000000000..b01ca3591d
--- /dev/null
+++ b/torchci/clickhouse_queries/vllm/ci_run_duration/params.json
@@ -0,0 +1,16 @@
+{
+ "params": {
+ "repo": "String",
+ "pipelineName": "String",
+ "startTime": "DateTime64(3)",
+ "stopTime": "DateTime64(3)"
+ },
+ "tests": [
+ {
+ "repo": "vllm-project/vllm",
+ "pipelineName": "CI",
+ "startTime": "2025-09-26T00:00:00.000",
+ "stopTime": "2025-10-03T00:00:00.000"
+ }
+ ]
+}
diff --git a/torchci/clickhouse_queries/vllm/ci_run_duration/query.sql b/torchci/clickhouse_queries/vllm/ci_run_duration/query.sql
new file mode 100644
index 0000000000..7b022413df
--- /dev/null
+++ b/torchci/clickhouse_queries/vllm/ci_run_duration/query.sql
@@ -0,0 +1,32 @@
+-- vLLM CI run durations (Buildkite builds)
+-- Lists per-build durations based on build.started_at and build.finished_at
+
+WITH b AS (
+ SELECT
+ tupleElement(pipeline, 'repository') AS repository,
+ tupleElement(pipeline, 'name') AS pipeline_name,
+ toUInt32(tupleElement(build, 'number')) AS build_number,
+ tupleElement(build, 'started_at') AS build_started_at,
+ tupleElement(build, 'finished_at') AS build_finished_at,
+ tupleElement(build, 'state') AS build_state
+ FROM vllm.vllm_buildkite_jobs
+ WHERE
+ tupleElement(pipeline, 'repository') = {repo: String }
+ AND tupleElement(pipeline, 'name') = {pipelineName: String }
+ AND tupleElement(build, 'started_at') IS NOT NULL
+ AND tupleElement(build, 'finished_at') IS NOT NULL
+ AND tupleElement(build, 'started_at') >= {startTime: DateTime64(3) }
+ AND tupleElement(build, 'started_at') < {stopTime: DateTime64(3) }
+)
+
+SELECT
+ pipeline_name,
+ build_number,
+ max(build_started_at) AS started_at,
+ max(build_finished_at) AS finished_at,
+ any(build_state) AS build_state,
+ dateDiff('second', started_at, finished_at) AS duration_seconds,
+ round(duration_seconds / 3600.0, 3) AS duration_hours
+FROM b
+GROUP BY pipeline_name, build_number
+ORDER BY started_at ASC
diff --git a/torchci/clickhouse_queries/vllm/job_reliability/params.json b/torchci/clickhouse_queries/vllm/job_reliability/params.json
new file mode 100644
index 0000000000..201745489e
--- /dev/null
+++ b/torchci/clickhouse_queries/vllm/job_reliability/params.json
@@ -0,0 +1,18 @@
+{
+ "params": {
+ "repo": "String",
+ "pipelineName": "String",
+ "startTime": "DateTime64(3)",
+ "stopTime": "DateTime64(3)",
+ "minRuns": "UInt32"
+ },
+ "tests": [
+ {
+ "repo": "https://github.com/vllm-project/vllm.git",
+ "pipelineName": "CI",
+ "startTime": "2025-09-26T00:00:00.000",
+ "stopTime": "2025-10-03T00:00:00.000",
+ "minRuns": 3
+ }
+ ]
+}
diff --git a/torchci/clickhouse_queries/vllm/job_reliability/query.sql b/torchci/clickhouse_queries/vllm/job_reliability/query.sql
new file mode 100644
index 0000000000..40004dc54b
--- /dev/null
+++ b/torchci/clickhouse_queries/vllm/job_reliability/query.sql
@@ -0,0 +1,56 @@
+-- vLLM per-job reliability metrics
+-- Computes success rate for each individual job in the CI pipeline
+-- Shows which jobs are most/least reliable
+
+WITH jobs AS (
+ SELECT
+ tupleElement(pipeline, 'repository') AS repository,
+ tupleElement(pipeline, 'name') AS pipeline_name,
+ toUInt32(tupleElement(build, 'number')) AS build_number,
+ tupleElement(job, 'name') AS job_name,
+ tupleElement(job, 'state') AS job_state,
+ tupleElement(job, 'soft_failed') AS soft_failed,
+ tupleElement(job, 'finished_at') AS job_finished_at
+ FROM vllm.vllm_buildkite_jobs
+ WHERE
+ tupleElement(pipeline, 'repository') = {repo: String }
+ AND tupleElement(pipeline, 'name') = {pipelineName: String }
+ AND tupleElement(job, 'finished_at') IS NOT NULL
+ AND tupleElement(job, 'finished_at') >= {startTime: DateTime64(3) }
+ AND tupleElement(job, 'finished_at') < {stopTime: DateTime64(3) }
+ -- Exclude soft-failed jobs from reliability calculation
+ AND tupleElement(job, 'soft_failed') = 'false'
+),
+
+job_stats AS (
+ SELECT
+ job_name,
+ countIf(lowerUTF8(job_state) IN ('passed', 'finished', 'success'))
+ AS passed_count,
+ countIf(lowerUTF8(job_state) = 'failed') AS failed_count,
+ countIf(lowerUTF8(job_state) IN ('canceled', 'cancelled'))
+ AS canceled_count,
+ passed_count + failed_count + canceled_count AS total_count,
+ passed_count + failed_count AS non_canceled_count,
+ if(
+ non_canceled_count > 0,
+ round(passed_count / non_canceled_count, 4),
+ NULL
+ ) AS success_rate
+ FROM jobs
+ GROUP BY job_name
+ HAVING non_canceled_count >= {minRuns: UInt32}
+)
+
+SELECT
+ job_name,
+ passed_count,
+ failed_count,
+ canceled_count,
+ total_count,
+ non_canceled_count,
+ success_rate
+FROM job_stats
+ORDER BY
+ success_rate ASC,
+ non_canceled_count DESC
diff --git a/torchci/clickhouse_queries/vllm/merges_percentage/query.sql b/torchci/clickhouse_queries/vllm/merges_percentage/query.sql
index dbb6b11fe6..9459fa2d30 100644
--- a/torchci/clickhouse_queries/vllm/merges_percentage/query.sql
+++ b/torchci/clickhouse_queries/vllm/merges_percentage/query.sql
@@ -108,7 +108,7 @@ manual_merged_prs AS (
manual_merged_prs_with_failures AS (
SELECT
bucket,
- count(number) AS manual_merged_with_failures_count
+ count(DISTINCT number) AS manual_merged_with_failures_count
FROM
merged_prs
LEFT JOIN latest_buildkite_jobs ON toString(merged_prs.number) = latest_buildkite_jobs.number
@@ -118,6 +118,19 @@ manual_merged_prs_with_failures AS (
GROUP BY
bucket
),
+manual_merged_prs_pending AS (
+ SELECT
+ bucket,
+ count(DISTINCT number) AS manual_merged_pending_count
+ FROM
+ merged_prs
+ LEFT JOIN latest_buildkite_jobs ON toString(merged_prs.number) = latest_buildkite_jobs.number
+ WHERE
+ tupleElement(auto_merge, 'merge_method') = ''
+ AND job_state IN ('running', 'pending', 'scheduled')
+ GROUP BY
+ bucket
+),
auto_merged_prs AS (
SELECT
bucket,
@@ -137,7 +150,8 @@ results AS (
abandon_count,
auto_merged_count,
manual_merged_count,
- manual_merged_with_failures_count
+ manual_merged_with_failures_count,
+ manual_merged_pending_count
FROM
total_prs
LEFT JOIN open_prs ON total_prs.bucket = open_prs.bucket
@@ -145,6 +159,7 @@ results AS (
LEFT JOIN auto_merged_prs ON total_prs.bucket = auto_merged_prs.bucket
LEFT JOIN manual_merged_prs ON total_prs.bucket = manual_merged_prs.bucket
LEFT JOIN manual_merged_prs_with_failures ON total_prs.bucket = manual_merged_prs_with_failures.bucket
+ LEFT JOIN manual_merged_prs_pending ON total_prs.bucket = manual_merged_prs_pending.bucket
)
SELECT
*
diff --git a/torchci/clickhouse_queries/vllm/pr_cycle_time_breakdown/params.json b/torchci/clickhouse_queries/vllm/pr_cycle_time_breakdown/params.json
new file mode 100644
index 0000000000..da509ee387
--- /dev/null
+++ b/torchci/clickhouse_queries/vllm/pr_cycle_time_breakdown/params.json
@@ -0,0 +1,14 @@
+{
+ "params": {
+ "repo": "String",
+ "startTime": "DateTime64(3)",
+ "stopTime": "DateTime64(3)"
+ },
+ "tests": [
+ {
+ "repo": "vllm-project/vllm",
+ "startTime": "2025-09-22T00:00:00.000",
+ "stopTime": "2025-09-29T00:00:00.000"
+ }
+ ]
+}
diff --git a/torchci/clickhouse_queries/vllm/pr_cycle_time_breakdown/query.sql b/torchci/clickhouse_queries/vllm/pr_cycle_time_breakdown/query.sql
new file mode 100644
index 0000000000..57456c7944
--- /dev/null
+++ b/torchci/clickhouse_queries/vllm/pr_cycle_time_breakdown/query.sql
@@ -0,0 +1,182 @@
+-- vLLM PR cycle time breakdown
+-- Computes P50 and P90 (hours) for:
+-- 1) Time to first (human) review: PR ready -> first human review
+-- 2) Time to approval: first human review -> first approval
+-- 3) Time in merge queue: first approval -> merge time
+-- Notes:
+-- - "Ready" is derived from the first time the 'ready' label was applied.
+-- - Reviews excluded if state = 'DISMISSED' and if reviewer looks like a bot.
+-- - Human review is approximated via author_association in an allowed set and reviewer != PR author.
+-- - Metrics only consider merged PRs within the window [startTime, stopTime).
+
+WITH prs AS (
+ SELECT
+ number AS pr_number,
+ user.login AS author,
+ parseDateTimeBestEffort(created_at) AS created_at_ts,
+ parseDateTimeBestEffort(closed_at) AS merged_at_ts
+ FROM default.pull_request
+ WHERE
+ dynamoKey LIKE concat({repo: String }, '%')
+ AND state = 'closed'
+ AND closed_at != ''
+ AND parseDateTimeBestEffort(closed_at) >= {startTime: DateTime64(3) }
+ AND parseDateTimeBestEffort(closed_at) < {stopTime: DateTime64(3) }
+),
+
+ready_events AS (
+ SELECT
+ ple.pr_number,
+ minIf(
+ ple.event_time,
+ lowerUTF8(ple.label_name) = 'ready' AND ple.action = 'labeled'
+ ) AS first_ready_ts
+ FROM default.pull_label_event ple
+ WHERE
+ ple.repo_name = {repo: String }
+ GROUP BY ple.pr_number
+),
+
+reviews_raw AS (
+ SELECT
+ toUInt32(
+ extractGroups(review.'pull_request_url', 'pulls/([0-9]+)')[1]
+ ) AS pr_number,
+ review.'user'.'login' AS reviewer,
+ review.'state' AS state,
+ review.'author_association' AS author_association,
+ review.'submitted_at' AS submitted_at_ts
+ FROM default.pull_request_review
+ WHERE
+ dynamoKey LIKE concat({repo: String }, '%')
+ AND review.'submitted_at' IS NOT NULL
+),
+
+-- Filter to human reviews and exclude dismissed ones and bot reviewers
+human_reviews AS (
+ SELECT
+ r.pr_number,
+ r.reviewer,
+ r.state,
+ r.author_association,
+ r.submitted_at_ts
+ FROM reviews_raw r
+ WHERE
+ lowerUTF8(r.state) != 'dismissed'
+ AND r.author_association IN (
+ 'MEMBER', 'OWNER', 'COLLABORATOR', 'CONTRIBUTOR'
+ )
+ AND r.reviewer NOT LIKE '%[bot]'
+ AND lowerUTF8(r.reviewer) NOT LIKE '%bot%'
+),
+
+first_human_review AS (
+ SELECT
+ pr.pr_number,
+ -- Define "first review" as first non-approved human review (commented/changes_requested)
+ minIf(
+ hr.submitted_at_ts,
+ hr.reviewer != pr.author
+ AND lowerUTF8(hr.state) IN ('commented', 'changes_requested')
+ ) AS first_review_ts
+ FROM prs pr
+ LEFT JOIN human_reviews hr ON pr.pr_number = hr.pr_number
+ GROUP BY pr.pr_number
+),
+
+first_approval AS (
+ SELECT
+ pr.pr_number,
+ -- Only count approvals from maintainers (exclude contributor approvals)
+ minIf(
+ hr.submitted_at_ts,
+ lowerUTF8(hr.state) = 'approved'
+ AND hr.reviewer != pr.author
+ AND hr.author_association IN ('MEMBER', 'OWNER', 'COLLABORATOR')
+ ) AS first_approval_ts
+ FROM prs pr
+ LEFT JOIN human_reviews hr ON pr.pr_number = hr.pr_number
+ GROUP BY pr.pr_number
+),
+
+durations AS (
+ SELECT
+ pr.pr_number,
+ coalesce(re.first_ready_ts, pr.created_at_ts) AS ready_ts,
+ fr.first_review_ts,
+ fa.first_approval_ts,
+ pr.merged_at_ts,
+ -- Durations in hours
+ if(
+ fr.first_review_ts IS NULL
+ OR fr.first_review_ts
+ < coalesce(re.first_ready_ts, pr.created_at_ts),
+ NULL,
+ dateDiff(
+ 'second',
+ coalesce(re.first_ready_ts, pr.created_at_ts),
+ fr.first_review_ts
+ )
+ / 3600.0
+ ) AS time_to_first_review_hours,
+
+ if(
+ fa.first_approval_ts IS NULL
+ OR fr.first_review_ts IS NULL
+ OR fa.first_approval_ts < fr.first_review_ts,
+ NULL,
+ dateDiff('second', fr.first_review_ts, fa.first_approval_ts)
+ / 3600.0
+ ) AS time_to_approval_hours,
+
+ if(
+ fa.first_approval_ts IS NULL
+ OR pr.merged_at_ts < fa.first_approval_ts,
+ NULL,
+ dateDiff('second', fa.first_approval_ts, pr.merged_at_ts) / 3600.0
+ ) AS time_in_merge_queue_hours
+ FROM prs pr
+ LEFT JOIN ready_events re ON pr.pr_number = re.pr_number
+ LEFT JOIN first_human_review fr ON pr.pr_number = fr.pr_number
+ LEFT JOIN first_approval fa ON pr.pr_number = fa.pr_number
+),
+
+filtered AS (
+ SELECT *
+ FROM durations
+ WHERE
+ (
+ time_to_first_review_hours IS NULL
+ OR (
+ time_to_first_review_hours >= 0
+ AND time_to_first_review_hours < 24 * 30
+ )
+ )
+ AND (
+ time_to_approval_hours IS NULL
+ OR (
+ time_to_approval_hours >= 0 AND time_to_approval_hours < 24 * 30
+ )
+ )
+ AND (
+ time_in_merge_queue_hours IS NULL
+ OR (
+ time_in_merge_queue_hours >= 0
+ AND time_in_merge_queue_hours < 24 * 30
+ )
+ )
+)
+
+SELECT
+ round(quantile(0.5) (time_to_first_review_hours), 2)
+ AS time_to_first_review_p50,
+ round(quantile(0.9) (time_to_first_review_hours), 2)
+ AS time_to_first_review_p90,
+ round(quantile(0.5) (time_to_approval_hours), 2) AS time_to_approval_p50,
+ round(quantile(0.9) (time_to_approval_hours), 2) AS time_to_approval_p90,
+ round(quantile(0.5) (time_in_merge_queue_hours), 2)
+ AS time_in_merge_queue_p50,
+ round(quantile(0.9) (time_in_merge_queue_hours), 2)
+ AS time_in_merge_queue_p90
+FROM filtered
+-- Quantiles ignore NULLs implicitly; if a column is entirely NULL in window, result will be NULL
diff --git a/torchci/clickhouse_queries/vllm/trunk_health/params.json b/torchci/clickhouse_queries/vllm/trunk_health/params.json
new file mode 100644
index 0000000000..3bf90eef34
--- /dev/null
+++ b/torchci/clickhouse_queries/vllm/trunk_health/params.json
@@ -0,0 +1,18 @@
+{
+ "params": {
+ "granularity": "String",
+ "repo": "String",
+ "pipelineName": "String",
+ "startTime": "DateTime64(3)",
+ "stopTime": "DateTime64(3)"
+ },
+ "tests": [
+ {
+ "granularity": "day",
+ "repo": "https://github.com/vllm-project/vllm.git",
+ "pipelineName": "CI",
+ "startTime": "2025-09-22T00:00:00.000",
+ "stopTime": "2025-09-29T00:00:00.000"
+ }
+ ]
+}
diff --git a/torchci/clickhouse_queries/vllm/trunk_health/query.sql b/torchci/clickhouse_queries/vllm/trunk_health/query.sql
new file mode 100644
index 0000000000..46bc09435e
--- /dev/null
+++ b/torchci/clickhouse_queries/vllm/trunk_health/query.sql
@@ -0,0 +1,23 @@
+-- vLLM trunk health history
+-- Returns individual main branch builds with timestamps for hourly visualization
+
+SELECT
+ tupleElement(build, 'number') AS build_number,
+ tupleElement(build, 'started_at') AS build_started_at,
+ tupleElement(build, 'state') AS build_state,
+ if(
+ lowerUTF8(tupleElement(build, 'state')) IN (
+ 'passed', 'finished', 'success'
+ ),
+ 1,
+ 0
+ ) AS is_green
+FROM vllm.vllm_buildkite_builds
+WHERE
+ tupleElement(pipeline, 'repository') = {repo: String }
+ AND tupleElement(pipeline, 'name') = {pipelineName: String }
+ AND tupleElement(build, 'branch') = 'main'
+ AND tupleElement(build, 'started_at') IS NOT NULL
+ AND tupleElement(build, 'started_at') >= {startTime: DateTime64(3) }
+ AND tupleElement(build, 'started_at') < {stopTime: DateTime64(3) }
+ORDER BY build_started_at ASC
diff --git a/torchci/clickhouse_queries/vllm/trunk_recovery_time/params.json b/torchci/clickhouse_queries/vllm/trunk_recovery_time/params.json
new file mode 100644
index 0000000000..3dde93989b
--- /dev/null
+++ b/torchci/clickhouse_queries/vllm/trunk_recovery_time/params.json
@@ -0,0 +1,16 @@
+{
+ "params": {
+ "repo": "String",
+ "pipelineName": "String",
+ "startTime": "DateTime64(3)",
+ "stopTime": "DateTime64(3)"
+ },
+ "tests": [
+ {
+ "repo": "https://github.com/vllm-project/vllm.git",
+ "pipelineName": "CI",
+ "startTime": "2025-09-22T00:00:00.000",
+ "stopTime": "2025-09-29T00:00:00.000"
+ }
+ ]
+}
diff --git a/torchci/clickhouse_queries/vllm/trunk_recovery_time/query.sql b/torchci/clickhouse_queries/vllm/trunk_recovery_time/query.sql
new file mode 100644
index 0000000000..06a53ece82
--- /dev/null
+++ b/torchci/clickhouse_queries/vllm/trunk_recovery_time/query.sql
@@ -0,0 +1,64 @@
+-- vLLM trunk recovery time
+-- Tracks how long it takes to recover when main breaks
+-- Shows time between when main went red and when it went green again
+
+WITH main_builds AS (
+ SELECT
+ tupleElement(build, 'number') AS build_number,
+ tupleElement(build, 'started_at') AS build_started_at,
+ tupleElement(build, 'state') AS build_state,
+ if(
+ lowerUTF8(build_state) IN ('passed', 'finished', 'success'),
+ 1,
+ if(lowerUTF8(build_state) = 'failed', 0, -1)
+ ) AS is_success
+ FROM vllm.vllm_buildkite_builds
+ WHERE
+ tupleElement(pipeline, 'repository') = {repo: String }
+ AND tupleElement(pipeline, 'name') = {pipelineName: String }
+ AND tupleElement(build, 'branch') = 'main'
+ AND tupleElement(build, 'started_at') IS NOT NULL
+ AND tupleElement(build, 'started_at') >= {startTime: DateTime64(3) }
+ AND tupleElement(build, 'started_at') < {stopTime: DateTime64(3) }
+),
+
+-- Track state changes
+build_with_prev AS (
+ SELECT
+ build_number,
+ build_started_at,
+ is_success,
+ lagInFrame(is_success)
+ OVER (
+ ORDER BY build_started_at
+ )
+ AS prev_is_success,
+ lagInFrame(build_started_at)
+ OVER (
+ ORDER BY build_started_at
+ )
+ AS prev_build_time
+ FROM main_builds
+ WHERE is_success IN (0, 1)
+),
+
+-- Find recovery events (failed -> success transitions)
+recovery_events AS (
+ SELECT
+ prev_build_time AS break_time,
+ build_started_at AS recovery_time,
+ dateDiff('minute', prev_build_time, build_started_at)
+ / 60.0 AS recovery_hours
+ FROM build_with_prev
+ WHERE
+ is_success = 1
+ AND prev_is_success = 0
+ AND prev_build_time IS NOT NULL
+)
+
+SELECT
+ break_time,
+ recovery_time,
+ recovery_hours
+FROM recovery_events
+ORDER BY break_time ASC
diff --git a/torchci/components/metrics/panels/ScalarPanel.tsx b/torchci/components/metrics/panels/ScalarPanel.tsx
index 7fc60ddfc3..02a827e95d 100644
--- a/torchci/components/metrics/panels/ScalarPanel.tsx
+++ b/torchci/components/metrics/panels/ScalarPanel.tsx
@@ -15,11 +15,17 @@ export function ScalarPanelWithValue({
valueRenderer,
// Callback to decide whether the scalar value is "bad" and should be displayed red.
badThreshold,
+ // Optional styles to apply to the Paper
+ paperSx,
+ // Optional styles to apply to the title Typography
+ titleSx,
}: {
title: string;
value: any;
valueRenderer: (_value: any) => string;
badThreshold: (_value: any) => boolean;
+ paperSx?: any;
+ titleSx?: any;
}) {
if (value === undefined) {
return ;
@@ -28,14 +34,25 @@ export function ScalarPanelWithValue({
let fontColor = badThreshold(value) ? "#ee6666" : "inherit";
return (
-
+
-
+
{title}
{
+ const s = params.data?.build_state?.toLowerCase?.();
+ if (s === "failed") return COLOR_ERROR;
+ if (s === "canceled" || s === "cancelled") return COLOR_GRAY;
+ if (s === "passed" || s === "finished" || s === "success")
+ return COLOR_SUCCESS;
+ return COLOR_SUCCESS;
+ },
+ },
+ };
+}
+
+// Helper function to generate line series for daily averages
+function getLineSeries(
+ dailyMeanSuccess: any[],
+ dailyMeanNonCanceled: any[]
+): any[] {
+ return [
+ {
+ name: "Daily mean (success)",
+ type: "line",
+ datasetIndex: 1,
+ smooth: true,
+ encode: { x: "day", y: "value" },
+ lineStyle: { color: COLOR_SUCCESS_LINE, opacity: 0.7, width: 1 },
+ showSymbol: true,
+ symbolSize: 4,
+ },
+ {
+ name: "Daily mean (success+failed)",
+ type: "line",
+ datasetIndex: 2,
+ smooth: true,
+ encode: { x: "day", y: "value" },
+ lineStyle: { color: COLOR_MIXED_LINE, opacity: 0.7, width: 1 },
+ showSymbol: true,
+ symbolSize: 4,
+ },
+ ];
+}
+
+// Helper function to generate scatter series for legend
+function getLegendScatterSeries(): any[] {
+ return [
+ {
+ name: "Success",
+ type: "scatter",
+ data: [],
+ itemStyle: { color: COLOR_SUCCESS },
+ tooltip: { show: false },
+ silent: true,
+ },
+ {
+ name: "Failed",
+ type: "scatter",
+ data: [],
+ itemStyle: { color: COLOR_ERROR },
+ tooltip: { show: false },
+ silent: true,
+ },
+ {
+ name: "Canceled",
+ type: "scatter",
+ data: [],
+ itemStyle: { color: COLOR_GRAY },
+ tooltip: { show: false },
+ silent: true,
+ },
+ ];
+}
+
+// Helper function to format tooltip content
+function formatTooltip(params: any): string {
+ if (params.seriesType === "line") {
+ const rawVal = Array.isArray(params.value)
+ ? params.value[1]
+ : params.data?.value;
+ return `Day: ${params.data.day}
Daily median: ${rawVal} h`;
+ }
+ const d = params.data;
+ const when = d.started_at ? dayjs(d.started_at).format("M/D/YY h:mm A") : "";
+ return `Started: ${when}
Pipeline: ${d.pipeline_name}
Build #: ${d.build_number}
Duration: ${d.duration_hours} h`;
+}
+
+export default function CiDurationsPanel({
+ data,
+}: {
+ data: any[] | undefined;
+}) {
+ const { darkMode } = useDarkMode();
+
+ const source = (data || []).map((d: any) => ({
+ ...d,
+ started_at: d.started_at ? dayjs(d.started_at).toISOString() : null,
+ duration_hours: Number(d.duration_hours),
+ }));
+ const durations = source
+ .map((s) => s.duration_hours)
+ .filter((x) => Number.isFinite(x));
+ const sorted = [...durations].sort((a, b) => a - b);
+ const quantile = (p: number) =>
+ sorted.length ? sorted[Math.floor((sorted.length - 1) * p)] : undefined;
+ const p10 = quantile(0.1);
+ const p50 = quantile(0.5);
+ const p90 = quantile(0.9);
+
+ const successStates = new Set(["passed", "finished", "success"]);
+ const nonCanceled = source.filter((s: any) => {
+ const st = (s.build_state || "").toLowerCase();
+ return st !== "canceled" && st !== "cancelled";
+ });
+ const successOnly = source.filter((s: any) =>
+ successStates.has((s.build_state || "").toLowerCase())
+ );
+
+ const groupDaily = (rows: any[]) => {
+ const grouped = _.groupBy(rows, (s) =>
+ s.started_at ? (s.started_at as string).slice(0, 10) : ""
+ );
+ return Object.entries(grouped)
+ .filter(([k]) => k !== "")
+ .map(([day, rs]: any) => {
+ const vals = rs
+ .map((r: any) => Number(r.duration_hours))
+ .filter((x: number) => Number.isFinite(x));
+ const value = vals.length ? _.sum(vals) / vals.length : undefined;
+ return {
+ day,
+ value: value !== undefined ? Number(value.toFixed(3)) : undefined,
+ };
+ })
+ .sort((a: any, b: any) => (a.day < b.day ? -1 : 1));
+ };
+
+ let dailyMeanSuccess = groupDaily(successOnly);
+ const dailyMeanNonCanceled = groupDaily(nonCanceled);
+ if (dailyMeanNonCanceled.length > 0 && dailyMeanSuccess.length > 0) {
+ const lastDay = dailyMeanNonCanceled[dailyMeanNonCanceled.length - 1].day;
+ const hasLastDay = dailyMeanSuccess.some((d: any) => d.day === lastDay);
+ if (!hasLastDay) {
+ const lastVal = dailyMeanSuccess[dailyMeanSuccess.length - 1].value;
+ if (lastVal !== undefined) {
+ dailyMeanSuccess = [
+ ...dailyMeanSuccess,
+ { day: lastDay, value: lastVal },
+ ];
+ }
+ }
+ }
+
+ const options: EChartsOption = {
+ title: { text: "CI run duration (hours)", subtext: "Buildkite builds" },
+ legend: {
+ top: 24,
+ data: [
+ { name: "Daily mean (success)" },
+ { name: "Daily mean (success+failed)" },
+ { name: "Success" },
+ { name: "Failed" },
+ { name: "Canceled" },
+ ],
+ selectedMode: false,
+ },
+ grid: { top: 60, right: 8, bottom: 24, left: 64 },
+ dataset: [
+ { source },
+ { source: dailyMeanSuccess },
+ { source: dailyMeanNonCanceled },
+ ],
+ xAxis: { type: "time", axisLabel: { hideOverlap: true } },
+ yAxis: {
+ type: "value",
+ name: "hours",
+ nameLocation: "middle",
+ nameGap: 42,
+ nameRotate: 90,
+ axisLabel: { margin: 8 },
+ },
+ tooltip: {
+ trigger: "item",
+ formatter: formatTooltip,
+ },
+ series: [
+ getMainScatterSeries(),
+ ...getLineSeries(dailyMeanSuccess, dailyMeanNonCanceled),
+ ...getLegendScatterSeries(),
+ ],
+ };
+
+ return (
+
+
+
+ );
+}
diff --git a/torchci/components/metrics/vllm/DurationDistributionPanel.tsx b/torchci/components/metrics/vllm/DurationDistributionPanel.tsx
new file mode 100644
index 0000000000..b7f5912601
--- /dev/null
+++ b/torchci/components/metrics/vllm/DurationDistributionPanel.tsx
@@ -0,0 +1,165 @@
+import { Paper } from "@mui/material";
+import { EChartsOption } from "echarts";
+import ReactECharts from "echarts-for-react";
+import { useDarkMode } from "lib/DarkModeContext";
+import {
+ getChartTitle,
+ getReactEChartsProps,
+ GRID_DEFAULT,
+} from "./chartUtils";
+import { COLOR_ERROR, COLOR_GRAY, COLOR_SUCCESS } from "./constants";
+
+// Helper function to create histogram bins
+function createHistogramBins(
+ durations: number[],
+ binSize: number = 0.5
+): { range: string; count: number; midpoint: number }[] {
+ if (durations.length === 0) return [];
+
+ const maxDuration = Math.max(...durations);
+ const numBins = Math.ceil(maxDuration / binSize);
+ const bins: { range: string; count: number; midpoint: number }[] = [];
+
+ for (let i = 0; i < numBins; i++) {
+ const start = i * binSize;
+ const end = (i + 1) * binSize;
+ const count = durations.filter((d) => d >= start && d < end).length;
+ bins.push({
+ range: `${start.toFixed(1)}-${end.toFixed(1)}h`,
+ count,
+ midpoint: (start + end) / 2,
+ });
+ }
+
+ return bins;
+}
+
+// Helper function to format distribution tooltip
+function formatDistributionTooltip(params: any): string {
+ if (!Array.isArray(params)) params = [params];
+
+ const range = params[0]?.name || "";
+ let result = `Duration: ${range}
`;
+
+ params.forEach((p: any) => {
+ if (p.value !== undefined && p.value > 0) {
+ result += `${p.marker} ${p.seriesName}: ${p.value} build(s)
`;
+ }
+ });
+
+ return result;
+}
+
+// Helper function to get distribution series
+function getDistributionSeries(
+ successBins: any[],
+ failedBins: any[],
+ canceledBins: any[]
+): any[] {
+ return [
+ {
+ name: "Success",
+ type: "bar",
+ data: successBins.map((b) => b.count),
+ itemStyle: { color: COLOR_SUCCESS },
+ emphasis: { focus: "series" },
+ },
+ {
+ name: "Failed",
+ type: "bar",
+ data: failedBins.map((b) => b.count),
+ itemStyle: { color: COLOR_ERROR },
+ emphasis: { focus: "series" },
+ },
+ {
+ name: "Canceled",
+ type: "bar",
+ data: canceledBins.map((b) => b.count),
+ itemStyle: { color: COLOR_GRAY },
+ emphasis: { focus: "series" },
+ },
+ ];
+}
+
+export default function DurationDistributionPanel({
+ data,
+}: {
+ data: any[] | undefined;
+}) {
+ const { darkMode } = useDarkMode();
+
+ // Process data into duration buckets by status
+ const source = (data || []).map((d: any) => ({
+ duration: Number(d.duration_hours),
+ status: (d.build_state || "").toLowerCase(),
+ }));
+
+ const successStates = new Set(["passed", "finished", "success"]);
+ const canceledStates = new Set(["canceled", "cancelled"]);
+
+ const successDurations = source
+ .filter((s) => successStates.has(s.status) && Number.isFinite(s.duration))
+ .map((s) => s.duration);
+
+ const failedDurations = source
+ .filter((s) => s.status === "failed" && Number.isFinite(s.duration))
+ .map((s) => s.duration);
+
+ const canceledDurations = source
+ .filter((s) => canceledStates.has(s.status) && Number.isFinite(s.duration))
+ .map((s) => s.duration);
+
+ // Create histogram bins
+ const binSize = 0.5; // 30 minute bins
+ const successBins = createHistogramBins(successDurations, binSize);
+ const failedBins = createHistogramBins(failedDurations, binSize);
+ const canceledBins = createHistogramBins(canceledDurations, binSize);
+
+ // Use the longest bin range for x-axis categories
+ const allBins = [successBins, failedBins, canceledBins];
+ const categories =
+ allBins
+ .reduce((a, b) => (a.length > b.length ? a : b), [])
+ .map((b) => b.range) || [];
+
+ const options: EChartsOption = {
+ title: getChartTitle(
+ "CI Duration Distribution",
+ "Histogram by build outcome"
+ ),
+ legend: {
+ top: 24,
+ data: ["Success", "Failed", "Canceled"],
+ },
+ grid: GRID_DEFAULT,
+ xAxis: {
+ type: "category",
+ data: categories,
+ name: "Duration Range",
+ nameLocation: "middle",
+ nameGap: 40,
+ axisLabel: {
+ rotate: 45,
+ fontSize: 10,
+ },
+ },
+ yAxis: {
+ type: "value",
+ name: "Count",
+ nameLocation: "middle",
+ nameGap: 40,
+ },
+ series: getDistributionSeries(successBins, failedBins, canceledBins),
+ tooltip: {
+ trigger: "axis",
+ axisPointer: { type: "shadow" },
+ formatter: formatDistributionTooltip,
+ },
+ };
+
+ return (
+
+
+
+ );
+}
diff --git a/torchci/components/metrics/vllm/ForceMergeBreakdownPanel.tsx b/torchci/components/metrics/vllm/ForceMergeBreakdownPanel.tsx
new file mode 100644
index 0000000000..fbbd4b250c
--- /dev/null
+++ b/torchci/components/metrics/vllm/ForceMergeBreakdownPanel.tsx
@@ -0,0 +1,99 @@
+import { Paper } from "@mui/material";
+import { EChartsOption } from "echarts";
+import ReactECharts from "echarts-for-react";
+import { useDarkMode } from "lib/DarkModeContext";
+import _ from "lodash";
+import { getReactEChartsProps } from "./chartUtils";
+import { COLOR_BORDER_WHITE, COLOR_ERROR, COLOR_WARNING } from "./constants";
+
+// Helper function to format breakdown tooltip
+function formatBreakdownTooltip(params: any): string {
+ const name = params.name;
+ const value = params.value;
+ const percent = params.percent;
+
+ return `${name}
Count: ${value}
Percentage: ${percent.toFixed(
+ 1
+ )}%`;
+}
+
+// Helper function to get pie series
+function getPieSeries(data: any[]): any {
+ return {
+ name: "Force Merge Reason",
+ type: "pie",
+ radius: ["40%", "70%"],
+ avoidLabelOverlap: true,
+ itemStyle: {
+ borderRadius: 10,
+ borderColor: COLOR_BORDER_WHITE,
+ borderWidth: 2,
+ },
+ label: {
+ show: true,
+ formatter: "{b}: {d}%",
+ },
+ emphasis: {
+ label: {
+ show: true,
+ fontSize: 16,
+ fontWeight: "bold",
+ },
+ },
+ data: data,
+ };
+}
+
+export default function ForceMergeBreakdownPanel({
+ data,
+}: {
+ data: any[] | undefined;
+}) {
+ const { darkMode } = useDarkMode();
+
+ // Sum up the counts across all time periods
+ const manualMergedFailures =
+ data === undefined || data.length === 0
+ ? 0
+ : _.sumBy(data, "manual_merged_with_failures_count");
+ const manualMergedPending =
+ data === undefined || data.length === 0
+ ? 0
+ : _.sumBy(data, "manual_merged_pending_count");
+
+ const pieData = [
+ {
+ value: manualMergedFailures,
+ name: "CI Failure (failing checks)",
+ itemStyle: { color: COLOR_ERROR },
+ },
+ {
+ value: manualMergedPending,
+ name: "Impatience (checks pending)",
+ itemStyle: { color: COLOR_WARNING },
+ },
+ ];
+
+ const options: EChartsOption = {
+ title: {
+ text: "Force Merge Breakdown",
+ subtext: "Reasons for manual merges",
+ },
+ tooltip: {
+ trigger: "item",
+ formatter: formatBreakdownTooltip,
+ },
+ legend: {
+ orient: "vertical",
+ left: "left",
+ top: "middle",
+ },
+ series: getPieSeries(pieData),
+ };
+
+ return (
+
+
+
+ );
+}
diff --git a/torchci/components/metrics/vllm/JobReliabilityPanel.tsx b/torchci/components/metrics/vllm/JobReliabilityPanel.tsx
new file mode 100644
index 0000000000..fac56e8eb5
--- /dev/null
+++ b/torchci/components/metrics/vllm/JobReliabilityPanel.tsx
@@ -0,0 +1,147 @@
+import { Paper } from "@mui/material";
+import { EChartsOption } from "echarts";
+import ReactECharts from "echarts-for-react";
+import { useDarkMode } from "lib/DarkModeContext";
+import { getReactEChartsProps } from "./chartUtils";
+import {
+ COLOR_BG_DARK,
+ COLOR_BORDER_LIGHT,
+ COLOR_ERROR,
+ COLOR_SUCCESS,
+ COLOR_WARNING,
+} from "./constants";
+
+// Helper function to format success rate label
+function formatSuccessRateLabel(params: any): string {
+ const rate = params.value * 100;
+ return rate.toFixed(1) + "%";
+}
+
+// Helper function to format job reliability tooltip
+function formatJobReliabilityTooltip(params: any, sortedData: any[]): string {
+ const param = params[0];
+ const jobData = sortedData[param.dataIndex];
+ if (!jobData) return "";
+
+ const successRate = jobData.success_rate
+ ? (jobData.success_rate * 100).toFixed(1) + "%"
+ : "N/A";
+ const passed = jobData.passed_count || 0;
+ const failed = jobData.failed_count || 0;
+ const canceled = jobData.canceled_count || 0;
+ const total = jobData.total_count || 0;
+ const nonCanceled = jobData.non_canceled_count || 0;
+
+ return (
+ `${jobData.job_name}
` +
+ `Success Rate: ${successRate}
` +
+ `Passed: ${passed}
` +
+ `Failed: ${failed}
` +
+ `Canceled: ${canceled}
` +
+ `Non-canceled: ${nonCanceled}
` +
+ `Total: ${total}`
+ );
+}
+
+export default function JobReliabilityPanel({
+ data,
+}: {
+ data: any[] | undefined;
+}) {
+ const { darkMode } = useDarkMode();
+
+ // Sort by success rate (worst first) and prepare data
+ const sortedData = [...(data || [])].sort((a, b) => {
+ const rateA = a.success_rate ?? 0;
+ const rateB = b.success_rate ?? 0;
+ return rateA - rateB;
+ });
+
+ const jobNames = sortedData.map((d) => d.job_name);
+ const successRates = sortedData.map((d) => d.success_rate ?? 0);
+
+ // Color code by reliability: red (<70%), yellow (70-90%), green (>90%)
+ const itemColors = successRates.map((rate) => {
+ if (rate < 0.7) return COLOR_ERROR;
+ if (rate < 0.9) return COLOR_WARNING;
+ return COLOR_SUCCESS;
+ });
+
+ const options: EChartsOption = {
+ title: {
+ text: "Per-Job Reliability",
+ subtext: "Success rate by job (min 3 runs)",
+ },
+ grid: {
+ top: 60,
+ right: 60,
+ bottom: 24,
+ left: 40,
+ containLabel: true,
+ },
+ xAxis: {
+ type: "value",
+ name: "Success Rate",
+ min: 0,
+ max: 1,
+ axisLabel: {
+ formatter: (value: number) => (value * 100).toFixed(0) + "%",
+ },
+ },
+ yAxis: {
+ type: "category",
+ data: jobNames,
+ axisLabel: {
+ interval: 0,
+ fontSize: 10,
+ },
+ inverse: false, // Worst jobs at bottom
+ },
+ series: [
+ {
+ name: "Success Rate",
+ type: "bar",
+ data: successRates.map((rate, idx) => ({
+ value: rate,
+ itemStyle: { color: itemColors[idx] },
+ })),
+ label: {
+ show: true,
+ position: "right",
+ formatter: formatSuccessRateLabel,
+ fontSize: 9,
+ },
+ },
+ ],
+ tooltip: {
+ trigger: "axis",
+ axisPointer: {
+ type: "shadow",
+ },
+ formatter: (params: any) =>
+ formatJobReliabilityTooltip(params, sortedData),
+ },
+ dataZoom: [
+ {
+ type: "slider",
+ yAxisIndex: 0,
+ show: true,
+ right: 10,
+ width: 30,
+ start:
+ jobNames.length > 15
+ ? Math.max(0, 100 - (15 / jobNames.length) * 100)
+ : 0,
+ end: 100,
+ handleSize: "100%",
+ borderColor: darkMode ? COLOR_BG_DARK : COLOR_BORDER_LIGHT,
+ },
+ ],
+ };
+
+ return (
+
+
+
+ );
+}
diff --git a/torchci/components/metrics/vllm/MergesPanel.tsx b/torchci/components/metrics/vllm/MergesPanel.tsx
new file mode 100644
index 0000000000..0e76485a78
--- /dev/null
+++ b/torchci/components/metrics/vllm/MergesPanel.tsx
@@ -0,0 +1,67 @@
+import { Paper } from "@mui/material";
+import { EChartsOption } from "echarts";
+import ReactECharts from "echarts-for-react";
+import { useDarkMode } from "lib/DarkModeContext";
+import { getReactEChartsProps } from "./chartUtils";
+import { COLOR_ERROR, COLOR_SUCCESS, COLOR_WARNING } from "./constants";
+
+// Helper function to format merges tooltip
+function formatMergesTooltip(params: any): string {
+ const manualMergedFailures = params[0].data.manual_merged_with_failures_count;
+ const manualMerged = params[0].data.manual_merged_count;
+ const autoMerged = params[0].data.auto_merged_count;
+ const total = manualMergedFailures + manualMerged + autoMerged;
+ const manualMergedFailuresPct =
+ ((manualMergedFailures / total) * 100).toFixed(1) + "%";
+ const manualMergedPct = ((manualMerged / total) * 100).toFixed(1) + "%";
+ const autoMergedPct = ((autoMerged / total) * 100).toFixed(1) + "%";
+ return (
+ `Force merges (red): ${manualMergedFailures} (${manualMergedFailuresPct})` +
+ `
Manual merges (orange): ${manualMerged} (${manualMergedPct})` +
+ `
Auto merges (green): ${autoMerged} (${autoMergedPct})` +
+ `
Total: ${total}`
+ );
+}
+
+export default function MergesPanel({ data }: { data: any }) {
+ const { darkMode } = useDarkMode();
+
+ const options: EChartsOption = {
+ title: { text: "Merged pull requests, by day", subtext: "" },
+ grid: { top: 60, right: 8, bottom: 24, left: 36 },
+ dataset: { source: data },
+ xAxis: { type: "category" },
+ yAxis: { type: "value" },
+ series: [
+ {
+ type: "bar",
+ stack: "all",
+ encode: { x: "granularity_bucket", y: "auto_merged_count" },
+ },
+ {
+ type: "bar",
+ stack: "all",
+ encode: { x: "granularity_bucket", y: "manual_merged_count" },
+ },
+ {
+ type: "bar",
+ stack: "all",
+ encode: {
+ x: "granularity_bucket",
+ y: "manual_merged_with_failures_count",
+ },
+ },
+ ],
+ color: [COLOR_SUCCESS, COLOR_WARNING, COLOR_ERROR],
+ tooltip: {
+ trigger: "axis",
+ formatter: formatMergesTooltip,
+ },
+ };
+
+ return (
+
+
+
+ );
+}
diff --git a/torchci/components/metrics/vllm/ReliabilityPanel.tsx b/torchci/components/metrics/vllm/ReliabilityPanel.tsx
new file mode 100644
index 0000000000..3db9d72b5e
--- /dev/null
+++ b/torchci/components/metrics/vllm/ReliabilityPanel.tsx
@@ -0,0 +1,109 @@
+import { Paper } from "@mui/material";
+import { EChartsOption } from "echarts";
+import ReactECharts from "echarts-for-react";
+import { useDarkMode } from "lib/DarkModeContext";
+import {
+ getCrosshairTooltipConfig,
+ getReactEChartsProps,
+ GRID_DEFAULT,
+} from "./chartUtils";
+import { COLOR_ERROR, COLOR_GRAY, COLOR_SUCCESS } from "./constants";
+
+// Helper function to generate stacked bar series for reliability data
+function getReliabilityBarSeries(): any[] {
+ return [
+ {
+ name: "Passed",
+ type: "bar",
+ stack: "builds",
+ encode: { x: "granularity_bucket", y: "passed_count" },
+ itemStyle: { color: COLOR_SUCCESS },
+ emphasis: {
+ focus: "series",
+ },
+ },
+ {
+ name: "Failed",
+ type: "bar",
+ stack: "builds",
+ encode: { x: "granularity_bucket", y: "failed_count" },
+ itemStyle: { color: COLOR_ERROR },
+ emphasis: {
+ focus: "series",
+ },
+ },
+ {
+ name: "Canceled",
+ type: "bar",
+ stack: "builds",
+ encode: { x: "granularity_bucket", y: "canceled_count" },
+ itemStyle: { color: COLOR_GRAY },
+ emphasis: {
+ focus: "series",
+ },
+ },
+ ];
+}
+
+// Helper function to format reliability tooltip
+function formatReliabilityTooltip(params: any): string {
+ const data = params[0]?.data;
+ if (!data) return "";
+
+ const successRate = data.success_rate
+ ? (data.success_rate * 100).toFixed(1) + "%"
+ : "N/A";
+ const passed = data.passed_count || 0;
+ const failed = data.failed_count || 0;
+ const canceled = data.canceled_count || 0;
+ const total = data.total_count || 0;
+ const nonCanceled = data.non_canceled_count || 0;
+
+ return (
+ `${data.granularity_bucket}
` +
+ `Success Rate: ${successRate}
` +
+ `Passed: ${passed}
` +
+ `Failed: ${failed}
` +
+ `Canceled: ${canceled}
` +
+ `Non-canceled: ${nonCanceled}
` +
+ `Total: ${total}`
+ );
+}
+
+export default function ReliabilityPanel({
+ data,
+}: {
+ data: any[] | undefined;
+}) {
+ const { darkMode } = useDarkMode();
+
+ const options: EChartsOption = {
+ title: {
+ text: "CI Build Counts",
+ subtext: "Daily build breakdown",
+ },
+ legend: {
+ top: 24,
+ data: ["Passed", "Failed", "Canceled"],
+ },
+ grid: { ...GRID_DEFAULT, bottom: 24 },
+ dataset: { source: data || [] },
+ xAxis: { type: "category" },
+ yAxis: {
+ type: "value",
+ name: "Count",
+ position: "left",
+ axisLabel: {
+ formatter: "{value}",
+ },
+ },
+ series: getReliabilityBarSeries(),
+ tooltip: getCrosshairTooltipConfig(darkMode, formatReliabilityTooltip),
+ };
+
+ return (
+
+
+
+ );
+}
diff --git a/torchci/components/metrics/vllm/ReliabilityTrendPanel.tsx b/torchci/components/metrics/vllm/ReliabilityTrendPanel.tsx
new file mode 100644
index 0000000000..dc5068523a
--- /dev/null
+++ b/torchci/components/metrics/vllm/ReliabilityTrendPanel.tsx
@@ -0,0 +1,115 @@
+import { Paper } from "@mui/material";
+import { EChartsOption } from "echarts";
+import ReactECharts from "echarts-for-react";
+import { useDarkMode } from "lib/DarkModeContext";
+import {
+ getCrosshairTooltipConfig,
+ getReactEChartsProps,
+ GRID_DEFAULT,
+} from "./chartUtils";
+import { COLOR_ERROR, COLOR_GRAY, COLOR_SUCCESS } from "./constants";
+
+// Helper function to generate line series for reliability trends
+function getReliabilityLineSeries(): any[] {
+ return [
+ {
+ name: "Passed",
+ type: "line",
+ encode: { x: "granularity_bucket", y: "passed_count" },
+ smooth: true,
+ lineStyle: { color: COLOR_SUCCESS, width: 2 },
+ itemStyle: { color: COLOR_SUCCESS },
+ symbolSize: 6,
+ emphasis: {
+ focus: "series",
+ },
+ },
+ {
+ name: "Failed",
+ type: "line",
+ encode: { x: "granularity_bucket", y: "failed_count" },
+ smooth: true,
+ lineStyle: { color: COLOR_ERROR, width: 2 },
+ itemStyle: { color: COLOR_ERROR },
+ symbolSize: 6,
+ emphasis: {
+ focus: "series",
+ },
+ },
+ {
+ name: "Canceled",
+ type: "line",
+ encode: { x: "granularity_bucket", y: "canceled_count" },
+ smooth: true,
+ lineStyle: { color: COLOR_GRAY, width: 2 },
+ itemStyle: { color: COLOR_GRAY },
+ symbolSize: 6,
+ emphasis: {
+ focus: "series",
+ },
+ },
+ ];
+}
+
+// Helper function to format reliability trend tooltip
+function formatReliabilityTrendTooltip(params: any): string {
+ const data = params[0]?.data;
+ if (!data) return "";
+
+ const passed = data.passed_count || 0;
+ const failed = data.failed_count || 0;
+ const canceled = data.canceled_count || 0;
+ const total = data.total_count || 0;
+ const nonCanceled = data.non_canceled_count || 0;
+ const successRate = data.success_rate
+ ? (data.success_rate * 100).toFixed(1) + "%"
+ : "N/A";
+
+ return (
+ `${data.granularity_bucket}
` +
+ `Passed: ${passed}
` +
+ `Failed: ${failed}
` +
+ `Canceled: ${canceled}
` +
+ `Non-canceled: ${nonCanceled}
` +
+ `Total: ${total}
` +
+ `Success Rate: ${successRate}`
+ );
+}
+
+export default function ReliabilityTrendPanel({
+ data,
+}: {
+ data: any[] | undefined;
+}) {
+ const { darkMode } = useDarkMode();
+
+ const options: EChartsOption = {
+ title: {
+ text: "CI Reliability Trends",
+ subtext: "Daily success rate over time",
+ },
+ legend: {
+ top: 24,
+ data: ["Passed", "Failed", "Canceled"],
+ },
+ grid: { ...GRID_DEFAULT, bottom: 24 },
+ dataset: { source: data || [] },
+ xAxis: { type: "category" },
+ yAxis: {
+ type: "value",
+ name: "Count",
+ position: "left",
+ axisLabel: {
+ formatter: "{value}",
+ },
+ },
+ series: getReliabilityLineSeries(),
+ tooltip: getCrosshairTooltipConfig(darkMode, formatReliabilityTrendTooltip),
+ };
+
+ return (
+
+
+
+ );
+}
diff --git a/torchci/components/metrics/vllm/TrunkHealthPanel.tsx b/torchci/components/metrics/vllm/TrunkHealthPanel.tsx
new file mode 100644
index 0000000000..d58cb834e9
--- /dev/null
+++ b/torchci/components/metrics/vllm/TrunkHealthPanel.tsx
@@ -0,0 +1,138 @@
+import { Paper } from "@mui/material";
+import dayjs from "dayjs";
+import { EChartsOption } from "echarts";
+import ReactECharts from "echarts-for-react";
+import { useDarkMode } from "lib/DarkModeContext";
+import {
+ getChartTitle,
+ getReactEChartsProps,
+ GRID_LEFT_WIDE,
+} from "./chartUtils";
+import { COLOR_BORDER_DARK, COLOR_ERROR, COLOR_SUCCESS } from "./constants";
+
+// Helper function to handle heatmap cell click
+function handleTrunkHealthClick(params: any) {
+ if (params?.componentType === "series") {
+ const buildNumber = params?.data?.[3]; // 4th element is build number
+ if (buildNumber !== undefined && buildNumber !== null) {
+ const url = `https://buildkite.com/vllm/ci/builds/${buildNumber}/`;
+ if (typeof window !== "undefined") {
+ window.open(url, "_blank");
+ }
+ }
+ }
+}
+
+// Helper function to format trunk health tooltip
+function formatTrunkHealthTooltip(params: any): string {
+ const data = params.data;
+ if (!data) return "";
+
+ const date = data[0];
+ const hour = data[1];
+ const isGreen = data[2] === 1;
+ const status = isGreen ? "Green ✓" : "Red ✗";
+ const buildNumber = data[3];
+
+ return (
+ `${date} ${hour}:00
` +
+ `Status: ${status}
` +
+ `Build #${buildNumber}`
+ );
+}
+
+// Helper function to get trunk health series
+function getTrunkHealthSeries(processedData: any[]): any {
+ return {
+ name: "Trunk Status",
+ type: "heatmap",
+ data: processedData,
+ label: {
+ show: false,
+ },
+ emphasis: {
+ itemStyle: {
+ shadowBlur: 10,
+ shadowColor: "rgba(0, 0, 0, 0.5)",
+ },
+ },
+ itemStyle: {
+ borderWidth: 1,
+ borderColor: COLOR_BORDER_DARK,
+ },
+ };
+}
+
+export default function TrunkHealthPanel({
+ data,
+}: {
+ data: any[] | undefined;
+}) {
+ const { darkMode } = useDarkMode();
+
+ // Process data into heatmap format: [date, hour, status, buildNumber]
+ const processedData = (data || []).map((d: any) => {
+ const timestamp = dayjs(d.build_started_at);
+ const date = timestamp.format("YYYY-MM-DD");
+ const hour = timestamp.hour();
+ return [date, hour, d.is_green, d.build_number];
+ });
+
+ // Get unique dates and hours for the grid
+ const uniqueDates = [...new Set(processedData.map((d) => d[0]))].sort();
+ const hours = Array.from({ length: 24 }, (_, i) => i);
+
+ const options: EChartsOption = {
+ title: getChartTitle("Main Branch Health", "Build status heatmap"),
+ grid: GRID_LEFT_WIDE,
+ xAxis: {
+ type: "category",
+ data: uniqueDates,
+ name: "Date",
+ nameLocation: "middle",
+ nameGap: 40,
+ axisLabel: {
+ rotate: 45,
+ fontSize: 9,
+ },
+ },
+ yAxis: {
+ type: "category",
+ data: hours,
+ name: "Hour",
+ nameLocation: "middle",
+ nameGap: 50,
+ nameRotate: 90,
+ axisLabel: {
+ formatter: (value: any) => `${value}:00`,
+ fontSize: 9,
+ },
+ },
+ visualMap: {
+ show: false,
+ min: 0,
+ max: 1,
+ dimension: 2,
+ inRange: {
+ color: [COLOR_ERROR, COLOR_SUCCESS],
+ },
+ },
+ series: getTrunkHealthSeries(processedData),
+ tooltip: {
+ position: "top",
+ formatter: formatTrunkHealthTooltip,
+ },
+ };
+
+ return (
+
+
+
+ );
+}
diff --git a/torchci/components/metrics/vllm/TrunkRecoveryPanel.tsx b/torchci/components/metrics/vllm/TrunkRecoveryPanel.tsx
new file mode 100644
index 0000000000..f5d1a718c1
--- /dev/null
+++ b/torchci/components/metrics/vllm/TrunkRecoveryPanel.tsx
@@ -0,0 +1,90 @@
+import { Paper } from "@mui/material";
+import dayjs from "dayjs";
+import { EChartsOption } from "echarts";
+import ReactECharts from "echarts-for-react";
+import { useDarkMode } from "lib/DarkModeContext";
+import {
+ getChartTitle,
+ getReactEChartsProps,
+ GRID_DEFAULT,
+} from "./chartUtils";
+import { COLOR_ERROR } from "./constants";
+
+// Helper function to format recovery tooltip
+function formatRecoveryTooltip(params: any): string {
+ const data = params.data;
+ if (!data) return "";
+
+ const breakTime = dayjs(data[0]).format("M/D/YY h:mm A");
+ const hours = data[1];
+
+ return (
+ `Trunk Breakage
` +
+ `When: ${breakTime}
` +
+ `Recovery time: ${hours.toFixed(1)} hours`
+ );
+}
+
+// Helper function to get recovery time series
+function getRecoveryTimeSeries(processedData: any[]): any {
+ return {
+ name: "Recovery Time",
+ type: "line",
+ data: processedData,
+ smooth: false,
+ lineStyle: {
+ color: COLOR_ERROR,
+ width: 2,
+ },
+ itemStyle: {
+ color: COLOR_ERROR,
+ },
+ symbolSize: 8,
+ };
+}
+
+export default function TrunkRecoveryPanel({
+ data,
+}: {
+ data: any[] | undefined;
+}) {
+ const { darkMode } = useDarkMode();
+
+ // Process data: [break_time, recovery_hours]
+ const processedData = (data || []).map((d: any) => [
+ dayjs(d.break_time).toDate(),
+ Number(d.recovery_hours),
+ ]);
+
+ const options: EChartsOption = {
+ title: getChartTitle("Main Branch Recovery Time", "Time to fix over time"),
+ grid: GRID_DEFAULT,
+ xAxis: {
+ type: "time",
+ name: "When Main Broke",
+ nameLocation: "middle",
+ nameGap: 40,
+ axisLabel: {
+ hideOverlap: true,
+ formatter: (value: number) => dayjs(value).format("MMM D"),
+ },
+ },
+ yAxis: {
+ type: "value",
+ name: "Recovery Time (hours)",
+ nameLocation: "middle",
+ nameGap: 40,
+ },
+ series: getRecoveryTimeSeries(processedData),
+ tooltip: {
+ trigger: "item",
+ formatter: formatRecoveryTooltip,
+ },
+ };
+
+ return (
+
+
+
+ );
+}
diff --git a/torchci/components/metrics/vllm/chartUtils.ts b/torchci/components/metrics/vllm/chartUtils.ts
new file mode 100644
index 0000000000..e5f49c8d8b
--- /dev/null
+++ b/torchci/components/metrics/vllm/chartUtils.ts
@@ -0,0 +1,60 @@
+// Shared utility functions and configurations for vLLM chart components
+import {
+ COLOR_BG_DARK,
+ COLOR_BG_LIGHT,
+ COLOR_CROSSHAIR_DARK,
+ COLOR_CROSSHAIR_LIGHT,
+ COLOR_TEXT_DARK,
+ COLOR_TEXT_LIGHT,
+} from "./constants";
+
+// Common title configuration with smaller font
+export function getChartTitle(text: string, subtext: string) {
+ return {
+ text,
+ subtext,
+ textStyle: {
+ fontSize: 14,
+ },
+ };
+}
+
+// Common grid configuration
+export const GRID_DEFAULT = { top: 80, right: 60, bottom: 60, left: 60 };
+export const GRID_COMPACT = { top: 60, right: 8, bottom: 24, left: 64 };
+export const GRID_LEFT_WIDE = { top: 70, right: 40, bottom: 60, left: 75 };
+
+// Common crosshair tooltip configuration
+export function getCrosshairTooltipConfig(darkMode: boolean, formatter: any) {
+ const crosshairColor = darkMode
+ ? COLOR_CROSSHAIR_DARK
+ : COLOR_CROSSHAIR_LIGHT;
+
+ return {
+ trigger: "axis" as const,
+ axisPointer: {
+ type: "cross" as const,
+ crossStyle: {
+ color: crosshairColor,
+ opacity: 0.5,
+ },
+ lineStyle: {
+ color: crosshairColor,
+ opacity: 0.5,
+ },
+ label: {
+ backgroundColor: darkMode ? COLOR_BG_DARK : COLOR_BG_LIGHT,
+ color: darkMode ? COLOR_TEXT_DARK : COLOR_TEXT_LIGHT,
+ },
+ },
+ formatter,
+ };
+}
+
+// Common ReactECharts wrapper props
+export function getReactEChartsProps(darkMode: boolean) {
+ return {
+ theme: darkMode ? "dark-hud" : undefined,
+ style: { height: "100%", width: "100%" },
+ };
+}
diff --git a/torchci/components/metrics/vllm/constants.ts b/torchci/components/metrics/vllm/constants.ts
new file mode 100644
index 0000000000..d19f970b92
--- /dev/null
+++ b/torchci/components/metrics/vllm/constants.ts
@@ -0,0 +1,24 @@
+// Shared color constants for vLLM metrics charts
+
+// Data visualization colors
+export const COLOR_SUCCESS = "#3ba272"; // Green - for successful/passing states
+export const COLOR_ERROR = "#ee6666"; // Red - for failures/errors
+export const COLOR_WARNING = "#fc9403"; // Orange - for warnings/manual actions
+export const COLOR_GRAY = "#9e9e9e"; // Gray - for canceled/neutral states
+export const COLOR_SUCCESS_LINE = "#00E676"; // Bright green - for success trend lines
+export const COLOR_MIXED_LINE = "#FF4081"; // Pink - for mixed success+failed trend lines
+
+// UI element colors (light mode)
+export const COLOR_CROSSHAIR_LIGHT = "#000000";
+export const COLOR_BG_LIGHT = "#f5f5f5";
+export const COLOR_TEXT_LIGHT = "#333";
+export const COLOR_BORDER_LIGHT = "#ddd";
+
+// UI element colors (dark mode)
+export const COLOR_CROSSHAIR_DARK = "#ffffff";
+export const COLOR_BG_DARK = "#555";
+export const COLOR_TEXT_DARK = "#fff";
+
+// Border colors
+export const COLOR_BORDER_DARK = "#222";
+export const COLOR_BORDER_WHITE = "#fff";
diff --git a/torchci/pages/metrics/vllm.tsx b/torchci/pages/metrics/vllm.tsx
index 3d81bbcac5..3b7a09d337 100644
--- a/torchci/pages/metrics/vllm.tsx
+++ b/torchci/pages/metrics/vllm.tsx
@@ -1,94 +1,117 @@
-import { Grid, Paper, Skeleton, Stack, Typography } from "@mui/material";
+import { Box, Divider, Grid, Skeleton, Stack, Typography } from "@mui/material";
import { ScalarPanelWithValue } from "components/metrics/panels/ScalarPanel";
+import CiDurationsPanel from "components/metrics/vllm/CiDurationsPanel";
+import DurationDistributionPanel from "components/metrics/vllm/DurationDistributionPanel";
+import ForceMergeBreakdownPanel from "components/metrics/vllm/ForceMergeBreakdownPanel";
+import JobReliabilityPanel from "components/metrics/vllm/JobReliabilityPanel";
+import MergesPanel from "components/metrics/vllm/MergesPanel";
+import ReliabilityPanel from "components/metrics/vllm/ReliabilityPanel";
+import ReliabilityTrendPanel from "components/metrics/vllm/ReliabilityTrendPanel";
+import TrunkHealthPanel from "components/metrics/vllm/TrunkHealthPanel";
+import TrunkRecoveryPanel from "components/metrics/vllm/TrunkRecoveryPanel";
import dayjs from "dayjs";
-import { EChartsOption } from "echarts";
-import ReactECharts from "echarts-for-react";
import { useDarkMode } from "lib/DarkModeContext";
import { useClickHouseAPIImmutable } from "lib/GeneralUtils";
import _ from "lodash";
-import { useState } from "react";
+import React, { useState } from "react";
import { TimeRangePicker } from "../metrics";
const ROW_HEIGHT = 375;
-function MergesPanel({ data }: { data: any }) {
- // Use the dark mode context to determine whether to use the dark theme
- const { darkMode } = useDarkMode();
+// moved MergesPanel and CiDurationsPanel to components
- const options: EChartsOption = {
- title: {
- text: "Merged pull requests, by day",
- subtext: "",
- },
- grid: { top: 60, right: 8, bottom: 24, left: 36 },
- dataset: { source: data },
- xAxis: { type: "category" },
- yAxis: {
- type: "value",
- },
- series: [
- {
- type: "bar",
- stack: "all",
- encode: {
- x: "granularity_bucket",
- y: "auto_merged_count",
- },
- },
- {
- type: "bar",
- stack: "all",
- encode: {
- x: "granularity_bucket",
- y: "manual_merged_count",
- },
- },
- {
- type: "bar",
- stack: "all",
- encode: {
- x: "granularity_bucket",
- y: "manual_merged_with_failures_count",
- },
- },
- ],
- color: ["#3ba272", "#fc9403", "#ee6666"],
- tooltip: {
- trigger: "axis",
- formatter: (params: any) => {
- const manualMergedFailures =
- params[0].data.manual_merged_with_failures_count;
- const manualMerged = params[0].data.manual_merged_count;
- const autoMerged = params[0].data.auto_merged_count;
- const total = manualMergedFailures + manualMerged + autoMerged;
-
- const manualMergedFailuresPct =
- ((manualMergedFailures / total) * 100).toFixed(1) + "%";
- const manualMergedPct = ((manualMerged / total) * 100).toFixed(1) + "%";
- const autoMergedPct = ((autoMerged / total) * 100).toFixed(1) + "%";
- return `Force merges (red): ${manualMergedFailures} (${manualMergedFailuresPct})
-
- Manual merges (orange): ${manualMerged} (${manualMergedPct})
-
- Auto merges (green): ${autoMerged} (${autoMergedPct})
-
- Total: ${total}`;
- },
- },
- };
+// Helper function to safely extract PR cycle data values
+function getPrCycleValue(
+ data: any[] | undefined,
+ field: string
+): number | null | undefined {
+ if (data === undefined) return undefined;
+ return data?.[0]?.[field] ?? null;
+}
+
+// Helper function to format hour values
+function formatHours(v: number | null | undefined): string {
+ return v === null || v === undefined ? "-" : Number(v).toFixed(2);
+}
+
+// Helper function to format hour values with unit
+function formatHoursWithUnit(v: number | null | undefined): string {
+ return v === null || v === undefined ? "-" : Number(v).toFixed(2) + "h";
+}
+
+// Helper function to format percentage values
+function formatPercentage(v: number | null | undefined): string {
+ return v === null || v === undefined ? "-" : (v * 100).toFixed(1) + "%";
+}
+
+// Helper function to format count values
+function formatCount(v: number | null | undefined): string {
+ return v === null || v === undefined ? "-" : v.toString();
+}
+
+// Type for metric configuration
+interface MetricConfig {
+ title: string;
+ value: number | null | undefined;
+ valueRenderer: (v: number | null | undefined) => string;
+ badThreshold: (v: number | null | undefined) => boolean;
+ paperSx?: any;
+}
+
+// Helper component to render a stack of metric panels from config
+function MetricStack({ metrics }: { metrics: MetricConfig[] }) {
+ return (
+ <>
+ {metrics.map((metric, index) => (
+
+ ))}
+ >
+ );
+}
+
+// Helper component for a metrics column
+function MetricColumn({
+ metrics,
+ height,
+ size = { xs: 12, md: 3, lg: 2 },
+}: {
+ metrics: MetricConfig[];
+ height?: string | number;
+ size?: { xs: number; md: number; lg?: number };
+}) {
+ return (
+
+
+
+
+
+ );
+}
+// Helper component for a dashboard row with consistent spacing
+function DashboardRow({
+ children,
+ spacing = 2,
+}: {
+ children: React.ReactNode;
+ spacing?: number;
+}) {
return (
-
-
-
+
+ {children}
+
);
}
export default function Page() {
+ const { darkMode } = useDarkMode();
const [startTime, setStartTime] = useState(dayjs().subtract(1, "week"));
const [stopTime, setStopTime] = useState(dayjs());
const [timeRange, setTimeRange] = useState(7);
@@ -107,6 +130,92 @@ export default function Page() {
}
);
+ const { data: ciDurations } = useClickHouseAPIImmutable(
+ "vllm/ci_run_duration",
+ {
+ ...timeParams,
+ // Buildkite uses full repo URL with .git in vLLM dataset
+ repo: "https://github.com/vllm-project/vllm.git",
+ pipelineName: "CI",
+ }
+ );
+
+ // Compute CI P50/P90 from returned rows
+ const points = (ciDurations || []) as any[];
+ const successStatesSet = new Set(["passed", "finished", "success"]);
+ const successDurations = points
+ .filter((d: any) =>
+ successStatesSet.has(String(d.build_state || "").toLowerCase())
+ )
+ .map((d: any) => Number(d.duration_hours))
+ .filter((x: number) => Number.isFinite(x))
+ .sort((a: number, b: number) => a - b);
+ const nonCanceledDurations = points
+ .filter((d: any) => {
+ const s = String(d.build_state || "").toLowerCase();
+ return s !== "canceled" && s !== "cancelled";
+ })
+ .map((d: any) => Number(d.duration_hours))
+ .filter((x: number) => Number.isFinite(x))
+ .sort((a: number, b: number) => a - b);
+ const qFrom = (arr: number[], p: number) =>
+ arr.length ? arr[Math.floor((arr.length - 1) * p)] : null;
+ const ciSuccP50 =
+ ciDurations === undefined ? undefined : qFrom(successDurations, 0.5);
+ const ciSuccP90 =
+ ciDurations === undefined ? undefined : qFrom(successDurations, 0.9);
+ const ciNCancP50 =
+ ciDurations === undefined ? undefined : qFrom(nonCanceledDurations, 0.5);
+ const ciNCancP90 =
+ ciDurations === undefined ? undefined : qFrom(nonCanceledDurations, 0.9);
+
+ const { data: prCycleData } = useClickHouseAPIImmutable(
+ "vllm/pr_cycle_time_breakdown",
+ {
+ ...timeParams,
+ repo: "vllm-project/vllm",
+ }
+ );
+
+ const { data: reliabilityData } = useClickHouseAPIImmutable(
+ "vllm/ci_reliability",
+ {
+ ...timeParams,
+ granularity: "day",
+ repo: "https://github.com/vllm-project/vllm.git",
+ pipelineName: "CI",
+ }
+ );
+
+ const { data: jobReliabilityData } = useClickHouseAPIImmutable(
+ "vllm/job_reliability",
+ {
+ ...timeParams,
+ repo: "https://github.com/vllm-project/vllm.git",
+ pipelineName: "CI",
+ minRuns: 3,
+ }
+ );
+
+ const { data: trunkHealthData } = useClickHouseAPIImmutable(
+ "vllm/trunk_health",
+ {
+ ...timeParams,
+ granularity: "day",
+ repo: "https://github.com/vllm-project/vllm.git",
+ pipelineName: "CI",
+ }
+ );
+
+ const { data: trunkRecoveryData } = useClickHouseAPIImmutable(
+ "vllm/trunk_recovery_time",
+ {
+ ...timeParams,
+ repo: "https://github.com/vllm-project/vllm.git",
+ pipelineName: "CI",
+ }
+ );
+
if (data === undefined) {
return ;
}
@@ -115,6 +224,10 @@ export default function Page() {
data === undefined || data.length === 0
? 0
: _.sumBy(data, "manual_merged_with_failures_count");
+ const manualMergedPending =
+ data === undefined || data.length === 0
+ ? 0
+ : _.sumBy(data, "manual_merged_pending_count");
const manualMerged =
data === undefined || data.length === 0
? 0
@@ -130,53 +243,278 @@ export default function Page() {
total === 0 ? 0 : manualMergedFailures / total;
const manualMergedPct = total == 0 ? 0 : manualMerged / total;
+ // Force merge breakdown percentages
+ // Total force merges = failures + pending (the two reasons for force merge)
+ const totalForceMerges = manualMergedFailures + manualMergedPending;
+ const forceMergeDueToFailurePct =
+ totalForceMerges === 0 ? 0 : manualMergedFailures / totalForceMerges;
+ const forceMergeDueToImpatiencePct =
+ totalForceMerges === 0 ? 0 : manualMergedPending / totalForceMerges;
+
+ // Compute overall reliability metrics
+ const reliabilityPoints = (reliabilityData || []) as any[];
+ const totalPassed = _.sumBy(reliabilityPoints, "passed_count");
+ const totalFailed = _.sumBy(reliabilityPoints, "failed_count");
+ const totalCanceled = _.sumBy(reliabilityPoints, "canceled_count");
+ const totalNonCanceled = totalPassed + totalFailed;
+ const overallSuccessRate =
+ reliabilityData === undefined
+ ? undefined
+ : totalNonCanceled === 0
+ ? null
+ : totalPassed / totalNonCanceled;
+
+ // Compute trunk health metrics
+ // Data now contains individual builds, group by day to get daily status
+ const trunkHealthPoints = (trunkHealthData || []) as any[];
+ const buildsByDay = _.groupBy(trunkHealthPoints, (d) =>
+ d.build_started_at ? d.build_started_at.slice(0, 10) : ""
+ );
+ const dailyStatus = Object.entries(buildsByDay).map(([day, builds]) => {
+ // Day is green if the most recent build was green
+ const sortedBuilds = _.sortBy(builds, "build_started_at");
+ const mostRecent = sortedBuilds[sortedBuilds.length - 1];
+ return { day, isGreen: mostRecent?.is_green === 1 };
+ });
+ const greenDays = dailyStatus.filter((d) => d.isGreen).length;
+ const totalDays = dailyStatus.length;
+ const trunkHealthPct =
+ trunkHealthData === undefined
+ ? undefined
+ : totalDays === 0
+ ? null
+ : greenDays / totalDays;
+
+ // Compute average recovery time
+ const recoveryTimes = (trunkRecoveryData || []) as any[];
+ const avgRecoveryTime =
+ trunkRecoveryData === undefined
+ ? undefined
+ : recoveryTimes.length === 0
+ ? null
+ : _.meanBy(recoveryTimes, "recovery_hours");
+
return (
-
-
-
+
+
+
vLLM CI Metrics
-
+
+
+
-
+ {/* Section 1: Key Metrics Summary Cards */}
+
+
+ Key Metrics Overview
+
+
+
+ (v ?? 0) > 0.2,
+ },
+ {
+ title: "% force merge: CI failure",
+ value: forceMergeDueToFailurePct,
+ valueRenderer: formatPercentage,
+ badThreshold: (v) => (v ?? 0) > 0.5,
+ },
+ ]}
+ />
+ (v ?? 0) > 0.5,
+ },
+ {
+ title: "% force merge: impatience",
+ value: forceMergeDueToImpatiencePct,
+ valueRenderer: formatPercentage,
+ badThreshold: (v) => (v ?? 0) > 0.3,
+ },
+ ]}
+ />
+ (v ?? 1) < 0.85,
+ },
+ {
+ title: "Main branch health %",
+ value: trunkHealthPct,
+ valueRenderer: formatPercentage,
+ badThreshold: (v) => (v ?? 1) < 0.9,
+ },
+ ]}
+ />
+ (v ?? 0) > 12,
+ },
+ {
+ title: "Total Failed Builds",
+ value: reliabilityData === undefined ? undefined : totalFailed,
+ valueRenderer: formatCount,
+ badThreshold: (v) => (v ?? 0) > 10,
+ },
+ ]}
+ />
+ (v ?? 0) > 2,
+ },
+ {
+ title: "CI Time to green P90",
+ value: ciSuccP90,
+ valueRenderer: formatHoursWithUnit,
+ badThreshold: (v) => (v ?? 0) > 6,
+ },
+ ]}
+ />
+ (v ?? 0) > 24,
+ },
+ ]}
+ />
+ (v ?? 0) > 48,
+ },
+ ]}
+ />
+ (v ?? 0) > 24,
+ },
+ ]}
+ />
+ (v ?? 0) > 72,
+ },
+ ]}
+ />
+
+
+ {/* Section 2: CI Reliability */}
+
+
+ CI Reliability
+
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
- (value * 100).toFixed(1) + "%"}
- badThreshold={(value) => value > 0.2}
- />
- (value * 100).toFixed(1) + "%"}
- badThreshold={(value) => value > 0.5}
- />
-
+ {/* Section 3: CI Duration Analysis */}
+
+
+ CI Duration Analysis
+
+
+
+
+
+
+
+
+
+
+
+ {/* Section 4: PR Cycle Metrics */}
+
+
+ PR Cycle Metrics
+
+
+
+
+
+
+
+
-
+
);
}