diff --git a/torchci/clickhouse_queries/vllm/ci_reliability/params.json b/torchci/clickhouse_queries/vllm/ci_reliability/params.json new file mode 100644 index 0000000000..8bf3432c59 --- /dev/null +++ b/torchci/clickhouse_queries/vllm/ci_reliability/params.json @@ -0,0 +1,18 @@ +{ + "params": { + "granularity": "String", + "repo": "String", + "pipelineName": "String", + "startTime": "DateTime64(3)", + "stopTime": "DateTime64(3)" + }, + "tests": [ + { + "granularity": "day", + "repo": "https://github.com/vllm-project/vllm.git", + "pipelineName": "CI", + "startTime": "2025-09-26T00:00:00.000", + "stopTime": "2025-10-03T00:00:00.000" + } + ] +} diff --git a/torchci/clickhouse_queries/vllm/ci_reliability/query.sql b/torchci/clickhouse_queries/vllm/ci_reliability/query.sql new file mode 100644 index 0000000000..b6a681e885 --- /dev/null +++ b/torchci/clickhouse_queries/vllm/ci_reliability/query.sql @@ -0,0 +1,66 @@ +-- vLLM CI reliability metrics +-- Computes CI success rate, failure rate over time for Buildkite builds +-- Daily breakdown of build states (passed, failed, canceled) +-- Overall success rate and job-level reliability + +WITH builds AS ( + SELECT + tupleElement(pipeline, 'repository') AS repository, + tupleElement(pipeline, 'name') AS pipeline_name, + toUInt32(tupleElement(build, 'number')) AS build_number, + tupleElement(build, 'started_at') AS build_started_at, + tupleElement(build, 'finished_at') AS build_finished_at, + tupleElement(build, 'state') AS build_state, + formatDateTime( + DATE_TRUNC( + {granularity: String }, + tupleElement(build, 'started_at') + ), + '%Y-%m-%d' + ) AS bucket + FROM vllm.vllm_buildkite_jobs + WHERE + tupleElement(pipeline, 'repository') = {repo: String } + AND tupleElement(pipeline, 'name') = {pipelineName: String } + AND tupleElement(build, 'started_at') IS NOT NULL + AND tupleElement(build, 'started_at') >= {startTime: DateTime64(3) } + AND tupleElement(build, 'started_at') < {stopTime: DateTime64(3) } + GROUP BY + repository, + pipeline_name, + build_number, + build_started_at, + build_finished_at, + build_state, + bucket +), + +daily_stats AS ( + SELECT + bucket, + countIf(lowerUTF8(build_state) IN ('passed', 'finished', 'success')) + AS passed_count, + countIf(lowerUTF8(build_state) = 'failed') AS failed_count, + countIf(lowerUTF8(build_state) IN ('canceled', 'cancelled')) + AS canceled_count, + passed_count + failed_count + canceled_count AS total_count, + passed_count + failed_count AS non_canceled_count, + if( + non_canceled_count > 0, + round(passed_count / non_canceled_count, 4), + NULL + ) AS success_rate + FROM builds + GROUP BY bucket +) + +SELECT + bucket AS granularity_bucket, + passed_count, + failed_count, + canceled_count, + total_count, + non_canceled_count, + success_rate +FROM daily_stats +ORDER BY granularity_bucket ASC diff --git a/torchci/clickhouse_queries/vllm/ci_run_duration/params.json b/torchci/clickhouse_queries/vllm/ci_run_duration/params.json new file mode 100644 index 0000000000..b01ca3591d --- /dev/null +++ b/torchci/clickhouse_queries/vllm/ci_run_duration/params.json @@ -0,0 +1,16 @@ +{ + "params": { + "repo": "String", + "pipelineName": "String", + "startTime": "DateTime64(3)", + "stopTime": "DateTime64(3)" + }, + "tests": [ + { + "repo": "vllm-project/vllm", + "pipelineName": "CI", + "startTime": "2025-09-26T00:00:00.000", + "stopTime": "2025-10-03T00:00:00.000" + } + ] +} diff --git a/torchci/clickhouse_queries/vllm/ci_run_duration/query.sql b/torchci/clickhouse_queries/vllm/ci_run_duration/query.sql new file mode 100644 index 0000000000..7b022413df --- /dev/null +++ b/torchci/clickhouse_queries/vllm/ci_run_duration/query.sql @@ -0,0 +1,32 @@ +-- vLLM CI run durations (Buildkite builds) +-- Lists per-build durations based on build.started_at and build.finished_at + +WITH b AS ( + SELECT + tupleElement(pipeline, 'repository') AS repository, + tupleElement(pipeline, 'name') AS pipeline_name, + toUInt32(tupleElement(build, 'number')) AS build_number, + tupleElement(build, 'started_at') AS build_started_at, + tupleElement(build, 'finished_at') AS build_finished_at, + tupleElement(build, 'state') AS build_state + FROM vllm.vllm_buildkite_jobs + WHERE + tupleElement(pipeline, 'repository') = {repo: String } + AND tupleElement(pipeline, 'name') = {pipelineName: String } + AND tupleElement(build, 'started_at') IS NOT NULL + AND tupleElement(build, 'finished_at') IS NOT NULL + AND tupleElement(build, 'started_at') >= {startTime: DateTime64(3) } + AND tupleElement(build, 'started_at') < {stopTime: DateTime64(3) } +) + +SELECT + pipeline_name, + build_number, + max(build_started_at) AS started_at, + max(build_finished_at) AS finished_at, + any(build_state) AS build_state, + dateDiff('second', started_at, finished_at) AS duration_seconds, + round(duration_seconds / 3600.0, 3) AS duration_hours +FROM b +GROUP BY pipeline_name, build_number +ORDER BY started_at ASC diff --git a/torchci/clickhouse_queries/vllm/job_reliability/params.json b/torchci/clickhouse_queries/vllm/job_reliability/params.json new file mode 100644 index 0000000000..201745489e --- /dev/null +++ b/torchci/clickhouse_queries/vllm/job_reliability/params.json @@ -0,0 +1,18 @@ +{ + "params": { + "repo": "String", + "pipelineName": "String", + "startTime": "DateTime64(3)", + "stopTime": "DateTime64(3)", + "minRuns": "UInt32" + }, + "tests": [ + { + "repo": "https://github.com/vllm-project/vllm.git", + "pipelineName": "CI", + "startTime": "2025-09-26T00:00:00.000", + "stopTime": "2025-10-03T00:00:00.000", + "minRuns": 3 + } + ] +} diff --git a/torchci/clickhouse_queries/vllm/job_reliability/query.sql b/torchci/clickhouse_queries/vllm/job_reliability/query.sql new file mode 100644 index 0000000000..40004dc54b --- /dev/null +++ b/torchci/clickhouse_queries/vllm/job_reliability/query.sql @@ -0,0 +1,56 @@ +-- vLLM per-job reliability metrics +-- Computes success rate for each individual job in the CI pipeline +-- Shows which jobs are most/least reliable + +WITH jobs AS ( + SELECT + tupleElement(pipeline, 'repository') AS repository, + tupleElement(pipeline, 'name') AS pipeline_name, + toUInt32(tupleElement(build, 'number')) AS build_number, + tupleElement(job, 'name') AS job_name, + tupleElement(job, 'state') AS job_state, + tupleElement(job, 'soft_failed') AS soft_failed, + tupleElement(job, 'finished_at') AS job_finished_at + FROM vllm.vllm_buildkite_jobs + WHERE + tupleElement(pipeline, 'repository') = {repo: String } + AND tupleElement(pipeline, 'name') = {pipelineName: String } + AND tupleElement(job, 'finished_at') IS NOT NULL + AND tupleElement(job, 'finished_at') >= {startTime: DateTime64(3) } + AND tupleElement(job, 'finished_at') < {stopTime: DateTime64(3) } + -- Exclude soft-failed jobs from reliability calculation + AND tupleElement(job, 'soft_failed') = 'false' +), + +job_stats AS ( + SELECT + job_name, + countIf(lowerUTF8(job_state) IN ('passed', 'finished', 'success')) + AS passed_count, + countIf(lowerUTF8(job_state) = 'failed') AS failed_count, + countIf(lowerUTF8(job_state) IN ('canceled', 'cancelled')) + AS canceled_count, + passed_count + failed_count + canceled_count AS total_count, + passed_count + failed_count AS non_canceled_count, + if( + non_canceled_count > 0, + round(passed_count / non_canceled_count, 4), + NULL + ) AS success_rate + FROM jobs + GROUP BY job_name + HAVING non_canceled_count >= {minRuns: UInt32} +) + +SELECT + job_name, + passed_count, + failed_count, + canceled_count, + total_count, + non_canceled_count, + success_rate +FROM job_stats +ORDER BY + success_rate ASC, + non_canceled_count DESC diff --git a/torchci/clickhouse_queries/vllm/merges_percentage/query.sql b/torchci/clickhouse_queries/vllm/merges_percentage/query.sql index dbb6b11fe6..9459fa2d30 100644 --- a/torchci/clickhouse_queries/vllm/merges_percentage/query.sql +++ b/torchci/clickhouse_queries/vllm/merges_percentage/query.sql @@ -108,7 +108,7 @@ manual_merged_prs AS ( manual_merged_prs_with_failures AS ( SELECT bucket, - count(number) AS manual_merged_with_failures_count + count(DISTINCT number) AS manual_merged_with_failures_count FROM merged_prs LEFT JOIN latest_buildkite_jobs ON toString(merged_prs.number) = latest_buildkite_jobs.number @@ -118,6 +118,19 @@ manual_merged_prs_with_failures AS ( GROUP BY bucket ), +manual_merged_prs_pending AS ( + SELECT + bucket, + count(DISTINCT number) AS manual_merged_pending_count + FROM + merged_prs + LEFT JOIN latest_buildkite_jobs ON toString(merged_prs.number) = latest_buildkite_jobs.number + WHERE + tupleElement(auto_merge, 'merge_method') = '' + AND job_state IN ('running', 'pending', 'scheduled') + GROUP BY + bucket +), auto_merged_prs AS ( SELECT bucket, @@ -137,7 +150,8 @@ results AS ( abandon_count, auto_merged_count, manual_merged_count, - manual_merged_with_failures_count + manual_merged_with_failures_count, + manual_merged_pending_count FROM total_prs LEFT JOIN open_prs ON total_prs.bucket = open_prs.bucket @@ -145,6 +159,7 @@ results AS ( LEFT JOIN auto_merged_prs ON total_prs.bucket = auto_merged_prs.bucket LEFT JOIN manual_merged_prs ON total_prs.bucket = manual_merged_prs.bucket LEFT JOIN manual_merged_prs_with_failures ON total_prs.bucket = manual_merged_prs_with_failures.bucket + LEFT JOIN manual_merged_prs_pending ON total_prs.bucket = manual_merged_prs_pending.bucket ) SELECT * diff --git a/torchci/clickhouse_queries/vllm/pr_cycle_time_breakdown/params.json b/torchci/clickhouse_queries/vllm/pr_cycle_time_breakdown/params.json new file mode 100644 index 0000000000..da509ee387 --- /dev/null +++ b/torchci/clickhouse_queries/vllm/pr_cycle_time_breakdown/params.json @@ -0,0 +1,14 @@ +{ + "params": { + "repo": "String", + "startTime": "DateTime64(3)", + "stopTime": "DateTime64(3)" + }, + "tests": [ + { + "repo": "vllm-project/vllm", + "startTime": "2025-09-22T00:00:00.000", + "stopTime": "2025-09-29T00:00:00.000" + } + ] +} diff --git a/torchci/clickhouse_queries/vllm/pr_cycle_time_breakdown/query.sql b/torchci/clickhouse_queries/vllm/pr_cycle_time_breakdown/query.sql new file mode 100644 index 0000000000..57456c7944 --- /dev/null +++ b/torchci/clickhouse_queries/vllm/pr_cycle_time_breakdown/query.sql @@ -0,0 +1,182 @@ +-- vLLM PR cycle time breakdown +-- Computes P50 and P90 (hours) for: +-- 1) Time to first (human) review: PR ready -> first human review +-- 2) Time to approval: first human review -> first approval +-- 3) Time in merge queue: first approval -> merge time +-- Notes: +-- - "Ready" is derived from the first time the 'ready' label was applied. +-- - Reviews excluded if state = 'DISMISSED' and if reviewer looks like a bot. +-- - Human review is approximated via author_association in an allowed set and reviewer != PR author. +-- - Metrics only consider merged PRs within the window [startTime, stopTime). + +WITH prs AS ( + SELECT + number AS pr_number, + user.login AS author, + parseDateTimeBestEffort(created_at) AS created_at_ts, + parseDateTimeBestEffort(closed_at) AS merged_at_ts + FROM default.pull_request + WHERE + dynamoKey LIKE concat({repo: String }, '%') + AND state = 'closed' + AND closed_at != '' + AND parseDateTimeBestEffort(closed_at) >= {startTime: DateTime64(3) } + AND parseDateTimeBestEffort(closed_at) < {stopTime: DateTime64(3) } +), + +ready_events AS ( + SELECT + ple.pr_number, + minIf( + ple.event_time, + lowerUTF8(ple.label_name) = 'ready' AND ple.action = 'labeled' + ) AS first_ready_ts + FROM default.pull_label_event ple + WHERE + ple.repo_name = {repo: String } + GROUP BY ple.pr_number +), + +reviews_raw AS ( + SELECT + toUInt32( + extractGroups(review.'pull_request_url', 'pulls/([0-9]+)')[1] + ) AS pr_number, + review.'user'.'login' AS reviewer, + review.'state' AS state, + review.'author_association' AS author_association, + review.'submitted_at' AS submitted_at_ts + FROM default.pull_request_review + WHERE + dynamoKey LIKE concat({repo: String }, '%') + AND review.'submitted_at' IS NOT NULL +), + +-- Filter to human reviews and exclude dismissed ones and bot reviewers +human_reviews AS ( + SELECT + r.pr_number, + r.reviewer, + r.state, + r.author_association, + r.submitted_at_ts + FROM reviews_raw r + WHERE + lowerUTF8(r.state) != 'dismissed' + AND r.author_association IN ( + 'MEMBER', 'OWNER', 'COLLABORATOR', 'CONTRIBUTOR' + ) + AND r.reviewer NOT LIKE '%[bot]' + AND lowerUTF8(r.reviewer) NOT LIKE '%bot%' +), + +first_human_review AS ( + SELECT + pr.pr_number, + -- Define "first review" as first non-approved human review (commented/changes_requested) + minIf( + hr.submitted_at_ts, + hr.reviewer != pr.author + AND lowerUTF8(hr.state) IN ('commented', 'changes_requested') + ) AS first_review_ts + FROM prs pr + LEFT JOIN human_reviews hr ON pr.pr_number = hr.pr_number + GROUP BY pr.pr_number +), + +first_approval AS ( + SELECT + pr.pr_number, + -- Only count approvals from maintainers (exclude contributor approvals) + minIf( + hr.submitted_at_ts, + lowerUTF8(hr.state) = 'approved' + AND hr.reviewer != pr.author + AND hr.author_association IN ('MEMBER', 'OWNER', 'COLLABORATOR') + ) AS first_approval_ts + FROM prs pr + LEFT JOIN human_reviews hr ON pr.pr_number = hr.pr_number + GROUP BY pr.pr_number +), + +durations AS ( + SELECT + pr.pr_number, + coalesce(re.first_ready_ts, pr.created_at_ts) AS ready_ts, + fr.first_review_ts, + fa.first_approval_ts, + pr.merged_at_ts, + -- Durations in hours + if( + fr.first_review_ts IS NULL + OR fr.first_review_ts + < coalesce(re.first_ready_ts, pr.created_at_ts), + NULL, + dateDiff( + 'second', + coalesce(re.first_ready_ts, pr.created_at_ts), + fr.first_review_ts + ) + / 3600.0 + ) AS time_to_first_review_hours, + + if( + fa.first_approval_ts IS NULL + OR fr.first_review_ts IS NULL + OR fa.first_approval_ts < fr.first_review_ts, + NULL, + dateDiff('second', fr.first_review_ts, fa.first_approval_ts) + / 3600.0 + ) AS time_to_approval_hours, + + if( + fa.first_approval_ts IS NULL + OR pr.merged_at_ts < fa.first_approval_ts, + NULL, + dateDiff('second', fa.first_approval_ts, pr.merged_at_ts) / 3600.0 + ) AS time_in_merge_queue_hours + FROM prs pr + LEFT JOIN ready_events re ON pr.pr_number = re.pr_number + LEFT JOIN first_human_review fr ON pr.pr_number = fr.pr_number + LEFT JOIN first_approval fa ON pr.pr_number = fa.pr_number +), + +filtered AS ( + SELECT * + FROM durations + WHERE + ( + time_to_first_review_hours IS NULL + OR ( + time_to_first_review_hours >= 0 + AND time_to_first_review_hours < 24 * 30 + ) + ) + AND ( + time_to_approval_hours IS NULL + OR ( + time_to_approval_hours >= 0 AND time_to_approval_hours < 24 * 30 + ) + ) + AND ( + time_in_merge_queue_hours IS NULL + OR ( + time_in_merge_queue_hours >= 0 + AND time_in_merge_queue_hours < 24 * 30 + ) + ) +) + +SELECT + round(quantile(0.5) (time_to_first_review_hours), 2) + AS time_to_first_review_p50, + round(quantile(0.9) (time_to_first_review_hours), 2) + AS time_to_first_review_p90, + round(quantile(0.5) (time_to_approval_hours), 2) AS time_to_approval_p50, + round(quantile(0.9) (time_to_approval_hours), 2) AS time_to_approval_p90, + round(quantile(0.5) (time_in_merge_queue_hours), 2) + AS time_in_merge_queue_p50, + round(quantile(0.9) (time_in_merge_queue_hours), 2) + AS time_in_merge_queue_p90 +FROM filtered +-- Quantiles ignore NULLs implicitly; if a column is entirely NULL in window, result will be NULL diff --git a/torchci/clickhouse_queries/vllm/trunk_health/params.json b/torchci/clickhouse_queries/vllm/trunk_health/params.json new file mode 100644 index 0000000000..3bf90eef34 --- /dev/null +++ b/torchci/clickhouse_queries/vllm/trunk_health/params.json @@ -0,0 +1,18 @@ +{ + "params": { + "granularity": "String", + "repo": "String", + "pipelineName": "String", + "startTime": "DateTime64(3)", + "stopTime": "DateTime64(3)" + }, + "tests": [ + { + "granularity": "day", + "repo": "https://github.com/vllm-project/vllm.git", + "pipelineName": "CI", + "startTime": "2025-09-22T00:00:00.000", + "stopTime": "2025-09-29T00:00:00.000" + } + ] +} diff --git a/torchci/clickhouse_queries/vllm/trunk_health/query.sql b/torchci/clickhouse_queries/vllm/trunk_health/query.sql new file mode 100644 index 0000000000..46bc09435e --- /dev/null +++ b/torchci/clickhouse_queries/vllm/trunk_health/query.sql @@ -0,0 +1,23 @@ +-- vLLM trunk health history +-- Returns individual main branch builds with timestamps for hourly visualization + +SELECT + tupleElement(build, 'number') AS build_number, + tupleElement(build, 'started_at') AS build_started_at, + tupleElement(build, 'state') AS build_state, + if( + lowerUTF8(tupleElement(build, 'state')) IN ( + 'passed', 'finished', 'success' + ), + 1, + 0 + ) AS is_green +FROM vllm.vllm_buildkite_builds +WHERE + tupleElement(pipeline, 'repository') = {repo: String } + AND tupleElement(pipeline, 'name') = {pipelineName: String } + AND tupleElement(build, 'branch') = 'main' + AND tupleElement(build, 'started_at') IS NOT NULL + AND tupleElement(build, 'started_at') >= {startTime: DateTime64(3) } + AND tupleElement(build, 'started_at') < {stopTime: DateTime64(3) } +ORDER BY build_started_at ASC diff --git a/torchci/clickhouse_queries/vllm/trunk_recovery_time/params.json b/torchci/clickhouse_queries/vllm/trunk_recovery_time/params.json new file mode 100644 index 0000000000..3dde93989b --- /dev/null +++ b/torchci/clickhouse_queries/vllm/trunk_recovery_time/params.json @@ -0,0 +1,16 @@ +{ + "params": { + "repo": "String", + "pipelineName": "String", + "startTime": "DateTime64(3)", + "stopTime": "DateTime64(3)" + }, + "tests": [ + { + "repo": "https://github.com/vllm-project/vllm.git", + "pipelineName": "CI", + "startTime": "2025-09-22T00:00:00.000", + "stopTime": "2025-09-29T00:00:00.000" + } + ] +} diff --git a/torchci/clickhouse_queries/vllm/trunk_recovery_time/query.sql b/torchci/clickhouse_queries/vllm/trunk_recovery_time/query.sql new file mode 100644 index 0000000000..06a53ece82 --- /dev/null +++ b/torchci/clickhouse_queries/vllm/trunk_recovery_time/query.sql @@ -0,0 +1,64 @@ +-- vLLM trunk recovery time +-- Tracks how long it takes to recover when main breaks +-- Shows time between when main went red and when it went green again + +WITH main_builds AS ( + SELECT + tupleElement(build, 'number') AS build_number, + tupleElement(build, 'started_at') AS build_started_at, + tupleElement(build, 'state') AS build_state, + if( + lowerUTF8(build_state) IN ('passed', 'finished', 'success'), + 1, + if(lowerUTF8(build_state) = 'failed', 0, -1) + ) AS is_success + FROM vllm.vllm_buildkite_builds + WHERE + tupleElement(pipeline, 'repository') = {repo: String } + AND tupleElement(pipeline, 'name') = {pipelineName: String } + AND tupleElement(build, 'branch') = 'main' + AND tupleElement(build, 'started_at') IS NOT NULL + AND tupleElement(build, 'started_at') >= {startTime: DateTime64(3) } + AND tupleElement(build, 'started_at') < {stopTime: DateTime64(3) } +), + +-- Track state changes +build_with_prev AS ( + SELECT + build_number, + build_started_at, + is_success, + lagInFrame(is_success) + OVER ( + ORDER BY build_started_at + ) + AS prev_is_success, + lagInFrame(build_started_at) + OVER ( + ORDER BY build_started_at + ) + AS prev_build_time + FROM main_builds + WHERE is_success IN (0, 1) +), + +-- Find recovery events (failed -> success transitions) +recovery_events AS ( + SELECT + prev_build_time AS break_time, + build_started_at AS recovery_time, + dateDiff('minute', prev_build_time, build_started_at) + / 60.0 AS recovery_hours + FROM build_with_prev + WHERE + is_success = 1 + AND prev_is_success = 0 + AND prev_build_time IS NOT NULL +) + +SELECT + break_time, + recovery_time, + recovery_hours +FROM recovery_events +ORDER BY break_time ASC diff --git a/torchci/components/metrics/panels/ScalarPanel.tsx b/torchci/components/metrics/panels/ScalarPanel.tsx index 7fc60ddfc3..02a827e95d 100644 --- a/torchci/components/metrics/panels/ScalarPanel.tsx +++ b/torchci/components/metrics/panels/ScalarPanel.tsx @@ -15,11 +15,17 @@ export function ScalarPanelWithValue({ valueRenderer, // Callback to decide whether the scalar value is "bad" and should be displayed red. badThreshold, + // Optional styles to apply to the Paper + paperSx, + // Optional styles to apply to the title Typography + titleSx, }: { title: string; value: any; valueRenderer: (_value: any) => string; badThreshold: (_value: any) => boolean; + paperSx?: any; + titleSx?: any; }) { if (value === undefined) { return ; @@ -28,14 +34,25 @@ export function ScalarPanelWithValue({ let fontColor = badThreshold(value) ? "#ee6666" : "inherit"; return ( - + - + {title} { + const s = params.data?.build_state?.toLowerCase?.(); + if (s === "failed") return COLOR_ERROR; + if (s === "canceled" || s === "cancelled") return COLOR_GRAY; + if (s === "passed" || s === "finished" || s === "success") + return COLOR_SUCCESS; + return COLOR_SUCCESS; + }, + }, + }; +} + +// Helper function to generate line series for daily averages +function getLineSeries( + dailyMeanSuccess: any[], + dailyMeanNonCanceled: any[] +): any[] { + return [ + { + name: "Daily mean (success)", + type: "line", + datasetIndex: 1, + smooth: true, + encode: { x: "day", y: "value" }, + lineStyle: { color: COLOR_SUCCESS_LINE, opacity: 0.7, width: 1 }, + showSymbol: true, + symbolSize: 4, + }, + { + name: "Daily mean (success+failed)", + type: "line", + datasetIndex: 2, + smooth: true, + encode: { x: "day", y: "value" }, + lineStyle: { color: COLOR_MIXED_LINE, opacity: 0.7, width: 1 }, + showSymbol: true, + symbolSize: 4, + }, + ]; +} + +// Helper function to generate scatter series for legend +function getLegendScatterSeries(): any[] { + return [ + { + name: "Success", + type: "scatter", + data: [], + itemStyle: { color: COLOR_SUCCESS }, + tooltip: { show: false }, + silent: true, + }, + { + name: "Failed", + type: "scatter", + data: [], + itemStyle: { color: COLOR_ERROR }, + tooltip: { show: false }, + silent: true, + }, + { + name: "Canceled", + type: "scatter", + data: [], + itemStyle: { color: COLOR_GRAY }, + tooltip: { show: false }, + silent: true, + }, + ]; +} + +// Helper function to format tooltip content +function formatTooltip(params: any): string { + if (params.seriesType === "line") { + const rawVal = Array.isArray(params.value) + ? params.value[1] + : params.data?.value; + return `Day: ${params.data.day}
Daily median: ${rawVal} h`; + } + const d = params.data; + const when = d.started_at ? dayjs(d.started_at).format("M/D/YY h:mm A") : ""; + return `Started: ${when}
Pipeline: ${d.pipeline_name}
Build #: ${d.build_number}
Duration: ${d.duration_hours} h`; +} + +export default function CiDurationsPanel({ + data, +}: { + data: any[] | undefined; +}) { + const { darkMode } = useDarkMode(); + + const source = (data || []).map((d: any) => ({ + ...d, + started_at: d.started_at ? dayjs(d.started_at).toISOString() : null, + duration_hours: Number(d.duration_hours), + })); + const durations = source + .map((s) => s.duration_hours) + .filter((x) => Number.isFinite(x)); + const sorted = [...durations].sort((a, b) => a - b); + const quantile = (p: number) => + sorted.length ? sorted[Math.floor((sorted.length - 1) * p)] : undefined; + const p10 = quantile(0.1); + const p50 = quantile(0.5); + const p90 = quantile(0.9); + + const successStates = new Set(["passed", "finished", "success"]); + const nonCanceled = source.filter((s: any) => { + const st = (s.build_state || "").toLowerCase(); + return st !== "canceled" && st !== "cancelled"; + }); + const successOnly = source.filter((s: any) => + successStates.has((s.build_state || "").toLowerCase()) + ); + + const groupDaily = (rows: any[]) => { + const grouped = _.groupBy(rows, (s) => + s.started_at ? (s.started_at as string).slice(0, 10) : "" + ); + return Object.entries(grouped) + .filter(([k]) => k !== "") + .map(([day, rs]: any) => { + const vals = rs + .map((r: any) => Number(r.duration_hours)) + .filter((x: number) => Number.isFinite(x)); + const value = vals.length ? _.sum(vals) / vals.length : undefined; + return { + day, + value: value !== undefined ? Number(value.toFixed(3)) : undefined, + }; + }) + .sort((a: any, b: any) => (a.day < b.day ? -1 : 1)); + }; + + let dailyMeanSuccess = groupDaily(successOnly); + const dailyMeanNonCanceled = groupDaily(nonCanceled); + if (dailyMeanNonCanceled.length > 0 && dailyMeanSuccess.length > 0) { + const lastDay = dailyMeanNonCanceled[dailyMeanNonCanceled.length - 1].day; + const hasLastDay = dailyMeanSuccess.some((d: any) => d.day === lastDay); + if (!hasLastDay) { + const lastVal = dailyMeanSuccess[dailyMeanSuccess.length - 1].value; + if (lastVal !== undefined) { + dailyMeanSuccess = [ + ...dailyMeanSuccess, + { day: lastDay, value: lastVal }, + ]; + } + } + } + + const options: EChartsOption = { + title: { text: "CI run duration (hours)", subtext: "Buildkite builds" }, + legend: { + top: 24, + data: [ + { name: "Daily mean (success)" }, + { name: "Daily mean (success+failed)" }, + { name: "Success" }, + { name: "Failed" }, + { name: "Canceled" }, + ], + selectedMode: false, + }, + grid: { top: 60, right: 8, bottom: 24, left: 64 }, + dataset: [ + { source }, + { source: dailyMeanSuccess }, + { source: dailyMeanNonCanceled }, + ], + xAxis: { type: "time", axisLabel: { hideOverlap: true } }, + yAxis: { + type: "value", + name: "hours", + nameLocation: "middle", + nameGap: 42, + nameRotate: 90, + axisLabel: { margin: 8 }, + }, + tooltip: { + trigger: "item", + formatter: formatTooltip, + }, + series: [ + getMainScatterSeries(), + ...getLineSeries(dailyMeanSuccess, dailyMeanNonCanceled), + ...getLegendScatterSeries(), + ], + }; + + return ( + + + + ); +} diff --git a/torchci/components/metrics/vllm/DurationDistributionPanel.tsx b/torchci/components/metrics/vllm/DurationDistributionPanel.tsx new file mode 100644 index 0000000000..b7f5912601 --- /dev/null +++ b/torchci/components/metrics/vllm/DurationDistributionPanel.tsx @@ -0,0 +1,165 @@ +import { Paper } from "@mui/material"; +import { EChartsOption } from "echarts"; +import ReactECharts from "echarts-for-react"; +import { useDarkMode } from "lib/DarkModeContext"; +import { + getChartTitle, + getReactEChartsProps, + GRID_DEFAULT, +} from "./chartUtils"; +import { COLOR_ERROR, COLOR_GRAY, COLOR_SUCCESS } from "./constants"; + +// Helper function to create histogram bins +function createHistogramBins( + durations: number[], + binSize: number = 0.5 +): { range: string; count: number; midpoint: number }[] { + if (durations.length === 0) return []; + + const maxDuration = Math.max(...durations); + const numBins = Math.ceil(maxDuration / binSize); + const bins: { range: string; count: number; midpoint: number }[] = []; + + for (let i = 0; i < numBins; i++) { + const start = i * binSize; + const end = (i + 1) * binSize; + const count = durations.filter((d) => d >= start && d < end).length; + bins.push({ + range: `${start.toFixed(1)}-${end.toFixed(1)}h`, + count, + midpoint: (start + end) / 2, + }); + } + + return bins; +} + +// Helper function to format distribution tooltip +function formatDistributionTooltip(params: any): string { + if (!Array.isArray(params)) params = [params]; + + const range = params[0]?.name || ""; + let result = `Duration: ${range}
`; + + params.forEach((p: any) => { + if (p.value !== undefined && p.value > 0) { + result += `${p.marker} ${p.seriesName}: ${p.value} build(s)
`; + } + }); + + return result; +} + +// Helper function to get distribution series +function getDistributionSeries( + successBins: any[], + failedBins: any[], + canceledBins: any[] +): any[] { + return [ + { + name: "Success", + type: "bar", + data: successBins.map((b) => b.count), + itemStyle: { color: COLOR_SUCCESS }, + emphasis: { focus: "series" }, + }, + { + name: "Failed", + type: "bar", + data: failedBins.map((b) => b.count), + itemStyle: { color: COLOR_ERROR }, + emphasis: { focus: "series" }, + }, + { + name: "Canceled", + type: "bar", + data: canceledBins.map((b) => b.count), + itemStyle: { color: COLOR_GRAY }, + emphasis: { focus: "series" }, + }, + ]; +} + +export default function DurationDistributionPanel({ + data, +}: { + data: any[] | undefined; +}) { + const { darkMode } = useDarkMode(); + + // Process data into duration buckets by status + const source = (data || []).map((d: any) => ({ + duration: Number(d.duration_hours), + status: (d.build_state || "").toLowerCase(), + })); + + const successStates = new Set(["passed", "finished", "success"]); + const canceledStates = new Set(["canceled", "cancelled"]); + + const successDurations = source + .filter((s) => successStates.has(s.status) && Number.isFinite(s.duration)) + .map((s) => s.duration); + + const failedDurations = source + .filter((s) => s.status === "failed" && Number.isFinite(s.duration)) + .map((s) => s.duration); + + const canceledDurations = source + .filter((s) => canceledStates.has(s.status) && Number.isFinite(s.duration)) + .map((s) => s.duration); + + // Create histogram bins + const binSize = 0.5; // 30 minute bins + const successBins = createHistogramBins(successDurations, binSize); + const failedBins = createHistogramBins(failedDurations, binSize); + const canceledBins = createHistogramBins(canceledDurations, binSize); + + // Use the longest bin range for x-axis categories + const allBins = [successBins, failedBins, canceledBins]; + const categories = + allBins + .reduce((a, b) => (a.length > b.length ? a : b), []) + .map((b) => b.range) || []; + + const options: EChartsOption = { + title: getChartTitle( + "CI Duration Distribution", + "Histogram by build outcome" + ), + legend: { + top: 24, + data: ["Success", "Failed", "Canceled"], + }, + grid: GRID_DEFAULT, + xAxis: { + type: "category", + data: categories, + name: "Duration Range", + nameLocation: "middle", + nameGap: 40, + axisLabel: { + rotate: 45, + fontSize: 10, + }, + }, + yAxis: { + type: "value", + name: "Count", + nameLocation: "middle", + nameGap: 40, + }, + series: getDistributionSeries(successBins, failedBins, canceledBins), + tooltip: { + trigger: "axis", + axisPointer: { type: "shadow" }, + formatter: formatDistributionTooltip, + }, + }; + + return ( + + + + ); +} diff --git a/torchci/components/metrics/vllm/ForceMergeBreakdownPanel.tsx b/torchci/components/metrics/vllm/ForceMergeBreakdownPanel.tsx new file mode 100644 index 0000000000..fbbd4b250c --- /dev/null +++ b/torchci/components/metrics/vllm/ForceMergeBreakdownPanel.tsx @@ -0,0 +1,99 @@ +import { Paper } from "@mui/material"; +import { EChartsOption } from "echarts"; +import ReactECharts from "echarts-for-react"; +import { useDarkMode } from "lib/DarkModeContext"; +import _ from "lodash"; +import { getReactEChartsProps } from "./chartUtils"; +import { COLOR_BORDER_WHITE, COLOR_ERROR, COLOR_WARNING } from "./constants"; + +// Helper function to format breakdown tooltip +function formatBreakdownTooltip(params: any): string { + const name = params.name; + const value = params.value; + const percent = params.percent; + + return `${name}
Count: ${value}
Percentage: ${percent.toFixed( + 1 + )}%`; +} + +// Helper function to get pie series +function getPieSeries(data: any[]): any { + return { + name: "Force Merge Reason", + type: "pie", + radius: ["40%", "70%"], + avoidLabelOverlap: true, + itemStyle: { + borderRadius: 10, + borderColor: COLOR_BORDER_WHITE, + borderWidth: 2, + }, + label: { + show: true, + formatter: "{b}: {d}%", + }, + emphasis: { + label: { + show: true, + fontSize: 16, + fontWeight: "bold", + }, + }, + data: data, + }; +} + +export default function ForceMergeBreakdownPanel({ + data, +}: { + data: any[] | undefined; +}) { + const { darkMode } = useDarkMode(); + + // Sum up the counts across all time periods + const manualMergedFailures = + data === undefined || data.length === 0 + ? 0 + : _.sumBy(data, "manual_merged_with_failures_count"); + const manualMergedPending = + data === undefined || data.length === 0 + ? 0 + : _.sumBy(data, "manual_merged_pending_count"); + + const pieData = [ + { + value: manualMergedFailures, + name: "CI Failure (failing checks)", + itemStyle: { color: COLOR_ERROR }, + }, + { + value: manualMergedPending, + name: "Impatience (checks pending)", + itemStyle: { color: COLOR_WARNING }, + }, + ]; + + const options: EChartsOption = { + title: { + text: "Force Merge Breakdown", + subtext: "Reasons for manual merges", + }, + tooltip: { + trigger: "item", + formatter: formatBreakdownTooltip, + }, + legend: { + orient: "vertical", + left: "left", + top: "middle", + }, + series: getPieSeries(pieData), + }; + + return ( + + + + ); +} diff --git a/torchci/components/metrics/vllm/JobReliabilityPanel.tsx b/torchci/components/metrics/vllm/JobReliabilityPanel.tsx new file mode 100644 index 0000000000..fac56e8eb5 --- /dev/null +++ b/torchci/components/metrics/vllm/JobReliabilityPanel.tsx @@ -0,0 +1,147 @@ +import { Paper } from "@mui/material"; +import { EChartsOption } from "echarts"; +import ReactECharts from "echarts-for-react"; +import { useDarkMode } from "lib/DarkModeContext"; +import { getReactEChartsProps } from "./chartUtils"; +import { + COLOR_BG_DARK, + COLOR_BORDER_LIGHT, + COLOR_ERROR, + COLOR_SUCCESS, + COLOR_WARNING, +} from "./constants"; + +// Helper function to format success rate label +function formatSuccessRateLabel(params: any): string { + const rate = params.value * 100; + return rate.toFixed(1) + "%"; +} + +// Helper function to format job reliability tooltip +function formatJobReliabilityTooltip(params: any, sortedData: any[]): string { + const param = params[0]; + const jobData = sortedData[param.dataIndex]; + if (!jobData) return ""; + + const successRate = jobData.success_rate + ? (jobData.success_rate * 100).toFixed(1) + "%" + : "N/A"; + const passed = jobData.passed_count || 0; + const failed = jobData.failed_count || 0; + const canceled = jobData.canceled_count || 0; + const total = jobData.total_count || 0; + const nonCanceled = jobData.non_canceled_count || 0; + + return ( + `${jobData.job_name}
` + + `Success Rate: ${successRate}
` + + `Passed: ${passed}
` + + `Failed: ${failed}
` + + `Canceled: ${canceled}
` + + `Non-canceled: ${nonCanceled}
` + + `Total: ${total}` + ); +} + +export default function JobReliabilityPanel({ + data, +}: { + data: any[] | undefined; +}) { + const { darkMode } = useDarkMode(); + + // Sort by success rate (worst first) and prepare data + const sortedData = [...(data || [])].sort((a, b) => { + const rateA = a.success_rate ?? 0; + const rateB = b.success_rate ?? 0; + return rateA - rateB; + }); + + const jobNames = sortedData.map((d) => d.job_name); + const successRates = sortedData.map((d) => d.success_rate ?? 0); + + // Color code by reliability: red (<70%), yellow (70-90%), green (>90%) + const itemColors = successRates.map((rate) => { + if (rate < 0.7) return COLOR_ERROR; + if (rate < 0.9) return COLOR_WARNING; + return COLOR_SUCCESS; + }); + + const options: EChartsOption = { + title: { + text: "Per-Job Reliability", + subtext: "Success rate by job (min 3 runs)", + }, + grid: { + top: 60, + right: 60, + bottom: 24, + left: 40, + containLabel: true, + }, + xAxis: { + type: "value", + name: "Success Rate", + min: 0, + max: 1, + axisLabel: { + formatter: (value: number) => (value * 100).toFixed(0) + "%", + }, + }, + yAxis: { + type: "category", + data: jobNames, + axisLabel: { + interval: 0, + fontSize: 10, + }, + inverse: false, // Worst jobs at bottom + }, + series: [ + { + name: "Success Rate", + type: "bar", + data: successRates.map((rate, idx) => ({ + value: rate, + itemStyle: { color: itemColors[idx] }, + })), + label: { + show: true, + position: "right", + formatter: formatSuccessRateLabel, + fontSize: 9, + }, + }, + ], + tooltip: { + trigger: "axis", + axisPointer: { + type: "shadow", + }, + formatter: (params: any) => + formatJobReliabilityTooltip(params, sortedData), + }, + dataZoom: [ + { + type: "slider", + yAxisIndex: 0, + show: true, + right: 10, + width: 30, + start: + jobNames.length > 15 + ? Math.max(0, 100 - (15 / jobNames.length) * 100) + : 0, + end: 100, + handleSize: "100%", + borderColor: darkMode ? COLOR_BG_DARK : COLOR_BORDER_LIGHT, + }, + ], + }; + + return ( + + + + ); +} diff --git a/torchci/components/metrics/vllm/MergesPanel.tsx b/torchci/components/metrics/vllm/MergesPanel.tsx new file mode 100644 index 0000000000..0e76485a78 --- /dev/null +++ b/torchci/components/metrics/vllm/MergesPanel.tsx @@ -0,0 +1,67 @@ +import { Paper } from "@mui/material"; +import { EChartsOption } from "echarts"; +import ReactECharts from "echarts-for-react"; +import { useDarkMode } from "lib/DarkModeContext"; +import { getReactEChartsProps } from "./chartUtils"; +import { COLOR_ERROR, COLOR_SUCCESS, COLOR_WARNING } from "./constants"; + +// Helper function to format merges tooltip +function formatMergesTooltip(params: any): string { + const manualMergedFailures = params[0].data.manual_merged_with_failures_count; + const manualMerged = params[0].data.manual_merged_count; + const autoMerged = params[0].data.auto_merged_count; + const total = manualMergedFailures + manualMerged + autoMerged; + const manualMergedFailuresPct = + ((manualMergedFailures / total) * 100).toFixed(1) + "%"; + const manualMergedPct = ((manualMerged / total) * 100).toFixed(1) + "%"; + const autoMergedPct = ((autoMerged / total) * 100).toFixed(1) + "%"; + return ( + `Force merges (red): ${manualMergedFailures} (${manualMergedFailuresPct})` + + `
Manual merges (orange): ${manualMerged} (${manualMergedPct})` + + `
Auto merges (green): ${autoMerged} (${autoMergedPct})` + + `
Total: ${total}` + ); +} + +export default function MergesPanel({ data }: { data: any }) { + const { darkMode } = useDarkMode(); + + const options: EChartsOption = { + title: { text: "Merged pull requests, by day", subtext: "" }, + grid: { top: 60, right: 8, bottom: 24, left: 36 }, + dataset: { source: data }, + xAxis: { type: "category" }, + yAxis: { type: "value" }, + series: [ + { + type: "bar", + stack: "all", + encode: { x: "granularity_bucket", y: "auto_merged_count" }, + }, + { + type: "bar", + stack: "all", + encode: { x: "granularity_bucket", y: "manual_merged_count" }, + }, + { + type: "bar", + stack: "all", + encode: { + x: "granularity_bucket", + y: "manual_merged_with_failures_count", + }, + }, + ], + color: [COLOR_SUCCESS, COLOR_WARNING, COLOR_ERROR], + tooltip: { + trigger: "axis", + formatter: formatMergesTooltip, + }, + }; + + return ( + + + + ); +} diff --git a/torchci/components/metrics/vllm/ReliabilityPanel.tsx b/torchci/components/metrics/vllm/ReliabilityPanel.tsx new file mode 100644 index 0000000000..3db9d72b5e --- /dev/null +++ b/torchci/components/metrics/vllm/ReliabilityPanel.tsx @@ -0,0 +1,109 @@ +import { Paper } from "@mui/material"; +import { EChartsOption } from "echarts"; +import ReactECharts from "echarts-for-react"; +import { useDarkMode } from "lib/DarkModeContext"; +import { + getCrosshairTooltipConfig, + getReactEChartsProps, + GRID_DEFAULT, +} from "./chartUtils"; +import { COLOR_ERROR, COLOR_GRAY, COLOR_SUCCESS } from "./constants"; + +// Helper function to generate stacked bar series for reliability data +function getReliabilityBarSeries(): any[] { + return [ + { + name: "Passed", + type: "bar", + stack: "builds", + encode: { x: "granularity_bucket", y: "passed_count" }, + itemStyle: { color: COLOR_SUCCESS }, + emphasis: { + focus: "series", + }, + }, + { + name: "Failed", + type: "bar", + stack: "builds", + encode: { x: "granularity_bucket", y: "failed_count" }, + itemStyle: { color: COLOR_ERROR }, + emphasis: { + focus: "series", + }, + }, + { + name: "Canceled", + type: "bar", + stack: "builds", + encode: { x: "granularity_bucket", y: "canceled_count" }, + itemStyle: { color: COLOR_GRAY }, + emphasis: { + focus: "series", + }, + }, + ]; +} + +// Helper function to format reliability tooltip +function formatReliabilityTooltip(params: any): string { + const data = params[0]?.data; + if (!data) return ""; + + const successRate = data.success_rate + ? (data.success_rate * 100).toFixed(1) + "%" + : "N/A"; + const passed = data.passed_count || 0; + const failed = data.failed_count || 0; + const canceled = data.canceled_count || 0; + const total = data.total_count || 0; + const nonCanceled = data.non_canceled_count || 0; + + return ( + `${data.granularity_bucket}
` + + `Success Rate: ${successRate}
` + + `Passed: ${passed}
` + + `Failed: ${failed}
` + + `Canceled: ${canceled}
` + + `Non-canceled: ${nonCanceled}
` + + `Total: ${total}` + ); +} + +export default function ReliabilityPanel({ + data, +}: { + data: any[] | undefined; +}) { + const { darkMode } = useDarkMode(); + + const options: EChartsOption = { + title: { + text: "CI Build Counts", + subtext: "Daily build breakdown", + }, + legend: { + top: 24, + data: ["Passed", "Failed", "Canceled"], + }, + grid: { ...GRID_DEFAULT, bottom: 24 }, + dataset: { source: data || [] }, + xAxis: { type: "category" }, + yAxis: { + type: "value", + name: "Count", + position: "left", + axisLabel: { + formatter: "{value}", + }, + }, + series: getReliabilityBarSeries(), + tooltip: getCrosshairTooltipConfig(darkMode, formatReliabilityTooltip), + }; + + return ( + + + + ); +} diff --git a/torchci/components/metrics/vllm/ReliabilityTrendPanel.tsx b/torchci/components/metrics/vllm/ReliabilityTrendPanel.tsx new file mode 100644 index 0000000000..dc5068523a --- /dev/null +++ b/torchci/components/metrics/vllm/ReliabilityTrendPanel.tsx @@ -0,0 +1,115 @@ +import { Paper } from "@mui/material"; +import { EChartsOption } from "echarts"; +import ReactECharts from "echarts-for-react"; +import { useDarkMode } from "lib/DarkModeContext"; +import { + getCrosshairTooltipConfig, + getReactEChartsProps, + GRID_DEFAULT, +} from "./chartUtils"; +import { COLOR_ERROR, COLOR_GRAY, COLOR_SUCCESS } from "./constants"; + +// Helper function to generate line series for reliability trends +function getReliabilityLineSeries(): any[] { + return [ + { + name: "Passed", + type: "line", + encode: { x: "granularity_bucket", y: "passed_count" }, + smooth: true, + lineStyle: { color: COLOR_SUCCESS, width: 2 }, + itemStyle: { color: COLOR_SUCCESS }, + symbolSize: 6, + emphasis: { + focus: "series", + }, + }, + { + name: "Failed", + type: "line", + encode: { x: "granularity_bucket", y: "failed_count" }, + smooth: true, + lineStyle: { color: COLOR_ERROR, width: 2 }, + itemStyle: { color: COLOR_ERROR }, + symbolSize: 6, + emphasis: { + focus: "series", + }, + }, + { + name: "Canceled", + type: "line", + encode: { x: "granularity_bucket", y: "canceled_count" }, + smooth: true, + lineStyle: { color: COLOR_GRAY, width: 2 }, + itemStyle: { color: COLOR_GRAY }, + symbolSize: 6, + emphasis: { + focus: "series", + }, + }, + ]; +} + +// Helper function to format reliability trend tooltip +function formatReliabilityTrendTooltip(params: any): string { + const data = params[0]?.data; + if (!data) return ""; + + const passed = data.passed_count || 0; + const failed = data.failed_count || 0; + const canceled = data.canceled_count || 0; + const total = data.total_count || 0; + const nonCanceled = data.non_canceled_count || 0; + const successRate = data.success_rate + ? (data.success_rate * 100).toFixed(1) + "%" + : "N/A"; + + return ( + `${data.granularity_bucket}
` + + `Passed: ${passed}
` + + `Failed: ${failed}
` + + `Canceled: ${canceled}
` + + `Non-canceled: ${nonCanceled}
` + + `Total: ${total}
` + + `Success Rate: ${successRate}` + ); +} + +export default function ReliabilityTrendPanel({ + data, +}: { + data: any[] | undefined; +}) { + const { darkMode } = useDarkMode(); + + const options: EChartsOption = { + title: { + text: "CI Reliability Trends", + subtext: "Daily success rate over time", + }, + legend: { + top: 24, + data: ["Passed", "Failed", "Canceled"], + }, + grid: { ...GRID_DEFAULT, bottom: 24 }, + dataset: { source: data || [] }, + xAxis: { type: "category" }, + yAxis: { + type: "value", + name: "Count", + position: "left", + axisLabel: { + formatter: "{value}", + }, + }, + series: getReliabilityLineSeries(), + tooltip: getCrosshairTooltipConfig(darkMode, formatReliabilityTrendTooltip), + }; + + return ( + + + + ); +} diff --git a/torchci/components/metrics/vllm/TrunkHealthPanel.tsx b/torchci/components/metrics/vllm/TrunkHealthPanel.tsx new file mode 100644 index 0000000000..d58cb834e9 --- /dev/null +++ b/torchci/components/metrics/vllm/TrunkHealthPanel.tsx @@ -0,0 +1,138 @@ +import { Paper } from "@mui/material"; +import dayjs from "dayjs"; +import { EChartsOption } from "echarts"; +import ReactECharts from "echarts-for-react"; +import { useDarkMode } from "lib/DarkModeContext"; +import { + getChartTitle, + getReactEChartsProps, + GRID_LEFT_WIDE, +} from "./chartUtils"; +import { COLOR_BORDER_DARK, COLOR_ERROR, COLOR_SUCCESS } from "./constants"; + +// Helper function to handle heatmap cell click +function handleTrunkHealthClick(params: any) { + if (params?.componentType === "series") { + const buildNumber = params?.data?.[3]; // 4th element is build number + if (buildNumber !== undefined && buildNumber !== null) { + const url = `https://buildkite.com/vllm/ci/builds/${buildNumber}/`; + if (typeof window !== "undefined") { + window.open(url, "_blank"); + } + } + } +} + +// Helper function to format trunk health tooltip +function formatTrunkHealthTooltip(params: any): string { + const data = params.data; + if (!data) return ""; + + const date = data[0]; + const hour = data[1]; + const isGreen = data[2] === 1; + const status = isGreen ? "Green ✓" : "Red ✗"; + const buildNumber = data[3]; + + return ( + `${date} ${hour}:00
` + + `Status: ${status}
` + + `Build #${buildNumber}` + ); +} + +// Helper function to get trunk health series +function getTrunkHealthSeries(processedData: any[]): any { + return { + name: "Trunk Status", + type: "heatmap", + data: processedData, + label: { + show: false, + }, + emphasis: { + itemStyle: { + shadowBlur: 10, + shadowColor: "rgba(0, 0, 0, 0.5)", + }, + }, + itemStyle: { + borderWidth: 1, + borderColor: COLOR_BORDER_DARK, + }, + }; +} + +export default function TrunkHealthPanel({ + data, +}: { + data: any[] | undefined; +}) { + const { darkMode } = useDarkMode(); + + // Process data into heatmap format: [date, hour, status, buildNumber] + const processedData = (data || []).map((d: any) => { + const timestamp = dayjs(d.build_started_at); + const date = timestamp.format("YYYY-MM-DD"); + const hour = timestamp.hour(); + return [date, hour, d.is_green, d.build_number]; + }); + + // Get unique dates and hours for the grid + const uniqueDates = [...new Set(processedData.map((d) => d[0]))].sort(); + const hours = Array.from({ length: 24 }, (_, i) => i); + + const options: EChartsOption = { + title: getChartTitle("Main Branch Health", "Build status heatmap"), + grid: GRID_LEFT_WIDE, + xAxis: { + type: "category", + data: uniqueDates, + name: "Date", + nameLocation: "middle", + nameGap: 40, + axisLabel: { + rotate: 45, + fontSize: 9, + }, + }, + yAxis: { + type: "category", + data: hours, + name: "Hour", + nameLocation: "middle", + nameGap: 50, + nameRotate: 90, + axisLabel: { + formatter: (value: any) => `${value}:00`, + fontSize: 9, + }, + }, + visualMap: { + show: false, + min: 0, + max: 1, + dimension: 2, + inRange: { + color: [COLOR_ERROR, COLOR_SUCCESS], + }, + }, + series: getTrunkHealthSeries(processedData), + tooltip: { + position: "top", + formatter: formatTrunkHealthTooltip, + }, + }; + + return ( + + + + ); +} diff --git a/torchci/components/metrics/vllm/TrunkRecoveryPanel.tsx b/torchci/components/metrics/vllm/TrunkRecoveryPanel.tsx new file mode 100644 index 0000000000..f5d1a718c1 --- /dev/null +++ b/torchci/components/metrics/vllm/TrunkRecoveryPanel.tsx @@ -0,0 +1,90 @@ +import { Paper } from "@mui/material"; +import dayjs from "dayjs"; +import { EChartsOption } from "echarts"; +import ReactECharts from "echarts-for-react"; +import { useDarkMode } from "lib/DarkModeContext"; +import { + getChartTitle, + getReactEChartsProps, + GRID_DEFAULT, +} from "./chartUtils"; +import { COLOR_ERROR } from "./constants"; + +// Helper function to format recovery tooltip +function formatRecoveryTooltip(params: any): string { + const data = params.data; + if (!data) return ""; + + const breakTime = dayjs(data[0]).format("M/D/YY h:mm A"); + const hours = data[1]; + + return ( + `Trunk Breakage
` + + `When: ${breakTime}
` + + `Recovery time: ${hours.toFixed(1)} hours` + ); +} + +// Helper function to get recovery time series +function getRecoveryTimeSeries(processedData: any[]): any { + return { + name: "Recovery Time", + type: "line", + data: processedData, + smooth: false, + lineStyle: { + color: COLOR_ERROR, + width: 2, + }, + itemStyle: { + color: COLOR_ERROR, + }, + symbolSize: 8, + }; +} + +export default function TrunkRecoveryPanel({ + data, +}: { + data: any[] | undefined; +}) { + const { darkMode } = useDarkMode(); + + // Process data: [break_time, recovery_hours] + const processedData = (data || []).map((d: any) => [ + dayjs(d.break_time).toDate(), + Number(d.recovery_hours), + ]); + + const options: EChartsOption = { + title: getChartTitle("Main Branch Recovery Time", "Time to fix over time"), + grid: GRID_DEFAULT, + xAxis: { + type: "time", + name: "When Main Broke", + nameLocation: "middle", + nameGap: 40, + axisLabel: { + hideOverlap: true, + formatter: (value: number) => dayjs(value).format("MMM D"), + }, + }, + yAxis: { + type: "value", + name: "Recovery Time (hours)", + nameLocation: "middle", + nameGap: 40, + }, + series: getRecoveryTimeSeries(processedData), + tooltip: { + trigger: "item", + formatter: formatRecoveryTooltip, + }, + }; + + return ( + + + + ); +} diff --git a/torchci/components/metrics/vllm/chartUtils.ts b/torchci/components/metrics/vllm/chartUtils.ts new file mode 100644 index 0000000000..e5f49c8d8b --- /dev/null +++ b/torchci/components/metrics/vllm/chartUtils.ts @@ -0,0 +1,60 @@ +// Shared utility functions and configurations for vLLM chart components +import { + COLOR_BG_DARK, + COLOR_BG_LIGHT, + COLOR_CROSSHAIR_DARK, + COLOR_CROSSHAIR_LIGHT, + COLOR_TEXT_DARK, + COLOR_TEXT_LIGHT, +} from "./constants"; + +// Common title configuration with smaller font +export function getChartTitle(text: string, subtext: string) { + return { + text, + subtext, + textStyle: { + fontSize: 14, + }, + }; +} + +// Common grid configuration +export const GRID_DEFAULT = { top: 80, right: 60, bottom: 60, left: 60 }; +export const GRID_COMPACT = { top: 60, right: 8, bottom: 24, left: 64 }; +export const GRID_LEFT_WIDE = { top: 70, right: 40, bottom: 60, left: 75 }; + +// Common crosshair tooltip configuration +export function getCrosshairTooltipConfig(darkMode: boolean, formatter: any) { + const crosshairColor = darkMode + ? COLOR_CROSSHAIR_DARK + : COLOR_CROSSHAIR_LIGHT; + + return { + trigger: "axis" as const, + axisPointer: { + type: "cross" as const, + crossStyle: { + color: crosshairColor, + opacity: 0.5, + }, + lineStyle: { + color: crosshairColor, + opacity: 0.5, + }, + label: { + backgroundColor: darkMode ? COLOR_BG_DARK : COLOR_BG_LIGHT, + color: darkMode ? COLOR_TEXT_DARK : COLOR_TEXT_LIGHT, + }, + }, + formatter, + }; +} + +// Common ReactECharts wrapper props +export function getReactEChartsProps(darkMode: boolean) { + return { + theme: darkMode ? "dark-hud" : undefined, + style: { height: "100%", width: "100%" }, + }; +} diff --git a/torchci/components/metrics/vllm/constants.ts b/torchci/components/metrics/vllm/constants.ts new file mode 100644 index 0000000000..d19f970b92 --- /dev/null +++ b/torchci/components/metrics/vllm/constants.ts @@ -0,0 +1,24 @@ +// Shared color constants for vLLM metrics charts + +// Data visualization colors +export const COLOR_SUCCESS = "#3ba272"; // Green - for successful/passing states +export const COLOR_ERROR = "#ee6666"; // Red - for failures/errors +export const COLOR_WARNING = "#fc9403"; // Orange - for warnings/manual actions +export const COLOR_GRAY = "#9e9e9e"; // Gray - for canceled/neutral states +export const COLOR_SUCCESS_LINE = "#00E676"; // Bright green - for success trend lines +export const COLOR_MIXED_LINE = "#FF4081"; // Pink - for mixed success+failed trend lines + +// UI element colors (light mode) +export const COLOR_CROSSHAIR_LIGHT = "#000000"; +export const COLOR_BG_LIGHT = "#f5f5f5"; +export const COLOR_TEXT_LIGHT = "#333"; +export const COLOR_BORDER_LIGHT = "#ddd"; + +// UI element colors (dark mode) +export const COLOR_CROSSHAIR_DARK = "#ffffff"; +export const COLOR_BG_DARK = "#555"; +export const COLOR_TEXT_DARK = "#fff"; + +// Border colors +export const COLOR_BORDER_DARK = "#222"; +export const COLOR_BORDER_WHITE = "#fff"; diff --git a/torchci/pages/metrics/vllm.tsx b/torchci/pages/metrics/vllm.tsx index 3d81bbcac5..3b7a09d337 100644 --- a/torchci/pages/metrics/vllm.tsx +++ b/torchci/pages/metrics/vllm.tsx @@ -1,94 +1,117 @@ -import { Grid, Paper, Skeleton, Stack, Typography } from "@mui/material"; +import { Box, Divider, Grid, Skeleton, Stack, Typography } from "@mui/material"; import { ScalarPanelWithValue } from "components/metrics/panels/ScalarPanel"; +import CiDurationsPanel from "components/metrics/vllm/CiDurationsPanel"; +import DurationDistributionPanel from "components/metrics/vllm/DurationDistributionPanel"; +import ForceMergeBreakdownPanel from "components/metrics/vllm/ForceMergeBreakdownPanel"; +import JobReliabilityPanel from "components/metrics/vllm/JobReliabilityPanel"; +import MergesPanel from "components/metrics/vllm/MergesPanel"; +import ReliabilityPanel from "components/metrics/vllm/ReliabilityPanel"; +import ReliabilityTrendPanel from "components/metrics/vllm/ReliabilityTrendPanel"; +import TrunkHealthPanel from "components/metrics/vllm/TrunkHealthPanel"; +import TrunkRecoveryPanel from "components/metrics/vllm/TrunkRecoveryPanel"; import dayjs from "dayjs"; -import { EChartsOption } from "echarts"; -import ReactECharts from "echarts-for-react"; import { useDarkMode } from "lib/DarkModeContext"; import { useClickHouseAPIImmutable } from "lib/GeneralUtils"; import _ from "lodash"; -import { useState } from "react"; +import React, { useState } from "react"; import { TimeRangePicker } from "../metrics"; const ROW_HEIGHT = 375; -function MergesPanel({ data }: { data: any }) { - // Use the dark mode context to determine whether to use the dark theme - const { darkMode } = useDarkMode(); +// moved MergesPanel and CiDurationsPanel to components - const options: EChartsOption = { - title: { - text: "Merged pull requests, by day", - subtext: "", - }, - grid: { top: 60, right: 8, bottom: 24, left: 36 }, - dataset: { source: data }, - xAxis: { type: "category" }, - yAxis: { - type: "value", - }, - series: [ - { - type: "bar", - stack: "all", - encode: { - x: "granularity_bucket", - y: "auto_merged_count", - }, - }, - { - type: "bar", - stack: "all", - encode: { - x: "granularity_bucket", - y: "manual_merged_count", - }, - }, - { - type: "bar", - stack: "all", - encode: { - x: "granularity_bucket", - y: "manual_merged_with_failures_count", - }, - }, - ], - color: ["#3ba272", "#fc9403", "#ee6666"], - tooltip: { - trigger: "axis", - formatter: (params: any) => { - const manualMergedFailures = - params[0].data.manual_merged_with_failures_count; - const manualMerged = params[0].data.manual_merged_count; - const autoMerged = params[0].data.auto_merged_count; - const total = manualMergedFailures + manualMerged + autoMerged; - - const manualMergedFailuresPct = - ((manualMergedFailures / total) * 100).toFixed(1) + "%"; - const manualMergedPct = ((manualMerged / total) * 100).toFixed(1) + "%"; - const autoMergedPct = ((autoMerged / total) * 100).toFixed(1) + "%"; - return `Force merges (red): ${manualMergedFailures} (${manualMergedFailuresPct}) -
- Manual merges (orange): ${manualMerged} (${manualMergedPct}) -
- Auto merges (green): ${autoMerged} (${autoMergedPct}) -
- Total: ${total}`; - }, - }, - }; +// Helper function to safely extract PR cycle data values +function getPrCycleValue( + data: any[] | undefined, + field: string +): number | null | undefined { + if (data === undefined) return undefined; + return data?.[0]?.[field] ?? null; +} + +// Helper function to format hour values +function formatHours(v: number | null | undefined): string { + return v === null || v === undefined ? "-" : Number(v).toFixed(2); +} + +// Helper function to format hour values with unit +function formatHoursWithUnit(v: number | null | undefined): string { + return v === null || v === undefined ? "-" : Number(v).toFixed(2) + "h"; +} + +// Helper function to format percentage values +function formatPercentage(v: number | null | undefined): string { + return v === null || v === undefined ? "-" : (v * 100).toFixed(1) + "%"; +} + +// Helper function to format count values +function formatCount(v: number | null | undefined): string { + return v === null || v === undefined ? "-" : v.toString(); +} + +// Type for metric configuration +interface MetricConfig { + title: string; + value: number | null | undefined; + valueRenderer: (v: number | null | undefined) => string; + badThreshold: (v: number | null | undefined) => boolean; + paperSx?: any; +} + +// Helper component to render a stack of metric panels from config +function MetricStack({ metrics }: { metrics: MetricConfig[] }) { + return ( + <> + {metrics.map((metric, index) => ( + + ))} + + ); +} + +// Helper component for a metrics column +function MetricColumn({ + metrics, + height, + size = { xs: 12, md: 3, lg: 2 }, +}: { + metrics: MetricConfig[]; + height?: string | number; + size?: { xs: number; md: number; lg?: number }; +}) { + return ( + + + + + + ); +} +// Helper component for a dashboard row with consistent spacing +function DashboardRow({ + children, + spacing = 2, +}: { + children: React.ReactNode; + spacing?: number; +}) { return ( - - - + + {children} + ); } export default function Page() { + const { darkMode } = useDarkMode(); const [startTime, setStartTime] = useState(dayjs().subtract(1, "week")); const [stopTime, setStopTime] = useState(dayjs()); const [timeRange, setTimeRange] = useState(7); @@ -107,6 +130,92 @@ export default function Page() { } ); + const { data: ciDurations } = useClickHouseAPIImmutable( + "vllm/ci_run_duration", + { + ...timeParams, + // Buildkite uses full repo URL with .git in vLLM dataset + repo: "https://github.com/vllm-project/vllm.git", + pipelineName: "CI", + } + ); + + // Compute CI P50/P90 from returned rows + const points = (ciDurations || []) as any[]; + const successStatesSet = new Set(["passed", "finished", "success"]); + const successDurations = points + .filter((d: any) => + successStatesSet.has(String(d.build_state || "").toLowerCase()) + ) + .map((d: any) => Number(d.duration_hours)) + .filter((x: number) => Number.isFinite(x)) + .sort((a: number, b: number) => a - b); + const nonCanceledDurations = points + .filter((d: any) => { + const s = String(d.build_state || "").toLowerCase(); + return s !== "canceled" && s !== "cancelled"; + }) + .map((d: any) => Number(d.duration_hours)) + .filter((x: number) => Number.isFinite(x)) + .sort((a: number, b: number) => a - b); + const qFrom = (arr: number[], p: number) => + arr.length ? arr[Math.floor((arr.length - 1) * p)] : null; + const ciSuccP50 = + ciDurations === undefined ? undefined : qFrom(successDurations, 0.5); + const ciSuccP90 = + ciDurations === undefined ? undefined : qFrom(successDurations, 0.9); + const ciNCancP50 = + ciDurations === undefined ? undefined : qFrom(nonCanceledDurations, 0.5); + const ciNCancP90 = + ciDurations === undefined ? undefined : qFrom(nonCanceledDurations, 0.9); + + const { data: prCycleData } = useClickHouseAPIImmutable( + "vllm/pr_cycle_time_breakdown", + { + ...timeParams, + repo: "vllm-project/vllm", + } + ); + + const { data: reliabilityData } = useClickHouseAPIImmutable( + "vllm/ci_reliability", + { + ...timeParams, + granularity: "day", + repo: "https://github.com/vllm-project/vllm.git", + pipelineName: "CI", + } + ); + + const { data: jobReliabilityData } = useClickHouseAPIImmutable( + "vllm/job_reliability", + { + ...timeParams, + repo: "https://github.com/vllm-project/vllm.git", + pipelineName: "CI", + minRuns: 3, + } + ); + + const { data: trunkHealthData } = useClickHouseAPIImmutable( + "vllm/trunk_health", + { + ...timeParams, + granularity: "day", + repo: "https://github.com/vllm-project/vllm.git", + pipelineName: "CI", + } + ); + + const { data: trunkRecoveryData } = useClickHouseAPIImmutable( + "vllm/trunk_recovery_time", + { + ...timeParams, + repo: "https://github.com/vllm-project/vllm.git", + pipelineName: "CI", + } + ); + if (data === undefined) { return ; } @@ -115,6 +224,10 @@ export default function Page() { data === undefined || data.length === 0 ? 0 : _.sumBy(data, "manual_merged_with_failures_count"); + const manualMergedPending = + data === undefined || data.length === 0 + ? 0 + : _.sumBy(data, "manual_merged_pending_count"); const manualMerged = data === undefined || data.length === 0 ? 0 @@ -130,53 +243,278 @@ export default function Page() { total === 0 ? 0 : manualMergedFailures / total; const manualMergedPct = total == 0 ? 0 : manualMerged / total; + // Force merge breakdown percentages + // Total force merges = failures + pending (the two reasons for force merge) + const totalForceMerges = manualMergedFailures + manualMergedPending; + const forceMergeDueToFailurePct = + totalForceMerges === 0 ? 0 : manualMergedFailures / totalForceMerges; + const forceMergeDueToImpatiencePct = + totalForceMerges === 0 ? 0 : manualMergedPending / totalForceMerges; + + // Compute overall reliability metrics + const reliabilityPoints = (reliabilityData || []) as any[]; + const totalPassed = _.sumBy(reliabilityPoints, "passed_count"); + const totalFailed = _.sumBy(reliabilityPoints, "failed_count"); + const totalCanceled = _.sumBy(reliabilityPoints, "canceled_count"); + const totalNonCanceled = totalPassed + totalFailed; + const overallSuccessRate = + reliabilityData === undefined + ? undefined + : totalNonCanceled === 0 + ? null + : totalPassed / totalNonCanceled; + + // Compute trunk health metrics + // Data now contains individual builds, group by day to get daily status + const trunkHealthPoints = (trunkHealthData || []) as any[]; + const buildsByDay = _.groupBy(trunkHealthPoints, (d) => + d.build_started_at ? d.build_started_at.slice(0, 10) : "" + ); + const dailyStatus = Object.entries(buildsByDay).map(([day, builds]) => { + // Day is green if the most recent build was green + const sortedBuilds = _.sortBy(builds, "build_started_at"); + const mostRecent = sortedBuilds[sortedBuilds.length - 1]; + return { day, isGreen: mostRecent?.is_green === 1 }; + }); + const greenDays = dailyStatus.filter((d) => d.isGreen).length; + const totalDays = dailyStatus.length; + const trunkHealthPct = + trunkHealthData === undefined + ? undefined + : totalDays === 0 + ? null + : greenDays / totalDays; + + // Compute average recovery time + const recoveryTimes = (trunkRecoveryData || []) as any[]; + const avgRecoveryTime = + trunkRecoveryData === undefined + ? undefined + : recoveryTimes.length === 0 + ? null + : _.meanBy(recoveryTimes, "recovery_hours"); + return ( -
- - +
+ + vLLM CI Metrics - + + + - + {/* Section 1: Key Metrics Summary Cards */} + + + Key Metrics Overview + + + + (v ?? 0) > 0.2, + }, + { + title: "% force merge: CI failure", + value: forceMergeDueToFailurePct, + valueRenderer: formatPercentage, + badThreshold: (v) => (v ?? 0) > 0.5, + }, + ]} + /> + (v ?? 0) > 0.5, + }, + { + title: "% force merge: impatience", + value: forceMergeDueToImpatiencePct, + valueRenderer: formatPercentage, + badThreshold: (v) => (v ?? 0) > 0.3, + }, + ]} + /> + (v ?? 1) < 0.85, + }, + { + title: "Main branch health %", + value: trunkHealthPct, + valueRenderer: formatPercentage, + badThreshold: (v) => (v ?? 1) < 0.9, + }, + ]} + /> + (v ?? 0) > 12, + }, + { + title: "Total Failed Builds", + value: reliabilityData === undefined ? undefined : totalFailed, + valueRenderer: formatCount, + badThreshold: (v) => (v ?? 0) > 10, + }, + ]} + /> + (v ?? 0) > 2, + }, + { + title: "CI Time to green P90", + value: ciSuccP90, + valueRenderer: formatHoursWithUnit, + badThreshold: (v) => (v ?? 0) > 6, + }, + ]} + /> + (v ?? 0) > 24, + }, + ]} + /> + (v ?? 0) > 48, + }, + ]} + /> + (v ?? 0) > 24, + }, + ]} + /> + (v ?? 0) > 72, + }, + ]} + /> + + + {/* Section 2: CI Reliability */} + + + CI Reliability + + + - + + + + + + + + + + + + + + + + + + - - - (value * 100).toFixed(1) + "%"} - badThreshold={(value) => value > 0.2} - /> - (value * 100).toFixed(1) + "%"} - badThreshold={(value) => value > 0.5} - /> - + {/* Section 3: CI Duration Analysis */} + + + CI Duration Analysis + + + + + + + + + + + + {/* Section 4: PR Cycle Metrics */} + + + PR Cycle Metrics + + + + + + + + - +
); }