From 4d9b6478b9d84e80045d920749746dbfec18b52c Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 18 Nov 2025 10:30:04 -0800 Subject: [PATCH 1/6] hidemetadata --- .../common/config.py | 4 ++-- .../benchmark_v3/configs/teams/compilers/config.ts | 12 ++++++------ .../backend/compilers/helpers/precompute.ts | 4 +++- torchci/lib/benchmark/compilerUtils.ts | 2 +- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/aws/lambda/benchmark_regression_summary_report/common/config.py b/aws/lambda/benchmark_regression_summary_report/common/config.py index afc0f790e9..0b60017ba5 100644 --- a/aws/lambda/benchmark_regression_summary_report/common/config.py +++ b/aws/lambda/benchmark_regression_summary_report/common/config.py @@ -228,8 +228,8 @@ threshold=0.9, baseline_aggregation="max", ), - "geomean": RegressionPolicy( - name="geomean", + "geomean_speedup": RegressionPolicy( + name="geomean_speedup", condition="greater_equal", threshold=0.95, baseline_aggregation="max", diff --git a/torchci/components/benchmark_v3/configs/teams/compilers/config.ts b/torchci/components/benchmark_v3/configs/teams/compilers/config.ts index 318072d430..02d0789df2 100644 --- a/torchci/components/benchmark_v3/configs/teams/compilers/config.ts +++ b/torchci/components/benchmark_v3/configs/teams/compilers/config.ts @@ -32,7 +32,7 @@ const PASSRATE_COMPARISON_POLICY: BenchmarkComparisonPolicyConfig = { }, }; const GEOMEAN_COMPARISON_POLICY: BenchmarkComparisonPolicyConfig = { - target: "geomean", + target: "geomean_speedup", type: "ratio", ratioPolicy: { badRatio: 0.95, @@ -107,7 +107,7 @@ const RENDER_MAPPING_BOOK = { scale: 100, }, }, - geomean: { + geomean_speedup: { unit: { unit: "x", }, @@ -418,7 +418,7 @@ export const CompilerPrecomputeBenchmarkUIConfig: BenchmarkUIConfig = { passrate: { text: "Passrate", }, - geomean: { + geomean_speedup: { text: "Geometric mean speedup", }, compilation_latency: { @@ -447,7 +447,7 @@ export const CompilerPrecomputeBenchmarkUIConfig: BenchmarkUIConfig = { filterByFieldValues: { metric: [ "passrate", - "geomean", + "geomean_speedup", "compilation_latency", "compression_ratio", ], @@ -465,7 +465,7 @@ export const CompilerPrecomputeBenchmarkUIConfig: BenchmarkUIConfig = { targetField: "metric", comparisonPolicy: { passrate: PASSRATE_COMPARISON_POLICY, - geomean: GEOMEAN_COMPARISON_POLICY, + geomean_speedup: GEOMEAN_COMPARISON_POLICY, compilation_latency: COMPILATION_LATENCY_COMPARISON_POLICY, compression_ratio: COMPRESSION_RATIO_POLICY, }, @@ -474,7 +474,7 @@ export const CompilerPrecomputeBenchmarkUIConfig: BenchmarkUIConfig = { passrate: { text: "Passrate (threshold: 95%)", }, - geomean: { + geomean_speedup: { text: "Geometric mean speedup (threshold = 0.95x)", }, compilation_latency: { diff --git a/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts b/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts index 59f5da3700..9bfdde4636 100644 --- a/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts +++ b/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts @@ -67,6 +67,8 @@ export function toPrecomputeCompilerData( const executionTime = computeExecutionTime(data, models); const peakMemoryUsage = computePeakMemoryUsage(data, models); + // filter out export for compiler since it's always 0 + let all_data = [ passrate, geomean, @@ -75,7 +77,7 @@ export function toPrecomputeCompilerData( executionTime, peakMemoryUsage, ].flat(); - + all_data = all_data.filter((row) => (row.compiler == "export" && row.metric != "passrate") ? false : true ); all_data = [...all_data].sort( (a, b) => Date.parse(a.granularity_bucket) - Date.parse(b.granularity_bucket) diff --git a/torchci/lib/benchmark/compilerUtils.ts b/torchci/lib/benchmark/compilerUtils.ts index 4545af7921..fb8b12ab3b 100644 --- a/torchci/lib/benchmark/compilerUtils.ts +++ b/torchci/lib/benchmark/compilerUtils.ts @@ -166,7 +166,7 @@ export function computeGeomean( const [bucket, workflowId, suite, compiler] = key.split("+"); returnedGeomean.push({ - metric: "geomean", + metric: "geomean_speedup", value: Number(gm), granularity_bucket: bucket, workflow_id: workflowId, From 9fbece69298ce1a8672c1fafd9b099c47824483b Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 18 Nov 2025 10:30:31 -0800 Subject: [PATCH 2/6] hidemetadata --- .../api_helper/backend/compilers/helpers/precompute.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts b/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts index 9bfdde4636..373c7f1e46 100644 --- a/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts +++ b/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts @@ -77,7 +77,11 @@ export function toPrecomputeCompilerData( executionTime, peakMemoryUsage, ].flat(); - all_data = all_data.filter((row) => (row.compiler == "export" && row.metric != "passrate") ? false : true ); + + // only show export for passrate + all_data = all_data.filter((row) => + row.compiler == "export" && row.metric != "passrate" ? false : true + ); all_data = [...all_data].sort( (a, b) => Date.parse(a.granularity_bucket) - Date.parse(b.granularity_bucket) From 14162ecffd2673f76ad27d8b1e72eaf810ff5285 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 18 Nov 2025 13:47:05 -0800 Subject: [PATCH 3/6] hidemetadata --- .../teams/compilers/CompilerPrecomputeConfirmDialogContent.tsx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/torchci/components/benchmark_v3/configs/teams/compilers/CompilerPrecomputeConfirmDialogContent.tsx b/torchci/components/benchmark_v3/configs/teams/compilers/CompilerPrecomputeConfirmDialogContent.tsx index 6f7110f5b3..c4de4f113c 100644 --- a/torchci/components/benchmark_v3/configs/teams/compilers/CompilerPrecomputeConfirmDialogContent.tsx +++ b/torchci/components/benchmark_v3/configs/teams/compilers/CompilerPrecomputeConfirmDialogContent.tsx @@ -62,10 +62,9 @@ export const CompilerPrecomputeConfirmDialogContent: React.FC< triggerUpdate(); return; } - const cell = await navigateToDataGrid( tableId, - [`${left?.compiler}`], + [`${left?.compiler}|`], `${left?.suite}`, toggleSectonId ); From cfdd74c7c9041310d2655d2a8e2d6d5eeb288e2f Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 18 Nov 2025 14:13:11 -0800 Subject: [PATCH 4/6] hidemetadata --- .../ComparisonTableColumnRendering.tsx | 18 ++++++++++++++++++ .../components/benchmarkTimeSeries/helper.tsx | 1 + .../configs/helpers/RegressionPolicy.ts | 7 ++++++- .../configs/teams/compilers/config.ts | 4 +++- 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/torchci/components/benchmark_v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTableColumnRendering.tsx b/torchci/components/benchmark_v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTableColumnRendering.tsx index eff25676d4..65b68c707f 100644 --- a/torchci/components/benchmark_v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTableColumnRendering.tsx +++ b/torchci/components/benchmark_v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTableColumnRendering.tsx @@ -157,6 +157,7 @@ export function getComparisionTableConlumnRendering( const VIOLATE_RULE_COLOR = "#ffebee"; // red[50] const IMPROVEMENT_COLOR = "#e8f5e9"; // green[50] const WARNING_COLOR = "#fff9c4"; // yellow[50] +const MISSING_DATA_COLOR = "#F5F5F5"; // ~ MUI grey[300] export function ComparisonTablePrimaryFieldValueCell({ params, @@ -222,6 +223,9 @@ export function ComparisonTableColumnFieldValueCell({ case "warning": bgColor = WARNING_COLOR; break; + case "missing": + bgColor = MISSING_DATA_COLOR; + break; case "neutral": default: break; @@ -370,6 +374,19 @@ export function getComparisonResult( result.reason = "detect failure"; } + if (config?.renderOptions?.renderMissing) { + if (ldata == null && rdata == null) { + result.verdict = "missing"; + result.reason = "both missing"; + } else if (ldata == null) { + result.verdict = "missing"; + result.reason = "left missing"; + } else if (rdata == null) { + result.verdict = "missing"; + result.reason = "right missing"; + } + } + const text = getFieldRender( targetVal, L, @@ -382,6 +399,7 @@ export function getComparisonResult( missingText, bothMissingText ); + return { result, text, diff --git a/torchci/components/benchmark_v3/components/dataRender/components/benchmarkTimeSeries/helper.tsx b/torchci/components/benchmark_v3/components/dataRender/components/benchmarkTimeSeries/helper.tsx index 8de941241f..f1b966eec9 100644 --- a/torchci/components/benchmark_v3/components/dataRender/components/benchmarkTimeSeries/helper.tsx +++ b/torchci/components/benchmark_v3/components/dataRender/components/benchmarkTimeSeries/helper.tsx @@ -68,6 +68,7 @@ export interface BenchmarkComparisonTableRenderingOptions { }; missingText?: string; bothMissingText?: string; + renderMissing?: boolean; } export interface BenchmarkComparisonTablePrimaryColumnConfig { diff --git a/torchci/components/benchmark_v3/configs/helpers/RegressionPolicy.ts b/torchci/components/benchmark_v3/configs/helpers/RegressionPolicy.ts index ef90b52a3e..8b59ae8153 100644 --- a/torchci/components/benchmark_v3/configs/helpers/RegressionPolicy.ts +++ b/torchci/components/benchmark_v3/configs/helpers/RegressionPolicy.ts @@ -1,6 +1,11 @@ import { asNumber } from "components/benchmark_v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTableHelpers"; -export type ComparisonVerdict = "good" | "neutral" | "regression" | "warning"; +export type ComparisonVerdict = + | "good" + | "neutral" + | "regression" + | "warning" + | "missing"; export type ComparisonPolicyType = "ratio" | "status" | "threshold"; export const DEFAULT_TYPE = "ratio"; export const DEFAULT_BAD_RATIO = 0.9; diff --git a/torchci/components/benchmark_v3/configs/teams/compilers/config.ts b/torchci/components/benchmark_v3/configs/teams/compilers/config.ts index 02d0789df2..dea1cc5606 100644 --- a/torchci/components/benchmark_v3/configs/teams/compilers/config.ts +++ b/torchci/components/benchmark_v3/configs/teams/compilers/config.ts @@ -323,6 +323,7 @@ export const CompilerDashboardBenchmarkUIConfig: BenchmarkUIConfig = { extraMetadata: DASHBOARD_COMPARISON_TABLE_METADATA_COLUMNS, renderOptions: { tableRenderingBook: DashboardRenderBook, + renderMissing: true, flex: { primary: 2, }, @@ -478,13 +479,14 @@ export const CompilerPrecomputeBenchmarkUIConfig: BenchmarkUIConfig = { text: "Geometric mean speedup (threshold = 0.95x)", }, compilation_latency: { - text: "compilation time (seconds)", + text: "Compilation time (seconds)", }, compression_ratio: { text: "Peak memory footprint compression ratio (threshold = 0.95x)", }, }, tableRenderingBook: RENDER_MAPPING_BOOK, + renderMissing: true, }, }, }, From 61354002d7bc94b4c9d2a6bd60a69eb293ffb119 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 18 Nov 2025 14:16:12 -0800 Subject: [PATCH 5/6] hidemetadata --- .../ComparisonTableColumnRendering.tsx | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/torchci/components/benchmark_v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTableColumnRendering.tsx b/torchci/components/benchmark_v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTableColumnRendering.tsx index 65b68c707f..ed7c719bf2 100644 --- a/torchci/components/benchmark_v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTableColumnRendering.tsx +++ b/torchci/components/benchmark_v3/components/dataRender/components/benchmarkTimeSeries/components/BenchmarkTimeSeriesComparisonSection/BenchmarkTimeSeriesComparisonTable/ComparisonTableColumnRendering.tsx @@ -369,11 +369,7 @@ export function getComparisonResult( ? missingText : config?.renderOptions?.bothMissingText; - if (ldata?.is_failure || rdata?.is_failure) { - result.verdict = "warning"; - result.reason = "detect failure"; - } - + // if either side missing, mark as missing if (config?.renderOptions?.renderMissing) { if (ldata == null && rdata == null) { result.verdict = "missing"; @@ -387,6 +383,12 @@ export function getComparisonResult( } } + // if either side failed, mark as failure, failure is higher priority than missing + if (ldata?.is_failure || rdata?.is_failure) { + result.verdict = "warning"; + result.reason = "detect failure"; + } + const text = getFieldRender( targetVal, L, From 3a41f19e4c43e08a657abdee16abdf2b74283d41 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 18 Nov 2025 15:29:46 -0800 Subject: [PATCH 6/6] hidemetadata --- .../common/config.py | 11 +-- .../backend/compilers/helpers/precompute.ts | 98 +++++++++++-------- torchci/lib/benchmark/compilerUtils.ts | 11 ++- torchci/lib/types.ts | 8 ++ 4 files changed, 82 insertions(+), 46 deletions(-) diff --git a/aws/lambda/benchmark_regression_summary_report/common/config.py b/aws/lambda/benchmark_regression_summary_report/common/config.py index 0b60017ba5..ea3d8f9e07 100644 --- a/aws/lambda/benchmark_regression_summary_report/common/config.py +++ b/aws/lambda/benchmark_regression_summary_report/common/config.py @@ -187,7 +187,7 @@ name="Compiler Benchmark Regression", id="compiler_regression", source=BenchmarkApiSource( - api_query_url="https://hud.pytorch.org/api/benchmark/get_time_series", + api_query_url="http://localhost:3000/api/benchmark/get_time_series", type="benchmark_time_series_api", # currently we only detect the regression for h100 with dtype bfloat16, and mode inference # we can extend this to other devices, dtypes and mode in the future @@ -197,12 +197,11 @@ "response_formats":["time_series"], "query_params": { "commits": [], - "compilers": [], - "arch": "h100", - "device": "cuda", - "dtype": "bfloat16", + "arches": ["b200","h100"], + "devices": ["cuda"], + "dtypes": ["bfloat16","amp","float16"], "granularity": "hour", - "mode": "inference", + "modes": ["training","inference"], "startTime": "{{ startTime }}", "stopTime": "{{ stopTime }}", "suites": ["torchbench", "huggingface", "timm_models"], diff --git a/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts b/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts index 373c7f1e46..165d2f8d4c 100644 --- a/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts +++ b/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts @@ -43,18 +43,7 @@ const COMPILER_PRECOMPUTE_TABLE_GROUP_KEY = [ ]; const COMPILER_PRECOMPUTE_TABLE_SUB_GROUP_KEY = ["suite"]; -export function toPrecomputeCompilerData( - rawData: any[], - formats: string[] = ["time_series"] -) { - const metadata = { - dtype: rawData[0].dtype, - arch: toApiArch(rawData[0].device, rawData[0].arch), - mode: rawData[0].mode, - device: rawData[0].device, - }; - - // get CompilerPerformanceData +function toPrecomputeCompilerDataPerGroup(rawData: any[], metadata: any) { const data = convertToCompilerPerformanceData(rawData); const commit_map = toWorkflowIdMap(data); @@ -67,9 +56,7 @@ export function toPrecomputeCompilerData( const executionTime = computeExecutionTime(data, models); const peakMemoryUsage = computePeakMemoryUsage(data, models); - // filter out export for compiler since it's always 0 - - let all_data = [ + let processed = [ passrate, geomean, peakMemory, @@ -78,39 +65,60 @@ export function toPrecomputeCompilerData( peakMemoryUsage, ].flat(); + addMetadata(processed, commit_map, metadata); + // only show export for passrate - all_data = all_data.filter((row) => + processed = processed.filter((row) => row.compiler == "export" && row.metric != "passrate" ? false : true ); - all_data = [...all_data].sort( + return processed; +} + +export function toPrecomputeCompilerData( + rawData: any[], + formats: string[] = ["time_series"] +) { + const { groups, metadataMapping } = groupByBenchmark(rawData); + + let all_data: any[] = []; + for (const [key, items] of Object.entries(groups)) { + console.log("Per group info:", key); + const meta = metadataMapping[key]; + const dataPerGroup = toPrecomputeCompilerDataPerGroup(items, meta); + all_data = [...all_data, ...dataPerGroup]; + } + const data = [...all_data].sort( (a, b) => Date.parse(a.granularity_bucket) - Date.parse(b.granularity_bucket) ); - if (!data || data.length === 0) { return emptyTimeSeriesResponse(); } // post process data to get start_ts and end_ts, and add commit metadata - const { start_ts, end_ts } = postFetchProcess(all_data, commit_map, metadata); - + const { start_ts, end_ts } = postFetchProcess(all_data); let res: any = {}; formats.forEach((format) => { const f = getFormat(all_data, format); res[format] = f; }); - return toTimeSeriesResponse(res, rawData.length, start_ts, end_ts); } -function postFetchProcess( - data: any[], - commit_map: Map, - metadata: any -) { +function addMetadata(data: any[], commit_map: Map, metadata: any) { + data.map((row) => { + row["commit"] = commit_map.get(row.workflow_id)?.commit; + row["branch"] = commit_map.get(row.workflow_id)?.branch; + row["dtype"] = metadata["dtype"]; + row["arch"] = metadata["arch"]; + row["device"] = metadata["device"]; + row["mode"] = metadata["mode"]; + }); +} + +function postFetchProcess(data: any[]) { let start_ts = new Date(data[0]?.granularity_bucket).getTime(); let end_ts = new Date(data[data.length - 1]?.granularity_bucket).getTime(); - // Handle invalid dates (NaN from getTime) if (isNaN(start_ts) || isNaN(end_ts)) { console.warn( @@ -120,22 +128,10 @@ function postFetchProcess( `(postFetchProcess)Invalid granularity_bucket values detected peek first data: ${data[0]}` ); } - // Swap if needed if (end_ts < start_ts) { [start_ts, end_ts] = [end_ts, start_ts]; } - - data.map((row) => { - row["commit"] = commit_map.get(row.workflow_id)?.commit; - row["branch"] = commit_map.get(row.workflow_id)?.branch; - - row["dtype"] = metadata["dtype"]; - row["arch"] = metadata["arch"]; - row["device"] = metadata["device"]; - row["mode"] = metadata["mode"]; - }); - return { start_ts, end_ts, @@ -165,3 +161,27 @@ function getFormat(data: any, format: string) { throw new Error("Invalid type"); } } + +export function groupByBenchmark(rawData: any[]) { + const groups: Record = {}; + const metadataMapping: Record = {}; + for (const item of rawData) { + const apiArch = toApiArch(item.device, item.arch); + // composite grouping key + const key = `${apiArch}_${item.device}_${item.dtype}_${item.mode}`; + if (!metadataMapping[key]) { + metadataMapping[key] = { + dtype: item.dtype, + arch: apiArch, + mode: item.mode, + device: item.device, + }; + } + if (!groups[key]) { + groups[key] = []; + } + groups[key].push(item); + } + + return { groups, metadataMapping }; +} diff --git a/torchci/lib/benchmark/compilerUtils.ts b/torchci/lib/benchmark/compilerUtils.ts index fb8b12ab3b..9e2e6b7981 100644 --- a/torchci/lib/benchmark/compilerUtils.ts +++ b/torchci/lib/benchmark/compilerUtils.ts @@ -64,6 +64,11 @@ export function computePassrate( const model = record.name; const accuracy = record.accuracy; + const mode = record.mode; + const device = record.device; + const arch = record.arch; + const dtype = record.dtype; + // Use clear compiler name to avoid confusion about what they do const compiler = COMPILER_NAMES_TO_DISPLAY_NAMES[record.compiler] ?? record.compiler; @@ -71,7 +76,7 @@ export function computePassrate( return; } - const key = `${bucket}+${workflowId}+${suite}+${compiler}`; + const key = `${bucket}+${workflowId}+${suite}+${compiler}+${mode}+${device}+${arch}+${dtype}`; if (!(key in totalCount)) { totalCount[key] = 0; passCount[key] = 0; @@ -446,6 +451,10 @@ export function convertToCompilerPerformanceData(data: BenchmarkData[]) { job_id: r.job_id, branch: r.branch, commit: r.commit, + arch: r.arch, + device: r.device, + dtype: r.dtype, + mode: r.mode, }; } diff --git a/torchci/lib/types.ts b/torchci/lib/types.ts index ced8e65339..ab2d9e47f9 100644 --- a/torchci/lib/types.ts +++ b/torchci/lib/types.ts @@ -210,6 +210,10 @@ export interface CompilerPerformanceData { job_id?: number; branch?: string; commit?: string; + device?: string; + dtype?: string; + mode?: string; + arch?: string; } export interface TritonBenchPerformanceData { @@ -239,6 +243,10 @@ export interface BenchmarkData { workflow_id: number; commit?: string; branch?: string; + device?: string; + dtype?: string; + mode?: string; + arch?: string; } export interface RepoBranchAndCommit {