diff --git a/aws/lambda/benchmark_regression_summary_report/common/config.py b/aws/lambda/benchmark_regression_summary_report/common/config.py index 0b60017ba5..ea3d8f9e07 100644 --- a/aws/lambda/benchmark_regression_summary_report/common/config.py +++ b/aws/lambda/benchmark_regression_summary_report/common/config.py @@ -187,7 +187,7 @@ name="Compiler Benchmark Regression", id="compiler_regression", source=BenchmarkApiSource( - api_query_url="https://hud.pytorch.org/api/benchmark/get_time_series", + api_query_url="http://localhost:3000/api/benchmark/get_time_series", type="benchmark_time_series_api", # currently we only detect the regression for h100 with dtype bfloat16, and mode inference # we can extend this to other devices, dtypes and mode in the future @@ -197,12 +197,11 @@ "response_formats":["time_series"], "query_params": { "commits": [], - "compilers": [], - "arch": "h100", - "device": "cuda", - "dtype": "bfloat16", + "arches": ["b200","h100"], + "devices": ["cuda"], + "dtypes": ["bfloat16","amp","float16"], "granularity": "hour", - "mode": "inference", + "modes": ["training","inference"], "startTime": "{{ startTime }}", "stopTime": "{{ stopTime }}", "suites": ["torchbench", "huggingface", "timm_models"], diff --git a/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts b/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts index 373c7f1e46..165d2f8d4c 100644 --- a/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts +++ b/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts @@ -43,18 +43,7 @@ const COMPILER_PRECOMPUTE_TABLE_GROUP_KEY = [ ]; const COMPILER_PRECOMPUTE_TABLE_SUB_GROUP_KEY = ["suite"]; -export function toPrecomputeCompilerData( - rawData: any[], - formats: string[] = ["time_series"] -) { - const metadata = { - dtype: rawData[0].dtype, - arch: toApiArch(rawData[0].device, rawData[0].arch), - mode: rawData[0].mode, - device: rawData[0].device, - }; - - // get CompilerPerformanceData +function toPrecomputeCompilerDataPerGroup(rawData: any[], metadata: any) { const data = convertToCompilerPerformanceData(rawData); const commit_map = toWorkflowIdMap(data); @@ -67,9 +56,7 @@ export function toPrecomputeCompilerData( const executionTime = computeExecutionTime(data, models); const peakMemoryUsage = computePeakMemoryUsage(data, models); - // filter out export for compiler since it's always 0 - - let all_data = [ + let processed = [ passrate, geomean, peakMemory, @@ -78,39 +65,60 @@ export function toPrecomputeCompilerData( peakMemoryUsage, ].flat(); + addMetadata(processed, commit_map, metadata); + // only show export for passrate - all_data = all_data.filter((row) => + processed = processed.filter((row) => row.compiler == "export" && row.metric != "passrate" ? false : true ); - all_data = [...all_data].sort( + return processed; +} + +export function toPrecomputeCompilerData( + rawData: any[], + formats: string[] = ["time_series"] +) { + const { groups, metadataMapping } = groupByBenchmark(rawData); + + let all_data: any[] = []; + for (const [key, items] of Object.entries(groups)) { + console.log("Per group info:", key); + const meta = metadataMapping[key]; + const dataPerGroup = toPrecomputeCompilerDataPerGroup(items, meta); + all_data = [...all_data, ...dataPerGroup]; + } + const data = [...all_data].sort( (a, b) => Date.parse(a.granularity_bucket) - Date.parse(b.granularity_bucket) ); - if (!data || data.length === 0) { return emptyTimeSeriesResponse(); } // post process data to get start_ts and end_ts, and add commit metadata - const { start_ts, end_ts } = postFetchProcess(all_data, commit_map, metadata); - + const { start_ts, end_ts } = postFetchProcess(all_data); let res: any = {}; formats.forEach((format) => { const f = getFormat(all_data, format); res[format] = f; }); - return toTimeSeriesResponse(res, rawData.length, start_ts, end_ts); } -function postFetchProcess( - data: any[], - commit_map: Map, - metadata: any -) { +function addMetadata(data: any[], commit_map: Map, metadata: any) { + data.map((row) => { + row["commit"] = commit_map.get(row.workflow_id)?.commit; + row["branch"] = commit_map.get(row.workflow_id)?.branch; + row["dtype"] = metadata["dtype"]; + row["arch"] = metadata["arch"]; + row["device"] = metadata["device"]; + row["mode"] = metadata["mode"]; + }); +} + +function postFetchProcess(data: any[]) { let start_ts = new Date(data[0]?.granularity_bucket).getTime(); let end_ts = new Date(data[data.length - 1]?.granularity_bucket).getTime(); - // Handle invalid dates (NaN from getTime) if (isNaN(start_ts) || isNaN(end_ts)) { console.warn( @@ -120,22 +128,10 @@ function postFetchProcess( `(postFetchProcess)Invalid granularity_bucket values detected peek first data: ${data[0]}` ); } - // Swap if needed if (end_ts < start_ts) { [start_ts, end_ts] = [end_ts, start_ts]; } - - data.map((row) => { - row["commit"] = commit_map.get(row.workflow_id)?.commit; - row["branch"] = commit_map.get(row.workflow_id)?.branch; - - row["dtype"] = metadata["dtype"]; - row["arch"] = metadata["arch"]; - row["device"] = metadata["device"]; - row["mode"] = metadata["mode"]; - }); - return { start_ts, end_ts, @@ -165,3 +161,27 @@ function getFormat(data: any, format: string) { throw new Error("Invalid type"); } } + +export function groupByBenchmark(rawData: any[]) { + const groups: Record = {}; + const metadataMapping: Record = {}; + for (const item of rawData) { + const apiArch = toApiArch(item.device, item.arch); + // composite grouping key + const key = `${apiArch}_${item.device}_${item.dtype}_${item.mode}`; + if (!metadataMapping[key]) { + metadataMapping[key] = { + dtype: item.dtype, + arch: apiArch, + mode: item.mode, + device: item.device, + }; + } + if (!groups[key]) { + groups[key] = []; + } + groups[key].push(item); + } + + return { groups, metadataMapping }; +} diff --git a/torchci/lib/benchmark/compilerUtils.ts b/torchci/lib/benchmark/compilerUtils.ts index fb8b12ab3b..9e2e6b7981 100644 --- a/torchci/lib/benchmark/compilerUtils.ts +++ b/torchci/lib/benchmark/compilerUtils.ts @@ -64,6 +64,11 @@ export function computePassrate( const model = record.name; const accuracy = record.accuracy; + const mode = record.mode; + const device = record.device; + const arch = record.arch; + const dtype = record.dtype; + // Use clear compiler name to avoid confusion about what they do const compiler = COMPILER_NAMES_TO_DISPLAY_NAMES[record.compiler] ?? record.compiler; @@ -71,7 +76,7 @@ export function computePassrate( return; } - const key = `${bucket}+${workflowId}+${suite}+${compiler}`; + const key = `${bucket}+${workflowId}+${suite}+${compiler}+${mode}+${device}+${arch}+${dtype}`; if (!(key in totalCount)) { totalCount[key] = 0; passCount[key] = 0; @@ -446,6 +451,10 @@ export function convertToCompilerPerformanceData(data: BenchmarkData[]) { job_id: r.job_id, branch: r.branch, commit: r.commit, + arch: r.arch, + device: r.device, + dtype: r.dtype, + mode: r.mode, }; } diff --git a/torchci/lib/types.ts b/torchci/lib/types.ts index ced8e65339..ab2d9e47f9 100644 --- a/torchci/lib/types.ts +++ b/torchci/lib/types.ts @@ -210,6 +210,10 @@ export interface CompilerPerformanceData { job_id?: number; branch?: string; commit?: string; + device?: string; + dtype?: string; + mode?: string; + arch?: string; } export interface TritonBenchPerformanceData { @@ -239,6 +243,10 @@ export interface BenchmarkData { workflow_id: number; commit?: string; branch?: string; + device?: string; + dtype?: string; + mode?: string; + arch?: string; } export interface RepoBranchAndCommit {