pytorch · yangw-dev · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025
diff --git a/aws/lambda/benchmark_regression_summary_report/common/config.py b/aws/lambda/benchmark_regression_summary_report/common/config.py
@@ -187,7 +187,7 @@
     name="Compiler Benchmark Regression",
     id="compiler_regression",
     source=BenchmarkApiSource(
-        api_query_url="https://hud.pytorch.org/api/benchmark/get_time_series",
+        api_query_url="http://localhost:3000/api/benchmark/get_time_series",
         type="benchmark_time_series_api",
         # currently we only detect the regression for h100 with dtype bfloat16, and mode inference
         # we can extend this to other devices, dtypes and mode in the future
@@ -197,12 +197,11 @@
                   "response_formats":["time_series"],
                   "query_params": {
                     "commits": [],
-                    "compilers": [],
-                    "arch": "h100",
-                    "device": "cuda",
-                    "dtype": "bfloat16",
+                    "arches": ["b200","h100"],
+                    "devices": ["cuda"],
+                    "dtypes": ["bfloat16","amp","float16"],
                     "granularity": "hour",
-                    "mode": "inference",
+                    "modes": ["training","inference"],
                     "startTime": "{{ startTime }}",
                     "stopTime": "{{ stopTime }}",
                     "suites": ["torchbench", "huggingface", "timm_models"],

diff --git a/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts b/torchci/lib/benchmark/api_helper/backend/compilers/helpers/precompute.ts
@@ -43,18 +43,7 @@ const COMPILER_PRECOMPUTE_TABLE_GROUP_KEY = [
 ];
 const COMPILER_PRECOMPUTE_TABLE_SUB_GROUP_KEY = ["suite"];
 
-export function toPrecomputeCompilerData(
-  rawData: any[],
-  formats: string[] = ["time_series"]
-) {
-  const metadata = {
-    dtype: rawData[0].dtype,
-    arch: toApiArch(rawData[0].device, rawData[0].arch),
-    mode: rawData[0].mode,
-    device: rawData[0].device,
-  };
-
-  // get CompilerPerformanceData
+function toPrecomputeCompilerDataPerGroup(rawData: any[], metadata: any) {
   const data = convertToCompilerPerformanceData(rawData);
   const commit_map = toWorkflowIdMap(data);
 
@@ -67,9 +56,7 @@ export function toPrecomputeCompilerData(
   const executionTime = computeExecutionTime(data, models);
   const peakMemoryUsage = computePeakMemoryUsage(data, models);
 
-  // filter out export for compiler since it's always 0
-
-  let all_data = [
+  let processed = [
     passrate,
     geomean,
     peakMemory,
@@ -78,39 +65,60 @@ export function toPrecomputeCompilerData(
     peakMemoryUsage,
   ].flat();
 
+  addMetadata(processed, commit_map, metadata);
+
   // only show export for passrate
-  all_data = all_data.filter((row) =>
+  processed = processed.filter((row) =>
     row.compiler == "export" && row.metric != "passrate" ? false : true
   );
-  all_data = [...all_data].sort(
+  return processed;
+}
+
+export function toPrecomputeCompilerData(
+  rawData: any[],
+  formats: string[] = ["time_series"]
+) {
+  const { groups, metadataMapping } = groupByBenchmark(rawData);
+
+  let all_data: any[] = [];
+  for (const [key, items] of Object.entries(groups)) {
+    console.log("Per group info:", key);
+    const meta = metadataMapping[key];
+    const dataPerGroup = toPrecomputeCompilerDataPerGroup(items, meta);
+    all_data = [...all_data, ...dataPerGroup];
+  }
+  const data = [...all_data].sort(
     (a, b) =>
       Date.parse(a.granularity_bucket) - Date.parse(b.granularity_bucket)
   );
-
   if (!data || data.length === 0) {
     return emptyTimeSeriesResponse();
   }
 
   // post process data to get start_ts and end_ts, and add commit metadata
-  const { start_ts, end_ts } = postFetchProcess(all_data, commit_map, metadata);
-
+  const { start_ts, end_ts } = postFetchProcess(all_data);
   let res: any = {};
   formats.forEach((format) => {
     const f = getFormat(all_data, format);
     res[format] = f;
   });
-
   return toTimeSeriesResponse(res, rawData.length, start_ts, end_ts);
 }
 
-function postFetchProcess(
-  data: any[],
-  commit_map: Map<string, any>,
-  metadata: any
-) {
+function addMetadata(data: any[], commit_map: Map<string, any>, metadata: any) {
+  data.map((row) => {
+    row["commit"] = commit_map.get(row.workflow_id)?.commit;
+    row["branch"] = commit_map.get(row.workflow_id)?.branch;
+    row["dtype"] = metadata["dtype"];
+    row["arch"] = metadata["arch"];
+    row["device"] = metadata["device"];
+    row["mode"] = metadata["mode"];
+  });
+}
+
+function postFetchProcess(data: any[]) {
   let start_ts = new Date(data[0]?.granularity_bucket).getTime();
   let end_ts = new Date(data[data.length - 1]?.granularity_bucket).getTime();
-
   // Handle invalid dates (NaN from getTime)
   if (isNaN(start_ts) || isNaN(end_ts)) {
     console.warn(
@@ -120,22 +128,10 @@ function postFetchProcess(
       `(postFetchProcess)Invalid granularity_bucket values detected peek first data: ${data[0]}`
     );
   }
-
   // Swap if needed
   if (end_ts < start_ts) {
     [start_ts, end_ts] = [end_ts, start_ts];
   }
-
-  data.map((row) => {
-    row["commit"] = commit_map.get(row.workflow_id)?.commit;
-    row["branch"] = commit_map.get(row.workflow_id)?.branch;
-
-    row["dtype"] = metadata["dtype"];
-    row["arch"] = metadata["arch"];
-    row["device"] = metadata["device"];
-    row["mode"] = metadata["mode"];
-  });
-
   return {
     start_ts,
     end_ts,
@@ -165,3 +161,27 @@ function getFormat(data: any, format: string) {
       throw new Error("Invalid type");
   }
 }
+
+export function groupByBenchmark(rawData: any[]) {
+  const groups: Record<string, any[]> = {};
+  const metadataMapping: Record<string, any> = {};
+  for (const item of rawData) {
+    const apiArch = toApiArch(item.device, item.arch);
+    // composite grouping key
+    const key = `${apiArch}_${item.device}_${item.dtype}_${item.mode}`;
+    if (!metadataMapping[key]) {
+      metadataMapping[key] = {
+        dtype: item.dtype,
+        arch: apiArch,
+        mode: item.mode,
+        device: item.device,
+      };
+    }
+    if (!groups[key]) {
+      groups[key] = [];
+    }
+    groups[key].push(item);
+  }
+
+  return { groups, metadataMapping };
+}
diff --git a/torchci/lib/benchmark/compilerUtils.ts b/torchci/lib/benchmark/compilerUtils.ts
@@ -64,14 +64,19 @@ export function computePassrate(
     const model = record.name;
     const accuracy = record.accuracy;
 
+    const mode = record.mode;
+    const device = record.device;
+    const arch = record.arch;
+    const dtype = record.dtype;
+
     // Use clear compiler name to avoid confusion about what they do
     const compiler =
       COMPILER_NAMES_TO_DISPLAY_NAMES[record.compiler] ?? record.compiler;
     if (BLOCKLIST_COMPILERS.includes(compiler)) {
       return;
     }
 
-    const key = `${bucket}+${workflowId}+${suite}+${compiler}`;
+    const key = `${bucket}+${workflowId}+${suite}+${compiler}+${mode}+${device}+${arch}+${dtype}`;
     if (!(key in totalCount)) {
       totalCount[key] = 0;
       passCount[key] = 0;
@@ -446,6 +451,10 @@ export function convertToCompilerPerformanceData(data: BenchmarkData[]) {
         job_id: r.job_id,
         branch: r.branch,
         commit: r.commit,
+        arch: r.arch,
+        device: r.device,
+        dtype: r.dtype,
+        mode: r.mode,
       };
     }
 

diff --git a/torchci/lib/types.ts b/torchci/lib/types.ts
@@ -210,6 +210,10 @@ export interface CompilerPerformanceData {
   job_id?: number;
   branch?: string;
   commit?: string;
+  device?: string;
+  dtype?: string;
+  mode?: string;
+  arch?: string;
 }
 
 export interface TritonBenchPerformanceData {
@@ -239,6 +243,10 @@ export interface BenchmarkData {
   workflow_id: number;
   commit?: string;
   branch?: string;
+  device?: string;
+  dtype?: string;
+  mode?: string;
+  arch?: string;
 }
 
 export interface RepoBranchAndCommit {