Add json file with HW capability

Egor-Krivov · Egor-Krivov · commit 8fbc416281cf · 2025-11-19T15:22:10.000Z
diff --git a/benchmarks/gpu_info.json b/benchmarks/gpu_info.json
@@ -0,0 +1,19 @@
+{
+  "_comment": "GPU -> [BF16/FP16 DPAS TFLOPs , Memory bandwidth GB/s]",
+  "Intel(R) Data Center GPU Max 1100": [
+    355.53,
+    1228.80
+  ],
+  "Intel(R) Data Center GPU Max 1550": [
+    419.43,
+    3276.8
+  ],
+  "Intel(R) Arc(TM) B580 Graphics": [
+    116.74,
+    456.0
+  ],
+  "Intel(R) Arc(TM) B570 Graphics": [
+    103.22,
+    380.0
+  ]
+}
diff --git a/benchmarks/triton_kernels_benchmark/benchmark_testing.py b/benchmarks/triton_kernels_benchmark/benchmark_testing.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
+import re
 from typing import Callable, ClassVar, Dict, Optional, List, Tuple, Union, Set
 from collections.abc import Iterable
 from enum import Enum
@@ -13,8 +14,10 @@
 import datetime
 import os
 import time
+from pathlib import Path
 
 import scipy.stats
+import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 
@@ -335,6 +338,30 @@ def filter_providers(
     return supported_providers
 
 
+def get_gpu_info():
+    device_name = torch.xpu.is_available() and torch.xpu.get_device_name()
+    if device_name is None:
+        print("Couldn't read device name.")
+        return None, None
+
+    # benchmarks/triton_kernels_benchmark/benchmark_testing.py -> benchmarks/gpu_info.json
+    current_dir = Path(__file__).parent.resolve()
+    gpu_info_path = current_dir.parent / "gpu_info.json"
+
+    if not gpu_info_path.exists():
+        print(f"Warning: '{gpu_info_path}' not found.")
+        return None, None
+
+    with open(gpu_info_path, "r", encoding="utf-8") as f:
+        gpu_info = json.load(f)
+
+    if device_name not in gpu_info:
+        print(f"Warning: Device '{device_name}' not found in {gpu_info_path}")
+        return None, None
+
+    return gpu_info[device_name]
+
+
 def perf_report(benchmarks):
     """
     Mark a function for benchmarking. The benchmark can then be executed by using the :code:`.run` method on the return value.
@@ -351,8 +378,7 @@ class MarkArgs:
     reports: str = ""
     n_runs: int = 1
     brief: bool = False
-    hw_gbps: float = None
-    hw_tflops: float = None
+    eff: bool = False
 
     @staticmethod
     def load_cli_args() -> MarkArgs:
@@ -377,29 +403,44 @@ def load_cli_args() -> MarkArgs:
             help="Print only mean values without min, max, CV.",
         )
         parser.add_argument(
-            "--hw_gbps",
-            type=float,
-            help="Hardware bandwidth in GB/s to calculate efficiency.",
-        )
-        parser.add_argument(
-            "--hw_tflops",
-            type=float,
-            help="Hardware peak performance in TFLOPS to calculate efficiency.",
+            "--eff",
+            "-e",
+            action="store_true",
+            help="Print HW utilization, will use internal database from 'gpu_info.json'.",
         )
         args = parser.parse_args()
-        return MarkArgs(args.reports, args.n_runs, args.brief, args.hw_gbps, args.hw_tflops)
+        return MarkArgs(args.reports, args.n_runs, args.brief, args.eff)
 
 
-def enhance_df(df, mark_args: MarkArgs):
+def enhance_df(df, bench, mark_args: MarkArgs):
+    hw_tflops, hw_gbps = None, None
+    if mark_args.eff:
+        hw_tflops, hw_gbps = get_gpu_info()
+
     df = df.copy()
     if mark_args.brief:
         df = df[[c for c in df.columns if not any(map(c.endswith, ("min", "max", "CV")))]]
 
+    # Find and write down HW efficiency columns
+    tflops_labels = [l for l in bench.ylabel if l.lower().endswith("tflops")]
+    tflops_pattern = "-(" + "|".join(tflops_labels) + ")(-min|-max)?$"
+
+    gbps_labels = [l for l in bench.ylabel if l.lower().replace("/", "p").endswith("gbps")]
+    gbps_pattern = "-(" + "|".join(gbps_labels) + ")(-min|-max)?$"
+
     for col in df.columns:
-        if col.lower().replace("/", "p").endswith("gbps") and mark_args.hw_gbps:
-            df[col + "-eff"] = (df[col] / mark_args.hw_gbps).apply(lambda x: f"{x:.1%}")
-        elif col.lower().endswith("tflops") and mark_args.hw_tflops:
-            df[col + "-eff"] = (df[col] / mark_args.hw_tflops).apply(lambda x: f"{x:.1%}")
+        if re.search(tflops_pattern, col) and hw_tflops:
+            df[re.sub(tflops_pattern, "-ceff", col)] = df[col] / hw_tflops
+        if re.search(gbps_pattern, col) and hw_gbps:
+            df[re.sub(gbps_pattern, "-meff", col)] = df[col] / hw_gbps
+            # df[re.sub(gbps_pattern, "-meff", col)] = (df[col] / mark_args.hw_gbps).apply(lambda x: f"{x:.1%}")
+    # We will only keep resulting efficiency column, we are either compute or memory bound.
+    for provider in bench.line_names:
+        if f"{provider}-ceff" in df.columns and f"{provider}-meff" in df.columns:
+            df[f"{provider}-eff"] = np.maximum(df[f"{provider}-ceff"],
+                                               df[f"{provider}-meff"]).apply(lambda x: f"{x:.2%}")
+            del df[f"{provider}-ceff"]
+            del df[f"{provider}-meff"]
 
     return df
 
@@ -487,7 +528,7 @@ def _run(self, bench: Benchmark, save_path: str, show_plots: bool, print_data: b
             col0, col1 = df.columns.tolist()
             df["Diff"] = df[col1] - df[col0]
 
-        df = enhance_df(df, mark_args)
+        df = enhance_df(df, bench, mark_args)
         if print_data:
             print(bench.plot_name + ":")
             print(df.to_string())