Skip to content

Commit 8fbc416

Browse files
committed
Add json file with HW capability
1 parent 89ef2d3 commit 8fbc416

File tree

2 files changed

+77
-17
lines changed

2 files changed

+77
-17
lines changed

benchmarks/gpu_info.json

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"_comment": "GPU -> [BF16/FP16 DPAS TFLOPs , Memory bandwidth GB/s]",
3+
"Intel(R) Data Center GPU Max 1100": [
4+
355.53,
5+
1228.80
6+
],
7+
"Intel(R) Data Center GPU Max 1550": [
8+
419.43,
9+
3276.8
10+
],
11+
"Intel(R) Arc(TM) B580 Graphics": [
12+
116.74,
13+
456.0
14+
],
15+
"Intel(R) Arc(TM) B570 Graphics": [
16+
103.22,
17+
380.0
18+
]
19+
}

benchmarks/triton_kernels_benchmark/benchmark_testing.py

Lines changed: 58 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from abc import ABC, abstractmethod
4+
import re
45
from typing import Callable, ClassVar, Dict, Optional, List, Tuple, Union, Set
56
from collections.abc import Iterable
67
from enum import Enum
@@ -13,8 +14,10 @@
1314
import datetime
1415
import os
1516
import time
17+
from pathlib import Path
1618

1719
import scipy.stats
20+
import numpy as np
1821
import pandas as pd
1922
import matplotlib.pyplot as plt
2023

@@ -335,6 +338,30 @@ def filter_providers(
335338
return supported_providers
336339

337340

341+
def get_gpu_info():
342+
device_name = torch.xpu.is_available() and torch.xpu.get_device_name()
343+
if device_name is None:
344+
print("Couldn't read device name.")
345+
return None, None
346+
347+
# benchmarks/triton_kernels_benchmark/benchmark_testing.py -> benchmarks/gpu_info.json
348+
current_dir = Path(__file__).parent.resolve()
349+
gpu_info_path = current_dir.parent / "gpu_info.json"
350+
351+
if not gpu_info_path.exists():
352+
print(f"Warning: '{gpu_info_path}' not found.")
353+
return None, None
354+
355+
with open(gpu_info_path, "r", encoding="utf-8") as f:
356+
gpu_info = json.load(f)
357+
358+
if device_name not in gpu_info:
359+
print(f"Warning: Device '{device_name}' not found in {gpu_info_path}")
360+
return None, None
361+
362+
return gpu_info[device_name]
363+
364+
338365
def perf_report(benchmarks):
339366
"""
340367
Mark a function for benchmarking. The benchmark can then be executed by using the :code:`.run` method on the return value.
@@ -351,8 +378,7 @@ class MarkArgs:
351378
reports: str = ""
352379
n_runs: int = 1
353380
brief: bool = False
354-
hw_gbps: float = None
355-
hw_tflops: float = None
381+
eff: bool = False
356382

357383
@staticmethod
358384
def load_cli_args() -> MarkArgs:
@@ -377,29 +403,44 @@ def load_cli_args() -> MarkArgs:
377403
help="Print only mean values without min, max, CV.",
378404
)
379405
parser.add_argument(
380-
"--hw_gbps",
381-
type=float,
382-
help="Hardware bandwidth in GB/s to calculate efficiency.",
383-
)
384-
parser.add_argument(
385-
"--hw_tflops",
386-
type=float,
387-
help="Hardware peak performance in TFLOPS to calculate efficiency.",
406+
"--eff",
407+
"-e",
408+
action="store_true",
409+
help="Print HW utilization, will use internal database from 'gpu_info.json'.",
388410
)
389411
args = parser.parse_args()
390-
return MarkArgs(args.reports, args.n_runs, args.brief, args.hw_gbps, args.hw_tflops)
412+
return MarkArgs(args.reports, args.n_runs, args.brief, args.eff)
391413

392414

393-
def enhance_df(df, mark_args: MarkArgs):
415+
def enhance_df(df, bench, mark_args: MarkArgs):
416+
hw_tflops, hw_gbps = None, None
417+
if mark_args.eff:
418+
hw_tflops, hw_gbps = get_gpu_info()
419+
394420
df = df.copy()
395421
if mark_args.brief:
396422
df = df[[c for c in df.columns if not any(map(c.endswith, ("min", "max", "CV")))]]
397423

424+
# Find and write down HW efficiency columns
425+
tflops_labels = [l for l in bench.ylabel if l.lower().endswith("tflops")]
426+
tflops_pattern = "-(" + "|".join(tflops_labels) + ")(-min|-max)?$"
427+
428+
gbps_labels = [l for l in bench.ylabel if l.lower().replace("/", "p").endswith("gbps")]
429+
gbps_pattern = "-(" + "|".join(gbps_labels) + ")(-min|-max)?$"
430+
398431
for col in df.columns:
399-
if col.lower().replace("/", "p").endswith("gbps") and mark_args.hw_gbps:
400-
df[col + "-eff"] = (df[col] / mark_args.hw_gbps).apply(lambda x: f"{x:.1%}")
401-
elif col.lower().endswith("tflops") and mark_args.hw_tflops:
402-
df[col + "-eff"] = (df[col] / mark_args.hw_tflops).apply(lambda x: f"{x:.1%}")
432+
if re.search(tflops_pattern, col) and hw_tflops:
433+
df[re.sub(tflops_pattern, "-ceff", col)] = df[col] / hw_tflops
434+
if re.search(gbps_pattern, col) and hw_gbps:
435+
df[re.sub(gbps_pattern, "-meff", col)] = df[col] / hw_gbps
436+
# df[re.sub(gbps_pattern, "-meff", col)] = (df[col] / mark_args.hw_gbps).apply(lambda x: f"{x:.1%}")
437+
# We will only keep resulting efficiency column, we are either compute or memory bound.
438+
for provider in bench.line_names:
439+
if f"{provider}-ceff" in df.columns and f"{provider}-meff" in df.columns:
440+
df[f"{provider}-eff"] = np.maximum(df[f"{provider}-ceff"],
441+
df[f"{provider}-meff"]).apply(lambda x: f"{x:.2%}")
442+
del df[f"{provider}-ceff"]
443+
del df[f"{provider}-meff"]
403444

404445
return df
405446

@@ -487,7 +528,7 @@ def _run(self, bench: Benchmark, save_path: str, show_plots: bool, print_data: b
487528
col0, col1 = df.columns.tolist()
488529
df["Diff"] = df[col1] - df[col0]
489530

490-
df = enhance_df(df, mark_args)
531+
df = enhance_df(df, bench, mark_args)
491532
if print_data:
492533
print(bench.plot_name + ":")
493534
print(df.to_string())

0 commit comments

Comments
 (0)