Skip to content

Commit d59f085

Browse files
authored
[benchmarks] Add options to print SW efficiency (#5493)
User passes new `-b` argument and we can print software efficiency and save it to the report as well.
1 parent a3058e2 commit d59f085

File tree

2 files changed

+89
-6
lines changed

2 files changed

+89
-6
lines changed

benchmarks/gpu_info.json

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"_comment": "GPU -> [BF16/FP16 DPAS TFLOPs , Memory bandwidth GB/s]",
3+
"Intel(R) Data Center GPU Max 1100": [
4+
355.53,
5+
1228.80
6+
],
7+
"Intel(R) Data Center GPU Max 1550": [
8+
419.43,
9+
3276.8
10+
],
11+
"Intel(R) Arc(TM) B580 Graphics": [
12+
116.74,
13+
456.0
14+
],
15+
"Intel(R) Arc(TM) B570 Graphics": [
16+
103.22,
17+
380.0
18+
]
19+
}

benchmarks/triton_kernels_benchmark/benchmark_testing.py

Lines changed: 70 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from abc import ABC, abstractmethod
4+
import re
45
from typing import Callable, ClassVar, Dict, Optional, List, Tuple, Union, Set
56
from collections.abc import Iterable
67
from enum import Enum
@@ -13,8 +14,10 @@
1314
import datetime
1415
import os
1516
import time
17+
from pathlib import Path
1618

1719
import scipy.stats
20+
import numpy as np
1821
import pandas as pd
1922
import matplotlib.pyplot as plt
2023

@@ -155,7 +158,6 @@ def do_bench_upstream_pytorch_profiler(fn, n_warmup=25, n_repeat=100, grad_to_no
155158
fn()
156159
synchronize()
157160
i += 1
158-
print(f"Stopped warmup after {i} iterations")
159161
else:
160162
for _ in range(n_warmup):
161163
fn()
@@ -336,6 +338,30 @@ def filter_providers(
336338
return supported_providers
337339

338340

341+
def get_gpu_info():
342+
device_name = torch.xpu.is_available() and torch.xpu.get_device_name()
343+
if device_name is None:
344+
print("Couldn't read device name.")
345+
return None, None
346+
347+
# benchmarks/triton_kernels_benchmark/benchmark_testing.py -> benchmarks/gpu_info.json
348+
current_dir = Path(__file__).parent.resolve()
349+
gpu_info_path = current_dir.parent / "gpu_info.json"
350+
351+
if not gpu_info_path.exists():
352+
print(f"Warning: '{gpu_info_path}' not found.")
353+
return None, None
354+
355+
with open(gpu_info_path, "r", encoding="utf-8") as f:
356+
gpu_info = json.load(f)
357+
358+
if device_name not in gpu_info:
359+
print(f"Warning: Device '{device_name}' not found in {gpu_info_path}")
360+
return None, None
361+
362+
return gpu_info[device_name]
363+
364+
339365
def perf_report(benchmarks):
340366
"""
341367
Mark a function for benchmarking. The benchmark can then be executed by using the :code:`.run` method on the return value.
@@ -352,6 +378,7 @@ class MarkArgs:
352378
reports: str = ""
353379
n_runs: int = 1
354380
brief: bool = False
381+
eff: bool = False
355382

356383
@staticmethod
357384
def load_cli_args() -> MarkArgs:
@@ -375,8 +402,47 @@ def load_cli_args() -> MarkArgs:
375402
action="store_true",
376403
help="Print only mean values without min, max, CV.",
377404
)
405+
parser.add_argument(
406+
"--eff",
407+
"-e",
408+
action="store_true",
409+
help="Print HW utilization, will use internal database from 'gpu_info.json'.",
410+
)
378411
args = parser.parse_args()
379-
return MarkArgs(args.reports, args.n_runs, args.brief)
412+
return MarkArgs(args.reports, args.n_runs, args.brief, args.eff)
413+
414+
415+
def enhance_df(df, bench, mark_args: MarkArgs):
416+
hw_tflops, hw_gbps = None, None
417+
if mark_args.eff:
418+
hw_tflops, hw_gbps = get_gpu_info()
419+
420+
df = df.copy()
421+
if mark_args.brief:
422+
df = df[[c for c in df.columns if not any(map(c.endswith, ("min", "max", "CV")))]]
423+
424+
# Find and write down HW efficiency columns
425+
tflops_labels = [l for l in bench.ylabel if l.lower().endswith("tflops")]
426+
tflops_pattern = "-(" + "|".join(tflops_labels) + ")(-min|-max)?$"
427+
428+
gbps_labels = [l for l in bench.ylabel if l.lower().replace("/", "p").endswith("gbps")]
429+
gbps_pattern = "-(" + "|".join(gbps_labels) + ")(-min|-max)?$"
430+
431+
for col in df.columns:
432+
if re.search(tflops_pattern, col) and hw_tflops:
433+
df[re.sub(tflops_pattern, "-ceff", col)] = df[col] / hw_tflops
434+
if re.search(gbps_pattern, col) and hw_gbps:
435+
df[re.sub(gbps_pattern, "-meff", col)] = df[col] / hw_gbps
436+
# df[re.sub(gbps_pattern, "-meff", col)] = (df[col] / mark_args.hw_gbps).apply(lambda x: f"{x:.1%}")
437+
# We will only keep resulting efficiency column, we are either compute or memory bound.
438+
for provider in bench.line_names:
439+
if f"{provider}-ceff" in df.columns and f"{provider}-meff" in df.columns:
440+
df[f"{provider}-eff"] = np.maximum(df[f"{provider}-ceff"],
441+
df[f"{provider}-meff"]).apply(lambda x: f"{x:.2%}")
442+
del df[f"{provider}-ceff"]
443+
del df[f"{provider}-meff"]
444+
445+
return df
380446

381447

382448
class Mark:
@@ -462,12 +528,10 @@ def _run(self, bench: Benchmark, save_path: str, show_plots: bool, print_data: b
462528
col0, col1 = df.columns.tolist()
463529
df["Diff"] = df[col1] - df[col0]
464530

531+
df = enhance_df(df, bench, mark_args)
465532
if print_data:
466533
print(bench.plot_name + ":")
467-
if mark_args.brief:
468-
print(df[[c for c in df.columns if not any(map(c.endswith, ("min", "max", "CV")))]].to_string())
469-
else:
470-
print(df.to_string())
534+
print(df.to_string())
471535

472536
if save_path:
473537
df.to_csv(os.path.join(save_path, f"{filename}.csv"), float_format=f"%.{save_precision}f", index=False)

0 commit comments

Comments
 (0)