11from __future__ import annotations
22
33from abc import ABC , abstractmethod
4+ import re
45from typing import Callable , ClassVar , Dict , Optional , List , Tuple , Union , Set
56from collections .abc import Iterable
67from enum import Enum
1314import datetime
1415import os
1516import time
17+ from pathlib import Path
1618
1719import scipy .stats
20+ import numpy as np
1821import pandas as pd
1922import matplotlib .pyplot as plt
2023
@@ -335,6 +338,30 @@ def filter_providers(
335338 return supported_providers
336339
337340
341+ def get_gpu_info ():
342+ device_name = torch .xpu .is_available () and torch .xpu .get_device_name ()
343+ if device_name is None :
344+ print ("Couldn't read device name." )
345+ return None , None
346+
347+ # benchmarks/triton_kernels_benchmark/benchmark_testing.py -> benchmarks/gpu_info.json
348+ current_dir = Path (__file__ ).parent .resolve ()
349+ gpu_info_path = current_dir .parent / "gpu_info.json"
350+
351+ if not gpu_info_path .exists ():
352+ print (f"Warning: '{ gpu_info_path } ' not found." )
353+ return None , None
354+
355+ with open (gpu_info_path , "r" , encoding = "utf-8" ) as f :
356+ gpu_info = json .load (f )
357+
358+ if device_name not in gpu_info :
359+ print (f"Warning: Device '{ device_name } ' not found in { gpu_info_path } " )
360+ return None , None
361+
362+ return gpu_info [device_name ]
363+
364+
338365def perf_report (benchmarks ):
339366 """
340367 Mark a function for benchmarking. The benchmark can then be executed by using the :code:`.run` method on the return value.
@@ -351,8 +378,7 @@ class MarkArgs:
351378 reports : str = ""
352379 n_runs : int = 1
353380 brief : bool = False
354- hw_gbps : float = None
355- hw_tflops : float = None
381+ eff : bool = False
356382
357383 @staticmethod
358384 def load_cli_args () -> MarkArgs :
@@ -377,29 +403,44 @@ def load_cli_args() -> MarkArgs:
377403 help = "Print only mean values without min, max, CV." ,
378404 )
379405 parser .add_argument (
380- "--hw_gbps" ,
381- type = float ,
382- help = "Hardware bandwidth in GB/s to calculate efficiency." ,
383- )
384- parser .add_argument (
385- "--hw_tflops" ,
386- type = float ,
387- help = "Hardware peak performance in TFLOPS to calculate efficiency." ,
406+ "--eff" ,
407+ "-e" ,
408+ action = "store_true" ,
409+ help = "Print HW utilization, will use internal database from 'gpu_info.json'." ,
388410 )
389411 args = parser .parse_args ()
390- return MarkArgs (args .reports , args .n_runs , args .brief , args .hw_gbps , args . hw_tflops )
412+ return MarkArgs (args .reports , args .n_runs , args .brief , args .eff )
391413
392414
393- def enhance_df (df , mark_args : MarkArgs ):
415+ def enhance_df (df , bench , mark_args : MarkArgs ):
416+ hw_tflops , hw_gbps = None , None
417+ if mark_args .eff :
418+ hw_tflops , hw_gbps = get_gpu_info ()
419+
394420 df = df .copy ()
395421 if mark_args .brief :
396422 df = df [[c for c in df .columns if not any (map (c .endswith , ("min" , "max" , "CV" )))]]
397423
424+ # Find and write down HW efficiency columns
425+ tflops_labels = [l for l in bench .ylabel if l .lower ().endswith ("tflops" )]
426+ tflops_pattern = "-(" + "|" .join (tflops_labels ) + ")(-min|-max)?$"
427+
428+ gbps_labels = [l for l in bench .ylabel if l .lower ().replace ("/" , "p" ).endswith ("gbps" )]
429+ gbps_pattern = "-(" + "|" .join (gbps_labels ) + ")(-min|-max)?$"
430+
398431 for col in df .columns :
399- if col .lower ().replace ("/" , "p" ).endswith ("gbps" ) and mark_args .hw_gbps :
400- df [col + "-eff" ] = (df [col ] / mark_args .hw_gbps ).apply (lambda x : f"{ x :.1%} " )
401- elif col .lower ().endswith ("tflops" ) and mark_args .hw_tflops :
402- df [col + "-eff" ] = (df [col ] / mark_args .hw_tflops ).apply (lambda x : f"{ x :.1%} " )
432+ if re .search (tflops_pattern , col ) and hw_tflops :
433+ df [re .sub (tflops_pattern , "-ceff" , col )] = df [col ] / hw_tflops
434+ if re .search (gbps_pattern , col ) and hw_gbps :
435+ df [re .sub (gbps_pattern , "-meff" , col )] = df [col ] / hw_gbps
436+ # df[re.sub(gbps_pattern, "-meff", col)] = (df[col] / mark_args.hw_gbps).apply(lambda x: f"{x:.1%}")
437+ # We will only keep resulting efficiency column, we are either compute or memory bound.
438+ for provider in bench .line_names :
439+ if f"{ provider } -ceff" in df .columns and f"{ provider } -meff" in df .columns :
440+ df [f"{ provider } -eff" ] = np .maximum (df [f"{ provider } -ceff" ],
441+ df [f"{ provider } -meff" ]).apply (lambda x : f"{ x :.2%} " )
442+ del df [f"{ provider } -ceff" ]
443+ del df [f"{ provider } -meff" ]
403444
404445 return df
405446
@@ -487,7 +528,7 @@ def _run(self, bench: Benchmark, save_path: str, show_plots: bool, print_data: b
487528 col0 , col1 = df .columns .tolist ()
488529 df ["Diff" ] = df [col1 ] - df [col0 ]
489530
490- df = enhance_df (df , mark_args )
531+ df = enhance_df (df , bench , mark_args )
491532 if print_data :
492533 print (bench .plot_name + ":" )
493534 print (df .to_string ())
0 commit comments