11from __future__ import annotations
22
33from abc import ABC , abstractmethod
4+ import re
45from typing import Callable , ClassVar , Dict , Optional , List , Tuple , Union , Set
56from collections .abc import Iterable
67from enum import Enum
1314import datetime
1415import os
1516import time
17+ from pathlib import Path
1618
1719import scipy .stats
20+ import numpy as np
1821import pandas as pd
1922import matplotlib .pyplot as plt
2023
@@ -155,7 +158,6 @@ def do_bench_upstream_pytorch_profiler(fn, n_warmup=25, n_repeat=100, grad_to_no
155158 fn ()
156159 synchronize ()
157160 i += 1
158- print (f"Stopped warmup after { i } iterations" )
159161 else :
160162 for _ in range (n_warmup ):
161163 fn ()
@@ -336,6 +338,30 @@ def filter_providers(
336338 return supported_providers
337339
338340
341+ def get_gpu_info ():
342+ device_name = torch .xpu .is_available () and torch .xpu .get_device_name ()
343+ if device_name is None :
344+ print ("Couldn't read device name." )
345+ return None , None
346+
347+ # benchmarks/triton_kernels_benchmark/benchmark_testing.py -> benchmarks/gpu_info.json
348+ current_dir = Path (__file__ ).parent .resolve ()
349+ gpu_info_path = current_dir .parent / "gpu_info.json"
350+
351+ if not gpu_info_path .exists ():
352+ print (f"Warning: '{ gpu_info_path } ' not found." )
353+ return None , None
354+
355+ with open (gpu_info_path , "r" , encoding = "utf-8" ) as f :
356+ gpu_info = json .load (f )
357+
358+ if device_name not in gpu_info :
359+ print (f"Warning: Device '{ device_name } ' not found in { gpu_info_path } " )
360+ return None , None
361+
362+ return gpu_info [device_name ]
363+
364+
339365def perf_report (benchmarks ):
340366 """
341367 Mark a function for benchmarking. The benchmark can then be executed by using the :code:`.run` method on the return value.
@@ -352,6 +378,7 @@ class MarkArgs:
352378 reports : str = ""
353379 n_runs : int = 1
354380 brief : bool = False
381+ eff : bool = False
355382
356383 @staticmethod
357384 def load_cli_args () -> MarkArgs :
@@ -375,8 +402,47 @@ def load_cli_args() -> MarkArgs:
375402 action = "store_true" ,
376403 help = "Print only mean values without min, max, CV." ,
377404 )
405+ parser .add_argument (
406+ "--eff" ,
407+ "-e" ,
408+ action = "store_true" ,
409+ help = "Print HW utilization, will use internal database from 'gpu_info.json'." ,
410+ )
378411 args = parser .parse_args ()
379- return MarkArgs (args .reports , args .n_runs , args .brief )
412+ return MarkArgs (args .reports , args .n_runs , args .brief , args .eff )
413+
414+
415+ def enhance_df (df , bench , mark_args : MarkArgs ):
416+ hw_tflops , hw_gbps = None , None
417+ if mark_args .eff :
418+ hw_tflops , hw_gbps = get_gpu_info ()
419+
420+ df = df .copy ()
421+ if mark_args .brief :
422+ df = df [[c for c in df .columns if not any (map (c .endswith , ("min" , "max" , "CV" )))]]
423+
424+ # Find and write down HW efficiency columns
425+ tflops_labels = [l for l in bench .ylabel if l .lower ().endswith ("tflops" )]
426+ tflops_pattern = "-(" + "|" .join (tflops_labels ) + ")(-min|-max)?$"
427+
428+ gbps_labels = [l for l in bench .ylabel if l .lower ().replace ("/" , "p" ).endswith ("gbps" )]
429+ gbps_pattern = "-(" + "|" .join (gbps_labels ) + ")(-min|-max)?$"
430+
431+ for col in df .columns :
432+ if re .search (tflops_pattern , col ) and hw_tflops :
433+ df [re .sub (tflops_pattern , "-ceff" , col )] = df [col ] / hw_tflops
434+ if re .search (gbps_pattern , col ) and hw_gbps :
435+ df [re .sub (gbps_pattern , "-meff" , col )] = df [col ] / hw_gbps
436+ # df[re.sub(gbps_pattern, "-meff", col)] = (df[col] / mark_args.hw_gbps).apply(lambda x: f"{x:.1%}")
437+ # We will only keep resulting efficiency column, we are either compute or memory bound.
438+ for provider in bench .line_names :
439+ if f"{ provider } -ceff" in df .columns and f"{ provider } -meff" in df .columns :
440+ df [f"{ provider } -eff" ] = np .maximum (df [f"{ provider } -ceff" ],
441+ df [f"{ provider } -meff" ]).apply (lambda x : f"{ x :.2%} " )
442+ del df [f"{ provider } -ceff" ]
443+ del df [f"{ provider } -meff" ]
444+
445+ return df
380446
381447
382448class Mark :
@@ -462,12 +528,10 @@ def _run(self, bench: Benchmark, save_path: str, show_plots: bool, print_data: b
462528 col0 , col1 = df .columns .tolist ()
463529 df ["Diff" ] = df [col1 ] - df [col0 ]
464530
531+ df = enhance_df (df , bench , mark_args )
465532 if print_data :
466533 print (bench .plot_name + ":" )
467- if mark_args .brief :
468- print (df [[c for c in df .columns if not any (map (c .endswith , ("min" , "max" , "CV" )))]].to_string ())
469- else :
470- print (df .to_string ())
534+ print (df .to_string ())
471535
472536 if save_path :
473537 df .to_csv (os .path .join (save_path , f"{ filename } .csv" ), float_format = f"%.{ save_precision } f" , index = False )
0 commit comments