48
48
import sympy
49
49
import torch
50
50
import torch .nn as nn
51
- import torch .utils .benchmark as benchmark
52
51
import tqdm
53
52
from torch .profiler import ProfilerActivity , profile
54
53
from utils import (
57
56
profiler_output_to_filtered_time_by_kernel_name ,
58
57
)
59
58
59
+ import torchao
60
60
from torchao .float8 import (
61
61
Float8LinearConfig ,
62
62
convert_to_float8_training ,
@@ -83,20 +83,6 @@ def forward(self, x):
83
83
return x
84
84
85
85
86
- # TODO(next): hook this up
87
-
88
-
89
- def benchmark_fn_in_sec (f , * args , ** kwargs ):
90
- # Manual warmup
91
- for _ in range (4 ):
92
- f (* args , ** kwargs )
93
- t0 = benchmark .Timer (
94
- stmt = "f(*args, **kwargs)" , globals = {"args" : args , "kwargs" : kwargs , "f" : f }
95
- )
96
- measurement = t0 .blocked_autorange ()
97
- return measurement .mean
98
-
99
-
100
86
def get_gpu_kernel_time (m , x , grad_output ):
101
87
# warm up
102
88
for _ in range (2 ):
@@ -232,6 +218,8 @@ def run(
232
218
float8_recipe_name = "tensorwise"
233
219
234
220
print (f"GPU: { torch .cuda .get_device_name (0 )} " )
221
+ print (f"torch version: { torch .__version__ } " )
222
+ print (f"torchao version: { torchao .__version__ } " )
235
223
print (f"do_benchmarks: { do_benchmarks } " )
236
224
print (f"shape_gen_name: { shape_gen_name } " )
237
225
print (f"float8_recipe_name: { float8_recipe_name } " )
0 commit comments