Skip to content

Commit dc12e82

Browse files
committed
Removed FP8 eager with torch.compile
Signed-off-by: realAsma <[email protected]>
1 parent 1ae9302 commit dc12e82

File tree

1 file changed

+1
-9
lines changed

1 file changed

+1
-9
lines changed

modelopt/torch/quantization/tensor_quant.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,17 +53,9 @@ def _fp8_eager(x, amax=None):
5353
x = x.to(torch.float32) * scale_inv
5454
return x.to(dtype)
5555

56-
57-
def _fp8_triton(x, amax):
58-
return torch.compile(dynamic=True)(_fp8_eager)(x, amax)
59-
60-
6156
def fp8_eager(x, amax):
6257
"""Eager mode implementation of FP8 quantization."""
63-
if triton_kernel.IS_AVAILABLE and not DISABLE_TRITON_KERNEL:
64-
return _fp8_triton(x, amax)
65-
else:
66-
return _fp8_eager(x, amax)
58+
return _fp8_eager(x, amax)
6759

6860

6961
def scaled_e4m3_impl(

0 commit comments

Comments
 (0)