We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 1ae9302 commit dc12e82Copy full SHA for dc12e82
modelopt/torch/quantization/tensor_quant.py
@@ -53,17 +53,9 @@ def _fp8_eager(x, amax=None):
53
x = x.to(torch.float32) * scale_inv
54
return x.to(dtype)
55
56
-
57
-def _fp8_triton(x, amax):
58
- return torch.compile(dynamic=True)(_fp8_eager)(x, amax)
59
60
61
def fp8_eager(x, amax):
62
"""Eager mode implementation of FP8 quantization."""
63
- if triton_kernel.IS_AVAILABLE and not DISABLE_TRITON_KERNEL:
64
- return _fp8_triton(x, amax)
65
- else:
66
- return _fp8_eager(x, amax)
+ return _fp8_eager(x, amax)
67
68
69
def scaled_e4m3_impl(
0 commit comments