Removed FP8 eager with torch.compile

realAsma · realAsma · commit dc12e82fe6e3 · 2025-09-30T00:17:39.000Z
Signed-off-by: realAsma &lt;akuriparambi@nvidia.com&gt;
diff --git a/modelopt/torch/quantization/tensor_quant.py b/modelopt/torch/quantization/tensor_quant.py
@@ -53,17 +53,9 @@ def _fp8_eager(x, amax=None):
         x = x.to(torch.float32) * scale_inv
     return x.to(dtype)
 
-
-def _fp8_triton(x, amax):
-    return torch.compile(dynamic=True)(_fp8_eager)(x, amax)
-
-
 def fp8_eager(x, amax):
     """Eager mode implementation of FP8 quantization."""
-    if triton_kernel.IS_AVAILABLE and not DISABLE_TRITON_KERNEL:
-        return _fp8_triton(x, amax)
-    else:
-        return _fp8_eager(x, amax)
+    return _fp8_eager(x, amax)
 
 
 def scaled_e4m3_impl(