From b5f912e8e696492cd57f3a9600b73642dabdfe22 Mon Sep 17 00:00:00 2001 From: Riyad Islam Date: Mon, 8 Sep 2025 17:23:30 -0700 Subject: [PATCH] Avoid autocast at onnx export if fp32 model is desired Signed-off-by: Riyad Islam --- modelopt/torch/_deploy/utils/torch_onnx.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modelopt/torch/_deploy/utils/torch_onnx.py b/modelopt/torch/_deploy/utils/torch_onnx.py index e18a9d209..dd691d80b 100644 --- a/modelopt/torch/_deploy/utils/torch_onnx.py +++ b/modelopt/torch/_deploy/utils/torch_onnx.py @@ -393,7 +393,9 @@ def get_onnx_bytes_and_metadata( # during inference. input_none_names = list(set(tree_spec_input.names) - set(input_names)) - use_torch_autocast = not (is_fp4_quantized(model) or is_mxfp8_quantized(model)) + use_torch_autocast = not ( + is_fp4_quantized(model) or is_mxfp8_quantized(model) or weights_dtype == "fp32" + ) autocast = torch.autocast("cuda") if use_torch_autocast else nullcontext() # Get output once (we export in inference mode - so also using inference mode here!)