Integrate autocast for mxfp8

ajrasane · ajrasane · commit 918d081f925f · 2025-12-05T20:05:46.000Z
Signed-off-by: ajrasane &lt;131806219+ajrasane@users.noreply.github.com&gt;
diff --git a/modelopt/onnx/export/mxfp8_exporter.py b/modelopt/onnx/export/mxfp8_exporter.py
@@ -166,24 +166,4 @@ def post_process(onnx_model: onnx.ModelProto) -> onnx.ModelProto:
                         attr.s = b"tanh"
                         logger.debug(f"Updated GELU node {node.name} to use tanh approximation")
 
-        def is_fp32_cast(node: onnx.NodeProto) -> bool:
-            return node.op_type == "Cast" and any(
-                attr.name == "to" and attr.i == onnx.TensorProto.FLOAT for attr in node.attribute
-            )
-
-        # Remove Cast nodes after specific operators
-        nodes_to_remove = []
-        for node in graph.node:
-            if node.op_type in ["Transpose", "Reshape", "Sqrt", "Add", "Gelu"]:
-                child_nodes = [n for n in graph.node if node.output[0] in n.input]
-                if len(child_nodes) == 1 and is_fp32_cast(child_nodes[0]):
-                    cast_node = child_nodes[0]
-                    node.output.clear()
-                    node.output.extend(cast_node.output)
-                    nodes_to_remove.append(cast_node.name)
-
-        # Remove unnecessary casts
-        new_nodes = [node for node in graph.node if node.name not in nodes_to_remove]
-        graph.node.extend(new_nodes)
-
         return onnx_model
diff --git a/modelopt/onnx/trt_utils.py b/modelopt/onnx/trt_utils.py
@@ -140,6 +140,7 @@ def _map_trt_to_onnx_type(trt_type: trt.DataType):
             trt.bool: onnx.TensorProto.BOOL,
             trt.fp8: onnx.TensorProto.FLOAT8E4M3FN,
             trt.fp4: onnx.TensorProto.FLOAT4E2M1,
+            trt.e8m0: onnx.TensorProto.UINT8,
         }
         try:
             return trt_to_onnx_dtype_mapping[trt_type]
diff --git a/modelopt/torch/_deploy/utils/torch_onnx.py b/modelopt/torch/_deploy/utils/torch_onnx.py
@@ -576,7 +576,7 @@ def get_onnx_bytes_and_metadata(
     except StopIteration:
         param_dtype = torch.float32
     if weights_dtype in ["fp16", "bf16"] and param_dtype == torch.float32:
-        if is_mxfp8_quantized(model) or is_int4_quantized(model):
+        if is_int4_quantized(model):
             assert weights_dtype == "fp16", "BF16 + MXFP8/INT4 mixed precision is not supported yet"
             onnx_opt_graph = convert_float_to_float16(
                 onnx_opt_graph,

Original file line number	Diff line number	Diff line change
`@@ -140,6 +140,7 @@ def _map_trt_to_onnx_type(trt_type: trt.DataType):`
`140`	`140`	`trt.bool: onnx.TensorProto.BOOL,`
`141`	`141`	`trt.fp8: onnx.TensorProto.FLOAT8E4M3FN,`
`142`	`142`	`trt.fp4: onnx.TensorProto.FLOAT4E2M1,`
	`143`	`+ trt.e8m0: onnx.TensorProto.UINT8,`
`143`	`144`	`}`
`144`	`145`	`try:`
`145`	`146`	`return trt_to_onnx_dtype_mapping[trt_type]`