gcunhase
diff --git a/‎examples/diffusers/quantization/onnx_utils/export.py‎
Lines changed: 2 additions & 2 deletions b/‎examples/diffusers/quantization/onnx_utils/export.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/onnx_ptq/llm_export.py‎
Lines changed: 2 additions & 3 deletions b/‎examples/onnx_ptq/llm_export.py‎
Lines changed: 2 additions & 3 deletions
@@ -47,7 +47,7 @@
 from diffusers.models.unets import UNet2DConditionModel
 from torch.onnx import export as onnx_export
 
-from modelopt.onnx.quantization.qdq_utils import fp4qdq_to_2dq
+from modelopt.onnx.export import NVFP4QuantExporter
 from modelopt.torch.quantization.export_onnx import configure_linear_module_onnx_quantizers
 from modelopt.torch.utils import torch_to
 
@@ -547,6 +547,6 @@ def modelopt_export_sd(backbone, onnx_dir, model_name, precision):
         else:
             flux_convert_rope_weight_type(onnx_model)
     if precision == "fp4":
-        onnx_model = fp4qdq_to_2dq(onnx_model)
+        onnx_model = NVFP4QuantExporter.process_model(onnx_model)
     save_onnx(onnx_model, q_output)
     shutil.rmtree(tmp_subfolder, ignore_errors=True)
@@ -30,15 +30,14 @@
 from transformers import AutoConfig, AutoTokenizer
 
 import modelopt
-from modelopt.onnx.export import INT4QuantExporter
+from modelopt.onnx.export import INT4QuantExporter, NVFP4QuantExporter
 from modelopt.onnx.llm_export_utils.export_utils import (
     ModelLoader,
     WrapperModelForCausalLM,
     llm_to_onnx,
 )
 from modelopt.onnx.llm_export_utils.quantization_utils import quantize
 from modelopt.onnx.llm_export_utils.surgeon_utils import fold_fp8_qdq_to_dq
-from modelopt.onnx.quantization.qdq_utils import fp4qdq_to_2dq
 from modelopt.torch.export import export_hf_checkpoint
 from modelopt.torch.quantization.utils import is_quantized_linear
 
@@ -275,7 +274,7 @@ def time_operation(operation_name):
 
     if dtype == "nvfp4":
         with time_operation("quantizing weights to nvfp4"):
-            onnx_model = fp4qdq_to_2dq(onnx_model, verbose=True)
+            onnx_model = NVFP4QuantExporter.process_model(onnx_model)
 
     elif dtype == "int4_awq":
         with time_operation("quantizing weights to int4"):