Copy get_quant_cfg_choices from nemo to export-deploy

chtruong814 · chtruong814 · commit 5703968d2cd3 · 2026-02-13T07:37:30.000Z
Signed-off-by: Charlie Truong &lt;chtruong@nvidia.com&gt;
diff --git a/nemo_export/onnx_llm_exporter.py b/nemo_export/onnx_llm_exporter.py
@@ -60,17 +60,6 @@
     trt = MagicMock()
     HAVE_TENSORRT = False
 
-try:
-    from nemo.collections.llm.modelopt.quantization.quant_cfg_choices import (
-        get_quant_cfg_choices,
-    )
-
-    QUANT_CFG_CHOICES = get_quant_cfg_choices()
-
-    HAVE_NEMO = True
-except (ImportError, ModuleNotFoundError):
-    HAVE_NEMO = False
-
 
 @wrapt.decorator
 def noop_decorator(func):
@@ -491,17 +480,15 @@ def quantize(
             forward_loop (callable): A function that accepts the model as a single parameter
                 and runs sample data through it. This is used for calibration during quantization.
         """
-        if not HAVE_NEMO:
-            raise UnavailableError(MISSING_NEMO_MSG)
-
         if not HAVE_MODELOPT:
             raise UnavailableError(MISSING_MODELOPT_MSG)
 
+        quant_cfg_choices = get_quant_cfg_choices()
         if isinstance(quant_cfg, str):
-            assert quant_cfg in QUANT_CFG_CHOICES, (
-                f"Quantization config {quant_cfg} is not supported. Supported configs: {list(QUANT_CFG_CHOICES)}"
+            assert quant_cfg in quant_cfg_choices, (
+                f"Quantization config {quant_cfg} is not supported. Supported configs: {list(quant_cfg_choices)}"
             )
-            quant_cfg = QUANT_CFG_CHOICES[quant_cfg]
+            quant_cfg = quant_cfg_choices[quant_cfg]
 
         logger.info("Starting quantization...")
         mtq.quantize(self.model, quant_cfg, forward_loop=forward_loop)
@@ -558,3 +545,35 @@ def get_calib_data_iter(
         for j in range(len(batch)):
             batch[j] = batch[j][:max_sequence_length]
         yield batch
+
+
+def get_quant_cfg_choices() -> Dict[str, Dict[str, Any]]:
+    """
+    Retrieve a dictionary of modelopt quantization configuration choices.
+
+    This function checks for the availability of specific quantization configurations defined in
+    the modelopt.torch.quantization (mtq) module and returns a dictionary mapping short names to
+    their corresponding configurations. The function is intended to work for different modelopt
+    library versions that come with variable configuration choices.
+
+    Returns:
+        dict: A dictionary where keys are short names (e.g., "fp8") and values are the
+            corresponding modelopt quantization configuration objects.
+    """
+    quant_cfg_names = [
+        ("int8", "INT8_DEFAULT_CFG"),
+        ("int8_sq", "INT8_SMOOTHQUANT_CFG"),
+        ("fp8", "FP8_DEFAULT_CFG"),
+        ("block_fp8", "FP8_2D_BLOCKWISE_WEIGHT_ONLY_CFG"),
+        ("int4_awq", "INT4_AWQ_CFG"),
+        ("w4a8_awq", "W4A8_AWQ_BETA_CFG"),
+        ("int4", "INT4_BLOCKWISE_WEIGHT_ONLY_CFG"),
+        ("nvfp4", "NVFP4_DEFAULT_CFG"),
+    ]
+
+    quant_cfg_choices = {}
+    for short_name, full_name in quant_cfg_names:
+        if config := getattr(mtq, full_name, None):
+            quant_cfg_choices[short_name] = config
+
+    return quant_cfg_choices