update alg_ext and add ut (#1064)

n1ck-guo · web-flow · commit e18f9b668397 · 2025-11-28T10:11:26.000+08:00
* update alg_ext and add ut

Signed-off-by: n1ck-guo &lt;heng.guo@intel.com&gt;
diff --git a/auto_round/alg_ext.abi3.so b/auto_round/alg_ext.abi3.so
diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
@@ -311,18 +311,12 @@ def __init__(
         if device_map is None:
             device_map = 0
 
-        self.enable_torch_compile = enable_torch_compile
-        self._adjust_torch_compile(enable_torch_compile)
-
         self.device_map = device_map
         if isinstance(self.device_map, str):
             self.device_map = self.device_map.replace(" ", "")
 
         self.device_list = parse_available_devices(device_map)
 
-        if isinstance(scheme, AutoScheme):
-            self.layer_config = self._gen_auto_scheme(model, scheme, dataset, self.device_map)
-
         # Set device, must place after model loading
         self.device = get_major_device(device_map)
         set_non_auto_device_map(self.model, self.device_map)
@@ -387,10 +381,17 @@ def __init__(
         self.batch_dim = None
         self.infer_bs_coeff = 1
 
+        # after setting iters
+        self.enable_torch_compile = enable_torch_compile
+        self._adjust_torch_compile(enable_torch_compile)
+
         self.block_forward = compile_func(block_forward, self.device) if self.enable_torch_compile else block_forward
         self._check_configs()
         torch.set_printoptions(precision=3, sci_mode=True)
 
+        if isinstance(scheme, AutoScheme):
+            self.layer_config = self._gen_auto_scheme(model, scheme, dataset, self.device_map)
+
         if is_hpex_available():
             logger.info("habana_frameworks is available, import htcore explicitly.")
             import habana_frameworks.torch.core as htcore  # pylint: disable=E0401
@@ -632,6 +633,7 @@ def _adjust_torch_compile(self, enable_torch_compile: bool) -> None:
             and not is_debug_mode()
             and "fp8" not in self.data_type
             and "fp8" not in self.act_data_type
+            and self.iters > 0
         ):
             logger.info(
                 "'enable_torch_compile' is set to `False` by default. "
diff --git a/auto_round/data_type/utils.py b/auto_round/data_type/utils.py
@@ -264,12 +264,20 @@ def _is_mlp_module(module: Module):
         # already fused/treated as one layer
         if hasattr(submodule, "qkv_proj"):
             return
+
+        q_global_scale = getattr(submodule.q_proj, global_scale_name, max_value_tensor)
+        q_global_scale = max_value_tensor if q_global_scale is None else q_global_scale
+        k_global_scale = getattr(submodule.k_proj, global_scale_name, max_value_tensor)
+        k_global_scale = max_value_tensor if k_global_scale is None else k_global_scale
+        v_global_scale = getattr(submodule.v_proj, global_scale_name, max_value_tensor)
+        v_global_scale = max_value_tensor if v_global_scale is None else v_global_scale
+
         global_scale = torch.min(
             torch.cat(
                 (
-                    getattr(submodule.q_proj, global_scale_name, max_value_tensor).reshape(1),
-                    getattr(submodule.k_proj, global_scale_name, max_value_tensor).reshape(1),
-                    getattr(submodule.v_proj, global_scale_name, max_value_tensor).reshape(1),
+                    q_global_scale.reshape(1),
+                    k_global_scale.reshape(1),
+                    v_global_scale.reshape(1),
                 )
             )
         ).reshape([1])
diff --git a/test/test_cpu/test_alg_ext.py b/test/test_cpu/test_alg_ext.py
@@ -0,0 +1,32 @@
+import copy
+import shutil
+import sys
+import unittest
+
+from parameterized import parameterized
+
+sys.path.insert(0, "../..")
+
+from auto_round import AutoRound
+
+
+class TestAlgExt(unittest.TestCase):
+    def test_alg_ext(self):
+        model_name = "/tf_dataset/auto_round/models/facebook/opt-125m"
+        ar = AutoRound(model_name, scheme="W2A16", iters=1, nsamples=1, enable_alg_ext=True)
+        ar.quantize()
+
+        model_name = "/tf_dataset/auto_round/models/Qwen/Qwen3-0.6B"
+        ar = AutoRound(model_name, scheme="gguf:q4_k_s", iters=1, nsamples=1, enable_alg_ext=True)
+        ar.quantize()
+
+    def test_alg_ext_import(self):
+        from auto_round.alg_ext import wrapper_autoround
+
+    def test_all_support_dtype(self):
+        model_name = "/tf_dataset/auto_round/models/facebook/opt-125m"
+        for scheme in ["MXFP4", "NVFP4", "W2A16G64"]:
+            ar = AutoRound(
+                model_name, scheme=scheme, iters=1, nsamples=1, enable_alg_ext=True, enable_torch_compile=True
+            )
+            ar.quantize()
diff --git a/test/test_cpu/test_autoround.py b/test/test_cpu/test_autoround.py
@@ -689,18 +689,6 @@ def test_mixed_bit_setting(self):
         ):
             raise ValueError("mixed bits is not correct")
 
-    def test_alg_ext(self):
-        model_name = "/tf_dataset/auto_round/models/facebook/opt-125m"
-        ar = AutoRound(model_name, scheme="W2A16", iters=1, nsamples=1, enable_alg_ext=True)
-        ar.quantize()
-
-        model_name = "/tf_dataset/auto_round/models/Qwen/Qwen3-0.6B"
-        ar = AutoRound(model_name, scheme="gguf:q4_k_s", iters=1, nsamples=1, enable_alg_ext=True)
-        ar.quantize()
-
-    def test_alg_ext_import(self):
-        from auto_round.alg_ext import wrapper_autoround
-
     def test_invalid_layer_config(self):
         with self.assertRaises(ValueError):
             layer_config = {"model.decoder.layers.2.self_attnx": {"bits": 2}}