intel · wenhuach21 · Sep 25, 2025 · Sep 25, 2025 · Sep 25, 2025 · Sep 25, 2025
diff --git a/auto_round/__init__.py b/auto_round/__init__.py
@@ -13,9 +13,8 @@
 # limitations under the License.
 from auto_round.autoround import AutoRound
 
-# support for old api
 from auto_round.autoround import AutoRoundLLM, AutoRoundMLLM, AutoRoundAdam
-from auto_round.schemes import QuantizationScheme
+from auto_round.schemes import QuantizationScheme, AutoScheme
 from auto_round.utils import LazyImport
 
 

diff --git a/auto_round/__main__.py b/auto_round/__main__.py
@@ -110,7 +110,7 @@ def __init__(self, *args, **kwargs):
 
         self.add_argument(
             "--scale_dtype",
-            default="fp16",
+            default=None,
             choices=["fp16", "float16", "bf16", "bfloat16", "fp32", "float32"],
             help="scale data type to use for quantization",
         )
@@ -470,6 +470,14 @@ def tune(args):
     extra_config.scheme_config = scheme_config
     extra_config.mllm_config = mllm_config
 
+    layer_config = {}
+    # from auto_round.auto_schemes.haha import get_mixed_config_layer_config
+    # layer_config = {}
+    # best_path = get_mixed_config_layer_config(model_name, target_bits=3)
+    # for item in best_path:
+    #     layer_config[item[0]] = {}
+    #     layer_config[item[0]]["bits"] = item[1]
+
     autoround: BaseCompressor = AutoRound(
         model=model_name,
         scheme=scheme,
@@ -486,6 +494,7 @@ def tune(args):
         not_use_best_mse=args.not_use_best_mse,
         enable_adam=args.adam,
         extra_config=extra_config,
+        layer_config=layer_config,
     )
 
     model_name = args.model.rstrip("/")

diff --git a/auto_round/auto_schemes/__init__.py b/auto_round/auto_schemes/__init__.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+AUTO_SCHEMES_ALGS = {}
+
+
+def register_dtype(names):
+    """Class decorator to register a mixed precision algorithm to the registry.
+
+    Decorator function used before a Pattern subclass.
+
+    Args:
+        names: A string. Define the export type.
+
+    Returns:
+        cls: The class of register.
+    """
+
+    def register(alg):
+        if isinstance(names, (tuple, list)):
+            for name in names:
+                AUTO_SCHEMES_ALGS[name] = alg
+        else:
+            AUTO_SCHEMES_ALGS[names] = alg
+
+        return alg
+
+    return register
diff --git a/auto_round/auto_schemes/gen_scheme.py b/auto_round/auto_schemes/gen_scheme.py
@@ -0,0 +1,84 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Iterable, Union
+
+import torch
+
+from auto_round import AutoScheme
+from auto_round.utils import get_layer_features
+
+
+class GenScheme:
+    def __init__(
+        self,
+        auto_scheme: AutoScheme,
+        model: torch.nn.Module,
+        quant_layer_names: Iterable[str],
+        fixed_layer_scheme: dict[str, dict],
+        scale_dtype: str = "fp16",
+        dataset="pile-10k",
+    ):
+        self.auto_scheme = auto_scheme
+        self.model = model
+        self.quant_layer_names = quant_layer_names
+        self.fixed_layer_scheme = fixed_layer_scheme
+        self.scale_dtype = scale_dtype
+        self.dataset = dataset
+
+    def _get_min_max_avg_bits(self) -> tuple[float, float]:
+        pass
+
+    # not validate yet
+    def get_layer_bits(self, layer):
+        weight = layer.weight
+        n_param = weight.numel()
+        weight_bits = getattr(layer, "bits", 16)
+        group_size = getattr(layer, "group_size", 128)
+        super_group_size = getattr(layer, "super_group_size", None)
+        super_weight_bits = getattr(layer, "super_bits", None)
+
+        # Main quantization cost
+        weight_total_bits = weight_bits * n_param
+        if weight_bits >= 16:  # Unquantized layer
+            return weight_total_bits, 16
+
+        in_features, output_features = get_layer_features(layer)
+        # Determine number of groups
+        if group_size > 0:  # group-wise
+            n_group = output_features * (in_features + group_size - 1) // group_size
+        elif group_size == 0:  # per-tensor
+            n_group = 1
+        elif group_size == -1:  # per-channel
+            n_group = output_features  # out_channels
+        else:
+            raise ValueError(f"Invalid group_size {group_size}")
+        aux_total_bits = 0
+        if not super_group_size:
+            # Scale and zero point bitwidths
+            scale_bits = 16
+            zp_bits = weight_bits if not super_group_size else 32  # default: same as weight_bits
+            # Overhead from scales and zero points
+            aux_total_bits = n_group * (scale_bits + zp_bits)
+
+        # Double quantization case
+        if super_group_size:
+            # Number of super-groups
+            aux_total_bits += n_group * super_weight_bits * 2  # sclae and min int count
+            n_super_group = (n_group + super_group_size - 1) // super_group_size
+            aux_total_bits += n_super_group * 32 * 2  # double quant scale and min_v
+
+        total_bits = weight_total_bits + aux_total_bits
+        avg_bits = total_bits / n_param
+        return total_bits, avg_bits
diff --git a/auto_round/auto_schemes/utils.py b/auto_round/auto_schemes/utils.py
@@ -0,0 +1,21 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def get_total_bits(model, layer_config):
+    pass
+
+
+def get_bits(layer):
+    pass
diff --git a/auto_round/autoround.py b/auto_round/autoround.py
@@ -25,7 +25,7 @@
     MLLMCompressor,
 )
 from auto_round.logger import deprecated, logger
-from auto_round.schemes import QuantizationScheme
+from auto_round.schemes import AutoScheme, QuantizationScheme
 from auto_round.utils import is_mllm_model
 
 
@@ -63,7 +63,7 @@ def __new__(
         cls,
         model: Union[torch.nn.Module, str],
         tokenizer=None,
-        scheme: Union[str, dict, QuantizationScheme] = "W4A16",
+        scheme: Union[str, dict, QuantizationScheme, AutoScheme] = "W4A16",
         layer_config: dict[str, Union[str, dict, QuantizationScheme]] = None,
         dataset: Union[str, list, tuple, torch.utils.data.DataLoader] = "NeelNanda/pile-10k",
         iters: int = 200,
@@ -77,7 +77,6 @@ def __new__(
         seed: int = 42,
         # for adam
         enable_adam: bool = False,
-        # for MLLM
         extra_config: ExtraConfig = None,
         **kwargs,
     ) -> BaseCompressor: