[quantization] Introduce wrap helper (#557)

mhs4670go · web-flow · commit fc67e354b4d2 · 2026-03-17T16:47:09.000+09:00
This commit introduces wrap helper.

TICO-DCO-1.0-Signed-off-by: seongwoo &lt;mhs4670go@naver.com&gt;
diff --git a/tico/quantization/wrapq/quantizer.py b/tico/quantization/wrapq/quantizer.py
@@ -20,7 +20,7 @@
 from tico.quantization.config.ptq import PTQConfig
 from tico.quantization.quantizer import BaseQuantizer
 from tico.quantization.quantizer_registry import register_quantizer
-
+from tico.quantization.wrapq.wrap_helper import PTQWrapHelper
 from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
 from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
 
@@ -43,6 +43,7 @@ def __init__(self, config: PTQConfig):
         super().__init__(config)
         self.qcfg: PTQConfig = config
         self.strict_wrap: bool = bool(getattr(config, "strict_wrap", True))
+        self.wrapper = PTQWrapHelper(strict_wrap=self.strict_wrap)
 
     @torch.no_grad()
     def prepare(
@@ -52,7 +53,7 @@ def prepare(
         kwargs: Optional[Dict[str, Any]] = None,
     ):
         # Wrap the tree (or single module) according to strictness policy
-        model = self._wrap_supported(model, self.qcfg)
+        model = self.wrapper.wrap_supported(model, self.qcfg)
 
         # Switch all quant modules into calibration mode
         if isinstance(model, QuantModuleBase):
@@ -71,154 +72,3 @@ def convert(self, model):
             if isinstance(m, QuantModuleBase):
                 m.freeze_qparams()
         return model
-
-    def _wrap_supported(
-        self,
-        root: nn.Module,
-        qcfg: PTQConfig,
-    ) -> nn.Module:
-        """
-        Recursively attempt to wrap boundaries. Strictness is applied at every boundary.
-        """
-        assert not isinstance(root, QuantModuleBase), "The module is already wrapped."
-        try:
-            return PTQWrapper(root, qcfg=qcfg, fp_name="model")
-        except NotImplementedError as e:
-            print("no special wrapper for model, wrappig using general case")
-
-        # Case A: HuggingFace-style transformers: model.model.layers
-        lm = getattr(root, "model", None)
-
-        embeddings = (
-            getattr(lm, "embed_tokens", None) if isinstance(lm, nn.Module) else None
-        )
-        if isinstance(embeddings, nn.Module):
-            child_scope = "model.embeddings"
-            child_cfg = qcfg.child(child_scope)
-            wrapped = self._try_wrap(
-                embeddings,
-                child_cfg,
-                fp_name=child_scope,
-                raise_on_fail=self.strict_wrap,
-            )
-            lm.embed_tokens = wrapped  # type: ignore[union-attr]
-
-        model_norm = getattr(lm, "norm", None) if isinstance(lm, nn.Module) else None
-        if isinstance(model_norm, nn.Module):
-            child_scope = "model.norm"
-            child_cfg = qcfg.child(child_scope)
-            wrapped = self._try_wrap(
-                model_norm,
-                child_cfg,
-                fp_name=child_scope,
-                raise_on_fail=self.strict_wrap,
-            )
-            lm.norm = wrapped  # type: ignore[union-attr]
-
-        lm_head = getattr(root, "lm_head", None) if isinstance(lm, nn.Module) else None
-        if isinstance(lm_head, nn.Module):
-            child_scope = "lm_head"
-            child_cfg = qcfg.child(child_scope)
-            wrapped = self._try_wrap(
-                lm_head,
-                child_cfg,
-                fp_name=child_scope,
-                raise_on_fail=self.strict_wrap,
-            )
-            root.lm_head = wrapped
-
-        layers = getattr(lm, "layers", None) if isinstance(lm, nn.Module) else None
-        if isinstance(layers, nn.ModuleList):
-            new_list = nn.ModuleList()
-            for idx, layer in enumerate(layers):
-                child_scope = f"layer{idx}"
-                child_cfg = qcfg.child(child_scope)
-
-                # Enforce strictness at the child boundary
-                wrapped = self._try_wrap(
-                    layer,
-                    child_cfg,
-                    fp_name=child_scope,
-                    raise_on_fail=self.strict_wrap,
-                )
-                new_list.append(wrapped)
-            lm.layers = new_list  # type: ignore[union-attr]
-            return root
-
-        # Case B: Containers
-        if isinstance(root, (nn.Sequential, nn.ModuleList)):
-            for i, child in enumerate(list(root)):
-                name = str(i)
-                child_cfg = qcfg.child(name)
-
-                wrapped = self._try_wrap(
-                    child, child_cfg, fp_name=name, raise_on_fail=self.strict_wrap
-                )
-                if wrapped is child:
-                    assert not self.strict_wrap
-                    wrapped = self._wrap_supported(wrapped, child_cfg)
-                root[i] = wrapped  # type: ignore[index]
-            return root
-
-        if isinstance(root, nn.ModuleDict):
-            for k, child in list(root.items()):
-                name = k
-                child_cfg = qcfg.child(name)
-
-                wrapped = self._try_wrap(
-                    child, child_cfg, fp_name=name, raise_on_fail=self.strict_wrap
-                )
-                if wrapped is child:
-                    assert not self.strict_wrap
-                    wrapped = self._wrap_supported(wrapped, child_cfg)
-                root[k] = wrapped  # type: ignore[index]
-            return root
-
-        # Case C: Leaf node
-        root_name = getattr(root, "_get_name", lambda: None)()
-        wrapped = self._try_wrap(
-            root, qcfg, fp_name=root_name, raise_on_fail=self.strict_wrap
-        )
-        if wrapped is not root:
-            return wrapped
-
-        assert not self.strict_wrap
-        # Case D: Named children
-        for name, child in list(root.named_children()):
-            child_cfg = qcfg.child(name)
-
-            wrapped = self._try_wrap(
-                child, child_cfg, fp_name=name, raise_on_fail=self.strict_wrap
-            )
-            if wrapped is child:
-                assert not self.strict_wrap
-                wrapped = self._wrap_supported(wrapped, child_cfg)
-            setattr(root, name, wrapped)
-
-        return root
-
-    def _try_wrap(
-        self,
-        module: nn.Module,
-        qcfg_for_child: PTQConfig,
-        *,
-        fp_name: Optional[str],
-        raise_on_fail: bool,
-    ) -> nn.Module:
-        """
-        Attempt to wrap a boundary with PTQWrapper.
-
-        Behavior:
-          • If PTQWrapper succeeds: return wrapped module.
-          • If PTQWrapper raises NotImplementedError:
-                - raise_on_fail=True  -> re-raise (strict)
-                - raise_on_fail=False -> return original module (permissive)
-        """
-        try:
-            return PTQWrapper(module, qcfg=qcfg_for_child, fp_name=fp_name)
-        except NotImplementedError as e:
-            if raise_on_fail:
-                raise NotImplementedError(
-                    f"PTQQuantizer: no quantization wrapper for {type(module).__name__}"
-                ) from e
-            return module
diff --git a/tico/quantization/wrapq/wrap_helper.py b/tico/quantization/wrapq/wrap_helper.py
@@ -0,0 +1,199 @@
+# Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional
+
+import torch.nn as nn
+
+from tico.quantization.config.ptq import PTQConfig
+from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
+from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
+
+
+class PTQWrapHelper:
+    """
+    Reusable helper that applies PTQWrapper recursively according to PTQConfig.
+
+    This class contains only the structural wrapping logic and can be reused by
+    other algorithms (e.g. SpinQuant) that want to leverage the same wrapper
+    hierarchy without enabling calibration immediately.
+    """
+
+    def __init__(self, *, strict_wrap: bool = True):
+        self.strict_wrap = strict_wrap
+
+    def wrap_supported(
+        self,
+        root: nn.Module,
+        qcfg: PTQConfig,
+    ) -> nn.Module:
+        """
+        Recursively attempt to wrap boundaries. Strictness is applied at every boundary.
+        """
+        assert not isinstance(root, QuantModuleBase), "The module is already wrapped."
+
+        try:
+            return PTQWrapper(root, qcfg=qcfg, fp_name="model")
+        except NotImplementedError:
+            print(
+                f"No specialized wrapper found for {type(root).__name__}; applying recursive wrapping."
+            )
+
+        # Case A: HuggingFace-style transformers: model.model.layers
+        lm = getattr(root, "model", None)
+
+        embeddings = (
+            getattr(lm, "embed_tokens", None) if isinstance(lm, nn.Module) else None
+        )
+        if isinstance(embeddings, nn.Module):
+            child_scope = "model.embeddings"
+            child_cfg = qcfg.child(child_scope)
+            wrapped = self.try_wrap(
+                embeddings,
+                child_cfg,
+                fp_name=child_scope,
+                raise_on_fail=self.strict_wrap,
+            )
+            lm.embed_tokens = wrapped  # type: ignore[union-attr]
+
+        model_norm = getattr(lm, "norm", None) if isinstance(lm, nn.Module) else None
+        if isinstance(model_norm, nn.Module):
+            child_scope = "model.norm"
+            child_cfg = qcfg.child(child_scope)
+            wrapped = self.try_wrap(
+                model_norm,
+                child_cfg,
+                fp_name=child_scope,
+                raise_on_fail=self.strict_wrap,
+            )
+            lm.norm = wrapped  # type: ignore[union-attr]
+
+        lm_head = getattr(root, "lm_head", None)
+        if isinstance(lm_head, nn.Module):
+            child_scope = "lm_head"
+            child_cfg = qcfg.child(child_scope)
+            wrapped = self.try_wrap(
+                lm_head,
+                child_cfg,
+                fp_name=child_scope,
+                raise_on_fail=self.strict_wrap,
+            )
+            root.lm_head = wrapped  # type: ignore[attr-defined]
+
+        layers = getattr(lm, "layers", None) if isinstance(lm, nn.Module) else None
+        if isinstance(layers, nn.ModuleList):
+            new_list = nn.ModuleList()
+            for idx, layer in enumerate(layers):
+                child_scope = f"layer{idx}"
+                child_cfg = qcfg.child(child_scope)
+
+                wrapped = self.try_wrap(
+                    layer,
+                    child_cfg,
+                    fp_name=child_scope,
+                    raise_on_fail=self.strict_wrap,
+                )
+                new_list.append(wrapped)
+            lm.layers = new_list  # type: ignore[union-attr]
+            return root
+
+        # Case B: Containers
+        if isinstance(root, (nn.Sequential, nn.ModuleList)):
+            for i, child in enumerate(list(root)):
+                name = str(i)
+                child_cfg = qcfg.child(name)
+
+                wrapped = self.try_wrap(
+                    child,
+                    child_cfg,
+                    fp_name=name,
+                    raise_on_fail=self.strict_wrap,
+                )
+                if wrapped is child:
+                    assert not self.strict_wrap
+                    wrapped = self.wrap_supported(wrapped, child_cfg)
+                root[i] = wrapped  # type: ignore[index]
+            return root
+
+        if isinstance(root, nn.ModuleDict):
+            for k, child in list(root.items()):
+                name = k
+                child_cfg = qcfg.child(name)
+
+                wrapped = self.try_wrap(
+                    child,
+                    child_cfg,
+                    fp_name=name,
+                    raise_on_fail=self.strict_wrap,
+                )
+                if wrapped is child:
+                    assert not self.strict_wrap
+                    wrapped = self.wrap_supported(wrapped, child_cfg)
+                root[k] = wrapped  # type: ignore[index]
+            return root
+
+        # Case C: Leaf node
+        root_name = getattr(root, "_get_name", lambda: None)()
+        wrapped = self.try_wrap(
+            root,
+            qcfg,
+            fp_name=root_name,
+            raise_on_fail=self.strict_wrap,
+        )
+        if wrapped is not root:
+            return wrapped
+
+        assert not self.strict_wrap
+
+        # Case D: Named children
+        for name, child in list(root.named_children()):
+            child_cfg = qcfg.child(name)
+
+            wrapped = self.try_wrap(
+                child,
+                child_cfg,
+                fp_name=name,
+                raise_on_fail=self.strict_wrap,
+            )
+            if wrapped is child:
+                wrapped = self.wrap_supported(wrapped, child_cfg)
+            setattr(root, name, wrapped)
+
+        return root
+
+    def try_wrap(
+        self,
+        module: nn.Module,
+        qcfg_for_child: PTQConfig,
+        *,
+        fp_name: Optional[str],
+        raise_on_fail: bool,
+    ) -> nn.Module:
+        """
+        Attempt to wrap a boundary with PTQWrapper.
+
+        Behavior:
+          - If PTQWrapper succeeds: return wrapped module.
+          - If PTQWrapper raises NotImplementedError:
+              * raise_on_fail=True  -> re-raise
+              * raise_on_fail=False -> return original module
+        """
+        try:
+            return PTQWrapper(module, qcfg=qcfg_for_child, fp_name=fp_name)
+        except NotImplementedError as e:
+            if raise_on_fail:
+                raise NotImplementedError(
+                    f"PTQWrapHelper: no quantization wrapper for {type(module).__name__}"
+                ) from e
+            return module