fixes and formatting

HDCharles · HDCharles · commit 07c657e2f9bf · 2025-12-03T20:27:15.000Z
Summary
fix smoothquant logic to align with AWQ
Signed-off-by: HDCharles &lt;charlesdavidhernandez@gmail.com&gt;
diff --git a/src/llmcompressor/modifiers/awq/base.py b/src/llmcompressor/modifiers/awq/base.py
@@ -7,16 +7,17 @@
 from compressed_tensors.utils import (
     align_modules,
     get_execution_device,
+    get_lowest_common_ancestor_name,
     match_modules_set,
     match_named_modules,
     update_offload_parameter,
-    get_lowest_common_ancestor_name,
 )
 from loguru import logger
 from pydantic import ConfigDict, PrivateAttr, model_validator
 from torch.nn import Module
-from tqdm import tqdm
 from torch.utils._pytree import tree_flatten
+from tqdm import tqdm
+
 from llmcompressor.core import Event, EventType, State
 from llmcompressor.modifiers import Modifier
 from llmcompressor.modifiers.awq.mappings import (
@@ -30,7 +31,10 @@
 from llmcompressor.pipelines.cache import IntermediatesCache
 from llmcompressor.utils.fsdp.helpers import get_fsdp_parent
 from llmcompressor.utils.helpers import calibration_forward_context
-from llmcompressor.utils.pytorch.module import get_layer_by_name
+from llmcompressor.utils.pytorch.module import (
+    get_layer_by_name,
+    get_module_to_name_dict,
+)
 
 __all__ = ["AWQModifier"]
 
@@ -321,30 +325,20 @@ def _set_resolved_mappings(self, model: Module) -> None:
         repeat for model.layer.1 and so on
         """
         resolved_mappings: list[ResolvedMapping] = []
-
-        module_to_name = {}
-        for name, module in model.named_modules():
-            if module in module_to_name:
-                logger.info(
-                    f"Warning, {name} and {module_to_name[module]} both "
-                    "share the same module the same module, "
-                    "may have trouble resolving mappings."
-                )
-            module_to_name[module] = name
-
+        module_to_name = get_module_to_name_dict(model)
         for mapping in self.mappings:
             for smooth_layers, *nested_balance_layers in match_modules_set(
                 model, (mapping.smooth_layer, *mapping.balance_layers), self.ignore
             ):
-                assert len(smooth_layers)==1, (
-                    "AWQ mappings need to match a single smoothlayer for each mapping but got "
-                    f"{[module_to_name.get(smooth_layer) for smooth_layer in smooth_layers]} "
-                    f"when matching {mapping.smooth_layer}"
+                assert len(smooth_layers) == 1, (
+                    "AWQ mappings need to match a single smoothlayer for each "
+                    f"mapping but got {[module_to_name.get(s) for s in smooth_layers]}"
+                    f" for mapping: {mapping}"
                 )
                 smooth_layer = smooth_layers[0]
                 smooth_name = module_to_name.get(smooth_layer)
 
-                #[[b00, b01, b02...], [b10, b11, b12,...], ...] v
+                # [[b00, b01, b02...], [b10, b11, b12,...], ...] v
                 #                             [b00, b01, b02, ..., b10, b11, b12, ...]
                 balance_layers = tree_flatten(nested_balance_layers)[0]
                 balance_names = [
@@ -371,7 +365,9 @@ def _set_resolved_mappings(self, model: Module) -> None:
                 else:
                     # for multiple balance layers, find lowest common parent
                     ancestor_name = get_lowest_common_ancestor_name(balance_names)
-                    ancestor_name, ancestor = get_lowest_non_module_list_ancestor(ancestor_name, model)
+                    ancestor_name, ancestor = get_lowest_non_module_list_ancestor(
+                        ancestor_name, model
+                    )
 
                 resolved_mappings.append(
                     ResolvedMapping(
@@ -807,7 +803,7 @@ def _accumulate_mean(
 
 def get_lowest_non_module_list_ancestor(name, module: Module) -> tuple[str, Module]:
     """
-    Given a name and a model, finds lowest ancestor of 
+    Given a name and a model, finds lowest ancestor of
     named module that's not a ModuleList
     i.e. module_list.module_dict.module_list -> module_list.module_dict
     i.e. module_list.module_dict -> module_list.module_dict
diff --git a/src/llmcompressor/modifiers/smoothquant/base.py b/src/llmcompressor/modifiers/smoothquant/base.py
@@ -1,11 +1,12 @@
 from dataclasses import dataclass
-from typing import Callable, Dict, List, Optional, Tuple, Union
+from typing import Callable, Dict, List, Optional, Tuple
 
 import torch
-from compressed_tensors.utils import align_module_device, match_named_modules
+from compressed_tensors.utils import align_module_device, match_modules_set
 from loguru import logger
 from pydantic import ConfigDict, Field
 from torch.nn import Module
+from torch.utils._pytree import tree_flatten
 
 from llmcompressor.core import Event, EventType, State
 from llmcompressor.modifiers import Modifier
@@ -14,7 +15,7 @@
     handle_mapping_resolution_errors,
 )
 from llmcompressor.utils.fsdp.helpers import get_fsdp_parent
-from llmcompressor.utils.pytorch.module import get_layer_by_name
+from llmcompressor.utils.pytorch.module import get_module_to_name_dict
 
 MINIMUM_SMOOTHING_SCALE = 1e-5
 
@@ -95,7 +96,7 @@ class SmoothQuantModifier(Modifier):
     """
 
     smoothing_strength: float = 0.5
-    mappings: Optional[List[Union[Tuple, List]]] = None
+    mappings: Optional[List[Tuple[List[str], str]]] = None
     ignore: Optional[List[str]] = None
     num_calibration_steps: Optional[int] = None
     calibration_function: Optional[Callable] = None
@@ -198,27 +199,22 @@ def _resolve_mappings(self, model: Module) -> List[SmoothQuantMapping]:
         be balanced.
         """
         resolved_mappings = []
-        for to_balance, to_smooth in self.mappings:
-            to_smooth_list = [to_smooth] if isinstance(to_smooth, str) else to_smooth
-
-            for smooth_name, smooth_layer in match_named_modules(
-                model, to_smooth_list, self.ignore
+        module_to_name = get_module_to_name_dict(model)
+        for mapping in self.mappings:
+            for *nested_balance_layers, smooth_layers in match_modules_set(
+                model, tree_flatten(mapping)[0], self.ignore
             ):
-                # Search for balance layers within the parent scope
-                smooth_parent_name = ".".join(smooth_name.split(".")[:-1])
-                smooth_parent = get_layer_by_name(smooth_parent_name, model)
-
-                balance_layers = [
-                    balance_layer
-                    for _, balance_layer in match_named_modules(
-                        smooth_parent, to_balance, self.ignore
-                    )
-                ]
-
-                if balance_layers:
-                    resolved_mappings.append(
-                        SmoothQuantMapping(smooth_name, smooth_layer, balance_layers)
-                    )
+                assert len(smooth_layers) == 1, (
+                    "SmoothQuant mappings must match a single smooth layer for each "
+                    f"mapping but got {[module_to_name.get(s) for s in smooth_layers]}"
+                    f" for mapping: {mapping}"
+                )
+                smooth_layer = smooth_layers[0]
+                smooth_name = module_to_name.get(smooth_layers[0])
+                balance_layers = tree_flatten(nested_balance_layers)[0]
+                resolved_mappings.append(
+                    SmoothQuantMapping(smooth_name, smooth_layer, balance_layers)
+                )
 
         return resolved_mappings
 
diff --git a/src/llmcompressor/modifiers/smoothquant/utils.py b/src/llmcompressor/modifiers/smoothquant/utils.py
@@ -1,6 +1,6 @@
 import functools
 from collections import namedtuple
-from typing import Dict, List, Tuple, Union
+from typing import Dict, List, Tuple
 
 from loguru import logger
 
@@ -10,7 +10,7 @@
     "DEFAULT_SMOOTHQUANT_MAPPINGS",
 ]
 
-LayerMapType = Tuple[Union[List[str], str], Union[List[str], str]]
+LayerMapType = Tuple[List[str], str]
 LayerMap: LayerMapType = namedtuple("LayerMap", ["balance_layers", "smooth_layers"])
 
 DEFAULT_SMOOTHQUANT_MAPPINGS: List[LayerMap] = [
diff --git a/src/llmcompressor/modifiers/transform/spinquant/base.py b/src/llmcompressor/modifiers/transform/spinquant/base.py
@@ -9,9 +9,9 @@
     TransformScheme,
     apply_transform_config,
 )
-from torch.utils._pytree import tree_flatten
 from compressed_tensors.utils import TorchDtype, get_head_dim
 from pydantic import Field, ValidationInfo, field_validator
+from torch.utils._pytree import tree_flatten
 from transformers import PreTrainedModel
 
 from llmcompressor.core import Event, EventType, State
@@ -204,7 +204,7 @@ def _fuse_norms(self, model: PreTrainedModel):
         for mapping in self.norm_mappings:
             for norm, *linears in match_modules_set(
                 model, (mapping.norm, *mapping.linears)
-            ): 
+            ):
                 # match_modules_set returns a list of lists
                 assert len(norm) == 1
                 fuse_norm_linears(norm[0], tree_flatten(linears)[0])
diff --git a/src/llmcompressor/utils/pytorch/module.py b/src/llmcompressor/utils/pytorch/module.py
@@ -10,6 +10,7 @@
 import torch
 from compressed_tensors import InternalModule
 from compressed_tensors.quantization.utils import is_module_quantized
+from loguru import logger
 from torch.nn import Linear, Module, Parameter
 from torch.nn.modules.conv import _ConvNd
 from transformers import PreTrainedModel
@@ -369,3 +370,16 @@ def get_layer_by_name(layer_name: str, module: Module) -> Module:
     if not layer_name:
         return module
     return attrgetter(layer_name)(module)
+
+
+def get_module_to_name_dict(model: Module) -> dict[Module:str]:
+    module_to_name = {}
+    for name, module in model.named_modules():
+        if module in module_to_name:
+            logger.info(
+                f"Warning, {name} and {module_to_name[module]} both "
+                "share the same module the same module, "
+                "may have trouble resolving mappings."
+            )
+        module_to_name[module] = name
+    return module_to_name
diff --git a/tests/llmcompressor/modifiers/awq/test_base.py b/tests/llmcompressor/modifiers/awq/test_base.py
@@ -222,21 +222,20 @@ def test_get_lowest_non_module_list_ancestor():
             )
         }
     )
-    
-    ancestor_name, ancestor = get_lowest_non_module_list_ancestor(
-        "", model
-    )
+
+    ancestor_name, ancestor = get_lowest_non_module_list_ancestor("", model)
     assert ancestor_name == "" and ancestor == model
 
-    ancestor_name, ancestor = get_lowest_non_module_list_ancestor(
-        "experts", model
-    )
+    ancestor_name, ancestor = get_lowest_non_module_list_ancestor("experts", model)
     assert ancestor_name == "" and ancestor == model
 
     ancestor_name, ancestor = get_lowest_non_module_list_ancestor(
         "experts.1.gate_proj", model
     )
-    assert ancestor_name == "experts.1.gate_proj" and ancestor == model["experts"][1]["gate_proj"]
+    assert (
+        ancestor_name == "experts.1.gate_proj"
+        and ancestor == model["experts"][1]["gate_proj"]
+    )
 
 
 @pytest.mark.unit
@@ -298,4 +297,3 @@ def test_moe_multiple_balance_layers():
 
     assert mapping.parent_name == "layer.mlp"
     assert mapping.parent == mlp
-