Update apply_quantiation_config to use match_named_modules

fynnsu · fynnsu · commit ad74d321ac04 · 2025-07-29T15:17:14.000Z
Signed-off-by: Fynn Schmitt-Ulms &lt;fschmitt@redhat.com&gt;
diff --git a/src/compressed_tensors/quantization/lifecycle/apply.py b/src/compressed_tensors/quantization/lifecycle/apply.py
@@ -40,6 +40,7 @@
     is_kv_cache_quant_scheme,
 )
 from compressed_tensors.utils.helpers import fix_fsdp_module_name, replace_module
+from compressed_tensors.utils.match import match_named_modules
 from compressed_tensors.utils.offload import update_parameter_data
 from compressed_tensors.utils.safetensors_load import get_safetensors_folder
 from safetensors import safe_open
@@ -147,47 +148,35 @@ def apply_quantization_config(
     if run_compressed:
         from compressed_tensors.linear.compressed_linear import CompressedLinear
 
-    # list of submodules to ignore
-    ignored_submodules = defaultdict(list)
     # mark appropriate layers for quantization by setting their quantization schemes
-    for name, submodule in model.named_modules():
-        # potentially fix module name to remove FSDP wrapper prefix
-        name = fix_fsdp_module_name(name)
-        if matches := find_name_or_class_matches(name, submodule, config.ignore):
-            for match in matches:
-                ignored_submodules[match].append(name)
-            continue  # layer matches ignore list, continue
-
-        targets = find_name_or_class_matches(name, submodule, target_to_scheme)
-
-        if targets:
-            # mark modules to be quantized by adding
-            # quant scheme to the matching layers
-            scheme = _scheme_from_targets(target_to_scheme, targets, name)
-            if run_compressed:
-                format = config.format
-                if format != CompressionFormat.dense.value:
-                    if isinstance(submodule, torch.nn.Linear):
-                        # TODO: expand to more module types
-                        compressed_linear = CompressedLinear.from_linear(
-                            submodule,
-                            quantization_scheme=scheme,
-                            quantization_format=format,
-                        )
-                        replace_module(model, name, compressed_linear)
-
-            # target matched - add layer and scheme to target list
-            submodule.quantization_scheme = scheme
-
-            names_to_scheme[name] = submodule.quantization_scheme
-
-    if config.ignore is not None and ignored_submodules is not None:
-        if set(config.ignore) - set(ignored_submodules):
-            _LOGGER.warning(
-                "Some layers that were to be ignored were "
-                "not found in the model: "
-                f"{set(config.ignore) - set(ignored_submodules)}"
-            )
+    for name, submodule, matched_targets in match_named_modules(
+        model,
+        target_to_scheme,
+        config.ignore or [],
+        warn_on_fail=True,
+        warn_on_unmatched_ignores=True,
+        return_matched_targets=True,
+        preprocess_name=fix_fsdp_module_name,
+    ):
+        # mark modules to be quantized by adding
+        # quant scheme to the matching layers
+        scheme = _scheme_from_targets(target_to_scheme, matched_targets, name)
+        if run_compressed:
+            format = config.format
+            if format != CompressionFormat.dense.value:
+                if isinstance(submodule, torch.nn.Linear):
+                    # TODO: expand to more module types
+                    compressed_linear = CompressedLinear.from_linear(
+                        submodule,
+                        quantization_scheme=scheme,
+                        quantization_format=format,
+                    )
+                    replace_module(model, name, compressed_linear)
+
+        # target matched - add layer and scheme to target list
+        submodule.quantization_scheme = scheme
+
+        names_to_scheme[name] = submodule.quantization_scheme
 
     # apply current quantization status across all targeted layers
     apply_quantization_status(model, config.quantization_status)
@@ -429,7 +418,6 @@ def _scheme_from_targets(
 def _merge_schemes(
     schemes_to_merge: List[QuantizationScheme], name: str
 ) -> QuantizationScheme:
-
     kv_cache_quantization_scheme = [
         scheme for scheme in schemes_to_merge if is_kv_cache_quant_scheme(scheme)
     ]
diff --git a/src/compressed_tensors/utils/match.py b/src/compressed_tensors/utils/match.py
@@ -15,7 +15,7 @@
 import logging
 import re
 from collections.abc import Generator
-from typing import Iterable, Tuple
+from typing import Callable, Iterable, Tuple
 
 import torch
 from compressed_tensors.utils.internal import InternalModule
@@ -35,8 +35,11 @@
 def match_named_modules(
     model: torch.nn.Module,
     targets: Iterable[str],
-    ignore: Iterable[str] = tuple(),
+    ignore: Iterable[str] | None = tuple(),
     warn_on_fail: bool = False,
+    warn_on_unmatched_ignores: bool = False,
+    return_matched_targets: bool = False,
+    preprocess_name: Callable[[str], str] = lambda x: x,
 ) -> Generator[Tuple[str, torch.nn.Module]]:
     """
     Yields names and modules which match `targets` but do not match `ignore`.
@@ -48,21 +51,66 @@ def match_named_modules(
     :param warn_on_fail: if True, warns if any targets do not match any modules in model
     :return: generator of module names and modules
     """
+    ignore = ignore or []
+
     unmatched_targets = set(targets)
+    unmatched_ignores = set(ignore)
+
+    # Order targets by type: exact name match, regex name match, class name match
+    targets = sorted(targets, key=lambda x: ("re:" in x, x))
     for name, module in model.named_modules():
+        # preprocess the module name and module
+        name = preprocess_name(name)
+
+        ignore_matched = False
+        for ign in ignore:
+            if is_match(name, module, ign):
+                unmatched_ignores -= {ign}
+                ignore_matched = True
+                break
+        if ignore_matched:
+            continue
+
+        matched_targets = []
+        # Check for name matches first (exact then regex)
         for target in targets:
-            if is_match(name, module, target):
+            if match_name(name, target):
                 unmatched_targets -= {target}
+                matched_targets.append(target)
+                if not return_matched_targets:
+                    break
 
-                if not any(is_match(name, module, ign) for ign in ignore):
-                    yield name, module
+        if not return_matched_targets and matched_targets:
+            # Don't need to check other targets, one match is enough
+            yield name, module
+            continue
+
+        # Check for class matches
+        for target in targets:
+            if match_class(module, target):
+                unmatched_targets -= {target}
+                matched_targets.append(target)
+                if not return_matched_targets:
+                    break
+
+        if matched_targets:
+            if return_matched_targets:
+                yield name, module, matched_targets
+            else:
+                yield name, module
 
     if warn_on_fail:
         for target in unmatched_targets:
             _LOGGER.warning(
                 f"Could not match `{target}` in instance of {model.__class__.__name__}"
             )
 
+    if warn_on_unmatched_ignores:
+        for ign in unmatched_ignores:
+            _LOGGER.warning(
+                f"Unmatched ignore targets: {unmatched_ignores}, in instance of {model.__class__.__name__}"
+            )
+
 
 def match_named_parameters(
     model: torch.nn.Module,