[WC] GroupSizeFallbackMode instead of enable_flexible_group_size (#3583)

Nikita Savelyev · web-flow · commit de884e0dc89a · 2025-07-15T15:52:00.000+04:00
### Changes - Replaced boolean `enable_flexible_group_size` with a `group_size_fallback_mode` enum. Possible values are NONE, IGNORE, ADJUST. Meaning: - ERROR: raise exception if can't divide by group size. - IGNORE: node with invalid group size won't be compressed at all. - ADJUST: the same as with `enable_flexible_group_size=True` on develop, i.e. compute new group size if possible, otherwise compress to backup precision. - Renamed `min_flexible_group_size` to `min_adjusted_group_size`. Set `group_size_fallback_mode` to IGNORE by default. Users are informed the following way depending on the selected fallback mode: - ERROR: exception is raised with a suggestion to set `group_size_fallback_mode` to IGNORE or ADJUST. - IGNORE: a info message is logged that some nodes will be ignored. - ADJUST: an info message is logged that some nodes will have an adjusted group size value / compressed to backup mode. ### Reason for changes UX improvement: now the default behavior won't result in an exception. ### Related tickets 167337 ### Tests Adopted the tests introduced in #3556.
diff --git a/src/nncf/__init__.py b/src/nncf/__init__.py
@@ -58,6 +58,7 @@
 from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters as AdvancedQuantizationParameters
 from nncf.quantization.advanced_parameters import AdvancedScaleEstimationParameters as AdvancedScaleEstimationParameters
 from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters as AdvancedSmoothQuantParameters
+from nncf.quantization.advanced_parameters import GroupSizeFallbackMode as GroupSizeFallbackMode
 from nncf.quantization.advanced_parameters import OverflowFix as OverflowFix
 from nncf.scopes import IgnoredScope as IgnoredScope
 from nncf.scopes import Subgraph as Subgraph
diff --git a/src/nncf/quantization/advanced_parameters.py b/src/nncf/quantization/advanced_parameters.py
@@ -76,6 +76,31 @@ class FP8Type(StrEnum):
     E5M2 = "f8e5m2"
 
 
+@api()
+class GroupSizeFallbackMode(StrEnum):
+    """
+    Specifies how to handle nodes that do not support the given group size.
+
+    :param ERROR: Raise an error if the given group size is not supported by a node.
+    :param IGNORE: Skip nodes that cannot be compressed with the given group size.
+    :param ADJUST: Automatically compute a suitable group size for unsupported nodes.
+        When selected, each weight for which the channel size is not divisible by the general group size value will
+        be compressed to a newly calculated group size. The new group size value is the maximal power of two
+        (i.e., 2^k) such that:
+            - channel size is divisible by it;
+            - it is less than the originally specified group size value;
+            - it is greater than or equal to `min_adjusted_group_size`.
+
+        If it's not possible to find a value satisfying these requirements, such weight is compressed to the backup
+        precision. If ratio < 1.0 and some weights have to be compressed to the backup precision because of group size
+        issues, then these weights won't contribute to the ratio of backup mode group.
+    """
+
+    ERROR = "error"
+    IGNORE = "ignore"
+    ADJUST = "adjust"
+
+
 @api()
 @dataclass
 class QuantizationParameters:
@@ -371,20 +396,11 @@ class AdvancedCompressionParameters:
     :type statistics_path: str
     :param lora_adapter_rank: Rank of lora adapters for FQ_LORA format. Defaults to 256.
     :type lora_adapter_rank: int
-    :param enable_flexible_group_size: Whether to enable flexible group size searching. When enabled, each weight
-    for which the channel size is not divisible by the general group size value will be compressed to a newly
-    calculated group size. The new group size value is the maximal power of two (i.e., 2^k) such that:
-        - channel size is divisible by it;
-        - it is less than the originally specified group size value;
-        - it is greater than or equal to `min_flexible_group_size`.
-
-    If it's not possible to find a value satisfying these requirements, such weight is compressed to the backup
-    precision. If ratio < 1.0 and some weights have to be compressed to the backup precision because of group size
-    issues, then these weights won't contribute to the ratio of backup mode group.
-    :type enable_flexible_group_size: bool
-    :param min_flexible_group_size: Minimum group size for flexible group size searching. Defaults to 16. The reason
+    :param group_size_fallback_mode: Specifies how to handle nodes that do not support the given group size.
+    :type group_size_fallback_mode: GroupSizeFallbackMode
+    :param min_adjusted_group_size: Minimum group size for adjustable group size searching. Defaults to 16. The reason
         behind this argument is to avoid too small group size values, which may lead to performance issues.
-    :type min_flexible_group_size: int
+    :type min_adjusted_group_size: int
     :param awq_params: Advanced parameters for AWQ algorithm.
     :type awq_params: AdvancedAWQParameters
     :param scale_estimation_params: Advanced parameters for Scale Estimation algorithm.
@@ -402,8 +418,8 @@ class AdvancedCompressionParameters:
 
     statistics_path: Optional[str] = None
     lora_adapter_rank: int = 256
-    enable_flexible_group_size: bool = False
-    min_flexible_group_size: int = 16
+    group_size_fallback_mode: GroupSizeFallbackMode = GroupSizeFallbackMode.IGNORE
+    min_adjusted_group_size: int = 16
     awq_params: AdvancedAWQParameters = field(default_factory=AdvancedAWQParameters)
     scale_estimation_params: AdvancedScaleEstimationParameters = field(
         default_factory=AdvancedScaleEstimationParameters
diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py
@@ -36,6 +36,7 @@
 from nncf.parameters import CompressWeightsMode
 from nncf.parameters import SensitivityMetric
 from nncf.quantization.advanced_parameters import AdvancedCompressionParameters
+from nncf.quantization.advanced_parameters import GroupSizeFallbackMode
 from nncf.quantization.advanced_parameters import convert_to_dict_recursively
 from nncf.quantization.algorithms.algorithm import Algorithm
 from nncf.quantization.algorithms.weight_compression.awq import AWQ
@@ -234,6 +235,13 @@ def check_user_compression_configuration(
         msg = "Codebook compression mode requires codebook parameters to be specified in advanced_parameters."
         raise nncf.ValidationError(msg)
 
+    if advanced_parameters and not isinstance(advanced_parameters.group_size_fallback_mode, GroupSizeFallbackMode):
+        msg = (
+            f"Unsupported group size fallback mode: {advanced_parameters.group_size_fallback_mode.value}. "
+            f"Supported modes are: {[e.value for e in GroupSizeFallbackMode]}."
+        )
+        raise nncf.ValidationError(msg)
+
 
 class WeightCompression(Algorithm):
     """
@@ -324,8 +332,8 @@ def __init__(
         self._mixed_precision_algo = criterion_cls(self._ratio, self._subset_size)
         self._statistics_path = self._advanced_parameters.statistics_path
 
-        self._enable_flexible_group_size = self._advanced_parameters.enable_flexible_group_size
-        self._min_flexible_group_size = self._advanced_parameters.min_flexible_group_size
+        self._group_size_fallback_mode = self._advanced_parameters.group_size_fallback_mode
+        self._min_adjusted_group_size = self._advanced_parameters.min_adjusted_group_size
 
         if self._awq:
             awq_params = self._advanced_parameters.awq_params
@@ -523,71 +531,115 @@ def _set_weight_compression_config(
             if reduction_channel_size % w_params.compression_config.group_size != 0:
                 failed_nodes.append((w_params.node_with_weight.node_name, reduction_channel_size))
         if len(failed_nodes) > 0:
-            names = ",".join(f'"{name}"' for name, _ in failed_nodes)
+            names = "\n\t".join(f'"{name}" (channel size: {channel_size})' for name, channel_size in failed_nodes)
             msg = (
-                "Failed to apply group-wise quantization with "
-                f"group size value {self._group_size} and channel size value {failed_nodes[0][1]}.\n"
+                f"Failed to apply group-wise quantization with group size value {self._group_size}.\n"
                 "Ensure that the group size is divisible by the channel size, "
-                "or include this node and others with similar issues in the ignored scope:\n"
-                f"nncf.compress_weight(\n\t..., \n\tignored_scope=IgnoredScope(names=[{names}]\n\t)\n)"
+                "or consider setting `group_size_fallback_mode` to IGNORE or ADJUST. Failed nodes:\n\t" + names
             )
             raise nncf.InvalidGroupSizeError(msg)
 
-    def _get_flexible_group_size_data(
+    def _handle_ignore_group_size_fallback(
+        self,
+        all_weight_params: list[WeightCompressionParameters],
+        ratio_defining_params: list[WeightCompressionParameters],
+        nodes_to_compress: list[NNCFNode],
+    ) -> tuple[list[WeightCompressionParameters], list[WeightCompressionParameters], list[NNCFNode]]:
+        """
+        Removes nodes that cannot be quantized with the specified group size from the lists of weight parameters.
+        """
+        if self._group_size == -1:
+            return all_weight_params, ratio_defining_params, nodes_to_compress
+
+        nodes_to_exclude = {}
+        for w_params in ratio_defining_params:
+            reduction_channel_size, _ = get_reduction_channel_size(w_params.weight_shape, w_params.reduction_axes)
+            if reduction_channel_size % self._group_size != 0:
+                nodes_to_exclude[w_params.node_with_weight.node_name] = w_params.weight_shape
+
+        if nodes_to_exclude:
+            ratio_defining_params = [
+                w_params
+                for w_params in ratio_defining_params
+                if w_params.node_with_weight.node_name not in nodes_to_exclude
+            ]
+            all_weight_params = [
+                w_params
+                for w_params in all_weight_params
+                if w_params.node_with_weight.node_name not in nodes_to_exclude
+            ]
+            nodes_to_compress = [node for node in nodes_to_compress if node.node_name not in nodes_to_exclude]
+
+            log_lines = [
+                f"{node_name} (weight shape: {weight_shape})" for node_name, weight_shape in nodes_to_exclude.items()
+            ]
+            log_message = (
+                f"Group-wise quantization with group size {self._group_size} can't be applied to some nodes. "
+                "They will be ignored and kept with original precision.\n"
+                "Consider setting group_size_fallback_mode to ADJUST, which enables automatic adjustment "
+                "to smaller group size values."
+            )
+            nncf_logger.warning(f"{log_message} Nodes:\n\t" + "\n\t".join(log_lines))
+
+        return all_weight_params, ratio_defining_params, nodes_to_compress
+
+    def _handle_adjust_group_size_fallback(
         self, weight_params: list[WeightCompressionParameters]
-    ) -> list[tuple[WeightCompressionParameters, int]]:
+    ) -> tuple[list[WeightCompressionParameters], dict[str, int]]:
         """
-        Compute flexible group size values.
-        :param weight_params: Weight parameters for which to compute flexible group size.
-        :return: A list of tuples, where each tuple pair contains a WeightCompressionParameters object and the
-            group size values associated with it. If group size can't be assigned to some weight parameter
-            it won't be included in the result.
+        Calculates adjusted group size for weight parameters that cannot be quantized with the specified group size.
+        :param weight_params: List of weight parameters to process.
+        :return: A tuple containing two elements:
+            - A list of weight parameters that can be quantized with the specified or adjusted group size.
+            - A dictionary mapping weight names to their group size values.
         """
-        flexible_group_size_not_found_weight_params = []
-        group_size_data = []
+        if self._group_size == -1:
+            return weight_params, {w_params.weight_name: self._group_size for w_params in weight_params}
+
+        group_size_values = {}
+        valid_weight_params = []
+        invalid_weight_params = []
+        adjusted_weight_params = []
         for w_params in weight_params:
             reduction_channel_size, _ = get_reduction_channel_size(w_params.weight_shape, w_params.reduction_axes)
             if reduction_channel_size % self._group_size == 0:
-                # The weight can be compressed with the given group size, nothing else to do
-                group_size_data.append((w_params, self._group_size))
+                valid_weight_params.append(w_params)
+                group_size_values[w_params.weight_name] = self._group_size
                 continue
 
-            # Find the maximal power of two that divides reduction_channel_size
-            flexible_group_size = reduction_channel_size & (~reduction_channel_size + 1)
+            # The maximal power of two that divides reduction_channel_size
+            adjusted_group_size = reduction_channel_size & (~reduction_channel_size + 1)
+            if adjusted_group_size >= self._min_adjusted_group_size:
+                valid_weight_params.append(w_params)
+                group_size_values[w_params.weight_name] = adjusted_group_size
+                adjusted_weight_params.append((w_params, adjusted_group_size))
+                continue
 
-            if flexible_group_size < self._min_flexible_group_size:
-                flexible_group_size_not_found_weight_params.append(w_params)
-            else:
-                group_size_data.append((w_params, flexible_group_size))
+            invalid_weight_params.append(w_params)
 
-        node_strings = []
-        for i, (w_params, new_group_size) in enumerate(group_size_data):
-            if new_group_size == self._group_size:
-                continue
-            weight_shape = w_params.weight_shape
-            reduction_channel_size, _ = get_reduction_channel_size(weight_shape, w_params.reduction_axes)
-            node_strings.append(
-                f"{w_params.node_with_weight.node_name} "
-                f"(weight shape: {weight_shape}, adjusted group size: {new_group_size})"
-            )
-        if len(node_strings) > 0:
+        if adjusted_weight_params:
+            # Adjusted group size value for some nodes
+            log_lines = [
+                f"{w.node_with_weight.node_name} (weight shape: {w.weight_shape}, adjusted group size: {adjusted_gs})"
+                for w, adjusted_gs in adjusted_weight_params
+            ]
             nncf_logger.info(
-                f"Wasn't able to set the specified group size value ({self._group_size}) to some nodes. These nodes "
-                f"will have an adjusted group size value:\n\t" + "\n\t".join(node_strings)
+                f"Some nodes can't be quantized with the specified group size of {self._group_size}. "
+                "Adjusted group size values will be used:\n\t" + "\n\t".join(log_lines)
             )
 
-        if len(flexible_group_size_not_found_weight_params) > 0:
-            node_strings = [""] * len(flexible_group_size_not_found_weight_params)
-            for i, w_params in enumerate(flexible_group_size_not_found_weight_params):
-                weight_shape = w_params.weight_shape
-                reduction_channel_size, _ = get_reduction_channel_size(weight_shape, w_params.reduction_axes)
-                node_strings[i] = f"{w_params.node_with_weight.node_name} (weight shape: {weight_shape})"
-            nncf_logger.warning(
-                "Large enough flexible group size value cannot be found for some nodes. They will be compressed "
-                "according to the backup mode. Nodes:\n\t" + "\n\t".join(node_strings)
+        if invalid_weight_params:
+            # Valid adjusted group size wasn't found
+            log_lines = [
+                f"{w.node_with_weight.node_name} (weight shape: {w.weight_shape})" for w in invalid_weight_params
+            ]
+            log_message = (
+                "A valid adjusted group size value can't be found for some nodes. They will be quantized using the "
+                f"{self._backup_mode.value} backup mode."
             )
+            nncf_logger.info(f"{log_message} Nodes:\n\t" + "\n\t".join(log_lines))
 
-        return group_size_data
+        return valid_weight_params, group_size_values
 
     @staticmethod
     def _proportion_str(num_weights_list: list[int], total_num_weights: int, total_num_params: int) -> str:
@@ -757,14 +809,14 @@ def apply(
                 weight_names.add(weight_name)
 
         ratio_defining_params = self._get_ratio_defining_params(all_weight_params, is_last_layer_shared)
-        if self._enable_flexible_group_size and self._group_size != -1:
-            # Compute flexible group size values if enabled
-            flexible_group_size_data = self._get_flexible_group_size_data(ratio_defining_params)
-            group_size_values = {w_param.weight_name: group_size for w_param, group_size in flexible_group_size_data}
-            # Select a subset of ratio_defining_params that can be compressed with some group size
-            ratio_defining_params = [w_param for w_param, _ in flexible_group_size_data]
+        if self._group_size_fallback_mode == GroupSizeFallbackMode.IGNORE:
+            all_weight_params, ratio_defining_params, nodes_to_compress = self._handle_ignore_group_size_fallback(
+                all_weight_params, ratio_defining_params, nodes_to_compress
+            )
+        if self._group_size_fallback_mode == GroupSizeFallbackMode.ADJUST:
+            ratio_defining_params, group_size_values = self._handle_adjust_group_size_fallback(ratio_defining_params)
         else:
-            group_size_values = {w_param.weight_name: self._group_size for w_param in ratio_defining_params}
+            group_size_values = {w_params.weight_name: self._group_size for w_params in all_weight_params}
         self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values)
         ignored_scope_weight_statistics = self._get_ignored_scope_weight_statistics(model, graph)
         nncf_logger.info(
diff --git a/tests/cross_fw/test_templates/template_test_weights_compression.py b/tests/cross_fw/test_templates/template_test_weights_compression.py