diff --git a/src/nncf/__init__.py b/src/nncf/__init__.py index 77cd6fbb09a..779bcba473e 100644 --- a/src/nncf/__init__.py +++ b/src/nncf/__init__.py @@ -58,6 +58,7 @@ from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters as AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import AdvancedScaleEstimationParameters as AdvancedScaleEstimationParameters from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters as AdvancedSmoothQuantParameters +from nncf.quantization.advanced_parameters import GroupSizeFallbackMode as GroupSizeFallbackMode from nncf.quantization.advanced_parameters import OverflowFix as OverflowFix from nncf.scopes import IgnoredScope as IgnoredScope from nncf.scopes import Subgraph as Subgraph diff --git a/src/nncf/quantization/advanced_parameters.py b/src/nncf/quantization/advanced_parameters.py index 8bd211f432a..a7b42fd7209 100644 --- a/src/nncf/quantization/advanced_parameters.py +++ b/src/nncf/quantization/advanced_parameters.py @@ -76,6 +76,31 @@ class FP8Type(StrEnum): E5M2 = "f8e5m2" +@api() +class GroupSizeFallbackMode(StrEnum): + """ + Specifies how to handle nodes that do not support the given group size. + + :param ERROR: Raise an error if the given group size is not supported by a node. + :param IGNORE: Skip nodes that cannot be compressed with the given group size. + :param ADJUST: Automatically compute a suitable group size for unsupported nodes. + When selected, each weight for which the channel size is not divisible by the general group size value will + be compressed to a newly calculated group size. The new group size value is the maximal power of two + (i.e., 2^k) such that: + - channel size is divisible by it; + - it is less than the originally specified group size value; + - it is greater than or equal to `min_adjusted_group_size`. + + If it's not possible to find a value satisfying these requirements, such weight is compressed to the backup + precision. If ratio < 1.0 and some weights have to be compressed to the backup precision because of group size + issues, then these weights won't contribute to the ratio of backup mode group. + """ + + ERROR = "error" + IGNORE = "ignore" + ADJUST = "adjust" + + @api() @dataclass class QuantizationParameters: @@ -371,20 +396,11 @@ class AdvancedCompressionParameters: :type statistics_path: str :param lora_adapter_rank: Rank of lora adapters for FQ_LORA format. Defaults to 256. :type lora_adapter_rank: int - :param enable_flexible_group_size: Whether to enable flexible group size searching. When enabled, each weight - for which the channel size is not divisible by the general group size value will be compressed to a newly - calculated group size. The new group size value is the maximal power of two (i.e., 2^k) such that: - - channel size is divisible by it; - - it is less than the originally specified group size value; - - it is greater than or equal to `min_flexible_group_size`. - - If it's not possible to find a value satisfying these requirements, such weight is compressed to the backup - precision. If ratio < 1.0 and some weights have to be compressed to the backup precision because of group size - issues, then these weights won't contribute to the ratio of backup mode group. - :type enable_flexible_group_size: bool - :param min_flexible_group_size: Minimum group size for flexible group size searching. Defaults to 16. The reason + :param group_size_fallback_mode: Specifies how to handle nodes that do not support the given group size. + :type group_size_fallback_mode: GroupSizeFallbackMode + :param min_adjusted_group_size: Minimum group size for adjustable group size searching. Defaults to 16. The reason behind this argument is to avoid too small group size values, which may lead to performance issues. - :type min_flexible_group_size: int + :type min_adjusted_group_size: int :param awq_params: Advanced parameters for AWQ algorithm. :type awq_params: AdvancedAWQParameters :param scale_estimation_params: Advanced parameters for Scale Estimation algorithm. @@ -402,8 +418,8 @@ class AdvancedCompressionParameters: statistics_path: Optional[str] = None lora_adapter_rank: int = 256 - enable_flexible_group_size: bool = False - min_flexible_group_size: int = 16 + group_size_fallback_mode: GroupSizeFallbackMode = GroupSizeFallbackMode.IGNORE + min_adjusted_group_size: int = 16 awq_params: AdvancedAWQParameters = field(default_factory=AdvancedAWQParameters) scale_estimation_params: AdvancedScaleEstimationParameters = field( default_factory=AdvancedScaleEstimationParameters diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 6aa7c80bf45..3b512b717f4 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -36,6 +36,7 @@ from nncf.parameters import CompressWeightsMode from nncf.parameters import SensitivityMetric from nncf.quantization.advanced_parameters import AdvancedCompressionParameters +from nncf.quantization.advanced_parameters import GroupSizeFallbackMode from nncf.quantization.advanced_parameters import convert_to_dict_recursively from nncf.quantization.algorithms.algorithm import Algorithm from nncf.quantization.algorithms.weight_compression.awq import AWQ @@ -234,6 +235,13 @@ def check_user_compression_configuration( msg = "Codebook compression mode requires codebook parameters to be specified in advanced_parameters." raise nncf.ValidationError(msg) + if advanced_parameters and not isinstance(advanced_parameters.group_size_fallback_mode, GroupSizeFallbackMode): + msg = ( + f"Unsupported group size fallback mode: {advanced_parameters.group_size_fallback_mode.value}. " + f"Supported modes are: {[e.value for e in GroupSizeFallbackMode]}." + ) + raise nncf.ValidationError(msg) + class WeightCompression(Algorithm): """ @@ -324,8 +332,8 @@ def __init__( self._mixed_precision_algo = criterion_cls(self._ratio, self._subset_size) self._statistics_path = self._advanced_parameters.statistics_path - self._enable_flexible_group_size = self._advanced_parameters.enable_flexible_group_size - self._min_flexible_group_size = self._advanced_parameters.min_flexible_group_size + self._group_size_fallback_mode = self._advanced_parameters.group_size_fallback_mode + self._min_adjusted_group_size = self._advanced_parameters.min_adjusted_group_size if self._awq: awq_params = self._advanced_parameters.awq_params @@ -523,71 +531,115 @@ def _set_weight_compression_config( if reduction_channel_size % w_params.compression_config.group_size != 0: failed_nodes.append((w_params.node_with_weight.node_name, reduction_channel_size)) if len(failed_nodes) > 0: - names = ",".join(f'"{name}"' for name, _ in failed_nodes) + names = "\n\t".join(f'"{name}" (channel size: {channel_size})' for name, channel_size in failed_nodes) msg = ( - "Failed to apply group-wise quantization with " - f"group size value {self._group_size} and channel size value {failed_nodes[0][1]}.\n" + f"Failed to apply group-wise quantization with group size value {self._group_size}.\n" "Ensure that the group size is divisible by the channel size, " - "or include this node and others with similar issues in the ignored scope:\n" - f"nncf.compress_weight(\n\t..., \n\tignored_scope=IgnoredScope(names=[{names}]\n\t)\n)" + "or consider setting `group_size_fallback_mode` to IGNORE or ADJUST. Failed nodes:\n\t" + names ) raise nncf.InvalidGroupSizeError(msg) - def _get_flexible_group_size_data( + def _handle_ignore_group_size_fallback( + self, + all_weight_params: list[WeightCompressionParameters], + ratio_defining_params: list[WeightCompressionParameters], + nodes_to_compress: list[NNCFNode], + ) -> tuple[list[WeightCompressionParameters], list[WeightCompressionParameters], list[NNCFNode]]: + """ + Removes nodes that cannot be quantized with the specified group size from the lists of weight parameters. + """ + if self._group_size == -1: + return all_weight_params, ratio_defining_params, nodes_to_compress + + nodes_to_exclude = {} + for w_params in ratio_defining_params: + reduction_channel_size, _ = get_reduction_channel_size(w_params.weight_shape, w_params.reduction_axes) + if reduction_channel_size % self._group_size != 0: + nodes_to_exclude[w_params.node_with_weight.node_name] = w_params.weight_shape + + if nodes_to_exclude: + ratio_defining_params = [ + w_params + for w_params in ratio_defining_params + if w_params.node_with_weight.node_name not in nodes_to_exclude + ] + all_weight_params = [ + w_params + for w_params in all_weight_params + if w_params.node_with_weight.node_name not in nodes_to_exclude + ] + nodes_to_compress = [node for node in nodes_to_compress if node.node_name not in nodes_to_exclude] + + log_lines = [ + f"{node_name} (weight shape: {weight_shape})" for node_name, weight_shape in nodes_to_exclude.items() + ] + log_message = ( + f"Group-wise quantization with group size {self._group_size} can't be applied to some nodes. " + "They will be ignored and kept with original precision.\n" + "Consider setting group_size_fallback_mode to ADJUST, which enables automatic adjustment " + "to smaller group size values." + ) + nncf_logger.warning(f"{log_message} Nodes:\n\t" + "\n\t".join(log_lines)) + + return all_weight_params, ratio_defining_params, nodes_to_compress + + def _handle_adjust_group_size_fallback( self, weight_params: list[WeightCompressionParameters] - ) -> list[tuple[WeightCompressionParameters, int]]: + ) -> tuple[list[WeightCompressionParameters], dict[str, int]]: """ - Compute flexible group size values. - :param weight_params: Weight parameters for which to compute flexible group size. - :return: A list of tuples, where each tuple pair contains a WeightCompressionParameters object and the - group size values associated with it. If group size can't be assigned to some weight parameter - it won't be included in the result. + Calculates adjusted group size for weight parameters that cannot be quantized with the specified group size. + :param weight_params: List of weight parameters to process. + :return: A tuple containing two elements: + - A list of weight parameters that can be quantized with the specified or adjusted group size. + - A dictionary mapping weight names to their group size values. """ - flexible_group_size_not_found_weight_params = [] - group_size_data = [] + if self._group_size == -1: + return weight_params, {w_params.weight_name: self._group_size for w_params in weight_params} + + group_size_values = {} + valid_weight_params = [] + invalid_weight_params = [] + adjusted_weight_params = [] for w_params in weight_params: reduction_channel_size, _ = get_reduction_channel_size(w_params.weight_shape, w_params.reduction_axes) if reduction_channel_size % self._group_size == 0: - # The weight can be compressed with the given group size, nothing else to do - group_size_data.append((w_params, self._group_size)) + valid_weight_params.append(w_params) + group_size_values[w_params.weight_name] = self._group_size continue - # Find the maximal power of two that divides reduction_channel_size - flexible_group_size = reduction_channel_size & (~reduction_channel_size + 1) + # The maximal power of two that divides reduction_channel_size + adjusted_group_size = reduction_channel_size & (~reduction_channel_size + 1) + if adjusted_group_size >= self._min_adjusted_group_size: + valid_weight_params.append(w_params) + group_size_values[w_params.weight_name] = adjusted_group_size + adjusted_weight_params.append((w_params, adjusted_group_size)) + continue - if flexible_group_size < self._min_flexible_group_size: - flexible_group_size_not_found_weight_params.append(w_params) - else: - group_size_data.append((w_params, flexible_group_size)) + invalid_weight_params.append(w_params) - node_strings = [] - for i, (w_params, new_group_size) in enumerate(group_size_data): - if new_group_size == self._group_size: - continue - weight_shape = w_params.weight_shape - reduction_channel_size, _ = get_reduction_channel_size(weight_shape, w_params.reduction_axes) - node_strings.append( - f"{w_params.node_with_weight.node_name} " - f"(weight shape: {weight_shape}, adjusted group size: {new_group_size})" - ) - if len(node_strings) > 0: + if adjusted_weight_params: + # Adjusted group size value for some nodes + log_lines = [ + f"{w.node_with_weight.node_name} (weight shape: {w.weight_shape}, adjusted group size: {adjusted_gs})" + for w, adjusted_gs in adjusted_weight_params + ] nncf_logger.info( - f"Wasn't able to set the specified group size value ({self._group_size}) to some nodes. These nodes " - f"will have an adjusted group size value:\n\t" + "\n\t".join(node_strings) + f"Some nodes can't be quantized with the specified group size of {self._group_size}. " + "Adjusted group size values will be used:\n\t" + "\n\t".join(log_lines) ) - if len(flexible_group_size_not_found_weight_params) > 0: - node_strings = [""] * len(flexible_group_size_not_found_weight_params) - for i, w_params in enumerate(flexible_group_size_not_found_weight_params): - weight_shape = w_params.weight_shape - reduction_channel_size, _ = get_reduction_channel_size(weight_shape, w_params.reduction_axes) - node_strings[i] = f"{w_params.node_with_weight.node_name} (weight shape: {weight_shape})" - nncf_logger.warning( - "Large enough flexible group size value cannot be found for some nodes. They will be compressed " - "according to the backup mode. Nodes:\n\t" + "\n\t".join(node_strings) + if invalid_weight_params: + # Valid adjusted group size wasn't found + log_lines = [ + f"{w.node_with_weight.node_name} (weight shape: {w.weight_shape})" for w in invalid_weight_params + ] + log_message = ( + "A valid adjusted group size value can't be found for some nodes. They will be quantized using the " + f"{self._backup_mode.value} backup mode." ) + nncf_logger.info(f"{log_message} Nodes:\n\t" + "\n\t".join(log_lines)) - return group_size_data + return valid_weight_params, group_size_values @staticmethod def _proportion_str(num_weights_list: list[int], total_num_weights: int, total_num_params: int) -> str: @@ -757,14 +809,14 @@ def apply( weight_names.add(weight_name) ratio_defining_params = self._get_ratio_defining_params(all_weight_params, is_last_layer_shared) - if self._enable_flexible_group_size and self._group_size != -1: - # Compute flexible group size values if enabled - flexible_group_size_data = self._get_flexible_group_size_data(ratio_defining_params) - group_size_values = {w_param.weight_name: group_size for w_param, group_size in flexible_group_size_data} - # Select a subset of ratio_defining_params that can be compressed with some group size - ratio_defining_params = [w_param for w_param, _ in flexible_group_size_data] + if self._group_size_fallback_mode == GroupSizeFallbackMode.IGNORE: + all_weight_params, ratio_defining_params, nodes_to_compress = self._handle_ignore_group_size_fallback( + all_weight_params, ratio_defining_params, nodes_to_compress + ) + if self._group_size_fallback_mode == GroupSizeFallbackMode.ADJUST: + ratio_defining_params, group_size_values = self._handle_adjust_group_size_fallback(ratio_defining_params) else: - group_size_values = {w_param.weight_name: self._group_size for w_param in ratio_defining_params} + group_size_values = {w_params.weight_name: self._group_size for w_params in all_weight_params} self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values) ignored_scope_weight_statistics = self._get_ignored_scope_weight_statistics(model, graph) nncf_logger.info( diff --git a/tests/cross_fw/test_templates/template_test_weights_compression.py b/tests/cross_fw/test_templates/template_test_weights_compression.py index 9ebdaf0f972..b7b25af4631 100644 --- a/tests/cross_fw/test_templates/template_test_weights_compression.py +++ b/tests/cross_fw/test_templates/template_test_weights_compression.py @@ -9,7 +9,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import math -import re from abc import ABC from abc import abstractmethod from typing import TypeVar @@ -372,25 +371,28 @@ def test_awq_scale_reference(self, monkeypatch): @pytest.mark.parametrize("algorithm", (None, "awq", "scale_estimation", "gptq", "lora_correction")) @pytest.mark.parametrize( - ["group_size", "enable_flex", "min_flex_group_size", "expected_outcome"], + ["group_size", "fallback_mode", "min_adjusted_group_size", "expected_outcome"], [ - (32, False, None, "exception"), - (32, True, 16, "warn_backup_mode"), - (32, True, 8, "info_adjusted_group_size"), + (32, nncf.GroupSizeFallbackMode.ERROR, None, "exception"), + (32, nncf.GroupSizeFallbackMode.IGNORE, 16, "warn_ignored"), + (32, nncf.GroupSizeFallbackMode.ADJUST, 16, "info_cant_adjust"), + (32, nncf.GroupSizeFallbackMode.ADJUST, 8, "info_adjusted_group_size"), + (32, None, None, "warn_ignored"), ], ) def test_error_message_for_invalid_group_size( self, algorithm, group_size, - enable_flex, - min_flex_group_size, + fallback_mode, + min_adjusted_group_size, expected_outcome, ): """ Verifies that: - an exception is raised for an invalid group size - - a warning is logged when a flexible group size value cannot be found + - a warning message is logged when a node is ignored due to an invalid group size + - an info message is logged when an adjustable group size value cannot be found - an info message is logged when the group size is adjusted to a valid value """ if algorithm in self.get_not_supported_algorithms(): @@ -409,55 +411,61 @@ def test_error_message_for_invalid_group_size( all_layers=True, **algorithm_dict, dataset=dataset, - advanced_parameters=nncf.AdvancedCompressionParameters( - enable_flexible_group_size=enable_flex, min_flexible_group_size=min_flex_group_size - ), ) + if fallback_mode is not None or min_adjusted_group_size is not None: + kwargs["advanced_parameters"] = nncf.AdvancedCompressionParameters( + group_size_fallback_mode=fallback_mode, + min_adjusted_group_size=min_adjusted_group_size, + ) if expected_outcome == "exception": with pytest.raises(InvalidGroupSizeError) as exc_info: compress_weights(**kwargs) - names = re.findall(r"IgnoredScope\(names=\[(.*?)\]\)", re.sub(r"[\n\t]", "", str(exc_info.value))) - assert len(names) == 1, f"Error message should contain ignored scope to avoid issue: {str(exc_info.value)}" - name_list = [name.strip('"') for name in names[0].split(",")] - - compress_weights(**kwargs, ignored_scope=IgnoredScope(names=name_list)) - elif expected_outcome == "warn_backup_mode": + assert "Failed to apply group-wise quantization with group size value" in str(exc_info.value) + elif expected_outcome == "warn_ignored": with patch.object(nncf_logger, "warning") as mock_warning: compress_weights(**kwargs) warning_messages = [args[0] for args, _ in mock_warning.call_args_list] - warn_msg = "Large enough flexible group size value cannot be found for some nodes." + warn_msg = "They will be ignored and kept with original precision." assert any(warn_msg in msg for msg in warning_messages) - elif expected_outcome == "info_adjusted_group_size": + elif expected_outcome in ["info_adjusted_group_size", "info_cant_adjust"]: with patch.object(nncf_logger, "info") as mock_info: compress_weights(**kwargs) info_messages = [args[0] for args, _ in mock_info.call_args_list] - info_msg = f"Wasn't able to set the specified group size value ({group_size}) to some nodes." + info_msg = ( + "Adjusted group size values will be used:" + if expected_outcome == "info_adjusted_group_size" + else "A valid adjusted group size value can't be found for some nodes." + ) assert any(info_msg in msg for msg in info_messages) @pytest.mark.parametrize( - ["model_channel_sizes", "ratio", "group_size", "min_flex_group_size", "ref_num_group_sizes"], [ - ([8, 8, 16, 16, 16, 32], 1.0, 32, 32, {32: 1}), - ([8, 8, 16, 16, 16, 32], 1.0, 32, 16, {16: 3, 32: 1}), - ([8, 8, 16, 16, 16, 32], 0.5, 32, 16, {16: 2}), + "model_channel_sizes", + "ratio", + "group_size", + "fallback_mode", + "min_adjusted_group_size", + "ref_num_group_sizes", + ], + [ + ([8, 8, 16, 16, 16, 32], 1.0, 32, None, None, {32: 1}), + ([8, 8, 16, 16, 16, 32], 1.0, 32, nncf.GroupSizeFallbackMode.IGNORE, None, {32: 1}), + ([8, 8, 16, 16, 16, 32], 1.0, 32, nncf.GroupSizeFallbackMode.ADJUST, 16, {16: 3, 32: 1}), + ([8, 8, 16, 16, 16, 32], 1.0, 32, nncf.GroupSizeFallbackMode.ADJUST, 32, {32: 1}), + ([8, 8, 16, 16, 16, 32], 0.5, 32, nncf.GroupSizeFallbackMode.ADJUST, 16, {16: 2}), ], ) - def test_flexible_group_size( + def test_group_size_fallback_modes( self, model_channel_sizes, ratio, group_size, - min_flex_group_size, + fallback_mode, + min_adjusted_group_size, ref_num_group_sizes, ): - """ - Verifies that: - - an exception is raised for an invalid group size - - a warning is logged when a flexible group size value cannot be found - - an info message is logged when the group size is adjusted to a valid value - """ model = self.get_different_channel_size_model(model_channel_sizes) input_example = self.to_tensor(np.ones([1, model_channel_sizes[0], model_channel_sizes[0]], dtype=np.float32)) dataset = Dataset([input_example]) @@ -468,10 +476,12 @@ def test_flexible_group_size( all_layers=True, group_size=group_size, dataset=dataset, - advanced_parameters=nncf.AdvancedCompressionParameters( - enable_flexible_group_size=True, min_flexible_group_size=min_flex_group_size - ), ) + if fallback_mode is not None: + kwargs["advanced_parameters"] = nncf.AdvancedCompressionParameters( + group_size_fallback_mode=fallback_mode, + min_adjusted_group_size=min_adjusted_group_size, + ) compress_weights(**kwargs)