Skip to content

Commit de884e0

Browse files
author
Nikita Savelyev
authored
[WC] GroupSizeFallbackMode instead of enable_flexible_group_size (#3583)
### Changes - Replaced boolean `enable_flexible_group_size` with a `group_size_fallback_mode` enum. Possible values are NONE, IGNORE, ADJUST. Meaning: - ERROR: raise exception if can't divide by group size. - IGNORE: node with invalid group size won't be compressed at all. - ADJUST: the same as with `enable_flexible_group_size=True` on develop, i.e. compute new group size if possible, otherwise compress to backup precision. - Renamed `min_flexible_group_size` to `min_adjusted_group_size`. Set `group_size_fallback_mode` to IGNORE by default. Users are informed the following way depending on the selected fallback mode: - ERROR: exception is raised with a suggestion to set `group_size_fallback_mode` to IGNORE or ADJUST. - IGNORE: a info message is logged that some nodes will be ignored. - ADJUST: an info message is logged that some nodes will have an adjusted group size value / compressed to backup mode. ### Reason for changes UX improvement: now the default behavior won't result in an exception. ### Related tickets 167337 ### Tests Adopted the tests introduced in #3556.
1 parent d9fc39a commit de884e0

File tree

4 files changed

+183
-104
lines changed

4 files changed

+183
-104
lines changed

src/nncf/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters as AdvancedQuantizationParameters
5959
from nncf.quantization.advanced_parameters import AdvancedScaleEstimationParameters as AdvancedScaleEstimationParameters
6060
from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters as AdvancedSmoothQuantParameters
61+
from nncf.quantization.advanced_parameters import GroupSizeFallbackMode as GroupSizeFallbackMode
6162
from nncf.quantization.advanced_parameters import OverflowFix as OverflowFix
6263
from nncf.scopes import IgnoredScope as IgnoredScope
6364
from nncf.scopes import Subgraph as Subgraph

src/nncf/quantization/advanced_parameters.py

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,31 @@ class FP8Type(StrEnum):
7676
E5M2 = "f8e5m2"
7777

7878

79+
@api()
80+
class GroupSizeFallbackMode(StrEnum):
81+
"""
82+
Specifies how to handle nodes that do not support the given group size.
83+
84+
:param ERROR: Raise an error if the given group size is not supported by a node.
85+
:param IGNORE: Skip nodes that cannot be compressed with the given group size.
86+
:param ADJUST: Automatically compute a suitable group size for unsupported nodes.
87+
When selected, each weight for which the channel size is not divisible by the general group size value will
88+
be compressed to a newly calculated group size. The new group size value is the maximal power of two
89+
(i.e., 2^k) such that:
90+
- channel size is divisible by it;
91+
- it is less than the originally specified group size value;
92+
- it is greater than or equal to `min_adjusted_group_size`.
93+
94+
If it's not possible to find a value satisfying these requirements, such weight is compressed to the backup
95+
precision. If ratio < 1.0 and some weights have to be compressed to the backup precision because of group size
96+
issues, then these weights won't contribute to the ratio of backup mode group.
97+
"""
98+
99+
ERROR = "error"
100+
IGNORE = "ignore"
101+
ADJUST = "adjust"
102+
103+
79104
@api()
80105
@dataclass
81106
class QuantizationParameters:
@@ -371,20 +396,11 @@ class AdvancedCompressionParameters:
371396
:type statistics_path: str
372397
:param lora_adapter_rank: Rank of lora adapters for FQ_LORA format. Defaults to 256.
373398
:type lora_adapter_rank: int
374-
:param enable_flexible_group_size: Whether to enable flexible group size searching. When enabled, each weight
375-
for which the channel size is not divisible by the general group size value will be compressed to a newly
376-
calculated group size. The new group size value is the maximal power of two (i.e., 2^k) such that:
377-
- channel size is divisible by it;
378-
- it is less than the originally specified group size value;
379-
- it is greater than or equal to `min_flexible_group_size`.
380-
381-
If it's not possible to find a value satisfying these requirements, such weight is compressed to the backup
382-
precision. If ratio < 1.0 and some weights have to be compressed to the backup precision because of group size
383-
issues, then these weights won't contribute to the ratio of backup mode group.
384-
:type enable_flexible_group_size: bool
385-
:param min_flexible_group_size: Minimum group size for flexible group size searching. Defaults to 16. The reason
399+
:param group_size_fallback_mode: Specifies how to handle nodes that do not support the given group size.
400+
:type group_size_fallback_mode: GroupSizeFallbackMode
401+
:param min_adjusted_group_size: Minimum group size for adjustable group size searching. Defaults to 16. The reason
386402
behind this argument is to avoid too small group size values, which may lead to performance issues.
387-
:type min_flexible_group_size: int
403+
:type min_adjusted_group_size: int
388404
:param awq_params: Advanced parameters for AWQ algorithm.
389405
:type awq_params: AdvancedAWQParameters
390406
:param scale_estimation_params: Advanced parameters for Scale Estimation algorithm.
@@ -402,8 +418,8 @@ class AdvancedCompressionParameters:
402418

403419
statistics_path: Optional[str] = None
404420
lora_adapter_rank: int = 256
405-
enable_flexible_group_size: bool = False
406-
min_flexible_group_size: int = 16
421+
group_size_fallback_mode: GroupSizeFallbackMode = GroupSizeFallbackMode.IGNORE
422+
min_adjusted_group_size: int = 16
407423
awq_params: AdvancedAWQParameters = field(default_factory=AdvancedAWQParameters)
408424
scale_estimation_params: AdvancedScaleEstimationParameters = field(
409425
default_factory=AdvancedScaleEstimationParameters

src/nncf/quantization/algorithms/weight_compression/algorithm.py

Lines changed: 106 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from nncf.parameters import CompressWeightsMode
3737
from nncf.parameters import SensitivityMetric
3838
from nncf.quantization.advanced_parameters import AdvancedCompressionParameters
39+
from nncf.quantization.advanced_parameters import GroupSizeFallbackMode
3940
from nncf.quantization.advanced_parameters import convert_to_dict_recursively
4041
from nncf.quantization.algorithms.algorithm import Algorithm
4142
from nncf.quantization.algorithms.weight_compression.awq import AWQ
@@ -234,6 +235,13 @@ def check_user_compression_configuration(
234235
msg = "Codebook compression mode requires codebook parameters to be specified in advanced_parameters."
235236
raise nncf.ValidationError(msg)
236237

238+
if advanced_parameters and not isinstance(advanced_parameters.group_size_fallback_mode, GroupSizeFallbackMode):
239+
msg = (
240+
f"Unsupported group size fallback mode: {advanced_parameters.group_size_fallback_mode.value}. "
241+
f"Supported modes are: {[e.value for e in GroupSizeFallbackMode]}."
242+
)
243+
raise nncf.ValidationError(msg)
244+
237245

238246
class WeightCompression(Algorithm):
239247
"""
@@ -324,8 +332,8 @@ def __init__(
324332
self._mixed_precision_algo = criterion_cls(self._ratio, self._subset_size)
325333
self._statistics_path = self._advanced_parameters.statistics_path
326334

327-
self._enable_flexible_group_size = self._advanced_parameters.enable_flexible_group_size
328-
self._min_flexible_group_size = self._advanced_parameters.min_flexible_group_size
335+
self._group_size_fallback_mode = self._advanced_parameters.group_size_fallback_mode
336+
self._min_adjusted_group_size = self._advanced_parameters.min_adjusted_group_size
329337

330338
if self._awq:
331339
awq_params = self._advanced_parameters.awq_params
@@ -523,71 +531,115 @@ def _set_weight_compression_config(
523531
if reduction_channel_size % w_params.compression_config.group_size != 0:
524532
failed_nodes.append((w_params.node_with_weight.node_name, reduction_channel_size))
525533
if len(failed_nodes) > 0:
526-
names = ",".join(f'"{name}"' for name, _ in failed_nodes)
534+
names = "\n\t".join(f'"{name}" (channel size: {channel_size})' for name, channel_size in failed_nodes)
527535
msg = (
528-
"Failed to apply group-wise quantization with "
529-
f"group size value {self._group_size} and channel size value {failed_nodes[0][1]}.\n"
536+
f"Failed to apply group-wise quantization with group size value {self._group_size}.\n"
530537
"Ensure that the group size is divisible by the channel size, "
531-
"or include this node and others with similar issues in the ignored scope:\n"
532-
f"nncf.compress_weight(\n\t..., \n\tignored_scope=IgnoredScope(names=[{names}]\n\t)\n)"
538+
"or consider setting `group_size_fallback_mode` to IGNORE or ADJUST. Failed nodes:\n\t" + names
533539
)
534540
raise nncf.InvalidGroupSizeError(msg)
535541

536-
def _get_flexible_group_size_data(
542+
def _handle_ignore_group_size_fallback(
543+
self,
544+
all_weight_params: list[WeightCompressionParameters],
545+
ratio_defining_params: list[WeightCompressionParameters],
546+
nodes_to_compress: list[NNCFNode],
547+
) -> tuple[list[WeightCompressionParameters], list[WeightCompressionParameters], list[NNCFNode]]:
548+
"""
549+
Removes nodes that cannot be quantized with the specified group size from the lists of weight parameters.
550+
"""
551+
if self._group_size == -1:
552+
return all_weight_params, ratio_defining_params, nodes_to_compress
553+
554+
nodes_to_exclude = {}
555+
for w_params in ratio_defining_params:
556+
reduction_channel_size, _ = get_reduction_channel_size(w_params.weight_shape, w_params.reduction_axes)
557+
if reduction_channel_size % self._group_size != 0:
558+
nodes_to_exclude[w_params.node_with_weight.node_name] = w_params.weight_shape
559+
560+
if nodes_to_exclude:
561+
ratio_defining_params = [
562+
w_params
563+
for w_params in ratio_defining_params
564+
if w_params.node_with_weight.node_name not in nodes_to_exclude
565+
]
566+
all_weight_params = [
567+
w_params
568+
for w_params in all_weight_params
569+
if w_params.node_with_weight.node_name not in nodes_to_exclude
570+
]
571+
nodes_to_compress = [node for node in nodes_to_compress if node.node_name not in nodes_to_exclude]
572+
573+
log_lines = [
574+
f"{node_name} (weight shape: {weight_shape})" for node_name, weight_shape in nodes_to_exclude.items()
575+
]
576+
log_message = (
577+
f"Group-wise quantization with group size {self._group_size} can't be applied to some nodes. "
578+
"They will be ignored and kept with original precision.\n"
579+
"Consider setting group_size_fallback_mode to ADJUST, which enables automatic adjustment "
580+
"to smaller group size values."
581+
)
582+
nncf_logger.warning(f"{log_message} Nodes:\n\t" + "\n\t".join(log_lines))
583+
584+
return all_weight_params, ratio_defining_params, nodes_to_compress
585+
586+
def _handle_adjust_group_size_fallback(
537587
self, weight_params: list[WeightCompressionParameters]
538-
) -> list[tuple[WeightCompressionParameters, int]]:
588+
) -> tuple[list[WeightCompressionParameters], dict[str, int]]:
539589
"""
540-
Compute flexible group size values.
541-
:param weight_params: Weight parameters for which to compute flexible group size.
542-
:return: A list of tuples, where each tuple pair contains a WeightCompressionParameters object and the
543-
group size values associated with it. If group size can't be assigned to some weight parameter
544-
it won't be included in the result.
590+
Calculates adjusted group size for weight parameters that cannot be quantized with the specified group size.
591+
:param weight_params: List of weight parameters to process.
592+
:return: A tuple containing two elements:
593+
- A list of weight parameters that can be quantized with the specified or adjusted group size.
594+
- A dictionary mapping weight names to their group size values.
545595
"""
546-
flexible_group_size_not_found_weight_params = []
547-
group_size_data = []
596+
if self._group_size == -1:
597+
return weight_params, {w_params.weight_name: self._group_size for w_params in weight_params}
598+
599+
group_size_values = {}
600+
valid_weight_params = []
601+
invalid_weight_params = []
602+
adjusted_weight_params = []
548603
for w_params in weight_params:
549604
reduction_channel_size, _ = get_reduction_channel_size(w_params.weight_shape, w_params.reduction_axes)
550605
if reduction_channel_size % self._group_size == 0:
551-
# The weight can be compressed with the given group size, nothing else to do
552-
group_size_data.append((w_params, self._group_size))
606+
valid_weight_params.append(w_params)
607+
group_size_values[w_params.weight_name] = self._group_size
553608
continue
554609

555-
# Find the maximal power of two that divides reduction_channel_size
556-
flexible_group_size = reduction_channel_size & (~reduction_channel_size + 1)
610+
# The maximal power of two that divides reduction_channel_size
611+
adjusted_group_size = reduction_channel_size & (~reduction_channel_size + 1)
612+
if adjusted_group_size >= self._min_adjusted_group_size:
613+
valid_weight_params.append(w_params)
614+
group_size_values[w_params.weight_name] = adjusted_group_size
615+
adjusted_weight_params.append((w_params, adjusted_group_size))
616+
continue
557617

558-
if flexible_group_size < self._min_flexible_group_size:
559-
flexible_group_size_not_found_weight_params.append(w_params)
560-
else:
561-
group_size_data.append((w_params, flexible_group_size))
618+
invalid_weight_params.append(w_params)
562619

563-
node_strings = []
564-
for i, (w_params, new_group_size) in enumerate(group_size_data):
565-
if new_group_size == self._group_size:
566-
continue
567-
weight_shape = w_params.weight_shape
568-
reduction_channel_size, _ = get_reduction_channel_size(weight_shape, w_params.reduction_axes)
569-
node_strings.append(
570-
f"{w_params.node_with_weight.node_name} "
571-
f"(weight shape: {weight_shape}, adjusted group size: {new_group_size})"
572-
)
573-
if len(node_strings) > 0:
620+
if adjusted_weight_params:
621+
# Adjusted group size value for some nodes
622+
log_lines = [
623+
f"{w.node_with_weight.node_name} (weight shape: {w.weight_shape}, adjusted group size: {adjusted_gs})"
624+
for w, adjusted_gs in adjusted_weight_params
625+
]
574626
nncf_logger.info(
575-
f"Wasn't able to set the specified group size value ({self._group_size}) to some nodes. These nodes "
576-
f"will have an adjusted group size value:\n\t" + "\n\t".join(node_strings)
627+
f"Some nodes can't be quantized with the specified group size of {self._group_size}. "
628+
"Adjusted group size values will be used:\n\t" + "\n\t".join(log_lines)
577629
)
578630

579-
if len(flexible_group_size_not_found_weight_params) > 0:
580-
node_strings = [""] * len(flexible_group_size_not_found_weight_params)
581-
for i, w_params in enumerate(flexible_group_size_not_found_weight_params):
582-
weight_shape = w_params.weight_shape
583-
reduction_channel_size, _ = get_reduction_channel_size(weight_shape, w_params.reduction_axes)
584-
node_strings[i] = f"{w_params.node_with_weight.node_name} (weight shape: {weight_shape})"
585-
nncf_logger.warning(
586-
"Large enough flexible group size value cannot be found for some nodes. They will be compressed "
587-
"according to the backup mode. Nodes:\n\t" + "\n\t".join(node_strings)
631+
if invalid_weight_params:
632+
# Valid adjusted group size wasn't found
633+
log_lines = [
634+
f"{w.node_with_weight.node_name} (weight shape: {w.weight_shape})" for w in invalid_weight_params
635+
]
636+
log_message = (
637+
"A valid adjusted group size value can't be found for some nodes. They will be quantized using the "
638+
f"{self._backup_mode.value} backup mode."
588639
)
640+
nncf_logger.info(f"{log_message} Nodes:\n\t" + "\n\t".join(log_lines))
589641

590-
return group_size_data
642+
return valid_weight_params, group_size_values
591643

592644
@staticmethod
593645
def _proportion_str(num_weights_list: list[int], total_num_weights: int, total_num_params: int) -> str:
@@ -757,14 +809,14 @@ def apply(
757809
weight_names.add(weight_name)
758810

759811
ratio_defining_params = self._get_ratio_defining_params(all_weight_params, is_last_layer_shared)
760-
if self._enable_flexible_group_size and self._group_size != -1:
761-
# Compute flexible group size values if enabled
762-
flexible_group_size_data = self._get_flexible_group_size_data(ratio_defining_params)
763-
group_size_values = {w_param.weight_name: group_size for w_param, group_size in flexible_group_size_data}
764-
# Select a subset of ratio_defining_params that can be compressed with some group size
765-
ratio_defining_params = [w_param for w_param, _ in flexible_group_size_data]
812+
if self._group_size_fallback_mode == GroupSizeFallbackMode.IGNORE:
813+
all_weight_params, ratio_defining_params, nodes_to_compress = self._handle_ignore_group_size_fallback(
814+
all_weight_params, ratio_defining_params, nodes_to_compress
815+
)
816+
if self._group_size_fallback_mode == GroupSizeFallbackMode.ADJUST:
817+
ratio_defining_params, group_size_values = self._handle_adjust_group_size_fallback(ratio_defining_params)
766818
else:
767-
group_size_values = {w_param.weight_name: self._group_size for w_param in ratio_defining_params}
819+
group_size_values = {w_params.weight_name: self._group_size for w_params in all_weight_params}
768820
self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values)
769821
ignored_scope_weight_statistics = self._get_ignored_scope_weight_statistics(model, graph)
770822
nncf_logger.info(

0 commit comments

Comments
 (0)