Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/nncf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters as AdvancedQuantizationParameters
from nncf.quantization.advanced_parameters import AdvancedScaleEstimationParameters as AdvancedScaleEstimationParameters
from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters as AdvancedSmoothQuantParameters
from nncf.quantization.advanced_parameters import GroupSizeFallbackMode as GroupSizeFallbackMode
from nncf.quantization.advanced_parameters import OverflowFix as OverflowFix
from nncf.scopes import IgnoredScope as IgnoredScope
from nncf.scopes import Subgraph as Subgraph
Expand Down
46 changes: 31 additions & 15 deletions src/nncf/quantization/advanced_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,31 @@ class FP8Type(StrEnum):
E5M2 = "f8e5m2"


@api()
class GroupSizeFallbackMode(StrEnum):
"""
Specifies how to handle nodes that do not support the given group size.

:param ERROR: Raise an error if the given group size is not supported by a node.
:param IGNORE: Skip nodes that cannot be compressed with the given group size.
:param ADJUST: Automatically compute a suitable group size for unsupported nodes.
When selected, each weight for which the channel size is not divisible by the general group size value will
be compressed to a newly calculated group size. The new group size value is the maximal power of two
(i.e., 2^k) such that:
- channel size is divisible by it;
- it is less than the originally specified group size value;
- it is greater than or equal to `min_adjusted_group_size`.

If it's not possible to find a value satisfying these requirements, such weight is compressed to the backup
precision. If ratio < 1.0 and some weights have to be compressed to the backup precision because of group size
issues, then these weights won't contribute to the ratio of backup mode group.
"""

ERROR = "error"
IGNORE = "ignore"
ADJUST = "adjust"


@api()
@dataclass
class QuantizationParameters:
Expand Down Expand Up @@ -371,20 +396,11 @@ class AdvancedCompressionParameters:
:type statistics_path: str
:param lora_adapter_rank: Rank of lora adapters for FQ_LORA format. Defaults to 256.
:type lora_adapter_rank: int
:param enable_flexible_group_size: Whether to enable flexible group size searching. When enabled, each weight
for which the channel size is not divisible by the general group size value will be compressed to a newly
calculated group size. The new group size value is the maximal power of two (i.e., 2^k) such that:
- channel size is divisible by it;
- it is less than the originally specified group size value;
- it is greater than or equal to `min_flexible_group_size`.

If it's not possible to find a value satisfying these requirements, such weight is compressed to the backup
precision. If ratio < 1.0 and some weights have to be compressed to the backup precision because of group size
issues, then these weights won't contribute to the ratio of backup mode group.
:type enable_flexible_group_size: bool
:param min_flexible_group_size: Minimum group size for flexible group size searching. Defaults to 16. The reason
:param group_size_fallback_mode: Specifies how to handle nodes that do not support the given group size.
:type group_size_fallback_mode: GroupSizeFallbackMode
:param min_adjusted_group_size: Minimum group size for adjustable group size searching. Defaults to 16. The reason
behind this argument is to avoid too small group size values, which may lead to performance issues.
:type min_flexible_group_size: int
:type min_adjusted_group_size: int
:param awq_params: Advanced parameters for AWQ algorithm.
:type awq_params: AdvancedAWQParameters
:param scale_estimation_params: Advanced parameters for Scale Estimation algorithm.
Expand All @@ -402,8 +418,8 @@ class AdvancedCompressionParameters:

statistics_path: Optional[str] = None
lora_adapter_rank: int = 256
enable_flexible_group_size: bool = False
min_flexible_group_size: int = 16
group_size_fallback_mode: GroupSizeFallbackMode = GroupSizeFallbackMode.IGNORE
min_adjusted_group_size: int = 16
awq_params: AdvancedAWQParameters = field(default_factory=AdvancedAWQParameters)
scale_estimation_params: AdvancedScaleEstimationParameters = field(
default_factory=AdvancedScaleEstimationParameters
Expand Down
160 changes: 106 additions & 54 deletions src/nncf/quantization/algorithms/weight_compression/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from nncf.parameters import CompressWeightsMode
from nncf.parameters import SensitivityMetric
from nncf.quantization.advanced_parameters import AdvancedCompressionParameters
from nncf.quantization.advanced_parameters import GroupSizeFallbackMode
from nncf.quantization.advanced_parameters import convert_to_dict_recursively
from nncf.quantization.algorithms.algorithm import Algorithm
from nncf.quantization.algorithms.weight_compression.awq import AWQ
Expand Down Expand Up @@ -234,6 +235,13 @@ def check_user_compression_configuration(
msg = "Codebook compression mode requires codebook parameters to be specified in advanced_parameters."
raise nncf.ValidationError(msg)

if advanced_parameters and not isinstance(advanced_parameters.group_size_fallback_mode, GroupSizeFallbackMode):
msg = (
f"Unsupported group size fallback mode: {advanced_parameters.group_size_fallback_mode.value}. "
f"Supported modes are: {[e.value for e in GroupSizeFallbackMode]}."
)
raise nncf.ValidationError(msg)


class WeightCompression(Algorithm):
"""
Expand Down Expand Up @@ -324,8 +332,8 @@ def __init__(
self._mixed_precision_algo = criterion_cls(self._ratio, self._subset_size)
self._statistics_path = self._advanced_parameters.statistics_path

self._enable_flexible_group_size = self._advanced_parameters.enable_flexible_group_size
self._min_flexible_group_size = self._advanced_parameters.min_flexible_group_size
self._group_size_fallback_mode = self._advanced_parameters.group_size_fallback_mode
self._min_adjusted_group_size = self._advanced_parameters.min_adjusted_group_size

if self._awq:
awq_params = self._advanced_parameters.awq_params
Expand Down Expand Up @@ -523,71 +531,115 @@ def _set_weight_compression_config(
if reduction_channel_size % w_params.compression_config.group_size != 0:
failed_nodes.append((w_params.node_with_weight.node_name, reduction_channel_size))
if len(failed_nodes) > 0:
names = ",".join(f'"{name}"' for name, _ in failed_nodes)
names = "\n\t".join(f'"{name}" (channel size: {channel_size})' for name, channel_size in failed_nodes)
msg = (
"Failed to apply group-wise quantization with "
f"group size value {self._group_size} and channel size value {failed_nodes[0][1]}.\n"
f"Failed to apply group-wise quantization with group size value {self._group_size}.\n"
"Ensure that the group size is divisible by the channel size, "
"or include this node and others with similar issues in the ignored scope:\n"
f"nncf.compress_weight(\n\t..., \n\tignored_scope=IgnoredScope(names=[{names}]\n\t)\n)"
"or consider setting `group_size_fallback_mode` to IGNORE or ADJUST. Failed nodes:\n\t" + names
)
raise nncf.InvalidGroupSizeError(msg)

def _get_flexible_group_size_data(
def _handle_ignore_group_size_fallback(
self,
all_weight_params: list[WeightCompressionParameters],
ratio_defining_params: list[WeightCompressionParameters],
nodes_to_compress: list[NNCFNode],
) -> tuple[list[WeightCompressionParameters], list[WeightCompressionParameters], list[NNCFNode]]:
"""
Removes nodes that cannot be quantized with the specified group size from the lists of weight parameters.
"""
if self._group_size == -1:
return all_weight_params, ratio_defining_params, nodes_to_compress

nodes_to_exclude = {}
for w_params in ratio_defining_params:
reduction_channel_size, _ = get_reduction_channel_size(w_params.weight_shape, w_params.reduction_axes)
if reduction_channel_size % self._group_size != 0:
nodes_to_exclude[w_params.node_with_weight.node_name] = w_params.weight_shape

if nodes_to_exclude:
ratio_defining_params = [
w_params
for w_params in ratio_defining_params
if w_params.node_with_weight.node_name not in nodes_to_exclude
]
all_weight_params = [
w_params
for w_params in all_weight_params
if w_params.node_with_weight.node_name not in nodes_to_exclude
]
nodes_to_compress = [node for node in nodes_to_compress if node.node_name not in nodes_to_exclude]

log_lines = [
f"{node_name} (weight shape: {weight_shape})" for node_name, weight_shape in nodes_to_exclude.items()
]
log_message = (
f"Group-wise quantization with group size {self._group_size} can't be applied to some nodes. "
"They will be ignored and kept with original precision.\n"
"Consider setting group_size_fallback_mode to ADJUST, which enables automatic adjustment "
"to smaller group size values."
)
nncf_logger.warning(f"{log_message} Nodes:\n\t" + "\n\t".join(log_lines))

return all_weight_params, ratio_defining_params, nodes_to_compress

def _handle_adjust_group_size_fallback(
self, weight_params: list[WeightCompressionParameters]
) -> list[tuple[WeightCompressionParameters, int]]:
) -> tuple[list[WeightCompressionParameters], dict[str, int]]:
"""
Compute flexible group size values.
:param weight_params: Weight parameters for which to compute flexible group size.
:return: A list of tuples, where each tuple pair contains a WeightCompressionParameters object and the
group size values associated with it. If group size can't be assigned to some weight parameter
it won't be included in the result.
Calculates adjusted group size for weight parameters that cannot be quantized with the specified group size.
:param weight_params: List of weight parameters to process.
:return: A tuple containing two elements:
- A list of weight parameters that can be quantized with the specified or adjusted group size.
- A dictionary mapping weight names to their group size values.
"""
flexible_group_size_not_found_weight_params = []
group_size_data = []
if self._group_size == -1:
return weight_params, {w_params.weight_name: self._group_size for w_params in weight_params}

group_size_values = {}
valid_weight_params = []
invalid_weight_params = []
adjusted_weight_params = []
for w_params in weight_params:
reduction_channel_size, _ = get_reduction_channel_size(w_params.weight_shape, w_params.reduction_axes)
if reduction_channel_size % self._group_size == 0:
# The weight can be compressed with the given group size, nothing else to do
group_size_data.append((w_params, self._group_size))
valid_weight_params.append(w_params)
group_size_values[w_params.weight_name] = self._group_size
continue

# Find the maximal power of two that divides reduction_channel_size
flexible_group_size = reduction_channel_size & (~reduction_channel_size + 1)
# The maximal power of two that divides reduction_channel_size
adjusted_group_size = reduction_channel_size & (~reduction_channel_size + 1)
if adjusted_group_size >= self._min_adjusted_group_size:
valid_weight_params.append(w_params)
group_size_values[w_params.weight_name] = adjusted_group_size
adjusted_weight_params.append((w_params, adjusted_group_size))
continue

if flexible_group_size < self._min_flexible_group_size:
flexible_group_size_not_found_weight_params.append(w_params)
else:
group_size_data.append((w_params, flexible_group_size))
invalid_weight_params.append(w_params)

node_strings = []
for i, (w_params, new_group_size) in enumerate(group_size_data):
if new_group_size == self._group_size:
continue
weight_shape = w_params.weight_shape
reduction_channel_size, _ = get_reduction_channel_size(weight_shape, w_params.reduction_axes)
node_strings.append(
f"{w_params.node_with_weight.node_name} "
f"(weight shape: {weight_shape}, adjusted group size: {new_group_size})"
)
if len(node_strings) > 0:
if adjusted_weight_params:
# Adjusted group size value for some nodes
log_lines = [
f"{w.node_with_weight.node_name} (weight shape: {w.weight_shape}, adjusted group size: {adjusted_gs})"
for w, adjusted_gs in adjusted_weight_params
]
nncf_logger.info(
f"Wasn't able to set the specified group size value ({self._group_size}) to some nodes. These nodes "
f"will have an adjusted group size value:\n\t" + "\n\t".join(node_strings)
f"Some nodes can't be quantized with the specified group size of {self._group_size}. "
"Adjusted group size values will be used:\n\t" + "\n\t".join(log_lines)
)

if len(flexible_group_size_not_found_weight_params) > 0:
node_strings = [""] * len(flexible_group_size_not_found_weight_params)
for i, w_params in enumerate(flexible_group_size_not_found_weight_params):
weight_shape = w_params.weight_shape
reduction_channel_size, _ = get_reduction_channel_size(weight_shape, w_params.reduction_axes)
node_strings[i] = f"{w_params.node_with_weight.node_name} (weight shape: {weight_shape})"
nncf_logger.warning(
"Large enough flexible group size value cannot be found for some nodes. They will be compressed "
"according to the backup mode. Nodes:\n\t" + "\n\t".join(node_strings)
if invalid_weight_params:
# Valid adjusted group size wasn't found
log_lines = [
f"{w.node_with_weight.node_name} (weight shape: {w.weight_shape})" for w in invalid_weight_params
]
log_message = (
"A valid adjusted group size value can't be found for some nodes. They will be quantized using the "
f"{self._backup_mode.value} backup mode."
)
nncf_logger.info(f"{log_message} Nodes:\n\t" + "\n\t".join(log_lines))

return group_size_data
return valid_weight_params, group_size_values

@staticmethod
def _proportion_str(num_weights_list: list[int], total_num_weights: int, total_num_params: int) -> str:
Expand Down Expand Up @@ -757,14 +809,14 @@ def apply(
weight_names.add(weight_name)

ratio_defining_params = self._get_ratio_defining_params(all_weight_params, is_last_layer_shared)
if self._enable_flexible_group_size and self._group_size != -1:
# Compute flexible group size values if enabled
flexible_group_size_data = self._get_flexible_group_size_data(ratio_defining_params)
group_size_values = {w_param.weight_name: group_size for w_param, group_size in flexible_group_size_data}
# Select a subset of ratio_defining_params that can be compressed with some group size
ratio_defining_params = [w_param for w_param, _ in flexible_group_size_data]
if self._group_size_fallback_mode == GroupSizeFallbackMode.IGNORE:
all_weight_params, ratio_defining_params, nodes_to_compress = self._handle_ignore_group_size_fallback(
all_weight_params, ratio_defining_params, nodes_to_compress
)
if self._group_size_fallback_mode == GroupSizeFallbackMode.ADJUST:
ratio_defining_params, group_size_values = self._handle_adjust_group_size_fallback(ratio_defining_params)
else:
group_size_values = {w_param.weight_name: self._group_size for w_param in ratio_defining_params}
group_size_values = {w_params.weight_name: self._group_size for w_params in all_weight_params}
self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values)
ignored_scope_weight_statistics = self._get_ignored_scope_weight_statistics(model, graph)
nncf_logger.info(
Expand Down
Loading