|
36 | 36 | from nncf.parameters import CompressWeightsMode |
37 | 37 | from nncf.parameters import SensitivityMetric |
38 | 38 | from nncf.quantization.advanced_parameters import AdvancedCompressionParameters |
| 39 | +from nncf.quantization.advanced_parameters import GroupSizeFallbackMode |
39 | 40 | from nncf.quantization.advanced_parameters import convert_to_dict_recursively |
40 | 41 | from nncf.quantization.algorithms.algorithm import Algorithm |
41 | 42 | from nncf.quantization.algorithms.weight_compression.awq import AWQ |
@@ -234,6 +235,13 @@ def check_user_compression_configuration( |
234 | 235 | msg = "Codebook compression mode requires codebook parameters to be specified in advanced_parameters." |
235 | 236 | raise nncf.ValidationError(msg) |
236 | 237 |
|
| 238 | + if advanced_parameters and not isinstance(advanced_parameters.group_size_fallback_mode, GroupSizeFallbackMode): |
| 239 | + msg = ( |
| 240 | + f"Unsupported group size fallback mode: {advanced_parameters.group_size_fallback_mode.value}. " |
| 241 | + f"Supported modes are: {[e.value for e in GroupSizeFallbackMode]}." |
| 242 | + ) |
| 243 | + raise nncf.ValidationError(msg) |
| 244 | + |
237 | 245 |
|
238 | 246 | class WeightCompression(Algorithm): |
239 | 247 | """ |
@@ -324,8 +332,8 @@ def __init__( |
324 | 332 | self._mixed_precision_algo = criterion_cls(self._ratio, self._subset_size) |
325 | 333 | self._statistics_path = self._advanced_parameters.statistics_path |
326 | 334 |
|
327 | | - self._enable_flexible_group_size = self._advanced_parameters.enable_flexible_group_size |
328 | | - self._min_flexible_group_size = self._advanced_parameters.min_flexible_group_size |
| 335 | + self._group_size_fallback_mode = self._advanced_parameters.group_size_fallback_mode |
| 336 | + self._min_adjusted_group_size = self._advanced_parameters.min_adjusted_group_size |
329 | 337 |
|
330 | 338 | if self._awq: |
331 | 339 | awq_params = self._advanced_parameters.awq_params |
@@ -523,71 +531,115 @@ def _set_weight_compression_config( |
523 | 531 | if reduction_channel_size % w_params.compression_config.group_size != 0: |
524 | 532 | failed_nodes.append((w_params.node_with_weight.node_name, reduction_channel_size)) |
525 | 533 | if len(failed_nodes) > 0: |
526 | | - names = ",".join(f'"{name}"' for name, _ in failed_nodes) |
| 534 | + names = "\n\t".join(f'"{name}" (channel size: {channel_size})' for name, channel_size in failed_nodes) |
527 | 535 | msg = ( |
528 | | - "Failed to apply group-wise quantization with " |
529 | | - f"group size value {self._group_size} and channel size value {failed_nodes[0][1]}.\n" |
| 536 | + f"Failed to apply group-wise quantization with group size value {self._group_size}.\n" |
530 | 537 | "Ensure that the group size is divisible by the channel size, " |
531 | | - "or include this node and others with similar issues in the ignored scope:\n" |
532 | | - f"nncf.compress_weight(\n\t..., \n\tignored_scope=IgnoredScope(names=[{names}]\n\t)\n)" |
| 538 | + "or consider setting `group_size_fallback_mode` to IGNORE or ADJUST. Failed nodes:\n\t" + names |
533 | 539 | ) |
534 | 540 | raise nncf.InvalidGroupSizeError(msg) |
535 | 541 |
|
536 | | - def _get_flexible_group_size_data( |
| 542 | + def _handle_ignore_group_size_fallback( |
| 543 | + self, |
| 544 | + all_weight_params: list[WeightCompressionParameters], |
| 545 | + ratio_defining_params: list[WeightCompressionParameters], |
| 546 | + nodes_to_compress: list[NNCFNode], |
| 547 | + ) -> tuple[list[WeightCompressionParameters], list[WeightCompressionParameters], list[NNCFNode]]: |
| 548 | + """ |
| 549 | + Removes nodes that cannot be quantized with the specified group size from the lists of weight parameters. |
| 550 | + """ |
| 551 | + if self._group_size == -1: |
| 552 | + return all_weight_params, ratio_defining_params, nodes_to_compress |
| 553 | + |
| 554 | + nodes_to_exclude = {} |
| 555 | + for w_params in ratio_defining_params: |
| 556 | + reduction_channel_size, _ = get_reduction_channel_size(w_params.weight_shape, w_params.reduction_axes) |
| 557 | + if reduction_channel_size % self._group_size != 0: |
| 558 | + nodes_to_exclude[w_params.node_with_weight.node_name] = w_params.weight_shape |
| 559 | + |
| 560 | + if nodes_to_exclude: |
| 561 | + ratio_defining_params = [ |
| 562 | + w_params |
| 563 | + for w_params in ratio_defining_params |
| 564 | + if w_params.node_with_weight.node_name not in nodes_to_exclude |
| 565 | + ] |
| 566 | + all_weight_params = [ |
| 567 | + w_params |
| 568 | + for w_params in all_weight_params |
| 569 | + if w_params.node_with_weight.node_name not in nodes_to_exclude |
| 570 | + ] |
| 571 | + nodes_to_compress = [node for node in nodes_to_compress if node.node_name not in nodes_to_exclude] |
| 572 | + |
| 573 | + log_lines = [ |
| 574 | + f"{node_name} (weight shape: {weight_shape})" for node_name, weight_shape in nodes_to_exclude.items() |
| 575 | + ] |
| 576 | + log_message = ( |
| 577 | + f"Group-wise quantization with group size {self._group_size} can't be applied to some nodes. " |
| 578 | + "They will be ignored and kept with original precision.\n" |
| 579 | + "Consider setting group_size_fallback_mode to ADJUST, which enables automatic adjustment " |
| 580 | + "to smaller group size values." |
| 581 | + ) |
| 582 | + nncf_logger.warning(f"{log_message} Nodes:\n\t" + "\n\t".join(log_lines)) |
| 583 | + |
| 584 | + return all_weight_params, ratio_defining_params, nodes_to_compress |
| 585 | + |
| 586 | + def _handle_adjust_group_size_fallback( |
537 | 587 | self, weight_params: list[WeightCompressionParameters] |
538 | | - ) -> list[tuple[WeightCompressionParameters, int]]: |
| 588 | + ) -> tuple[list[WeightCompressionParameters], dict[str, int]]: |
539 | 589 | """ |
540 | | - Compute flexible group size values. |
541 | | - :param weight_params: Weight parameters for which to compute flexible group size. |
542 | | - :return: A list of tuples, where each tuple pair contains a WeightCompressionParameters object and the |
543 | | - group size values associated with it. If group size can't be assigned to some weight parameter |
544 | | - it won't be included in the result. |
| 590 | + Calculates adjusted group size for weight parameters that cannot be quantized with the specified group size. |
| 591 | + :param weight_params: List of weight parameters to process. |
| 592 | + :return: A tuple containing two elements: |
| 593 | + - A list of weight parameters that can be quantized with the specified or adjusted group size. |
| 594 | + - A dictionary mapping weight names to their group size values. |
545 | 595 | """ |
546 | | - flexible_group_size_not_found_weight_params = [] |
547 | | - group_size_data = [] |
| 596 | + if self._group_size == -1: |
| 597 | + return weight_params, {w_params.weight_name: self._group_size for w_params in weight_params} |
| 598 | + |
| 599 | + group_size_values = {} |
| 600 | + valid_weight_params = [] |
| 601 | + invalid_weight_params = [] |
| 602 | + adjusted_weight_params = [] |
548 | 603 | for w_params in weight_params: |
549 | 604 | reduction_channel_size, _ = get_reduction_channel_size(w_params.weight_shape, w_params.reduction_axes) |
550 | 605 | if reduction_channel_size % self._group_size == 0: |
551 | | - # The weight can be compressed with the given group size, nothing else to do |
552 | | - group_size_data.append((w_params, self._group_size)) |
| 606 | + valid_weight_params.append(w_params) |
| 607 | + group_size_values[w_params.weight_name] = self._group_size |
553 | 608 | continue |
554 | 609 |
|
555 | | - # Find the maximal power of two that divides reduction_channel_size |
556 | | - flexible_group_size = reduction_channel_size & (~reduction_channel_size + 1) |
| 610 | + # The maximal power of two that divides reduction_channel_size |
| 611 | + adjusted_group_size = reduction_channel_size & (~reduction_channel_size + 1) |
| 612 | + if adjusted_group_size >= self._min_adjusted_group_size: |
| 613 | + valid_weight_params.append(w_params) |
| 614 | + group_size_values[w_params.weight_name] = adjusted_group_size |
| 615 | + adjusted_weight_params.append((w_params, adjusted_group_size)) |
| 616 | + continue |
557 | 617 |
|
558 | | - if flexible_group_size < self._min_flexible_group_size: |
559 | | - flexible_group_size_not_found_weight_params.append(w_params) |
560 | | - else: |
561 | | - group_size_data.append((w_params, flexible_group_size)) |
| 618 | + invalid_weight_params.append(w_params) |
562 | 619 |
|
563 | | - node_strings = [] |
564 | | - for i, (w_params, new_group_size) in enumerate(group_size_data): |
565 | | - if new_group_size == self._group_size: |
566 | | - continue |
567 | | - weight_shape = w_params.weight_shape |
568 | | - reduction_channel_size, _ = get_reduction_channel_size(weight_shape, w_params.reduction_axes) |
569 | | - node_strings.append( |
570 | | - f"{w_params.node_with_weight.node_name} " |
571 | | - f"(weight shape: {weight_shape}, adjusted group size: {new_group_size})" |
572 | | - ) |
573 | | - if len(node_strings) > 0: |
| 620 | + if adjusted_weight_params: |
| 621 | + # Adjusted group size value for some nodes |
| 622 | + log_lines = [ |
| 623 | + f"{w.node_with_weight.node_name} (weight shape: {w.weight_shape}, adjusted group size: {adjusted_gs})" |
| 624 | + for w, adjusted_gs in adjusted_weight_params |
| 625 | + ] |
574 | 626 | nncf_logger.info( |
575 | | - f"Wasn't able to set the specified group size value ({self._group_size}) to some nodes. These nodes " |
576 | | - f"will have an adjusted group size value:\n\t" + "\n\t".join(node_strings) |
| 627 | + f"Some nodes can't be quantized with the specified group size of {self._group_size}. " |
| 628 | + "Adjusted group size values will be used:\n\t" + "\n\t".join(log_lines) |
577 | 629 | ) |
578 | 630 |
|
579 | | - if len(flexible_group_size_not_found_weight_params) > 0: |
580 | | - node_strings = [""] * len(flexible_group_size_not_found_weight_params) |
581 | | - for i, w_params in enumerate(flexible_group_size_not_found_weight_params): |
582 | | - weight_shape = w_params.weight_shape |
583 | | - reduction_channel_size, _ = get_reduction_channel_size(weight_shape, w_params.reduction_axes) |
584 | | - node_strings[i] = f"{w_params.node_with_weight.node_name} (weight shape: {weight_shape})" |
585 | | - nncf_logger.warning( |
586 | | - "Large enough flexible group size value cannot be found for some nodes. They will be compressed " |
587 | | - "according to the backup mode. Nodes:\n\t" + "\n\t".join(node_strings) |
| 631 | + if invalid_weight_params: |
| 632 | + # Valid adjusted group size wasn't found |
| 633 | + log_lines = [ |
| 634 | + f"{w.node_with_weight.node_name} (weight shape: {w.weight_shape})" for w in invalid_weight_params |
| 635 | + ] |
| 636 | + log_message = ( |
| 637 | + "A valid adjusted group size value can't be found for some nodes. They will be quantized using the " |
| 638 | + f"{self._backup_mode.value} backup mode." |
588 | 639 | ) |
| 640 | + nncf_logger.info(f"{log_message} Nodes:\n\t" + "\n\t".join(log_lines)) |
589 | 641 |
|
590 | | - return group_size_data |
| 642 | + return valid_weight_params, group_size_values |
591 | 643 |
|
592 | 644 | @staticmethod |
593 | 645 | def _proportion_str(num_weights_list: list[int], total_num_weights: int, total_num_params: int) -> str: |
@@ -757,14 +809,14 @@ def apply( |
757 | 809 | weight_names.add(weight_name) |
758 | 810 |
|
759 | 811 | ratio_defining_params = self._get_ratio_defining_params(all_weight_params, is_last_layer_shared) |
760 | | - if self._enable_flexible_group_size and self._group_size != -1: |
761 | | - # Compute flexible group size values if enabled |
762 | | - flexible_group_size_data = self._get_flexible_group_size_data(ratio_defining_params) |
763 | | - group_size_values = {w_param.weight_name: group_size for w_param, group_size in flexible_group_size_data} |
764 | | - # Select a subset of ratio_defining_params that can be compressed with some group size |
765 | | - ratio_defining_params = [w_param for w_param, _ in flexible_group_size_data] |
| 812 | + if self._group_size_fallback_mode == GroupSizeFallbackMode.IGNORE: |
| 813 | + all_weight_params, ratio_defining_params, nodes_to_compress = self._handle_ignore_group_size_fallback( |
| 814 | + all_weight_params, ratio_defining_params, nodes_to_compress |
| 815 | + ) |
| 816 | + if self._group_size_fallback_mode == GroupSizeFallbackMode.ADJUST: |
| 817 | + ratio_defining_params, group_size_values = self._handle_adjust_group_size_fallback(ratio_defining_params) |
766 | 818 | else: |
767 | | - group_size_values = {w_param.weight_name: self._group_size for w_param in ratio_defining_params} |
| 819 | + group_size_values = {w_params.weight_name: self._group_size for w_params in all_weight_params} |
768 | 820 | self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values) |
769 | 821 | ignored_scope_weight_statistics = self._get_ignored_scope_weight_statistics(model, graph) |
770 | 822 | nncf_logger.info( |
|
0 commit comments