From 458181d2459eec336a20d0c57b22b1105f514b61 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 25 Aug 2025 17:10:48 +0400 Subject: [PATCH 01/15] init --- .../weight_compression/algorithm.py | 65 ++++++++++++++----- 1 file changed, 47 insertions(+), 18 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 1fb2f343f72..39f794e8f69 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -769,30 +769,42 @@ def is_weight_compression_supported( return is_supported_dtype and not no_bit_reduction - def apply( + def get_processed_weight_compression_parameters( self, model: TModel, graph: NNCFGraph, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, - ) -> TModel: - self.set_backend_entity(model) - + ) -> tuple[list[WeightCompressionParameters], list[WeightCompressionParameters], dict[str, Any]]: + """ + Collects and processes weight compression parameters for all nodes in the model, + applies all processing steps including group size fallback handling and compression + configuration setting. + + :param model: Backend-specific input model. + :param graph: NNCFGraph instance. + :param statistic_points: Optional pre-collected statistic points. + :param dataset: Optional dataset for statistics collection. + :return: A tuple containing Final processed list of weight parameters ready for compression, + List of weight parameters that were skipped + Statistics collected for data-aware compression. None if data-free compression. + """ nodes_to_compress = self.get_nodes_to_compress(graph) - - all_weight_params: list[WeightCompressionParameters] = [] + + initial_all_weight_params: list[WeightCompressionParameters] = [] skipped_weight_params: list[WeightCompressionParameters] = [] - + weight_names = set() is_last_layer_skipped = False n = len(nodes_to_compress) ignored_names = self.get_ignored_node_names(graph) + for i, node in enumerate(nodes_to_compress): is_target_node = should_consider_scope(node.node_name, ignored_names) for weight_name, weight_port_id in self._backend_entity.get_weight_names_and_port_ids(node, graph): is_last_layer = i == n - 1 if weight_name in weight_names: - # If the last layer has shared weights then skiped + # If the last layer has shared weights then skip # to avoid processing the same weight more than once is_last_layer_skipped = is_last_layer continue @@ -828,10 +840,11 @@ def apply( ) if self.is_weight_compression_supported(weight_dtype, mode): wc_config = WeightCompressionConfig(mode=mode) + weight_params = WeightCompressionParameters( weight_name, node, weight_port_id, weight_dtype, weight_shape, reduction_axes, wc_config ) - all_weight_params.append(weight_params) + initial_all_weight_params.append(weight_params) weight_names.add(weight_name) else: is_last_layer_skipped = is_last_layer @@ -841,10 +854,9 @@ def apply( ) ) - # Get subset of nodes to define compression ratio - ratio_defining_params = self._get_ratio_defining_params(all_weight_params, is_last_layer_skipped) + ratio_defining_params = self._get_ratio_defining_params(initial_all_weight_params, is_last_layer_skipped) - # Handle group size fallback modes + all_weight_params = initial_all_weight_params if self._group_size_fallback_mode == GroupSizeFallbackMode.IGNORE: all_weight_params, ratio_defining_params, skipped_weight_params = self._handle_ignore_group_size_fallback( all_weight_params, ratio_defining_params, skipped_weight_params @@ -854,7 +866,7 @@ def apply( else: group_size_values = {w_params.weight_name: self._group_size for w_params in ratio_defining_params} - # Collect statistics for the weights compression + # Step 4: Collect statistics for data-aware compression statistics = None if (self._data_aware_mixed_precision or self._data_aware_compression) and dataset: weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params @@ -873,19 +885,36 @@ def apply( matmul_input_to_output_nodes_map, statistic_points ) - # Set weight compression configuration + # Step 5: Set weight compression configuration self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values) - # Print statistics + # Step 6: Print statistics nncf_logger.info( self._get_bitwidth_distribution_str(all_weight_params, ratio_defining_params, skipped_weight_params) ) - # Filter all_weight_params and by excluding nodes that should remain in their original floating-point precision - all_weight_params = list(filter(lambda w_params: w_params.compression_config is not None, all_weight_params)) + # Step 7: Filter out nodes that should remain in their original floating-point precision + final_all_weight_params = list(filter(lambda w_params: w_params.compression_config is not None, all_weight_params)) + + return final_all_weight_params, skipped_weight_params, statistics + + + def apply( + self, + model: TModel, + graph: NNCFGraph, + statistic_points: Optional[StatisticPointsContainer] = None, + dataset: Optional[Dataset] = None, + ) -> TModel: + self.set_backend_entity(model) + + # Get processed weight compression parameters ready for compression + all_weight_params, skipped_weight_params, statistics = self.get_processed_weight_compression_parameters( + model, graph, statistic_points, dataset + ) if self._awq: - model = self.awq_algo.apply(model, graph, all_weight_params, statistics, self._backend_entity) + self.awq_algo.apply(model, graph, all_weight_params, statistics, self._backend_entity) # After applying AWQ we need to update statistics since AWQ alters the activations statistics = self.awq_algo.update_statistics(statistics) # del is used to prematurely mark non-necessary data as free for garbage collection From db74923ad60403fcd6d9aa4ed1ffb8cf2b19c807 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 25 Aug 2025 17:11:11 +0400 Subject: [PATCH 02/15] comments fix --- .../quantization/algorithms/weight_compression/algorithm.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 39f794e8f69..41bf747f79b 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -866,7 +866,6 @@ def get_processed_weight_compression_parameters( else: group_size_values = {w_params.weight_name: self._group_size for w_params in ratio_defining_params} - # Step 4: Collect statistics for data-aware compression statistics = None if (self._data_aware_mixed_precision or self._data_aware_compression) and dataset: weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params @@ -885,15 +884,12 @@ def get_processed_weight_compression_parameters( matmul_input_to_output_nodes_map, statistic_points ) - # Step 5: Set weight compression configuration self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values) - # Step 6: Print statistics nncf_logger.info( self._get_bitwidth_distribution_str(all_weight_params, ratio_defining_params, skipped_weight_params) ) - # Step 7: Filter out nodes that should remain in their original floating-point precision final_all_weight_params = list(filter(lambda w_params: w_params.compression_config is not None, all_weight_params)) return final_all_weight_params, skipped_weight_params, statistics From 0a752357903e404f23d6593f80c66da0debe8035 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 25 Aug 2025 17:42:32 +0400 Subject: [PATCH 03/15] pre commit fix --- .../weight_compression/algorithm.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 41bf747f79b..16cfda4eda4 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -780,7 +780,7 @@ def get_processed_weight_compression_parameters( Collects and processes weight compression parameters for all nodes in the model, applies all processing steps including group size fallback handling and compression configuration setting. - + :param model: Backend-specific input model. :param graph: NNCFGraph instance. :param statistic_points: Optional pre-collected statistic points. @@ -790,15 +790,15 @@ def get_processed_weight_compression_parameters( Statistics collected for data-aware compression. None if data-free compression. """ nodes_to_compress = self.get_nodes_to_compress(graph) - + initial_all_weight_params: list[WeightCompressionParameters] = [] skipped_weight_params: list[WeightCompressionParameters] = [] - + weight_names = set() is_last_layer_skipped = False n = len(nodes_to_compress) ignored_names = self.get_ignored_node_names(graph) - + for i, node in enumerate(nodes_to_compress): is_target_node = should_consider_scope(node.node_name, ignored_names) for weight_name, weight_port_id in self._backend_entity.get_weight_names_and_port_ids(node, graph): @@ -840,7 +840,7 @@ def get_processed_weight_compression_parameters( ) if self.is_weight_compression_supported(weight_dtype, mode): wc_config = WeightCompressionConfig(mode=mode) - + weight_params = WeightCompressionParameters( weight_name, node, weight_port_id, weight_dtype, weight_shape, reduction_axes, wc_config ) @@ -890,10 +890,11 @@ def get_processed_weight_compression_parameters( self._get_bitwidth_distribution_str(all_weight_params, ratio_defining_params, skipped_weight_params) ) - final_all_weight_params = list(filter(lambda w_params: w_params.compression_config is not None, all_weight_params)) - - return final_all_weight_params, skipped_weight_params, statistics + final_all_weight_params = list( + filter(lambda w_params: w_params.compression_config is not None, all_weight_params) + ) + return final_all_weight_params, statistics def apply( self, @@ -905,7 +906,7 @@ def apply( self.set_backend_entity(model) # Get processed weight compression parameters ready for compression - all_weight_params, skipped_weight_params, statistics = self.get_processed_weight_compression_parameters( + all_weight_params, statistics = self.get_processed_weight_compression_parameters( model, graph, statistic_points, dataset ) From 34c9aca6535222ba973dfd39abb240ad4e9b1f7d Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 25 Aug 2025 17:57:29 +0400 Subject: [PATCH 04/15] clean up code and comments --- .../algorithms/weight_compression/algorithm.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 16cfda4eda4..f8c61670cc1 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -791,7 +791,7 @@ def get_processed_weight_compression_parameters( """ nodes_to_compress = self.get_nodes_to_compress(graph) - initial_all_weight_params: list[WeightCompressionParameters] = [] + all_weight_params: list[WeightCompressionParameters] = [] skipped_weight_params: list[WeightCompressionParameters] = [] weight_names = set() @@ -804,7 +804,7 @@ def get_processed_weight_compression_parameters( for weight_name, weight_port_id in self._backend_entity.get_weight_names_and_port_ids(node, graph): is_last_layer = i == n - 1 if weight_name in weight_names: - # If the last layer has shared weights then skip + # If the last layer has shared weights then skip it # to avoid processing the same weight more than once is_last_layer_skipped = is_last_layer continue @@ -844,7 +844,7 @@ def get_processed_weight_compression_parameters( weight_params = WeightCompressionParameters( weight_name, node, weight_port_id, weight_dtype, weight_shape, reduction_axes, wc_config ) - initial_all_weight_params.append(weight_params) + all_weight_params.append(weight_params) weight_names.add(weight_name) else: is_last_layer_skipped = is_last_layer @@ -854,9 +854,9 @@ def get_processed_weight_compression_parameters( ) ) - ratio_defining_params = self._get_ratio_defining_params(initial_all_weight_params, is_last_layer_skipped) + ratio_defining_params = self._get_ratio_defining_params(all_weight_params, is_last_layer_skipped) - all_weight_params = initial_all_weight_params + # Handle group size fallback modes if self._group_size_fallback_mode == GroupSizeFallbackMode.IGNORE: all_weight_params, ratio_defining_params, skipped_weight_params = self._handle_ignore_group_size_fallback( all_weight_params, ratio_defining_params, skipped_weight_params @@ -911,7 +911,7 @@ def apply( ) if self._awq: - self.awq_algo.apply(model, graph, all_weight_params, statistics, self._backend_entity) + model = self.awq_algo.apply(model, graph, all_weight_params, statistics, self._backend_entity) # After applying AWQ we need to update statistics since AWQ alters the activations statistics = self.awq_algo.update_statistics(statistics) # del is used to prematurely mark non-necessary data as free for garbage collection From 08bb8feb386f1fa90bb3e6eb403f49d159a93e76 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 27 Aug 2025 11:45:20 +0400 Subject: [PATCH 05/15] micro fix --- .../algorithms/weight_compression/algorithm.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index f8c61670cc1..73604ef0b5d 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -854,6 +854,7 @@ def get_processed_weight_compression_parameters( ) ) + # Get subset of nodes to define compression ratio ratio_defining_params = self._get_ratio_defining_params(all_weight_params, is_last_layer_skipped) # Handle group size fallback modes @@ -866,6 +867,7 @@ def get_processed_weight_compression_parameters( else: group_size_values = {w_params.weight_name: self._group_size for w_params in ratio_defining_params} + # Collect statistics for the weights compression statistics = None if (self._data_aware_mixed_precision or self._data_aware_compression) and dataset: weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params @@ -884,17 +886,20 @@ def get_processed_weight_compression_parameters( matmul_input_to_output_nodes_map, statistic_points ) + # Set weight compression configuration self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values) + # Print statistics nncf_logger.info( self._get_bitwidth_distribution_str(all_weight_params, ratio_defining_params, skipped_weight_params) ) - final_all_weight_params = list( + # Filter all_weight_params and by excluding nodes that should remain in their original floating-point precision + all_weight_params = list( filter(lambda w_params: w_params.compression_config is not None, all_weight_params) ) - return final_all_weight_params, statistics + return all_weight_params, statistics def apply( self, From fd02a15ddb38977b8081c3078bfe47182fc4b818 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 27 Aug 2025 13:25:27 +0400 Subject: [PATCH 06/15] mixrofix --- .../quantization/algorithms/weight_compression/algorithm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 73604ef0b5d..d7b263a4cd7 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -778,8 +778,8 @@ def get_processed_weight_compression_parameters( ) -> tuple[list[WeightCompressionParameters], list[WeightCompressionParameters], dict[str, Any]]: """ Collects and processes weight compression parameters for all nodes in the model, - applies all processing steps including group size fallback handling and compression - configuration setting. + applies all processing steps including mixed precision assignment, group size fallback + handling and compression configuration setting. :param model: Backend-specific input model. :param graph: NNCFGraph instance. From 0c8158d267114626b3229fa241f1e1ce1db6883f Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 27 Aug 2025 13:34:03 +0400 Subject: [PATCH 07/15] pre commit fix --- .../quantization/algorithms/weight_compression/algorithm.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index d7b263a4cd7..97032107387 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -778,7 +778,7 @@ def get_processed_weight_compression_parameters( ) -> tuple[list[WeightCompressionParameters], list[WeightCompressionParameters], dict[str, Any]]: """ Collects and processes weight compression parameters for all nodes in the model, - applies all processing steps including mixed precision assignment, group size fallback + applies all processing steps including mixed precision assignment, group size fallback handling and compression configuration setting. :param model: Backend-specific input model. @@ -895,9 +895,7 @@ def get_processed_weight_compression_parameters( ) # Filter all_weight_params and by excluding nodes that should remain in their original floating-point precision - all_weight_params = list( - filter(lambda w_params: w_params.compression_config is not None, all_weight_params) - ) + all_weight_params = list(filter(lambda w_params: w_params.compression_config is not None, all_weight_params)) return all_weight_params, statistics From f4fce8a8a4f1f93bc223671a206d0f39f853ebf2 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 27 Aug 2025 18:14:37 +0400 Subject: [PATCH 08/15] review changes --- .../weight_compression/algorithm.py | 73 ++++++++++++------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 97032107387..5d733e06d4d 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -769,7 +769,43 @@ def is_weight_compression_supported( return is_supported_dtype and not no_bit_reduction - def get_processed_weight_compression_parameters( + def collect_weight_compression_statistics( + self, + model: TModel, + graph: NNCFGraph, + dataset: Dataset, + weight_params: list[WeightCompressionParameters], + statistic_points: Optional[StatisticPointsContainer] = None, + ) -> Optional[dict[str, Any]]: + """ + Collects statistics for weight compression if data-aware compression or + mixed-precision is enabled. + + :param model: Backend-specific input model. + :param graph: NNCFGraph instance. + :param dataset: Dataset for statistics collection. + :param weight_params: Weight parameters for which to collect statistics. + :param statistic_points: Optional pre-collected statistic points. + :return: Statistics and Statistic points container. + """ + statistics = None + if not (self._data_aware_mixed_precision or self._data_aware_compression) and not dataset: + return statistics, statistic_points + matmul_nodes_to_compress = [ + wp.node_with_weight + for wp in weight_params + if wp.node_with_weight.metatype in self._backend_entity.matmul_metatypes + ] + matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map(matmul_nodes_to_compress, graph) + + if statistic_points is None: + statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) + statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) + + statistics = self._get_statistics_for_weights_compression(matmul_input_to_output_nodes_map, statistic_points) + return statistics, statistic_points + + def get_weight_compression_parameters( self, model: TModel, graph: NNCFGraph, @@ -777,17 +813,15 @@ def get_processed_weight_compression_parameters( dataset: Optional[Dataset] = None, ) -> tuple[list[WeightCompressionParameters], list[WeightCompressionParameters], dict[str, Any]]: """ - Collects and processes weight compression parameters for all nodes in the model, - applies all processing steps including mixed precision assignment, group size fallback - handling and compression configuration setting. + Generates a list of weight compression parameters based on the Weight Compression algorithm + configuration. Determines the appropriate precision, group size, and other parameters for + each node eligible for weight compression. :param model: Backend-specific input model. :param graph: NNCFGraph instance. :param statistic_points: Optional pre-collected statistic points. :param dataset: Optional dataset for statistics collection. - :return: A tuple containing Final processed list of weight parameters ready for compression, - List of weight parameters that were skipped - Statistics collected for data-aware compression. None if data-free compression. + :return: A list of weight compression parameters based on the Weight Compression algorithm configuration. """ nodes_to_compress = self.get_nodes_to_compress(graph) @@ -868,23 +902,10 @@ def get_processed_weight_compression_parameters( group_size_values = {w_params.weight_name: self._group_size for w_params in ratio_defining_params} # Collect statistics for the weights compression - statistics = None - if (self._data_aware_mixed_precision or self._data_aware_compression) and dataset: - weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params - matmul_nodes_to_compress = [ - wp.node_with_weight - for wp in weight_params - if wp.node_with_weight.metatype in self._backend_entity.matmul_metatypes - ] - matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map( - matmul_nodes_to_compress, graph - ) - if statistic_points is None: - statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) - statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) - statistics = self._get_statistics_for_weights_compression( - matmul_input_to_output_nodes_map, statistic_points - ) + weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params + statistics, statistic_points = self.collect_weight_compression_statistics( + model, graph, dataset, weight_params, statistic_points + ) # Set weight compression configuration self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values) @@ -909,9 +930,7 @@ def apply( self.set_backend_entity(model) # Get processed weight compression parameters ready for compression - all_weight_params, statistics = self.get_processed_weight_compression_parameters( - model, graph, statistic_points, dataset - ) + all_weight_params, statistics = self.get_weight_compression_parameters(model, graph, statistic_points, dataset) if self._awq: model = self.awq_algo.apply(model, graph, all_weight_params, statistics, self._backend_entity) From 6513236a9a98c2ece93f508ad99bbc6bd8f220af Mon Sep 17 00:00:00 2001 From: anzr299 Date: Wed, 27 Aug 2025 18:43:28 +0400 Subject: [PATCH 09/15] revert weight compression statistic collection method --- .../weight_compression/algorithm.py | 57 ++++++------------- 1 file changed, 17 insertions(+), 40 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 5d733e06d4d..a59f31b360c 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -769,42 +769,6 @@ def is_weight_compression_supported( return is_supported_dtype and not no_bit_reduction - def collect_weight_compression_statistics( - self, - model: TModel, - graph: NNCFGraph, - dataset: Dataset, - weight_params: list[WeightCompressionParameters], - statistic_points: Optional[StatisticPointsContainer] = None, - ) -> Optional[dict[str, Any]]: - """ - Collects statistics for weight compression if data-aware compression or - mixed-precision is enabled. - - :param model: Backend-specific input model. - :param graph: NNCFGraph instance. - :param dataset: Dataset for statistics collection. - :param weight_params: Weight parameters for which to collect statistics. - :param statistic_points: Optional pre-collected statistic points. - :return: Statistics and Statistic points container. - """ - statistics = None - if not (self._data_aware_mixed_precision or self._data_aware_compression) and not dataset: - return statistics, statistic_points - matmul_nodes_to_compress = [ - wp.node_with_weight - for wp in weight_params - if wp.node_with_weight.metatype in self._backend_entity.matmul_metatypes - ] - matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map(matmul_nodes_to_compress, graph) - - if statistic_points is None: - statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) - statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) - - statistics = self._get_statistics_for_weights_compression(matmul_input_to_output_nodes_map, statistic_points) - return statistics, statistic_points - def get_weight_compression_parameters( self, model: TModel, @@ -902,10 +866,23 @@ def get_weight_compression_parameters( group_size_values = {w_params.weight_name: self._group_size for w_params in ratio_defining_params} # Collect statistics for the weights compression - weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params - statistics, statistic_points = self.collect_weight_compression_statistics( - model, graph, dataset, weight_params, statistic_points - ) + statistics = None + if (self._data_aware_mixed_precision or self._data_aware_compression) and dataset: + weight_params = ratio_defining_params if self._backup_mode == BackupMode.NONE else all_weight_params + matmul_nodes_to_compress = [ + wp.node_with_weight + for wp in weight_params + if wp.node_with_weight.metatype in self._backend_entity.matmul_metatypes + ] + matmul_input_to_output_nodes_map = self.get_matmul_input_to_output_nodes_map( + matmul_nodes_to_compress, graph + ) + if statistic_points is None: + statistic_points = self.get_statistic_points(model, graph, matmul_input_to_output_nodes_map.keys()) + statistic_points = self._collect_statistics(dataset, graph, model, statistic_points) + statistics = self._get_statistics_for_weights_compression( + matmul_input_to_output_nodes_map, statistic_points + ) # Set weight compression configuration self._set_weight_compression_config(ratio_defining_params, model, graph, statistic_points, group_size_values) From eb7d78f057040a13f43fb5d1aa27662edfb67b08 Mon Sep 17 00:00:00 2001 From: Aamir Nazir Date: Thu, 28 Aug 2025 17:04:18 +0400 Subject: [PATCH 10/15] Update algorithm.py --- .../quantization/algorithms/weight_compression/algorithm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index a59f31b360c..92042956a4c 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -785,7 +785,7 @@ def get_weight_compression_parameters( :param graph: NNCFGraph instance. :param statistic_points: Optional pre-collected statistic points. :param dataset: Optional dataset for statistics collection. - :return: A list of weight compression parameters based on the Weight Compression algorithm configuration. + :return: A tuple of weight compression parameters based on the Weight Compression algorithm configuration. """ nodes_to_compress = self.get_nodes_to_compress(graph) From 9b4878084e5fbbc9ccdcf3ba5b5dd203d092f8e9 Mon Sep 17 00:00:00 2001 From: Aamir Nazir Date: Thu, 28 Aug 2025 17:17:57 +0400 Subject: [PATCH 11/15] modify type hint for return of get_weight_compression_parameters --- .../quantization/algorithms/weight_compression/algorithm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 92042956a4c..d4b5691d3d7 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -775,7 +775,7 @@ def get_weight_compression_parameters( graph: NNCFGraph, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, - ) -> tuple[list[WeightCompressionParameters], list[WeightCompressionParameters], dict[str, Any]]: + ) -> tuple[list[WeightCompressionParameters], dict[str, Any]]: """ Generates a list of weight compression parameters based on the Weight Compression algorithm configuration. Determines the appropriate precision, group size, and other parameters for From 7dc5fd09c98b8f02e82be6451813de2c033cdb44 Mon Sep 17 00:00:00 2001 From: Aamir Nazir Date: Fri, 29 Aug 2025 15:43:17 +0400 Subject: [PATCH 12/15] Update src/nncf/quantization/algorithms/weight_compression/algorithm.py Co-authored-by: Daniil Lyakhov --- .../quantization/algorithms/weight_compression/algorithm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index d4b5691d3d7..335fffc3282 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -775,7 +775,7 @@ def get_weight_compression_parameters( graph: NNCFGraph, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, - ) -> tuple[list[WeightCompressionParameters], dict[str, Any]]: + ) -> tuple[list[WeightCompressionParameters], Optional[dict[str, WCTensorStatistic]]]: """ Generates a list of weight compression parameters based on the Weight Compression algorithm configuration. Determines the appropriate precision, group size, and other parameters for From 8f19ced6bb1208ecd058513ee2974d142a842bd1 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 2 Sep 2025 12:07:52 +0400 Subject: [PATCH 13/15] review changes --- .../algorithms/weight_compression/algorithm.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 335fffc3282..96e6b9dcf77 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -777,15 +777,17 @@ def get_weight_compression_parameters( dataset: Optional[Dataset] = None, ) -> tuple[list[WeightCompressionParameters], Optional[dict[str, WCTensorStatistic]]]: """ - Generates a list of weight compression parameters based on the Weight Compression algorithm - configuration. Determines the appropriate precision, group size, and other parameters for - each node eligible for weight compression. + Generates a mapping of target node names to the collected statistics based on the provided + statistic_points. If statistic_points is None, collects required compression statistics on + the given dataset. :param model: Backend-specific input model. :param graph: NNCFGraph instance. :param statistic_points: Optional pre-collected statistic points. :param dataset: Optional dataset for statistics collection. - :return: A tuple of weight compression parameters based on the Weight Compression algorithm configuration. + :return: A tuple consisting of a list of weight compression parameters, based on the Weight + Compression algorithm configuration, and a mapping of target node names to the + collected statistics. """ nodes_to_compress = self.get_nodes_to_compress(graph) From b7a591be64f088b95f8749f1c040b3d0044955d5 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Tue, 2 Sep 2025 14:32:54 +0400 Subject: [PATCH 14/15] minor fix --- .../algorithms/weight_compression/algorithm.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index 96e6b9dcf77..d328f1977f4 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -777,9 +777,11 @@ def get_weight_compression_parameters( dataset: Optional[Dataset] = None, ) -> tuple[list[WeightCompressionParameters], Optional[dict[str, WCTensorStatistic]]]: """ - Generates a mapping of target node names to the collected statistics based on the provided - statistic_points. If statistic_points is None, collects required compression statistics on - the given dataset. + Generates a list of weight compression parameters based on the Weight Compression algorithm + configuration. Determines the appropriate quantization parameters for each node eligible for + weight compression. Also, Generates a mapping of target node names to the collected statistics + based on the provided statistic_points. If statistic_points is None, collects required + compression statistics on the given dataset. :param model: Backend-specific input model. :param graph: NNCFGraph instance. From f6ecb8534177318ea95b9772b15ec9f09ba77cc8 Mon Sep 17 00:00:00 2001 From: anzr299 Date: Mon, 8 Sep 2025 13:37:13 +0400 Subject: [PATCH 15/15] pre commit change --- .../algorithms/weight_compression/algorithm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py index d328f1977f4..d0407a1eff4 100644 --- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -778,17 +778,17 @@ def get_weight_compression_parameters( ) -> tuple[list[WeightCompressionParameters], Optional[dict[str, WCTensorStatistic]]]: """ Generates a list of weight compression parameters based on the Weight Compression algorithm - configuration. Determines the appropriate quantization parameters for each node eligible for - weight compression. Also, Generates a mapping of target node names to the collected statistics - based on the provided statistic_points. If statistic_points is None, collects required + configuration. Determines the appropriate quantization parameters for each node eligible for + weight compression. Also, Generates a mapping of target node names to the collected statistics + based on the provided statistic_points. If statistic_points is None, collects required compression statistics on the given dataset. :param model: Backend-specific input model. :param graph: NNCFGraph instance. :param statistic_points: Optional pre-collected statistic points. :param dataset: Optional dataset for statistics collection. - :return: A tuple consisting of a list of weight compression parameters, based on the Weight - Compression algorithm configuration, and a mapping of target node names to the + :return: A tuple consisting of a list of weight compression parameters, based on the Weight + Compression algorithm configuration, and a mapping of target node names to the collected statistics. """ nodes_to_compress = self.get_nodes_to_compress(graph)