openvinotoolkit · andreyanufr · Jun 10, 2024 · Sep 4, 2024 · Sep 23, 2024 · Sep 26, 2024
@@ -376,6 +376,7 @@ def compress_weights_impl(
     scale_estimation: bool,
     gptq: bool,
     lora_correction: bool,
+    codebook_estimation: bool,
     backup_mode: BackupMode,
     compression_format: CompressionFormat,
     advanced_parameters: Optional[AdvancedCompressionParameters] = None,
@@ -397,6 +398,7 @@ def compress_weights_impl(
         scale_estimation,
         gptq,
         lora_correction,
+        codebook_estimation,
         backup_mode,
         compression_format,
         advanced_parameters,

@@ -41,6 +41,7 @@
 from nncf.quantization.advanced_parameters import convert_to_dict_recursively
 from nncf.quantization.algorithms.algorithm import Algorithm
 from nncf.quantization.algorithms.weight_compression.awq import AWQ
+from nncf.quantization.algorithms.weight_compression.codebook_estimation import CodebookEstimation
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
 from nncf.quantization.algorithms.weight_compression.constants import CB4_QUANTILES
 from nncf.quantization.algorithms.weight_compression.gptq import GPTQ
@@ -86,6 +87,7 @@ def get_weight_compression_configuration(
     scale_estimation: Optional[bool] = None,
     gptq: Optional[bool] = None,
     lora_correction: Optional[bool] = None,
+    codebook_estimation: Optional[bool] = None,
     ignored_scope: Optional[IgnoredScope] = None,
     sensitivity_metric: Optional[SensitivityMetric] = None,
     backup_mode: Optional[BackupMode] = None,
@@ -111,6 +113,7 @@ def get_weight_compression_configuration(
         "scale_estimation": scale_estimation or False,
         "gptq": gptq or False,
         "lora_correction": lora_correction or False,
+        "codebook_estimation": codebook_estimation or False,
         "ignored_scope": ignored_scope or IgnoredScope(),
         "sensitivity_metric": (
             (
@@ -137,6 +140,7 @@ def check_user_compression_configuration(
     scale_estimation: Optional[bool],
     gptq: Optional[bool],
     lora_correction: Optional[bool],
+    codebook_estimation: Optional[bool],
     ignored_scope: Optional[IgnoredScope],
     sensitivity_metric: Optional[SensitivityMetric],
     backup_mode: Optional[BackupMode],
@@ -167,6 +171,7 @@ def check_user_compression_configuration(
             "gptq": gptq,
             "lora_correction": lora_correction,
             "backup_mode": backup_mode,
+            "codebook_estimation": codebook_estimation,
         }
         unsupported_for_int8 = [name for name, value in unsupported_options.items() if value is not None]
         if unsupported_for_int8:
@@ -280,6 +285,7 @@ def __init__(
         scale_estimation: bool,
         gptq: bool,
         lora_correction: bool,
+        codebook_estimation: bool,
         backup_mode: BackupMode = BackupMode.INT8_ASYM,
         compression_format: CompressionFormat = CompressionFormat.DQ,
         advanced_parameters: Optional[AdvancedCompressionParameters] = None,
@@ -339,6 +345,7 @@ def __init__(
         self._scale_estimation = scale_estimation
         self._gptq = gptq
         self._lora_correction = lora_correction
+        self._codebook_estimation = codebook_estimation
         self._backup_mode = backup_mode
         self._compression_format = compression_format
         self._advanced_parameters = (
@@ -379,6 +386,9 @@ def __init__(
                 scale_estimation_params.weight_penalty,
             )
 
+        if self._codebook_estimation:
+            self._codebook_estimation_algo = CodebookEstimation()
+
         self._data_aware_mixed_precision = (
             self._sensitivity_metric != SensitivityMetric.WEIGHT_QUANTIZATION_ERROR and self._ratio != 1.0
         )
@@ -387,6 +397,7 @@ def __init__(
             or self._scale_estimation
             or self._lora_correction
             or self._gptq
+            or self._codebook_estimation
         )
 
     @property
@@ -938,6 +949,15 @@ def apply(
         lora_correction_algo = None
         description = "Applying Weight Compression"
 
+        if self._codebook_estimation:
+            precomputed_compressed_weights = self._codebook_estimation_algo.apply(
+                model=model,
+                graph=graph,
+                all_weight_params=all_weight_params,
+                statistics=statistics,
+                backend_entity=self._backend_entity,
+            )
+
         if self._gptq:
             del statistics
             model, precomputed_compressed_weights = self._gptq_algo.apply(