Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
488cacc
Support scale estimation inside GPTQ
alexsu52 Jun 10, 2024
ee64877
fix for INT4_ASYM
alexsu52 Sep 4, 2024
f22e411
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Sep 23, 2024
51b4d7b
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Sep 26, 2024
f66cd1e
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Sep 30, 2024
7ce5a53
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Oct 2, 2024
f74d156
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Nov 11, 2024
5288c79
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Nov 11, 2024
1becf15
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Nov 14, 2024
047d7d9
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Dec 10, 2024
c0c7e57
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Dec 16, 2024
b74dea1
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Dec 27, 2024
26a9a77
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Jan 7, 2025
25fcc2c
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Feb 25, 2025
26d4887
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Mar 12, 2025
7748233
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Apr 1, 2025
df251b3
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Apr 8, 2025
4c134c4
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Apr 9, 2025
6147097
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Apr 14, 2025
2b94d28
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr May 7, 2025
5e312a5
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr May 9, 2025
2c5e983
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr May 12, 2025
1d8db1e
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr May 23, 2025
7244f18
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr May 28, 2025
443048c
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Jun 2, 2025
80d2d8a
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Jun 11, 2025
06bb19b
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Jun 26, 2025
5d97d87
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Jul 2, 2025
ae7cece
Merge remote-tracking branch 'upstream/develop' into develop
andreyanufr Jul 10, 2025
3bcd47b
Initial codebook estimation algorithm.
andreyanufr Jul 11, 2025
eb93fdb
First working example for layer wise codebook.
andreyanufr Jul 14, 2025
5bfccee
Experiment.
andreyanufr Jul 16, 2025
509b6ef
Experiment with accuracy improvement.
andreyanufr Jul 17, 2025
872b025
Fix in histogram computation.
andreyanufr Jul 18, 2025
9a8d08b
Experimrnt.
andreyanufr Jul 28, 2025
ad518c2
Search best codebook by minimizing MatMul diff.
andreyanufr Jul 30, 2025
2f8ec00
Merge remote-tracking branch 'upstream/develop' into aanuf/LUT_per_la…
andreyanufr Sep 10, 2025
b5c4c4a
Merge remote-tracking branch 'upstream/develop' into aanuf/LUT_per_la…
andreyanufr Sep 25, 2025
2fb21b2
Removed unused code.
andreyanufr Sep 25, 2025
812cbed
Remove unused code.
andreyanufr Sep 25, 2025
8c896c8
Replace np by fns.
andreyanufr Sep 26, 2025
a792c0b
Replace np by fns.
andreyanufr Sep 26, 2025
d3c2ab8
Replace np by fns.
andreyanufr Sep 26, 2025
9eec3e3
Replace np by fns.
andreyanufr Sep 29, 2025
5a66fda
Fixed problems with fp64 data types.
andreyanufr Sep 30, 2025
ec432bd
Resolved merge conflict with signed scale.
andreyanufr Oct 8, 2025
735c809
Fixed.
andreyanufr Oct 8, 2025
8ea3946
Removed unused code.
andreyanufr Oct 9, 2025
037a255
Fixed bug with close centroids.
andreyanufr Oct 10, 2025
817a790
Fixed error with argmin/cumsum args.
andreyanufr Oct 13, 2025
be6029a
Removed unused fuction.
andreyanufr Oct 13, 2025
58a64d8
Fix.
andreyanufr Oct 13, 2025
ffe0cf4
Fix.
andreyanufr Oct 13, 2025
72af4fd
Fix.
andreyanufr Oct 13, 2025
c6f72ee
Fixed bug with codebook type..
andreyanufr Oct 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/nncf/openvino/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ def compress_weights_impl(
scale_estimation: bool,
gptq: bool,
lora_correction: bool,
codebook_estimation: bool,
backup_mode: BackupMode,
compression_format: CompressionFormat,
advanced_parameters: Optional[AdvancedCompressionParameters] = None,
Expand All @@ -397,6 +398,7 @@ def compress_weights_impl(
scale_estimation,
gptq,
lora_correction,
codebook_estimation,
backup_mode,
compression_format,
advanced_parameters,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from nncf.quantization.advanced_parameters import convert_to_dict_recursively
from nncf.quantization.algorithms.algorithm import Algorithm
from nncf.quantization.algorithms.weight_compression.awq import AWQ
from nncf.quantization.algorithms.weight_compression.codebook_estimation import CodebookEstimation
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
from nncf.quantization.algorithms.weight_compression.constants import CB4_QUANTILES
from nncf.quantization.algorithms.weight_compression.gptq import GPTQ
Expand Down Expand Up @@ -86,6 +87,7 @@ def get_weight_compression_configuration(
scale_estimation: Optional[bool] = None,
gptq: Optional[bool] = None,
lora_correction: Optional[bool] = None,
codebook_estimation: Optional[bool] = None,
ignored_scope: Optional[IgnoredScope] = None,
sensitivity_metric: Optional[SensitivityMetric] = None,
backup_mode: Optional[BackupMode] = None,
Expand All @@ -111,6 +113,7 @@ def get_weight_compression_configuration(
"scale_estimation": scale_estimation or False,
"gptq": gptq or False,
"lora_correction": lora_correction or False,
"codebook_estimation": codebook_estimation or False,
"ignored_scope": ignored_scope or IgnoredScope(),
"sensitivity_metric": (
(
Expand All @@ -137,6 +140,7 @@ def check_user_compression_configuration(
scale_estimation: Optional[bool],
gptq: Optional[bool],
lora_correction: Optional[bool],
codebook_estimation: Optional[bool],
ignored_scope: Optional[IgnoredScope],
sensitivity_metric: Optional[SensitivityMetric],
backup_mode: Optional[BackupMode],
Expand Down Expand Up @@ -167,6 +171,7 @@ def check_user_compression_configuration(
"gptq": gptq,
"lora_correction": lora_correction,
"backup_mode": backup_mode,
"codebook_estimation": codebook_estimation,
}
unsupported_for_int8 = [name for name, value in unsupported_options.items() if value is not None]
if unsupported_for_int8:
Expand Down Expand Up @@ -280,6 +285,7 @@ def __init__(
scale_estimation: bool,
gptq: bool,
lora_correction: bool,
codebook_estimation: bool,
backup_mode: BackupMode = BackupMode.INT8_ASYM,
compression_format: CompressionFormat = CompressionFormat.DQ,
advanced_parameters: Optional[AdvancedCompressionParameters] = None,
Expand Down Expand Up @@ -339,6 +345,7 @@ def __init__(
self._scale_estimation = scale_estimation
self._gptq = gptq
self._lora_correction = lora_correction
self._codebook_estimation = codebook_estimation
self._backup_mode = backup_mode
self._compression_format = compression_format
self._advanced_parameters = (
Expand Down Expand Up @@ -379,6 +386,9 @@ def __init__(
scale_estimation_params.weight_penalty,
)

if self._codebook_estimation:
self._codebook_estimation_algo = CodebookEstimation()

self._data_aware_mixed_precision = (
self._sensitivity_metric != SensitivityMetric.WEIGHT_QUANTIZATION_ERROR and self._ratio != 1.0
)
Expand All @@ -387,6 +397,7 @@ def __init__(
or self._scale_estimation
or self._lora_correction
or self._gptq
or self._codebook_estimation
)

@property
Expand Down Expand Up @@ -938,6 +949,15 @@ def apply(
lora_correction_algo = None
description = "Applying Weight Compression"

if self._codebook_estimation:
precomputed_compressed_weights = self._codebook_estimation_algo.apply(
model=model,
graph=graph,
all_weight_params=all_weight_params,
statistics=statistics,
backend_entity=self._backend_entity,
)

if self._gptq:
del statistics
model, precomputed_compressed_weights = self._gptq_algo.apply(
Expand Down
Loading
Loading