Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
73b8439
Refactor quantization method parameters
KazunoriSumiya Jan 13, 2026
08f6e31
Remove check for TPC package in verify_packages.py
KazunoriSumiya Jan 14, 2026
f5b205a
Add sdsp_version parameter to MCTWrapper and update unit tests for re…
KazunoriSumiya Jan 14, 2026
86ab900
Refactor code structure for improved readability and maintainability
KazunoriSumiya Jan 14, 2026
6a61bce
Fix assertion order in MCTWrapper unit tests for consistency
KazunoriSumiya Jan 14, 2026
669bc18
Add new quantization parameters and update MCTWrapper configuration
KazunoriSumiya Jan 15, 2026
7ee7e85
update quantization parameters in MCTWrapper
KazunoriSumiya Jan 15, 2026
d74951f
Remove unnecessary blank line in constants.py
KazunoriSumiya Jan 16, 2026
53318c2
Fix formatting inconsistencies and update framework in MCTWrapper uni…
KazunoriSumiya Jan 16, 2026
9339351
Update allowed_keys in MCTWrapper to include DISTANCE_WEIGHTING_METHO…
KazunoriSumiya Jan 16, 2026
e1284db
Refactor CoreConfig initialization in MCTWrapper for improved readabi…
KazunoriSumiya Jan 16, 2026
73a9070
Undo the change that removed the unnecessary lines
KazunoriSumiya Jan 16, 2026
a13e987
Update distance weighting method description and improve allowed_keys…
KazunoriSumiya Jan 16, 2026
29faba2
Remove unused QuantizationConfig parameters from constants.py
KazunoriSumiya Jan 16, 2026
16902a0
Enhance quantization parameters and documentation
KazunoriSumiya Jan 19, 2026
a04156f
Refactor MCTWrapper and related components for enhanced quantization …
KazunoriSumiya Jan 19, 2026
6e68519
Merge branch 'feature_tpc_improvement_modify_parameters' of https://g…
KazunoriSumiya Jan 19, 2026
783664c
Enhance documentation and fix minor typos in MCTWrapper and associate…
KazunoriSumiya Jan 19, 2026
c2c4780
Remove docstring for GPTQ with Mixed Precision in test_quantization f…
KazunoriSumiya Jan 19, 2026
d824c34
Fix formatting of mixed precision configuration instantiation in MCTW…
KazunoriSumiya Jan 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion model_compression_toolkit/verify_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import importlib
from packaging import version

Expand Down
26 changes: 17 additions & 9 deletions model_compression_toolkit/wrapper/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,20 @@
FW_NAME = 'fw_name'
SDSP_VERSION = 'sdsp_version'

# QuantizationConfig parameters
ACTIVATION_ERROR_METHOD = 'activation_error_method'
WEIGHTS_BIAS_CORRECTION = 'weights_bias_correction'
Z_THRESHOLD = 'z_threshold'
LINEAR_COLLAPSING = 'linear_collapsing'
RESIDUAL_COLLAPSING = 'residual_collapsing'
WEIGHTS_ERROR_METHOD = 'weights_error_method'

# MixedPrecisionQuantizationConfig parameters
DISTANCE_WEIGHTING_METHOD = 'distance_weighting_method'
NUM_OF_IMAGES = 'num_of_images'
USE_HESSIAN_BASED_SCORES = 'use_hessian_based_scores'

# ResourceUtilization parameters
WEIGHTS_COMPRESSION_RATIO = 'weights_compression_ratio'

# Resource utilization data parameters
Expand All @@ -32,14 +43,6 @@
TARGET_RESOURCE_UTILIZATION = 'target_resource_utilization'
IN_MODULE = 'in_module'

# QuantizationConfig parameters
ACTIVATION_ERROR_METHOD = 'activation_error_method'
WEIGHTS_ERROR_METHOD = 'weights_error_method'
WEIGHTS_BIAS_CORRECTION = 'weights_bias_correction'
Z_THRESHOLD = 'z_threshold'
LINEAR_COLLAPSING = 'linear_collapsing'
RESIDUAL_COLLAPSING = 'residual_collapsing'

# GPTQ specific parameters
GPTQ_CONFIG = 'gptq_config'
MODEL = 'model'
Expand All @@ -48,7 +51,12 @@
N_EPOCHS = 'n_epochs'
OPTIMIZER = 'optimizer'

# Export parameters
# low_bit_quantizer_ptq
CONVERTER_VER = 'converter_ver'
LEARNING_RATE = 'learning_rate'

# Export parameters
SAVE_MODEL_PATH = 'save_model_path'

# default compression ratio
DEFAULT_COMPRESSION_RATIO = 0.75
128 changes: 93 additions & 35 deletions model_compression_toolkit/wrapper/mct_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
import model_compression_toolkit as mct
from model_compression_toolkit.logger import Logger
from model_compression_toolkit.wrapper.constants import (
REPRESENTATIVE_DATA_GEN, CORE_CONFIG, FW_NAME, SDSP_VERSION,
NUM_OF_IMAGES, USE_HESSIAN_BASED_SCORES, IN_MODEL, IN_MODULE, MODEL,
TARGET_PLATFORM_CAPABILITIES, TARGET_RESOURCE_UTILIZATION,
ACTIVATION_ERROR_METHOD, WEIGHTS_ERROR_METHOD, WEIGHTS_BIAS_CORRECTION,
Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING, GPTQ_CONFIG,
WEIGHTS_COMPRESSION_RATIO, N_EPOCHS, OPTIMIZER, LEARNING_RATE,
CONVERTER_VER, SAVE_MODEL_PATH
FW_NAME, SDSP_VERSION, ACTIVATION_ERROR_METHOD, WEIGHTS_BIAS_CORRECTION,
Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING, WEIGHTS_ERROR_METHOD,
DISTANCE_WEIGHTING_METHOD, NUM_OF_IMAGES,
USE_HESSIAN_BASED_SCORES, WEIGHTS_COMPRESSION_RATIO,
IN_MODEL, REPRESENTATIVE_DATA_GEN, CORE_CONFIG, TARGET_PLATFORM_CAPABILITIES,
TARGET_RESOURCE_UTILIZATION, IN_MODULE, GPTQ_CONFIG, MODEL,
N_EPOCHS, OPTIMIZER, LEARNING_RATE, CONVERTER_VER, SAVE_MODEL_PATH, DEFAULT_COMPRESSION_RATIO
)


Expand Down Expand Up @@ -55,11 +55,11 @@ def __init__(self):
:widths: 30, 30, 40

"sdsp_version", "'3.14'", "SDSP version for TPC"
"activation_error_method", "mct.core.QuantizationErrorMethod.MSE", "Activation quantization error method"
"weights_bias_correction", "True", "Enable weights bias correction"
"z_threshold", "float('inf')", "Z-threshold for quantization"
"linear_collapsing", "True", "Enable linear layer collapsing"
"residual_collapsing", "True", "Enable residual connection collapsing"
"activation_error_method", "mct.core.QuantizationErrorMethod.MSE", "Activation quantization error method (low priority)"
"weights_bias_correction", "True", "Enable weights bias correction (low priority)"
"z_threshold", "float('inf')", "Z-threshold for quantization (low priority)"
"linear_collapsing", "True", "Enable linear layer collapsing (low priority)"
"residual_collapsing", "True", "Enable residual connection collapsing (low priority)"
"save_model_path", "'./qmodel.keras' / './qmodel.onnx'", "Path to save quantized model (Keras/Pytorch)"

**PTQ, mixed_precision**
Expand All @@ -69,8 +69,14 @@ def __init__(self):
:widths: 30, 30, 40

"sdsp_version", "'3.14'", "SDSP version for TPC"
"activation_error_method", "mct.core.QuantizationErrorMethod.MSE", "Activation quantization error method (low priority)"
"weights_bias_correction", "True", "Enable weights bias correction (low priority)"
"z_threshold", "float('inf')", "Z-threshold for quantization (low priority)"
"linear_collapsing", "True", "Enable linear layer collapsing (low priority)"
"residual_collapsing", "True", "Enable residual connection collapsing (low priority)"
"distance_weighting_method", "None", "Distance weighting method for mixed precision (low priority)"
"num_of_images", "5", "Number of images for mixed precision"
"use_hessian_based_scores", "False", "Use Hessian-based scores for mixed precision"
"use_hessian_based_scores", "False", "Use Hessian-based scores for mixed precision (low priority)"
"weights_compression_ratio", "None", "Weights compression ratio for resource util"
"save_model_path", "'./qmodel.keras' / './qmodel.onnx'", "Path to save quantized model (Keras/Pytorch)"

Expand All @@ -81,8 +87,13 @@ def __init__(self):
:widths: 30, 30, 40

"sdsp_version", "'3.14'", "SDSP version for TPC"
"activation_error_method", "mct.core.QuantizationErrorMethod.MSE", "Activation quantization error method (low priority)"
"weights_bias_correction", "True", "Enable weights bias correction (low priority)"
"z_threshold", "float('inf')", "Z-threshold for quantization (low priority)"
"linear_collapsing", "True", "Enable linear layer collapsing (low priority)"
"residual_collapsing", "True", "Enable residual connection collapsing (low priority)"
"n_epochs", "5", "Number of training epochs for GPTQ"
"optimizer", "None", "Optimizer for GPTQ training"
"optimizer", "None", "Optimizer for GPTQ training (low priority)"
"save_model_path", "'./qmodel.keras' / './qmodel.onnx'", "Path to save quantized model (Keras/Pytorch)"

**GPTQ, mixed_precision**
Expand All @@ -92,11 +103,17 @@ def __init__(self):
:widths: 30, 30, 40

"sdsp_version", "'3.14'", "SDSP version for TPC"
"activation_error_method", "mct.core.QuantizationErrorMethod.MSE", "Activation quantization error method (low priority)"
"weights_bias_correction", "True", "Enable weights bias correction (low priority)"
"z_threshold", "float('inf')", "Z-threshold for quantization (low priority)"
"linear_collapsing", "True", "Enable linear layer collapsing (low priority)"
"residual_collapsing", "True", "Enable residual connection collapsing (low priority)"
"weights_compression_ratio", "None", "Weights compression ratio for resource util"
"n_epochs", "5", "Number of training epochs for GPTQ"
"optimizer", "None", "Optimizer for GPTQ training"
"optimizer", "None", "Optimizer for GPTQ training (low priority)"
"distance_weighting_method", "None", "Distance weighting method for GPTQ (low priority)"
"num_of_images", "5", "Number of images for mixed precision"
"use_hessian_based_scores", "False", "Use Hessian-based scores for mixed precision"
"weights_compression_ratio", "None", "Weights compression ratio for resource util"
"use_hessian_based_scores", "False", "Use Hessian-based scores for mixed precision (low priority)"
"save_model_path", "'./qmodel.keras' / './qmodel.onnx'", "Path to save quantized model (Keras/Pytorch)"

"""
Expand All @@ -112,17 +129,18 @@ def __init__(self):
LINEAR_COLLAPSING: True,
RESIDUAL_COLLAPSING: True,

# GradientPTQConfig
N_EPOCHS: 5,
OPTIMIZER: None,

# MixedPrecisionQuantizationConfig
DISTANCE_WEIGHTING_METHOD: None,
NUM_OF_IMAGES: 5,
USE_HESSIAN_BASED_SCORES: False,

# ResourceUtilization
WEIGHTS_COMPRESSION_RATIO: None,

# GradientPTQConfig
N_EPOCHS: 5,
OPTIMIZER: None,

# low_bit_quantizer_ptq
LEARNING_RATE: 0.001,
CONVERTER_VER: 'v3.14',
Expand Down Expand Up @@ -172,16 +190,21 @@ def _initialize_and_validate(self, float_model: Any,
Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING,
SAVE_MODEL_PATH]
else:
allowed_keys = [FW_NAME, SDSP_VERSION, NUM_OF_IMAGES, USE_HESSIAN_BASED_SCORES,
allowed_keys = [FW_NAME, SDSP_VERSION, ACTIVATION_ERROR_METHOD, WEIGHTS_BIAS_CORRECTION,
Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING,
DISTANCE_WEIGHTING_METHOD, NUM_OF_IMAGES, USE_HESSIAN_BASED_SCORES,
WEIGHTS_COMPRESSION_RATIO, SAVE_MODEL_PATH]
else:
if not use_mixed_precision:
allowed_keys = [FW_NAME, SDSP_VERSION, N_EPOCHS, OPTIMIZER,
SAVE_MODEL_PATH]
allowed_keys = [FW_NAME, SDSP_VERSION, ACTIVATION_ERROR_METHOD, WEIGHTS_BIAS_CORRECTION,
Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING,
N_EPOCHS, OPTIMIZER, SAVE_MODEL_PATH]
else:
allowed_keys = [FW_NAME, SDSP_VERSION, N_EPOCHS, OPTIMIZER,
allowed_keys = [FW_NAME, SDSP_VERSION, ACTIVATION_ERROR_METHOD, WEIGHTS_BIAS_CORRECTION,
Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING,
WEIGHTS_COMPRESSION_RATIO, N_EPOCHS, OPTIMIZER, DISTANCE_WEIGHTING_METHOD,
NUM_OF_IMAGES, USE_HESSIAN_BASED_SCORES,
WEIGHTS_COMPRESSION_RATIO, SAVE_MODEL_PATH]
SAVE_MODEL_PATH]

self.params = { k: v for k, v in self.params.items() if k in allowed_keys }

Expand Down Expand Up @@ -320,12 +343,26 @@ def _setting_PTQ_mixed_precision(self) -> Dict[str, Any]:
Returns:
dict: Parameter dictionary for PTQ.
"""
params_QCfg = {
ACTIVATION_ERROR_METHOD: self.params[ACTIVATION_ERROR_METHOD],
WEIGHTS_ERROR_METHOD: mct.core.QuantizationErrorMethod.MSE,
WEIGHTS_BIAS_CORRECTION: self.params[WEIGHTS_BIAS_CORRECTION],
Z_THRESHOLD: self.params[Z_THRESHOLD],
LINEAR_COLLAPSING: self.params[LINEAR_COLLAPSING],
RESIDUAL_COLLAPSING: self.params[RESIDUAL_COLLAPSING]
}
q_config = mct.core.QuantizationConfig(**params_QCfg)

params_MPCfg = {
DISTANCE_WEIGHTING_METHOD: self.params[DISTANCE_WEIGHTING_METHOD],
NUM_OF_IMAGES: self.params[NUM_OF_IMAGES],
USE_HESSIAN_BASED_SCORES: self.params[USE_HESSIAN_BASED_SCORES]
}
mixed_precision_config = mct.core.MixedPrecisionQuantizationConfig(**params_MPCfg)
core_config = mct.core.CoreConfig(mixed_precision_config=mixed_precision_config)

core_config = mct.core.CoreConfig(quantization_config=q_config,
mixed_precision_config=mixed_precision_config)

params_RUDCfg = {
IN_MODEL: self.float_model,
REPRESENTATIVE_DATA_GEN: self.representative_dataset,
Expand All @@ -334,7 +371,7 @@ def _setting_PTQ_mixed_precision(self) -> Dict[str, Any]:
}
ru_data = self.resource_utilization_data(**params_RUDCfg)
weights_compression_ratio = (
0.75 if self.params[WEIGHTS_COMPRESSION_RATIO] is None
DEFAULT_COMPRESSION_RATIO if self.params[WEIGHTS_COMPRESSION_RATIO] is None
else self.params[WEIGHTS_COMPRESSION_RATIO])
resource_utilization = mct.core.ResourceUtilization(
ru_data.weights_memory * weights_compression_ratio)
Expand Down Expand Up @@ -383,18 +420,32 @@ def _setting_GPTQ_mixed_precision(self) -> Dict[str, Any]:
Returns:
dict: Parameter dictionary for GPTQ.
"""
params_QCfg = {
ACTIVATION_ERROR_METHOD: self.params[ACTIVATION_ERROR_METHOD],
WEIGHTS_ERROR_METHOD: mct.core.QuantizationErrorMethod.MSE,
WEIGHTS_BIAS_CORRECTION: self.params[WEIGHTS_BIAS_CORRECTION],
Z_THRESHOLD: self.params[Z_THRESHOLD],
LINEAR_COLLAPSING: self.params[LINEAR_COLLAPSING],
RESIDUAL_COLLAPSING: self.params[RESIDUAL_COLLAPSING]
}
q_config = mct.core.QuantizationConfig(**params_QCfg)

params_GPTQCfg = {
N_EPOCHS: self.params[N_EPOCHS],
OPTIMIZER: self.params[OPTIMIZER]
}
gptq_config = self.get_gptq_config(**params_GPTQCfg)

params_MPCfg = {
DISTANCE_WEIGHTING_METHOD: self.params[DISTANCE_WEIGHTING_METHOD],
NUM_OF_IMAGES: self.params[NUM_OF_IMAGES],
USE_HESSIAN_BASED_SCORES: self.params[USE_HESSIAN_BASED_SCORES],
}
mixed_precision_config = mct.core.MixedPrecisionQuantizationConfig(**params_MPCfg)
core_config = mct.core.CoreConfig(mixed_precision_config=mixed_precision_config)

core_config = mct.core.CoreConfig(quantization_config=q_config,
mixed_precision_config=mixed_precision_config)

params_RUDCfg = {
IN_MODEL: self.float_model,
REPRESENTATIVE_DATA_GEN: self.representative_dataset,
Expand All @@ -403,16 +454,11 @@ def _setting_GPTQ_mixed_precision(self) -> Dict[str, Any]:
}
ru_data = self.resource_utilization_data(**params_RUDCfg)
weights_compression_ratio = (
0.75 if self.params[WEIGHTS_COMPRESSION_RATIO] is None
DEFAULT_COMPRESSION_RATIO if self.params[WEIGHTS_COMPRESSION_RATIO] is None
else self.params[WEIGHTS_COMPRESSION_RATIO])
resource_utilization = mct.core.ResourceUtilization(
ru_data.weights_memory * weights_compression_ratio)

core_config = mct.core.CoreConfig(
mixed_precision_config = mixed_precision_config,
quantization_config = mct.core.QuantizationConfig()
)

params_GPTQ = {
self.argname_model: self.float_model,
REPRESENTATIVE_DATA_GEN: self.representative_dataset,
Expand All @@ -430,6 +476,17 @@ def _setting_GPTQ(self) -> Dict[str, Any]:
Returns:
dict: Parameter dictionary for GPTQ.
"""
params_QCfg = {
ACTIVATION_ERROR_METHOD: self.params[ACTIVATION_ERROR_METHOD],
WEIGHTS_ERROR_METHOD: mct.core.QuantizationErrorMethod.MSE,
WEIGHTS_BIAS_CORRECTION: self.params[WEIGHTS_BIAS_CORRECTION],
Z_THRESHOLD: self.params[Z_THRESHOLD],
LINEAR_COLLAPSING: self.params[LINEAR_COLLAPSING],
RESIDUAL_COLLAPSING: self.params[RESIDUAL_COLLAPSING]
}
q_config = mct.core.QuantizationConfig(**params_QCfg)
core_config = mct.core.CoreConfig(quantization_config=q_config)

params_GPTQCfg = {
N_EPOCHS: self.params[N_EPOCHS],
OPTIMIZER: self.params[OPTIMIZER]
Expand All @@ -440,6 +497,7 @@ def _setting_GPTQ(self) -> Dict[str, Any]:
self.argname_model: self.float_model,
REPRESENTATIVE_DATA_GEN: self.representative_dataset,
GPTQ_CONFIG: gptq_config,
CORE_CONFIG: core_config,
TARGET_PLATFORM_CAPABILITIES: self.tpc
}
return params_GPTQ
Expand Down
Loading