Skip to content
25 changes: 16 additions & 9 deletions model_compression_toolkit/wrapper/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,19 @@
FW_NAME = 'fw_name'
SDSP_VERSION = 'sdsp_version'

# QuantizationConfig parameters
ACTIVATION_ERROR_METHOD = 'activation_error_method'
WEIGHTS_BIAS_CORRECTION = 'weights_bias_correction'
Z_THRESHOLD = 'z_threshold'
LINEAR_COLLAPSING = 'linear_collapsing'
RESIDUAL_COLLAPSING = 'residual_collapsing'

# MixedPrecisionQuantizationConfig parameters
DISTANCE_WEIGHTING_METHOD = 'distance_weighting_method'
NUM_OF_IMAGES = 'num_of_images'
USE_HESSIAN_BASED_SCORES = 'use_hessian_based_scores'

# ResourceUtilization parameters
WEIGHTS_COMPRESSION_RATIO = 'weights_compression_ratio'

# Resource utilization data parameters
Expand All @@ -32,14 +42,6 @@
TARGET_RESOURCE_UTILIZATION = 'target_resource_utilization'
IN_MODULE = 'in_module'

# QuantizationConfig parameters
ACTIVATION_ERROR_METHOD = 'activation_error_method'
WEIGHTS_ERROR_METHOD = 'weights_error_method'
WEIGHTS_BIAS_CORRECTION = 'weights_bias_correction'
Z_THRESHOLD = 'z_threshold'
LINEAR_COLLAPSING = 'linear_collapsing'
RESIDUAL_COLLAPSING = 'residual_collapsing'

# GPTQ specific parameters
GPTQ_CONFIG = 'gptq_config'
MODEL = 'model'
Expand All @@ -48,7 +50,12 @@
N_EPOCHS = 'n_epochs'
OPTIMIZER = 'optimizer'

# Export parameters
# low_bit_quantizer_ptq
CONVERTER_VER = 'converter_ver'
LEARNING_RATE = 'learning_rate'

# Export parameters
SAVE_MODEL_PATH = 'save_model_path'

# default compression ratio
DEFAULT_COMPRESSION_RATIO = 0.75
134 changes: 92 additions & 42 deletions model_compression_toolkit/wrapper/mct_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
import model_compression_toolkit as mct
from model_compression_toolkit.logger import Logger
from model_compression_toolkit.wrapper.constants import (
REPRESENTATIVE_DATA_GEN, CORE_CONFIG, FW_NAME, SDSP_VERSION,
NUM_OF_IMAGES, USE_HESSIAN_BASED_SCORES, IN_MODEL, IN_MODULE, MODEL,
TARGET_PLATFORM_CAPABILITIES, TARGET_RESOURCE_UTILIZATION,
ACTIVATION_ERROR_METHOD, WEIGHTS_ERROR_METHOD, WEIGHTS_BIAS_CORRECTION,
Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING, GPTQ_CONFIG,
WEIGHTS_COMPRESSION_RATIO, N_EPOCHS, OPTIMIZER, LEARNING_RATE,
CONVERTER_VER, SAVE_MODEL_PATH
FW_NAME, SDSP_VERSION, ACTIVATION_ERROR_METHOD, WEIGHTS_BIAS_CORRECTION,
Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING,
DISTANCE_WEIGHTING_METHOD, NUM_OF_IMAGES,
USE_HESSIAN_BASED_SCORES, WEIGHTS_COMPRESSION_RATIO,
IN_MODEL, REPRESENTATIVE_DATA_GEN, CORE_CONFIG, TARGET_PLATFORM_CAPABILITIES,
TARGET_RESOURCE_UTILIZATION, IN_MODULE, GPTQ_CONFIG, MODEL,
N_EPOCHS, OPTIMIZER, LEARNING_RATE, CONVERTER_VER, SAVE_MODEL_PATH, DEFAULT_COMPRESSION_RATIO
)


Expand Down Expand Up @@ -55,11 +55,11 @@ def __init__(self):
:widths: 30, 30, 40

"sdsp_version", "'3.14'", "SDSP version for TPC"
"activation_error_method", "mct.core.QuantizationErrorMethod.MSE", "Activation quantization error method"
"weights_bias_correction", "True", "Enable weights bias correction"
"z_threshold", "float('inf')", "Z-threshold for quantization"
"linear_collapsing", "True", "Enable linear layer collapsing"
"residual_collapsing", "True", "Enable residual connection collapsing"
"activation_error_method", "mct.core.QuantizationErrorMethod.MSE", "Activation quantization error method (low priority)"
"weights_bias_correction", "True", "Enable weights bias correction (low priority)"
"z_threshold", "float('inf')", "Z-threshold for quantization (low priority)"
"linear_collapsing", "True", "Enable linear layer collapsing (low priority)"
"residual_collapsing", "True", "Enable residual connection collapsing (low priority)"
"save_model_path", "'./qmodel.keras' / './qmodel.onnx'", "Path to save quantized model (Keras/Pytorch)"

**PTQ, mixed_precision**
Expand All @@ -69,9 +69,15 @@ def __init__(self):
:widths: 30, 30, 40

"sdsp_version", "'3.14'", "SDSP version for TPC"
"activation_error_method", "mct.core.QuantizationErrorMethod.MSE", "Activation quantization error method (low priority)"
"weights_bias_correction", "True", "Enable weights bias correction (low priority)"
"z_threshold", "float('inf')", "Z-threshold for quantization (low priority)"
"linear_collapsing", "True", "Enable linear layer collapsing (low priority)"
"residual_collapsing", "True", "Enable residual connection collapsing (low priority)"
"distance_weighting_method", "See `MixedPrecisionQuantizationConfig <https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/classes/MixedPrecisionQuantizationConfig.html>`_", "Distance weighting method for mixed precision (low priority)"
"num_of_images", "5", "Number of images for mixed precision"
"use_hessian_based_scores", "False", "Use Hessian-based scores for mixed precision"
"weights_compression_ratio", "None", "Weights compression ratio for resource util"
"use_hessian_based_scores", "False", "Use Hessian-based scores for mixed precision (low priority)"
"weights_compression_ratio", "0.75", "Weights compression ratio for resource util (0.0~1.0)"
"save_model_path", "'./qmodel.keras' / './qmodel.onnx'", "Path to save quantized model (Keras/Pytorch)"

**GPTQ**
Expand All @@ -81,8 +87,13 @@ def __init__(self):
:widths: 30, 30, 40

"sdsp_version", "'3.14'", "SDSP version for TPC"
"activation_error_method", "mct.core.QuantizationErrorMethod.MSE", "Activation quantization error method (low priority)"
"weights_bias_correction", "True", "Enable weights bias correction (low priority)"
"z_threshold", "float('inf')", "Z-threshold for quantization (low priority)"
"linear_collapsing", "True", "Enable linear layer collapsing (low priority)"
"residual_collapsing", "True", "Enable residual connection collapsing (low priority)"
"n_epochs", "5", "Number of training epochs for GPTQ"
"optimizer", "None", "Optimizer for GPTQ training"
"optimizer", "default of `get_keras_gptq_config <https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/methods/get_keras_gptq_config.html#model_compression_toolkit.gptq.get_keras_gptq_config>`_ or `get_pytorch_gptq_config <https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/methods/get_pytroch_gptq_config.html#model_compression_toolkit.gptq.get_pytorch_gptq_config>`_", "Optimizer for GPTQ training (low priority)"
"save_model_path", "'./qmodel.keras' / './qmodel.onnx'", "Path to save quantized model (Keras/Pytorch)"

**GPTQ, mixed_precision**
Expand All @@ -92,11 +103,17 @@ def __init__(self):
:widths: 30, 30, 40

"sdsp_version", "'3.14'", "SDSP version for TPC"
"activation_error_method", "mct.core.QuantizationErrorMethod.MSE", "Activation quantization error method (low priority)"
"weights_bias_correction", "True", "Enable weights bias correction (low priority)"
"z_threshold", "float('inf')", "Z-threshold for quantization (low priority)"
"linear_collapsing", "True", "Enable linear layer collapsing (low priority)"
"residual_collapsing", "True", "Enable residual connection collapsing (low priority)"
"weights_compression_ratio", "0.75", "Weights compression ratio for resource util (0.0~1.0)"
"n_epochs", "5", "Number of training epochs for GPTQ"
"optimizer", "None", "Optimizer for GPTQ training"
"optimizer", "default of `get_keras_gptq_config <https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/methods/get_keras_gptq_config.html#model_compression_toolkit.gptq.get_keras_gptq_config>`_ or `get_pytorch_gptq_config <https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/methods/get_pytroch_gptq_config.html#model_compression_toolkit.gptq.get_pytorch_gptq_config>`_", "Optimizer for GPTQ training (low priority)"
"distance_weighting_method", "See `MixedPrecisionQuantizationConfig <https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/classes/MixedPrecisionQuantizationConfig.html>`_", "Distance weighting method for mixed precision (low priority)"
"num_of_images", "5", "Number of images for mixed precision"
"use_hessian_based_scores", "False", "Use Hessian-based scores for mixed precision"
"weights_compression_ratio", "None", "Weights compression ratio for resource util"
"use_hessian_based_scores", "False", "Use Hessian-based scores for mixed precision (low priority)"
"save_model_path", "'./qmodel.keras' / './qmodel.onnx'", "Path to save quantized model (Keras/Pytorch)"

"""
Expand All @@ -112,16 +129,17 @@ def __init__(self):
LINEAR_COLLAPSING: True,
RESIDUAL_COLLAPSING: True,

# GradientPTQConfig
N_EPOCHS: 5,
OPTIMIZER: None,

# MixedPrecisionQuantizationConfig
DISTANCE_WEIGHTING_METHOD: None,
NUM_OF_IMAGES: 5,
USE_HESSIAN_BASED_SCORES: False,

# ResourceUtilization
WEIGHTS_COMPRESSION_RATIO: None,
WEIGHTS_COMPRESSION_RATIO: DEFAULT_COMPRESSION_RATIO,

# GradientPTQConfig
N_EPOCHS: 5,
OPTIMIZER: None,

# low_bit_quantizer_ptq
LEARNING_RATE: 0.001,
Expand Down Expand Up @@ -172,16 +190,21 @@ def _initialize_and_validate(self, float_model: Any,
Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING,
SAVE_MODEL_PATH]
else:
allowed_keys = [FW_NAME, SDSP_VERSION, NUM_OF_IMAGES, USE_HESSIAN_BASED_SCORES,
allowed_keys = [FW_NAME, SDSP_VERSION, ACTIVATION_ERROR_METHOD, WEIGHTS_BIAS_CORRECTION,
Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING,
DISTANCE_WEIGHTING_METHOD, NUM_OF_IMAGES, USE_HESSIAN_BASED_SCORES,
WEIGHTS_COMPRESSION_RATIO, SAVE_MODEL_PATH]
else:
if not use_mixed_precision:
allowed_keys = [FW_NAME, SDSP_VERSION, N_EPOCHS, OPTIMIZER,
SAVE_MODEL_PATH]
allowed_keys = [FW_NAME, SDSP_VERSION, ACTIVATION_ERROR_METHOD, WEIGHTS_BIAS_CORRECTION,
Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING,
N_EPOCHS, OPTIMIZER, SAVE_MODEL_PATH]
else:
allowed_keys = [FW_NAME, SDSP_VERSION, N_EPOCHS, OPTIMIZER,
allowed_keys = [FW_NAME, SDSP_VERSION, ACTIVATION_ERROR_METHOD, WEIGHTS_BIAS_CORRECTION,
Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING,
WEIGHTS_COMPRESSION_RATIO, N_EPOCHS, OPTIMIZER, DISTANCE_WEIGHTING_METHOD,
NUM_OF_IMAGES, USE_HESSIAN_BASED_SCORES,
WEIGHTS_COMPRESSION_RATIO, SAVE_MODEL_PATH]
SAVE_MODEL_PATH]

self.params = { k: v for k, v in self.params.items() if k in allowed_keys }

Expand Down Expand Up @@ -320,22 +343,33 @@ def _setting_PTQ_mixed_precision(self) -> Dict[str, Any]:
Returns:
dict: Parameter dictionary for PTQ.
"""
params_QCfg = {
ACTIVATION_ERROR_METHOD: self.params[ACTIVATION_ERROR_METHOD],
WEIGHTS_BIAS_CORRECTION: self.params[WEIGHTS_BIAS_CORRECTION],
Z_THRESHOLD: self.params[Z_THRESHOLD],
LINEAR_COLLAPSING: self.params[LINEAR_COLLAPSING],
RESIDUAL_COLLAPSING: self.params[RESIDUAL_COLLAPSING]
}
q_config = mct.core.QuantizationConfig(**params_QCfg)

params_MPCfg = {
DISTANCE_WEIGHTING_METHOD: self.params[DISTANCE_WEIGHTING_METHOD],
NUM_OF_IMAGES: self.params[NUM_OF_IMAGES],
USE_HESSIAN_BASED_SCORES: self.params[USE_HESSIAN_BASED_SCORES]
}
mixed_precision_config = mct.core.MixedPrecisionQuantizationConfig(**params_MPCfg)
core_config = mct.core.CoreConfig(mixed_precision_config=mixed_precision_config)

core_config = mct.core.CoreConfig(quantization_config=q_config,
mixed_precision_config=mixed_precision_config)

params_RUDCfg = {
IN_MODEL: self.float_model,
REPRESENTATIVE_DATA_GEN: self.representative_dataset,
CORE_CONFIG: core_config,
TARGET_PLATFORM_CAPABILITIES: self.tpc
}
ru_data = self.resource_utilization_data(**params_RUDCfg)
weights_compression_ratio = (
0.75 if self.params[WEIGHTS_COMPRESSION_RATIO] is None
else self.params[WEIGHTS_COMPRESSION_RATIO])
weights_compression_ratio = self.params[WEIGHTS_COMPRESSION_RATIO]
resource_utilization = mct.core.ResourceUtilization(
ru_data.weights_memory * weights_compression_ratio)

Expand All @@ -357,7 +391,6 @@ def _setting_PTQ(self) -> Dict[str, Any]:
"""
params_QCfg = {
ACTIVATION_ERROR_METHOD: self.params[ACTIVATION_ERROR_METHOD],
WEIGHTS_ERROR_METHOD: mct.core.QuantizationErrorMethod.MSE,
WEIGHTS_BIAS_CORRECTION: self.params[WEIGHTS_BIAS_CORRECTION],
Z_THRESHOLD: self.params[Z_THRESHOLD],
LINEAR_COLLAPSING: self.params[LINEAR_COLLAPSING],
Expand All @@ -383,36 +416,42 @@ def _setting_GPTQ_mixed_precision(self) -> Dict[str, Any]:
Returns:
dict: Parameter dictionary for GPTQ.
"""
params_QCfg = {
ACTIVATION_ERROR_METHOD: self.params[ACTIVATION_ERROR_METHOD],
WEIGHTS_BIAS_CORRECTION: self.params[WEIGHTS_BIAS_CORRECTION],
Z_THRESHOLD: self.params[Z_THRESHOLD],
LINEAR_COLLAPSING: self.params[LINEAR_COLLAPSING],
RESIDUAL_COLLAPSING: self.params[RESIDUAL_COLLAPSING]
}
q_config = mct.core.QuantizationConfig(**params_QCfg)

params_GPTQCfg = {
N_EPOCHS: self.params[N_EPOCHS],
OPTIMIZER: self.params[OPTIMIZER]
}
gptq_config = self.get_gptq_config(**params_GPTQCfg)

params_MPCfg = {
DISTANCE_WEIGHTING_METHOD: self.params[DISTANCE_WEIGHTING_METHOD],
NUM_OF_IMAGES: self.params[NUM_OF_IMAGES],
USE_HESSIAN_BASED_SCORES: self.params[USE_HESSIAN_BASED_SCORES],
}
mixed_precision_config = mct.core.MixedPrecisionQuantizationConfig(**params_MPCfg)
core_config = mct.core.CoreConfig(mixed_precision_config=mixed_precision_config)

core_config = mct.core.CoreConfig(quantization_config=q_config,
mixed_precision_config=mixed_precision_config)

params_RUDCfg = {
IN_MODEL: self.float_model,
REPRESENTATIVE_DATA_GEN: self.representative_dataset,
CORE_CONFIG: core_config,
TARGET_PLATFORM_CAPABILITIES: self.tpc
}
ru_data = self.resource_utilization_data(**params_RUDCfg)
weights_compression_ratio = (
0.75 if self.params[WEIGHTS_COMPRESSION_RATIO] is None
else self.params[WEIGHTS_COMPRESSION_RATIO])
weights_compression_ratio = self.params[WEIGHTS_COMPRESSION_RATIO]
resource_utilization = mct.core.ResourceUtilization(
ru_data.weights_memory * weights_compression_ratio)

core_config = mct.core.CoreConfig(
mixed_precision_config = mixed_precision_config,
quantization_config = mct.core.QuantizationConfig()
)

params_GPTQ = {
self.argname_model: self.float_model,
REPRESENTATIVE_DATA_GEN: self.representative_dataset,
Expand All @@ -430,6 +469,16 @@ def _setting_GPTQ(self) -> Dict[str, Any]:
Returns:
dict: Parameter dictionary for GPTQ.
"""
params_QCfg = {
ACTIVATION_ERROR_METHOD: self.params[ACTIVATION_ERROR_METHOD],
WEIGHTS_BIAS_CORRECTION: self.params[WEIGHTS_BIAS_CORRECTION],
Z_THRESHOLD: self.params[Z_THRESHOLD],
LINEAR_COLLAPSING: self.params[LINEAR_COLLAPSING],
RESIDUAL_COLLAPSING: self.params[RESIDUAL_COLLAPSING]
}
q_config = mct.core.QuantizationConfig(**params_QCfg)
core_config = mct.core.CoreConfig(quantization_config=q_config)

params_GPTQCfg = {
N_EPOCHS: self.params[N_EPOCHS],
OPTIMIZER: self.params[OPTIMIZER]
Expand All @@ -440,6 +489,7 @@ def _setting_GPTQ(self) -> Dict[str, Any]:
self.argname_model: self.float_model,
REPRESENTATIVE_DATA_GEN: self.representative_dataset,
GPTQ_CONFIG: gptq_config,
CORE_CONFIG: core_config,
TARGET_PLATFORM_CAPABILITIES: self.tpc
}
return params_GPTQ
Expand Down
Loading