SonySemiconductorSolutions · ueno-hiroshi002 · Jan 26, 2026 · Jan 20, 2026 · Jan 20, 2026 · Jan 22, 2026
diff --git a/model_compression_toolkit/wrapper/constants.py b/model_compression_toolkit/wrapper/constants.py
@@ -17,9 +17,19 @@
 FW_NAME = 'fw_name'
 SDSP_VERSION = 'sdsp_version'
 
+# QuantizationConfig parameters
+ACTIVATION_ERROR_METHOD = 'activation_error_method'
+WEIGHTS_BIAS_CORRECTION = 'weights_bias_correction'
+Z_THRESHOLD = 'z_threshold'
+LINEAR_COLLAPSING = 'linear_collapsing'
+RESIDUAL_COLLAPSING = 'residual_collapsing'
+
 # MixedPrecisionQuantizationConfig parameters
+DISTANCE_WEIGHTING_METHOD = 'distance_weighting_method'
 NUM_OF_IMAGES = 'num_of_images'
 USE_HESSIAN_BASED_SCORES = 'use_hessian_based_scores'
+
+# ResourceUtilization parameters
 WEIGHTS_COMPRESSION_RATIO = 'weights_compression_ratio'
 
 # Resource utilization data parameters
@@ -32,14 +42,6 @@
 TARGET_RESOURCE_UTILIZATION = 'target_resource_utilization'
 IN_MODULE = 'in_module'
 
-# QuantizationConfig parameters
-ACTIVATION_ERROR_METHOD = 'activation_error_method'
-WEIGHTS_ERROR_METHOD = 'weights_error_method'
-WEIGHTS_BIAS_CORRECTION = 'weights_bias_correction'
-Z_THRESHOLD = 'z_threshold'
-LINEAR_COLLAPSING = 'linear_collapsing'
-RESIDUAL_COLLAPSING = 'residual_collapsing'
-
 # GPTQ specific parameters
 GPTQ_CONFIG = 'gptq_config'
 MODEL = 'model'
@@ -48,7 +50,12 @@
 N_EPOCHS = 'n_epochs'
 OPTIMIZER = 'optimizer'
 
-# Export parameters
+# low_bit_quantizer_ptq
 CONVERTER_VER = 'converter_ver'
 LEARNING_RATE = 'learning_rate'
+
+# Export parameters
 SAVE_MODEL_PATH = 'save_model_path'
+
+# default compression ratio
+DEFAULT_COMPRESSION_RATIO = 0.75
diff --git a/model_compression_toolkit/wrapper/mct_wrapper.py b/model_compression_toolkit/wrapper/mct_wrapper.py
@@ -17,13 +17,13 @@
 import model_compression_toolkit as mct
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.wrapper.constants import (
-    REPRESENTATIVE_DATA_GEN, CORE_CONFIG, FW_NAME, SDSP_VERSION,
-    NUM_OF_IMAGES, USE_HESSIAN_BASED_SCORES, IN_MODEL, IN_MODULE, MODEL,
-    TARGET_PLATFORM_CAPABILITIES, TARGET_RESOURCE_UTILIZATION,
-    ACTIVATION_ERROR_METHOD, WEIGHTS_ERROR_METHOD, WEIGHTS_BIAS_CORRECTION,
-    Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING, GPTQ_CONFIG,
-    WEIGHTS_COMPRESSION_RATIO, N_EPOCHS, OPTIMIZER, LEARNING_RATE,
-    CONVERTER_VER, SAVE_MODEL_PATH
+    FW_NAME, SDSP_VERSION, ACTIVATION_ERROR_METHOD, WEIGHTS_BIAS_CORRECTION,
+    Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING, 
+    DISTANCE_WEIGHTING_METHOD, NUM_OF_IMAGES, 
+    USE_HESSIAN_BASED_SCORES, WEIGHTS_COMPRESSION_RATIO,
+    IN_MODEL, REPRESENTATIVE_DATA_GEN, CORE_CONFIG, TARGET_PLATFORM_CAPABILITIES,
+    TARGET_RESOURCE_UTILIZATION, IN_MODULE, GPTQ_CONFIG, MODEL,
+    N_EPOCHS, OPTIMIZER, LEARNING_RATE, CONVERTER_VER, SAVE_MODEL_PATH, DEFAULT_COMPRESSION_RATIO
 )
 
 
@@ -55,11 +55,11 @@ def __init__(self):
            :widths: 30, 30, 40
 
            "sdsp_version", "'3.14'", "SDSP version for TPC"
-           "activation_error_method", "mct.core.QuantizationErrorMethod.MSE", "Activation quantization error method"
-           "weights_bias_correction", "True", "Enable weights bias correction"
-           "z_threshold", "float('inf')", "Z-threshold for quantization"
-           "linear_collapsing", "True", "Enable linear layer collapsing"
-           "residual_collapsing", "True", "Enable residual connection collapsing"
+           "activation_error_method", "mct.core.QuantizationErrorMethod.MSE", "Activation quantization error method (low priority)"
+           "weights_bias_correction", "True", "Enable weights bias correction (low priority)"
+           "z_threshold", "float('inf')", "Z-threshold for quantization (low priority)"
+           "linear_collapsing", "True", "Enable linear layer collapsing (low priority)"
+           "residual_collapsing", "True", "Enable residual connection collapsing (low priority)"
            "save_model_path", "'./qmodel.keras' / './qmodel.onnx'", "Path to save quantized model (Keras/Pytorch)"
 
         **PTQ, mixed_precision**
@@ -69,9 +69,15 @@ def __init__(self):
            :widths: 30, 30, 40
 
            "sdsp_version", "'3.14'", "SDSP version for TPC"
+           "activation_error_method", "mct.core.QuantizationErrorMethod.MSE", "Activation quantization error method (low priority)"
+           "weights_bias_correction", "True", "Enable weights bias correction (low priority)"
+           "z_threshold", "float('inf')", "Z-threshold for quantization (low priority)"
+           "linear_collapsing", "True", "Enable linear layer collapsing (low priority)"
+           "residual_collapsing", "True", "Enable residual connection collapsing (low priority)"
+           "distance_weighting_method", "See `MixedPrecisionQuantizationConfig <https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/classes/MixedPrecisionQuantizationConfig.html>`_", "Distance weighting method for mixed precision (low priority)"
            "num_of_images", "5", "Number of images for mixed precision"
-           "use_hessian_based_scores", "False", "Use Hessian-based scores for mixed precision"
-           "weights_compression_ratio", "None", "Weights compression ratio for resource util"
+           "use_hessian_based_scores", "False", "Use Hessian-based scores for mixed precision (low priority)"
+           "weights_compression_ratio", "0.75", "Weights compression ratio for resource util (0.0～1.0)"
            "save_model_path", "'./qmodel.keras' / './qmodel.onnx'", "Path to save quantized model (Keras/Pytorch)"
 
         **GPTQ**
@@ -81,8 +87,13 @@ def __init__(self):
            :widths: 30, 30, 40
 
            "sdsp_version", "'3.14'", "SDSP version for TPC"
+           "activation_error_method", "mct.core.QuantizationErrorMethod.MSE", "Activation quantization error method (low priority)"
+           "weights_bias_correction", "True", "Enable weights bias correction (low priority)"
+           "z_threshold", "float('inf')", "Z-threshold for quantization (low priority)"
+           "linear_collapsing", "True", "Enable linear layer collapsing (low priority)"
+           "residual_collapsing", "True", "Enable residual connection collapsing (low priority)"
            "n_epochs", "5", "Number of training epochs for GPTQ"
-           "optimizer", "None", "Optimizer for GPTQ training"
+           "optimizer", "default of `get_keras_gptq_config <https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/methods/get_keras_gptq_config.html#model_compression_toolkit.gptq.get_keras_gptq_config>`_ or `get_pytorch_gptq_config <https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/methods/get_pytroch_gptq_config.html#model_compression_toolkit.gptq.get_pytorch_gptq_config>`_", "Optimizer for GPTQ training (low priority)"
            "save_model_path", "'./qmodel.keras' / './qmodel.onnx'", "Path to save quantized model (Keras/Pytorch)"
 
         **GPTQ, mixed_precision**
@@ -92,11 +103,17 @@ def __init__(self):
            :widths: 30, 30, 40
 
            "sdsp_version", "'3.14'", "SDSP version for TPC"
+           "activation_error_method", "mct.core.QuantizationErrorMethod.MSE", "Activation quantization error method (low priority)"
+           "weights_bias_correction", "True", "Enable weights bias correction (low priority)"
+           "z_threshold", "float('inf')", "Z-threshold for quantization (low priority)"
+           "linear_collapsing", "True", "Enable linear layer collapsing (low priority)"
+           "residual_collapsing", "True", "Enable residual connection collapsing (low priority)"
+           "weights_compression_ratio", "0.75", "Weights compression ratio for resource util (0.0～1.0)"           
            "n_epochs", "5", "Number of training epochs for GPTQ"
-           "optimizer", "None", "Optimizer for GPTQ training"
+           "optimizer", "default of `get_keras_gptq_config <https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/methods/get_keras_gptq_config.html#model_compression_toolkit.gptq.get_keras_gptq_config>`_ or `get_pytorch_gptq_config <https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/methods/get_pytroch_gptq_config.html#model_compression_toolkit.gptq.get_pytorch_gptq_config>`_", "Optimizer for GPTQ training (low priority)"
+           "distance_weighting_method", "See `MixedPrecisionQuantizationConfig <https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/classes/MixedPrecisionQuantizationConfig.html>`_", "Distance weighting method for mixed precision (low priority)"
            "num_of_images", "5", "Number of images for mixed precision"
-           "use_hessian_based_scores", "False", "Use Hessian-based scores for mixed precision"
-           "weights_compression_ratio", "None", "Weights compression ratio for resource util"
+           "use_hessian_based_scores", "False", "Use Hessian-based scores for mixed precision (low priority)"
            "save_model_path", "'./qmodel.keras' / './qmodel.onnx'", "Path to save quantized model (Keras/Pytorch)"
 
         """
@@ -112,16 +129,17 @@ def __init__(self):
             LINEAR_COLLAPSING: True,
             RESIDUAL_COLLAPSING: True,
 
-            # GradientPTQConfig
-            N_EPOCHS: 5,
-            OPTIMIZER: None,
-
             # MixedPrecisionQuantizationConfig
+            DISTANCE_WEIGHTING_METHOD: None,
             NUM_OF_IMAGES: 5,
             USE_HESSIAN_BASED_SCORES: False,
 
             # ResourceUtilization
-            WEIGHTS_COMPRESSION_RATIO: None,
+            WEIGHTS_COMPRESSION_RATIO: DEFAULT_COMPRESSION_RATIO,
+
+            # GradientPTQConfig
+            N_EPOCHS: 5,
+            OPTIMIZER: None,
 
             # low_bit_quantizer_ptq
             LEARNING_RATE: 0.001,
@@ -172,16 +190,21 @@ def _initialize_and_validate(self, float_model: Any,
                                 Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING,
                                 SAVE_MODEL_PATH]
             else:
-                allowed_keys = [FW_NAME, SDSP_VERSION, NUM_OF_IMAGES, USE_HESSIAN_BASED_SCORES,
+                allowed_keys = [FW_NAME, SDSP_VERSION, ACTIVATION_ERROR_METHOD, WEIGHTS_BIAS_CORRECTION,
+                                Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING,
+                                DISTANCE_WEIGHTING_METHOD, NUM_OF_IMAGES, USE_HESSIAN_BASED_SCORES,
                                 WEIGHTS_COMPRESSION_RATIO, SAVE_MODEL_PATH]
         else:
             if not use_mixed_precision:
-                allowed_keys = [FW_NAME, SDSP_VERSION, N_EPOCHS, OPTIMIZER,
-                                SAVE_MODEL_PATH]
+                allowed_keys = [FW_NAME, SDSP_VERSION, ACTIVATION_ERROR_METHOD, WEIGHTS_BIAS_CORRECTION, 
+                                Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING,
+                                N_EPOCHS, OPTIMIZER, SAVE_MODEL_PATH]
             else:
-                allowed_keys = [FW_NAME, SDSP_VERSION, N_EPOCHS, OPTIMIZER,
+                allowed_keys = [FW_NAME, SDSP_VERSION, ACTIVATION_ERROR_METHOD, WEIGHTS_BIAS_CORRECTION, 
+                                Z_THRESHOLD, LINEAR_COLLAPSING, RESIDUAL_COLLAPSING,
+                                WEIGHTS_COMPRESSION_RATIO, N_EPOCHS, OPTIMIZER, DISTANCE_WEIGHTING_METHOD,
                                 NUM_OF_IMAGES, USE_HESSIAN_BASED_SCORES,
-                                WEIGHTS_COMPRESSION_RATIO, SAVE_MODEL_PATH]
+                                SAVE_MODEL_PATH]
 
         self.params = { k: v for k, v in self.params.items() if k in allowed_keys }
 
@@ -320,22 +343,33 @@ def _setting_PTQ_mixed_precision(self) -> Dict[str, Any]:
         Returns:
             dict: Parameter dictionary for PTQ.
         """
+        params_QCfg = {
+            ACTIVATION_ERROR_METHOD: self.params[ACTIVATION_ERROR_METHOD],
+            WEIGHTS_BIAS_CORRECTION: self.params[WEIGHTS_BIAS_CORRECTION],
+            Z_THRESHOLD: self.params[Z_THRESHOLD],
+            LINEAR_COLLAPSING: self.params[LINEAR_COLLAPSING],
+            RESIDUAL_COLLAPSING: self.params[RESIDUAL_COLLAPSING]
+        }
+        q_config = mct.core.QuantizationConfig(**params_QCfg)
+
         params_MPCfg = {
+            DISTANCE_WEIGHTING_METHOD: self.params[DISTANCE_WEIGHTING_METHOD],
             NUM_OF_IMAGES: self.params[NUM_OF_IMAGES],
             USE_HESSIAN_BASED_SCORES: self.params[USE_HESSIAN_BASED_SCORES]
         }
         mixed_precision_config = mct.core.MixedPrecisionQuantizationConfig(**params_MPCfg)
-        core_config = mct.core.CoreConfig(mixed_precision_config=mixed_precision_config)
+
+        core_config = mct.core.CoreConfig(quantization_config=q_config, 
+                                          mixed_precision_config=mixed_precision_config)
+
         params_RUDCfg = {
             IN_MODEL: self.float_model,
             REPRESENTATIVE_DATA_GEN: self.representative_dataset,
             CORE_CONFIG: core_config,
             TARGET_PLATFORM_CAPABILITIES: self.tpc
         }
         ru_data = self.resource_utilization_data(**params_RUDCfg)
-        weights_compression_ratio = (
-            0.75 if self.params[WEIGHTS_COMPRESSION_RATIO] is None
-            else self.params[WEIGHTS_COMPRESSION_RATIO])
+        weights_compression_ratio = self.params[WEIGHTS_COMPRESSION_RATIO]
         resource_utilization = mct.core.ResourceUtilization(
             ru_data.weights_memory * weights_compression_ratio)
 
@@ -357,7 +391,6 @@ def _setting_PTQ(self) -> Dict[str, Any]:
         """
         params_QCfg = {
             ACTIVATION_ERROR_METHOD: self.params[ACTIVATION_ERROR_METHOD],
-            WEIGHTS_ERROR_METHOD: mct.core.QuantizationErrorMethod.MSE,
             WEIGHTS_BIAS_CORRECTION: self.params[WEIGHTS_BIAS_CORRECTION],
             Z_THRESHOLD: self.params[Z_THRESHOLD],
             LINEAR_COLLAPSING: self.params[LINEAR_COLLAPSING],
@@ -383,36 +416,42 @@ def _setting_GPTQ_mixed_precision(self) -> Dict[str, Any]:
         Returns:
             dict: Parameter dictionary for GPTQ.
         """
+        params_QCfg = {
+            ACTIVATION_ERROR_METHOD: self.params[ACTIVATION_ERROR_METHOD],
+            WEIGHTS_BIAS_CORRECTION: self.params[WEIGHTS_BIAS_CORRECTION],
+            Z_THRESHOLD: self.params[Z_THRESHOLD],
+            LINEAR_COLLAPSING: self.params[LINEAR_COLLAPSING],
+            RESIDUAL_COLLAPSING: self.params[RESIDUAL_COLLAPSING]
+        }
+        q_config = mct.core.QuantizationConfig(**params_QCfg)        
+
         params_GPTQCfg = {
             N_EPOCHS: self.params[N_EPOCHS],
             OPTIMIZER: self.params[OPTIMIZER]
         }
         gptq_config = self.get_gptq_config(**params_GPTQCfg)
 
         params_MPCfg = {
+            DISTANCE_WEIGHTING_METHOD: self.params[DISTANCE_WEIGHTING_METHOD],
             NUM_OF_IMAGES: self.params[NUM_OF_IMAGES],
             USE_HESSIAN_BASED_SCORES: self.params[USE_HESSIAN_BASED_SCORES],
         }
         mixed_precision_config = mct.core.MixedPrecisionQuantizationConfig(**params_MPCfg)
-        core_config = mct.core.CoreConfig(mixed_precision_config=mixed_precision_config)
+
+        core_config = mct.core.CoreConfig(quantization_config=q_config,
+                                          mixed_precision_config=mixed_precision_config)
+
         params_RUDCfg = {
             IN_MODEL: self.float_model,
             REPRESENTATIVE_DATA_GEN: self.representative_dataset,
             CORE_CONFIG: core_config,
             TARGET_PLATFORM_CAPABILITIES: self.tpc
         }
         ru_data = self.resource_utilization_data(**params_RUDCfg)
-        weights_compression_ratio = (
-            0.75 if self.params[WEIGHTS_COMPRESSION_RATIO] is None
-            else self.params[WEIGHTS_COMPRESSION_RATIO])
+        weights_compression_ratio = self.params[WEIGHTS_COMPRESSION_RATIO]
         resource_utilization = mct.core.ResourceUtilization(
             ru_data.weights_memory * weights_compression_ratio)
 
-        core_config = mct.core.CoreConfig(
-            mixed_precision_config = mixed_precision_config,
-            quantization_config = mct.core.QuantizationConfig()
-        )
-
         params_GPTQ = {
             self.argname_model: self.float_model,
             REPRESENTATIVE_DATA_GEN: self.representative_dataset,
@@ -430,6 +469,16 @@ def _setting_GPTQ(self) -> Dict[str, Any]:
         Returns:
             dict: Parameter dictionary for GPTQ.
         """
+        params_QCfg = {
+            ACTIVATION_ERROR_METHOD: self.params[ACTIVATION_ERROR_METHOD],
+            WEIGHTS_BIAS_CORRECTION: self.params[WEIGHTS_BIAS_CORRECTION],
+            Z_THRESHOLD: self.params[Z_THRESHOLD],
+            LINEAR_COLLAPSING: self.params[LINEAR_COLLAPSING],
+            RESIDUAL_COLLAPSING: self.params[RESIDUAL_COLLAPSING]
+        }
+        q_config = mct.core.QuantizationConfig(**params_QCfg)
+        core_config = mct.core.CoreConfig(quantization_config=q_config)
+
         params_GPTQCfg = {
             N_EPOCHS: self.params[N_EPOCHS],
             OPTIMIZER: self.params[OPTIMIZER]
@@ -440,6 +489,7 @@ def _setting_GPTQ(self) -> Dict[str, Any]:
             self.argname_model: self.float_model,
             REPRESENTATIVE_DATA_GEN: self.representative_dataset,
             GPTQ_CONFIG: gptq_config,
+            CORE_CONFIG: core_config,
             TARGET_PLATFORM_CAPABILITIES: self.tpc
         }
         return params_GPTQ