Enhance quantization parameters and documentation

KazunoriSumiya · KazunoriSumiya · commit 16902a0611f8 · 2026-01-19T12:32:22.000+09:00
diff --git a/tests_pytest/keras_tests/e2e_tests/wrapper/test_mct_wrapper_keras_e2e.py b/tests_pytest/keras_tests/e2e_tests/wrapper/test_mct_wrapper_keras_e2e.py
@@ -166,10 +166,16 @@ def PTQ_Keras_mixed_precision(float_model: keras.Model) -> Tuple[bool, keras.Mod
         # Configure mixed precision parameters for optimal compression
         param_items = [
             ['sdsp_version', '3.14'],  # The version of the SDSP converter.
-            ['num_of_images', 5],  # Number of images
-            ['use_hessian_based_scores', False],  # Use Hessian scores
-            ['weights_compression_ratio', 0.75],  # Compression ratio
-            ['save_model_path', './qmodel_PTQ_Keras_mixed_precision.keras']  # Path to save the model.
+            ['activation_error_method', QuantizationErrorMethod.MSE],  # Error metric for activation (low priority).
+            ['weights_bias_correction', True],  # Enable bias correction for weights (low priority).
+            ['z_threshold', float('inf')],  # Threshold for zero-point quantization (low priority).
+            ['linear_collapsing', True],  # Enable linear layer collapsing optimization (low priority).
+            ['residual_collapsing', True],  # Enable residual connection collapsing (low priority).
+            ['distance_weighting_method', None],  # Distance weighting method for mixed precision (low priority).
+            ['num_of_images', 5],  # Number of images for mixed precision.
+            ['use_hessian_based_scores', False],  # Use Hessian-based sensitivity scores for layer importance (low priority).
+            ['weights_compression_ratio', 0.75],  # Target compression ratio for model weights (75% of original size.
+            ['save_model_path', './qmodel_PTQ_Keras_mixed_precision.keras']  # Path to save the quantized model.
         ]
 
         # Execute quantization with mixed precision using MCTWrapper
@@ -195,9 +201,14 @@ def GPTQ_Keras(float_model: keras.Model) -> Tuple[bool, keras.Model]:
         # Configure GPTQ-specific parameters for gradient-based optimization
         param_items = [
             ['sdsp_version', '3.14'],  # The version of the SDSP converter.
-            ['n_epochs', 5],  # Number of training epochs
-            ['optimizer', None],  # Optimizer for training
-            ['save_model_path', './qmodel_GPTQ_Keras.keras']  # Path to save the model.
+            ['activation_error_method', QuantizationErrorMethod.MSE],  # Error metric for activation (low priority).
+            ['weights_bias_correction', True],  # Enable bias correction for weights (low priority).
+            ['z_threshold', float('inf')],  # Threshold for zero-point quantization (low priority).
+            ['linear_collapsing', True],  # Enable linear layer collapsing optimization (low priority).
+            ['residual_collapsing', True],  # Enable residual connection collapsing (low priority).    
+            ['n_epochs', 5],  # Number of epochs for gradient-based fine-tuning.
+            ['optimizer', None],  # Optimizer to use during fine-tuning (low priority).
+            ['save_model_path', './qmodel_GPTQ_Keras.keras']  # Path to save the quantized model.
         ]
 
         # Execute gradient-based quantization using MCTWrapper
@@ -209,18 +220,37 @@ def GPTQ_Keras(float_model: keras.Model) -> Tuple[bool, keras.Model]:
     # Run GPTQ + Mixed Precision Quantization (mixed_precision) with Keras
     @decorator
     def GPTQ_Keras_mixed_precision(float_model: keras.Model) -> Tuple[bool, keras.Model]:
+        """
+        Perform Gradient-based Post-Training Quantization with Mixed Precision (GPTQ + mixed_precision).
+        
+        This combines the benefits of both techniques:
+        - GPTQ: Gradient-based optimization for better quantization accuracy
+        - Mixed Precision: Optimal bit-width allocation for size/accuracy trade-off
+        
+        Args:
+            float_model: Original floating-point Keras model
+        
+        Returns:
+            tuple: (success_flag, quantized_model)
+        """
         framework = 'tensorflow'
         method = 'GPTQ'
         use_mixed_precision = True
 
         param_items = [
             ['sdsp_version', '3.14'],  # The version of the SDSP converter.
-            ['n_epochs', 5],  # Number of training epochs
-            ['optimizer', None],  # Optimizer for training
-            ['num_of_images', 5],  # Number of images
-            ['use_hessian_based_scores', False],  # Use Hessian scores
-            ['weights_compression_ratio', 0.75],  # Compression ratio
-            ['save_model_path', './qmodel_GPTQ_Keras_mixed_precision.keras']  # Path to save the model.
+            ['activation_error_method', QuantizationErrorMethod.MSE],  # Error metric for activation (low priority).
+            ['weights_bias_correction', True],  # Enable bias correction for weights (low priority).
+            ['z_threshold', float('inf')],  # Threshold for zero-point quantization (low priority).
+            ['linear_collapsing', True],  # Enable linear layer collapsing optimization (low priority).
+            ['residual_collapsing', True],  # Enable residual connection collapsing (low priority).
+            ['weights_compression_ratio', 0.75],  # Compression ratio for weights.
+            ['n_epochs', 5],  # Number of epochs for gradient-based fine-tuning.
+            ['optimizer', None],  # Optimizer to use during fine-tuning (low priority).
+            ['distance_weighting_method', None],  # Distance weighting method for GPTQ (low priority).
+            ['num_of_images', 5],  # Number of images to use for calibration.
+            ['use_hessian_based_scores', False],  # Whether to use Hessian-based scores for layer importance.
+            ['save_model_path', './qmodel_GPTQ_Keras_mixed_precision.keras']  # Path to save the quantized model.
         ]
 
         wrapper = mct.wrapper.mct_wrapper.MCTWrapper()
diff --git a/tests_pytest/pytorch_tests/e2e_tests/wrapper/test_mct_wrapper_pytorch_e2e.py b/tests_pytest/pytorch_tests/e2e_tests/wrapper/test_mct_wrapper_pytorch_e2e.py
@@ -198,10 +198,16 @@ def PTQ_Pytorch_mixed_precision(float_model):
         # Define mixed precision quantization parameters
         param_items = [
             ['sdsp_version', '3.14'],  # The version of the SDSP converter.
-            ['num_of_images', 5],  # Number of images
-            ['use_hessian_based_scores', False],  # Use Hessian scores
-            ['weights_compression_ratio', 0.5],  # Compression ratio
-            ['save_model_path', './qmodel_PTQ_Pytorch_mixed_precision.onnx']  # Path to save the model.
+            ['activation_error_method', QuantizationErrorMethod.MSE],  # Error metric for activation (low priority).
+            ['weights_bias_correction', True],  # Enable bias correction for weights (low priority).
+            ['z_threshold', float('inf')],  # Threshold for zero-point quantization (low priority).
+            ['linear_collapsing', True],  # Enable linear layer collapsing optimization (low priority).
+            ['residual_collapsing', True],  # Enable residual connection collapsing (low priority).
+            ['distance_weighting_method', None],  # Distance weighting method for mixed precision (low priority).
+            ['num_of_images', 5],  # Number of images for mixed precision.
+            ['use_hessian_based_scores', False],  # Use Hessian-based sensitivity scores for layer importance (low priority).
+            ['weights_compression_ratio', 0.75],  # Target compression ratio for model weights (75% of original size.
+            ['save_model_path', './qmodel_PTQ_Pytorch_mixed_precision.onnx']  # Path to save quantized model as ONNX.
         ]
 
         # Execute mixed precision quantization and export to ONNX
@@ -238,9 +244,14 @@ def GPTQ_Pytorch(float_model):
         # Define GPTQ-specific parameters for gradient-based optimization
         param_items = [
             ['sdsp_version', '3.14'],  # The version of the SDSP converter.
-            ['n_epochs', 5],  # Number of training epochs
-            ['optimizer', None],  # Optimizer for training
-            ['save_model_path', './qmodel_GPTQ_Pytorch.onnx']  # Path to save the model.
+            ['activation_error_method', QuantizationErrorMethod.MSE],  # Error metric for activation (low priority).
+            ['weights_bias_correction', True],  # Enable bias correction for weights (low priority).
+            ['z_threshold', float('inf')],  # Threshold for zero-point quantization (low priority).
+            ['linear_collapsing', True],  # Enable linear layer collapsing optimization (low priority).
+            ['residual_collapsing', True],  # Enable residual connection collapsing (low priority).    
+            ['n_epochs', 5],  # Number of epochs for gradient-based fine-tuning.
+            ['optimizer', None],  # Optimizer to use during fine-tuning (low priority).
+            ['save_model_path', './qmodel_GPTQ_Pytorch.onnx']  # Path to save quantized model as ONNX.
         ]
 
         # Execute gradient-based quantization and export to ONNX
@@ -276,12 +287,18 @@ def GPTQ_Pytorch_mixed_precision(float_model):
         # Define GPTQ mixed precision parameters for advanced optimization
         param_items = [
             ['sdsp_version', '3.14'],  # The version of the SDSP converter.
-            ['n_epochs', 5],  # Number of training epochs
-            ['optimizer', None],  # Optimizer for training
-            ['num_of_images', 5],  # Number of images
-            ['use_hessian_based_scores', False],  # Use Hessian scores
-            ['weights_compression_ratio', 0.5],  # Compression ratio
-            ['save_model_path', './qmodel_GPTQ_Pytorch_mixed_precision.onnx']  # Path to save the model.
+            ['activation_error_method', QuantizationErrorMethod.MSE],  # Error metric for activation (low priority).
+            ['weights_bias_correction', True],  # Enable bias correction for weights (low priority).
+            ['z_threshold', float('inf')],  # Threshold for zero-point quantization (low priority).
+            ['linear_collapsing', True],  # Enable linear layer collapsing optimization (low priority).
+            ['residual_collapsing', True],  # Enable residual connection collapsing (low priority).
+            ['weights_compression_ratio', 0.75],  # Compression ratio for weights.
+            ['n_epochs', 5],  # Number of epochs for gradient-based fine-tuning.
+            ['optimizer', None],  # Optimizer to use during fine-tuning (low priority).
+            ['distance_weighting_method', None],  # Distance weighting method for GPTQ (low priority).
+            ['num_of_images', 5],  # Number of images to use for calibration.
+            ['use_hessian_based_scores', False],  # Whether to use Hessian-based scores for layer importance.
+            ['save_model_path', './qmodel_GPTQ_Pytorch_mixed_precision.onnx']  # Path to save quantized model as ONNX.
         ]
 
         # Execute advanced GPTQ with mixed precision and export to ONNX
diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_mct_wrapper.ipynb b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_mct_wrapper.ipynb
@@ -318,11 +318,11 @@
     "    # Parameter configuration for PTQ\n",
     "    param_items = [\n",
     "        ['sdsp_version', '3.14'],  # The version of the SDSP converter.\n",
-    "        ['activation_error_method', QuantizationErrorMethod.MSE],  # Error metric for activation.\n",
-    "        ['weights_bias_correction', True],  # Enable bias correction for weights.\n",
-    "        ['z_threshold', float('inf')],  # Threshold for zero-point quantization.\n",
-    "        ['linear_collapsing', True],  # Enable linear layer collapsing optimization.\n",
-    "        ['residual_collapsing', True],  # Enable residual connection collapsing.\n",
+    "        ['activation_error_method', QuantizationErrorMethod.MSE],  # Error metric for activation (low priority).\n",
+    "        ['weights_bias_correction', True],  # Enable bias correction for weights (low priority).\n",
+    "        ['z_threshold', float('inf')],  # Threshold for zero-point quantization (low priority).\n",
+    "        ['linear_collapsing', True],  # Enable linear layer collapsing optimization (low priority).\n",
+    "        ['residual_collapsing', True],  # Enable residual connection collapsing (low priority).\n",
     "        ['save_model_path', './qmodel_PTQ_Keras.keras']  # Path to save the quantized model.\n",
     "    ]\n",
     "\n",
@@ -383,14 +383,14 @@
     "    # Parameter configuration for PTQ with Mixed Precision\n",
     "    param_items = [\n",
     "        ['sdsp_version', '3.14'],  # The version of the SDSP converter.\n",
-    "        ['activation_error_method', QuantizationErrorMethod.MSE],  # Error metric for activation.\n",
-    "        ['weights_bias_correction', True],  # Enable bias correction for weights.\n",
-    "        ['z_threshold', float('inf')],  # Threshold for zero-point quantization.\n",
-    "        ['linear_collapsing', True],  # Enable linear layer collapsing optimization.\n",
-    "        ['residual_collapsing', True],  # Enable residual connection collapsing.\n",
-    "        ['distance_weighting_method', None],  # Distance weighting method for GPTQ (low priority).\n",
-    "        ['num_of_images', 5],  # Number of epochs for gradient-based fine-tuning.\n",
-    "        ['use_hessian_based_scores', False],  # Use Hessian-based sensitivity scores for layer importance.\n",
+    "        ['activation_error_method', QuantizationErrorMethod.MSE],  # Error metric for activation (low priority).\n",
+    "        ['weights_bias_correction', True],  # Enable bias correction for weights (low priority).\n",
+    "        ['z_threshold', float('inf')],  # Threshold for zero-point quantization (low priority).\n",
+    "        ['linear_collapsing', True],  # Enable linear layer collapsing optimization (low priority).\n",
+    "        ['residual_collapsing', True],  # Enable residual connection collapsing (low priority).\n",
+    "        ['distance_weighting_method', None],  # Distance weighting method for mixed precision (low priority).\n",
+    "        ['num_of_images', 5],  # Number of images for mixed precision.\n",
+    "        ['use_hessian_based_scores', False],  # Use Hessian-based sensitivity scores for layer importance (low priority).\n",
     "        ['weights_compression_ratio', 0.75],  # Target compression ratio for model weights (75% of original size.\n",
     "        ['save_model_path', './qmodel_PTQ_Keras_mixed_precision.keras']  # Path to save the quantized model.\n",
     "    ]\n",
@@ -453,13 +453,13 @@
     "    param_items = [\n",
     "        # Platform configuration\n",
     "        ['sdsp_version', '3.14'],  # The version of the SDSP converter.\n",
-    "        ['activation_error_method', QuantizationErrorMethod.MSE],  # Error metric for activation.\n",
-    "        ['weights_bias_correction', True],  # Enable bias correction for weights.\n",
-    "        ['z_threshold', float('inf')],  # Threshold for zero-point quantization.\n",
-    "        ['linear_collapsing', True],  # Enable linear layer collapsing optimization.\n",
-    "        ['residual_collapsing', True],  # Enable residual connection collapsing.    \n",
+    "        ['activation_error_method', QuantizationErrorMethod.MSE],  # Error metric for activation (low priority).\n",
+    "        ['weights_bias_correction', True],  # Enable bias correction for weights (low priority).\n",
+    "        ['z_threshold', float('inf')],  # Threshold for zero-point quantization (low priority).\n",
+    "        ['linear_collapsing', True],  # Enable linear layer collapsing optimization (low priority).\n",
+    "        ['residual_collapsing', True],  # Enable residual connection collapsing (low priority).    \n",
     "        ['n_epochs', 5],  # Number of epochs for gradient-based fine-tuning.\n",
-    "        ['optimizer', None],  # Optimizer to use during fine-tuning.\n",
+    "        ['optimizer', None],  # Optimizer to use during fine-tuning (low priority).\n",
     "        ['save_model_path', './qmodel_GPTQ_Keras.keras']  # Path to save the quantized model.\n",
     "    ]\n",
     "\n",
@@ -524,14 +524,14 @@
     "    param_items = [\n",
     "        # Platform configuration\n",
     "        ['sdsp_version', '3.14'],  # The version of the SDSP converter.\n",
-    "        ['activation_error_method', QuantizationErrorMethod.MSE],  # Error metric for activation.\n",
-    "        ['weights_bias_correction', True],  # Enable bias correction for weights.\n",
-    "        ['z_threshold', float('inf')],  # Threshold for zero-point quantization.\n",
-    "        ['linear_collapsing', True],  # Enable linear layer collapsing optimization.\n",
-    "        ['residual_collapsing', True],  # Enable residual connection collapsing.\n",
+    "        ['activation_error_method', QuantizationErrorMethod.MSE],  # Error metric for activation (low priority).\n",
+    "        ['weights_bias_correction', True],  # Enable bias correction for weights (low priority).\n",
+    "        ['z_threshold', float('inf')],  # Threshold for zero-point quantization (low priority).\n",
+    "        ['linear_collapsing', True],  # Enable linear layer collapsing optimization (low priority).\n",
+    "        ['residual_collapsing', True],  # Enable residual connection collapsing (low priority).\n",
     "        ['weights_compression_ratio', 0.75],  # Compression ratio for weights.\n",
     "        ['n_epochs', 5],  # Number of epochs for gradient-based fine-tuning.\n",
-    "        ['optimizer', None],  # Optimizer to use during fine-tuning.\n",
+    "        ['optimizer', None],  # Optimizer to use during fine-tuning (low priority).\n",
     "        ['distance_weighting_method', None],  # Distance weighting method for GPTQ (low priority).\n",
     "        ['num_of_images', 5],  # Number of images to use for calibration.\n",
     "        ['use_hessian_based_scores', False],  # Whether to use Hessian-based scores for layer importance.\n",
diff --git a/tutorials/notebooks/mct_features_notebooks/pytorch/example_pytorch_mct_wrapper.ipynb b/tutorials/notebooks/mct_features_notebooks/pytorch/example_pytorch_mct_wrapper.ipynb