Skip to content

Commit 16902a0

Browse files
Enhance quantization parameters and documentation
1 parent 29faba2 commit 16902a0

File tree

4 files changed

+123
-76
lines changed

4 files changed

+123
-76
lines changed

tests_pytest/keras_tests/e2e_tests/wrapper/test_mct_wrapper_keras_e2e.py

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -166,10 +166,16 @@ def PTQ_Keras_mixed_precision(float_model: keras.Model) -> Tuple[bool, keras.Mod
166166
# Configure mixed precision parameters for optimal compression
167167
param_items = [
168168
['sdsp_version', '3.14'], # The version of the SDSP converter.
169-
['num_of_images', 5], # Number of images
170-
['use_hessian_based_scores', False], # Use Hessian scores
171-
['weights_compression_ratio', 0.75], # Compression ratio
172-
['save_model_path', './qmodel_PTQ_Keras_mixed_precision.keras'] # Path to save the model.
169+
['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation (low priority).
170+
['weights_bias_correction', True], # Enable bias correction for weights (low priority).
171+
['z_threshold', float('inf')], # Threshold for zero-point quantization (low priority).
172+
['linear_collapsing', True], # Enable linear layer collapsing optimization (low priority).
173+
['residual_collapsing', True], # Enable residual connection collapsing (low priority).
174+
['distance_weighting_method', None], # Distance weighting method for mixed precision (low priority).
175+
['num_of_images', 5], # Number of images for mixed precision.
176+
['use_hessian_based_scores', False], # Use Hessian-based sensitivity scores for layer importance (low priority).
177+
['weights_compression_ratio', 0.75], # Target compression ratio for model weights (75% of original size.
178+
['save_model_path', './qmodel_PTQ_Keras_mixed_precision.keras'] # Path to save the quantized model.
173179
]
174180

175181
# Execute quantization with mixed precision using MCTWrapper
@@ -195,9 +201,14 @@ def GPTQ_Keras(float_model: keras.Model) -> Tuple[bool, keras.Model]:
195201
# Configure GPTQ-specific parameters for gradient-based optimization
196202
param_items = [
197203
['sdsp_version', '3.14'], # The version of the SDSP converter.
198-
['n_epochs', 5], # Number of training epochs
199-
['optimizer', None], # Optimizer for training
200-
['save_model_path', './qmodel_GPTQ_Keras.keras'] # Path to save the model.
204+
['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation (low priority).
205+
['weights_bias_correction', True], # Enable bias correction for weights (low priority).
206+
['z_threshold', float('inf')], # Threshold for zero-point quantization (low priority).
207+
['linear_collapsing', True], # Enable linear layer collapsing optimization (low priority).
208+
['residual_collapsing', True], # Enable residual connection collapsing (low priority).
209+
['n_epochs', 5], # Number of epochs for gradient-based fine-tuning.
210+
['optimizer', None], # Optimizer to use during fine-tuning (low priority).
211+
['save_model_path', './qmodel_GPTQ_Keras.keras'] # Path to save the quantized model.
201212
]
202213

203214
# Execute gradient-based quantization using MCTWrapper
@@ -209,18 +220,37 @@ def GPTQ_Keras(float_model: keras.Model) -> Tuple[bool, keras.Model]:
209220
# Run GPTQ + Mixed Precision Quantization (mixed_precision) with Keras
210221
@decorator
211222
def GPTQ_Keras_mixed_precision(float_model: keras.Model) -> Tuple[bool, keras.Model]:
223+
"""
224+
Perform Gradient-based Post-Training Quantization with Mixed Precision (GPTQ + mixed_precision).
225+
226+
This combines the benefits of both techniques:
227+
- GPTQ: Gradient-based optimization for better quantization accuracy
228+
- Mixed Precision: Optimal bit-width allocation for size/accuracy trade-off
229+
230+
Args:
231+
float_model: Original floating-point Keras model
232+
233+
Returns:
234+
tuple: (success_flag, quantized_model)
235+
"""
212236
framework = 'tensorflow'
213237
method = 'GPTQ'
214238
use_mixed_precision = True
215239

216240
param_items = [
217241
['sdsp_version', '3.14'], # The version of the SDSP converter.
218-
['n_epochs', 5], # Number of training epochs
219-
['optimizer', None], # Optimizer for training
220-
['num_of_images', 5], # Number of images
221-
['use_hessian_based_scores', False], # Use Hessian scores
222-
['weights_compression_ratio', 0.75], # Compression ratio
223-
['save_model_path', './qmodel_GPTQ_Keras_mixed_precision.keras'] # Path to save the model.
242+
['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation (low priority).
243+
['weights_bias_correction', True], # Enable bias correction for weights (low priority).
244+
['z_threshold', float('inf')], # Threshold for zero-point quantization (low priority).
245+
['linear_collapsing', True], # Enable linear layer collapsing optimization (low priority).
246+
['residual_collapsing', True], # Enable residual connection collapsing (low priority).
247+
['weights_compression_ratio', 0.75], # Compression ratio for weights.
248+
['n_epochs', 5], # Number of epochs for gradient-based fine-tuning.
249+
['optimizer', None], # Optimizer to use during fine-tuning (low priority).
250+
['distance_weighting_method', None], # Distance weighting method for GPTQ (low priority).
251+
['num_of_images', 5], # Number of images to use for calibration.
252+
['use_hessian_based_scores', False], # Whether to use Hessian-based scores for layer importance.
253+
['save_model_path', './qmodel_GPTQ_Keras_mixed_precision.keras'] # Path to save the quantized model.
224254
]
225255

226256
wrapper = mct.wrapper.mct_wrapper.MCTWrapper()

tests_pytest/pytorch_tests/e2e_tests/wrapper/test_mct_wrapper_pytorch_e2e.py

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -198,10 +198,16 @@ def PTQ_Pytorch_mixed_precision(float_model):
198198
# Define mixed precision quantization parameters
199199
param_items = [
200200
['sdsp_version', '3.14'], # The version of the SDSP converter.
201-
['num_of_images', 5], # Number of images
202-
['use_hessian_based_scores', False], # Use Hessian scores
203-
['weights_compression_ratio', 0.5], # Compression ratio
204-
['save_model_path', './qmodel_PTQ_Pytorch_mixed_precision.onnx'] # Path to save the model.
201+
['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation (low priority).
202+
['weights_bias_correction', True], # Enable bias correction for weights (low priority).
203+
['z_threshold', float('inf')], # Threshold for zero-point quantization (low priority).
204+
['linear_collapsing', True], # Enable linear layer collapsing optimization (low priority).
205+
['residual_collapsing', True], # Enable residual connection collapsing (low priority).
206+
['distance_weighting_method', None], # Distance weighting method for mixed precision (low priority).
207+
['num_of_images', 5], # Number of images for mixed precision.
208+
['use_hessian_based_scores', False], # Use Hessian-based sensitivity scores for layer importance (low priority).
209+
['weights_compression_ratio', 0.75], # Target compression ratio for model weights (75% of original size.
210+
['save_model_path', './qmodel_PTQ_Pytorch_mixed_precision.onnx'] # Path to save quantized model as ONNX.
205211
]
206212

207213
# Execute mixed precision quantization and export to ONNX
@@ -238,9 +244,14 @@ def GPTQ_Pytorch(float_model):
238244
# Define GPTQ-specific parameters for gradient-based optimization
239245
param_items = [
240246
['sdsp_version', '3.14'], # The version of the SDSP converter.
241-
['n_epochs', 5], # Number of training epochs
242-
['optimizer', None], # Optimizer for training
243-
['save_model_path', './qmodel_GPTQ_Pytorch.onnx'] # Path to save the model.
247+
['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation (low priority).
248+
['weights_bias_correction', True], # Enable bias correction for weights (low priority).
249+
['z_threshold', float('inf')], # Threshold for zero-point quantization (low priority).
250+
['linear_collapsing', True], # Enable linear layer collapsing optimization (low priority).
251+
['residual_collapsing', True], # Enable residual connection collapsing (low priority).
252+
['n_epochs', 5], # Number of epochs for gradient-based fine-tuning.
253+
['optimizer', None], # Optimizer to use during fine-tuning (low priority).
254+
['save_model_path', './qmodel_GPTQ_Pytorch.onnx'] # Path to save quantized model as ONNX.
244255
]
245256

246257
# Execute gradient-based quantization and export to ONNX
@@ -276,12 +287,18 @@ def GPTQ_Pytorch_mixed_precision(float_model):
276287
# Define GPTQ mixed precision parameters for advanced optimization
277288
param_items = [
278289
['sdsp_version', '3.14'], # The version of the SDSP converter.
279-
['n_epochs', 5], # Number of training epochs
280-
['optimizer', None], # Optimizer for training
281-
['num_of_images', 5], # Number of images
282-
['use_hessian_based_scores', False], # Use Hessian scores
283-
['weights_compression_ratio', 0.5], # Compression ratio
284-
['save_model_path', './qmodel_GPTQ_Pytorch_mixed_precision.onnx'] # Path to save the model.
290+
['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation (low priority).
291+
['weights_bias_correction', True], # Enable bias correction for weights (low priority).
292+
['z_threshold', float('inf')], # Threshold for zero-point quantization (low priority).
293+
['linear_collapsing', True], # Enable linear layer collapsing optimization (low priority).
294+
['residual_collapsing', True], # Enable residual connection collapsing (low priority).
295+
['weights_compression_ratio', 0.75], # Compression ratio for weights.
296+
['n_epochs', 5], # Number of epochs for gradient-based fine-tuning.
297+
['optimizer', None], # Optimizer to use during fine-tuning (low priority).
298+
['distance_weighting_method', None], # Distance weighting method for GPTQ (low priority).
299+
['num_of_images', 5], # Number of images to use for calibration.
300+
['use_hessian_based_scores', False], # Whether to use Hessian-based scores for layer importance.
301+
['save_model_path', './qmodel_GPTQ_Pytorch_mixed_precision.onnx'] # Path to save quantized model as ONNX.
285302
]
286303

287304
# Execute advanced GPTQ with mixed precision and export to ONNX

tutorials/notebooks/mct_features_notebooks/keras/example_keras_mct_wrapper.ipynb

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -318,11 +318,11 @@
318318
" # Parameter configuration for PTQ\n",
319319
" param_items = [\n",
320320
" ['sdsp_version', '3.14'], # The version of the SDSP converter.\n",
321-
" ['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation.\n",
322-
" ['weights_bias_correction', True], # Enable bias correction for weights.\n",
323-
" ['z_threshold', float('inf')], # Threshold for zero-point quantization.\n",
324-
" ['linear_collapsing', True], # Enable linear layer collapsing optimization.\n",
325-
" ['residual_collapsing', True], # Enable residual connection collapsing.\n",
321+
" ['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation (low priority).\n",
322+
" ['weights_bias_correction', True], # Enable bias correction for weights (low priority).\n",
323+
" ['z_threshold', float('inf')], # Threshold for zero-point quantization (low priority).\n",
324+
" ['linear_collapsing', True], # Enable linear layer collapsing optimization (low priority).\n",
325+
" ['residual_collapsing', True], # Enable residual connection collapsing (low priority).\n",
326326
" ['save_model_path', './qmodel_PTQ_Keras.keras'] # Path to save the quantized model.\n",
327327
" ]\n",
328328
"\n",
@@ -383,14 +383,14 @@
383383
" # Parameter configuration for PTQ with Mixed Precision\n",
384384
" param_items = [\n",
385385
" ['sdsp_version', '3.14'], # The version of the SDSP converter.\n",
386-
" ['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation.\n",
387-
" ['weights_bias_correction', True], # Enable bias correction for weights.\n",
388-
" ['z_threshold', float('inf')], # Threshold for zero-point quantization.\n",
389-
" ['linear_collapsing', True], # Enable linear layer collapsing optimization.\n",
390-
" ['residual_collapsing', True], # Enable residual connection collapsing.\n",
391-
" ['distance_weighting_method', None], # Distance weighting method for GPTQ (low priority).\n",
392-
" ['num_of_images', 5], # Number of epochs for gradient-based fine-tuning.\n",
393-
" ['use_hessian_based_scores', False], # Use Hessian-based sensitivity scores for layer importance.\n",
386+
" ['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation (low priority).\n",
387+
" ['weights_bias_correction', True], # Enable bias correction for weights (low priority).\n",
388+
" ['z_threshold', float('inf')], # Threshold for zero-point quantization (low priority).\n",
389+
" ['linear_collapsing', True], # Enable linear layer collapsing optimization (low priority).\n",
390+
" ['residual_collapsing', True], # Enable residual connection collapsing (low priority).\n",
391+
" ['distance_weighting_method', None], # Distance weighting method for mixed precision (low priority).\n",
392+
" ['num_of_images', 5], # Number of images for mixed precision.\n",
393+
" ['use_hessian_based_scores', False], # Use Hessian-based sensitivity scores for layer importance (low priority).\n",
394394
" ['weights_compression_ratio', 0.75], # Target compression ratio for model weights (75% of original size.\n",
395395
" ['save_model_path', './qmodel_PTQ_Keras_mixed_precision.keras'] # Path to save the quantized model.\n",
396396
" ]\n",
@@ -453,13 +453,13 @@
453453
" param_items = [\n",
454454
" # Platform configuration\n",
455455
" ['sdsp_version', '3.14'], # The version of the SDSP converter.\n",
456-
" ['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation.\n",
457-
" ['weights_bias_correction', True], # Enable bias correction for weights.\n",
458-
" ['z_threshold', float('inf')], # Threshold for zero-point quantization.\n",
459-
" ['linear_collapsing', True], # Enable linear layer collapsing optimization.\n",
460-
" ['residual_collapsing', True], # Enable residual connection collapsing. \n",
456+
" ['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation (low priority).\n",
457+
" ['weights_bias_correction', True], # Enable bias correction for weights (low priority).\n",
458+
" ['z_threshold', float('inf')], # Threshold for zero-point quantization (low priority).\n",
459+
" ['linear_collapsing', True], # Enable linear layer collapsing optimization (low priority).\n",
460+
" ['residual_collapsing', True], # Enable residual connection collapsing (low priority). \n",
461461
" ['n_epochs', 5], # Number of epochs for gradient-based fine-tuning.\n",
462-
" ['optimizer', None], # Optimizer to use during fine-tuning.\n",
462+
" ['optimizer', None], # Optimizer to use during fine-tuning (low priority).\n",
463463
" ['save_model_path', './qmodel_GPTQ_Keras.keras'] # Path to save the quantized model.\n",
464464
" ]\n",
465465
"\n",
@@ -524,14 +524,14 @@
524524
" param_items = [\n",
525525
" # Platform configuration\n",
526526
" ['sdsp_version', '3.14'], # The version of the SDSP converter.\n",
527-
" ['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation.\n",
528-
" ['weights_bias_correction', True], # Enable bias correction for weights.\n",
529-
" ['z_threshold', float('inf')], # Threshold for zero-point quantization.\n",
530-
" ['linear_collapsing', True], # Enable linear layer collapsing optimization.\n",
531-
" ['residual_collapsing', True], # Enable residual connection collapsing.\n",
527+
" ['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation (low priority).\n",
528+
" ['weights_bias_correction', True], # Enable bias correction for weights (low priority).\n",
529+
" ['z_threshold', float('inf')], # Threshold for zero-point quantization (low priority).\n",
530+
" ['linear_collapsing', True], # Enable linear layer collapsing optimization (low priority).\n",
531+
" ['residual_collapsing', True], # Enable residual connection collapsing (low priority).\n",
532532
" ['weights_compression_ratio', 0.75], # Compression ratio for weights.\n",
533533
" ['n_epochs', 5], # Number of epochs for gradient-based fine-tuning.\n",
534-
" ['optimizer', None], # Optimizer to use during fine-tuning.\n",
534+
" ['optimizer', None], # Optimizer to use during fine-tuning (low priority).\n",
535535
" ['distance_weighting_method', None], # Distance weighting method for GPTQ (low priority).\n",
536536
" ['num_of_images', 5], # Number of images to use for calibration.\n",
537537
" ['use_hessian_based_scores', False], # Whether to use Hessian-based scores for layer importance.\n",

0 commit comments

Comments
 (0)