|
318 | 318 | " # Parameter configuration for PTQ\n", |
319 | 319 | " param_items = [\n", |
320 | 320 | " ['sdsp_version', '3.14'], # The version of the SDSP converter.\n", |
321 | | - " ['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation.\n", |
322 | | - " ['weights_bias_correction', True], # Enable bias correction for weights.\n", |
323 | | - " ['z_threshold', float('inf')], # Threshold for zero-point quantization.\n", |
324 | | - " ['linear_collapsing', True], # Enable linear layer collapsing optimization.\n", |
325 | | - " ['residual_collapsing', True], # Enable residual connection collapsing.\n", |
| 321 | + " ['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation (low priority).\n", |
| 322 | + " ['weights_bias_correction', True], # Enable bias correction for weights (low priority).\n", |
| 323 | + " ['z_threshold', float('inf')], # Threshold for zero-point quantization (low priority).\n", |
| 324 | + " ['linear_collapsing', True], # Enable linear layer collapsing optimization (low priority).\n", |
| 325 | + " ['residual_collapsing', True], # Enable residual connection collapsing (low priority).\n", |
326 | 326 | " ['save_model_path', './qmodel_PTQ_Keras.keras'] # Path to save the quantized model.\n", |
327 | 327 | " ]\n", |
328 | 328 | "\n", |
|
383 | 383 | " # Parameter configuration for PTQ with Mixed Precision\n", |
384 | 384 | " param_items = [\n", |
385 | 385 | " ['sdsp_version', '3.14'], # The version of the SDSP converter.\n", |
386 | | - " ['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation.\n", |
387 | | - " ['weights_bias_correction', True], # Enable bias correction for weights.\n", |
388 | | - " ['z_threshold', float('inf')], # Threshold for zero-point quantization.\n", |
389 | | - " ['linear_collapsing', True], # Enable linear layer collapsing optimization.\n", |
390 | | - " ['residual_collapsing', True], # Enable residual connection collapsing.\n", |
391 | | - " ['distance_weighting_method', None], # Distance weighting method for GPTQ (low priority).\n", |
392 | | - " ['num_of_images', 5], # Number of epochs for gradient-based fine-tuning.\n", |
393 | | - " ['use_hessian_based_scores', False], # Use Hessian-based sensitivity scores for layer importance.\n", |
| 386 | + " ['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation (low priority).\n", |
| 387 | + " ['weights_bias_correction', True], # Enable bias correction for weights (low priority).\n", |
| 388 | + " ['z_threshold', float('inf')], # Threshold for zero-point quantization (low priority).\n", |
| 389 | + " ['linear_collapsing', True], # Enable linear layer collapsing optimization (low priority).\n", |
| 390 | + " ['residual_collapsing', True], # Enable residual connection collapsing (low priority).\n", |
| 391 | + " ['distance_weighting_method', None], # Distance weighting method for mixed precision (low priority).\n", |
| 392 | + " ['num_of_images', 5], # Number of images for mixed precision.\n", |
| 393 | + " ['use_hessian_based_scores', False], # Use Hessian-based sensitivity scores for layer importance (low priority).\n", |
394 | 394 | " ['weights_compression_ratio', 0.75], # Target compression ratio for model weights (75% of original size.\n", |
395 | 395 | " ['save_model_path', './qmodel_PTQ_Keras_mixed_precision.keras'] # Path to save the quantized model.\n", |
396 | 396 | " ]\n", |
|
453 | 453 | " param_items = [\n", |
454 | 454 | " # Platform configuration\n", |
455 | 455 | " ['sdsp_version', '3.14'], # The version of the SDSP converter.\n", |
456 | | - " ['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation.\n", |
457 | | - " ['weights_bias_correction', True], # Enable bias correction for weights.\n", |
458 | | - " ['z_threshold', float('inf')], # Threshold for zero-point quantization.\n", |
459 | | - " ['linear_collapsing', True], # Enable linear layer collapsing optimization.\n", |
460 | | - " ['residual_collapsing', True], # Enable residual connection collapsing. \n", |
| 456 | + " ['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation (low priority).\n", |
| 457 | + " ['weights_bias_correction', True], # Enable bias correction for weights (low priority).\n", |
| 458 | + " ['z_threshold', float('inf')], # Threshold for zero-point quantization (low priority).\n", |
| 459 | + " ['linear_collapsing', True], # Enable linear layer collapsing optimization (low priority).\n", |
| 460 | + " ['residual_collapsing', True], # Enable residual connection collapsing (low priority). \n", |
461 | 461 | " ['n_epochs', 5], # Number of epochs for gradient-based fine-tuning.\n", |
462 | | - " ['optimizer', None], # Optimizer to use during fine-tuning.\n", |
| 462 | + " ['optimizer', None], # Optimizer to use during fine-tuning (low priority).\n", |
463 | 463 | " ['save_model_path', './qmodel_GPTQ_Keras.keras'] # Path to save the quantized model.\n", |
464 | 464 | " ]\n", |
465 | 465 | "\n", |
|
524 | 524 | " param_items = [\n", |
525 | 525 | " # Platform configuration\n", |
526 | 526 | " ['sdsp_version', '3.14'], # The version of the SDSP converter.\n", |
527 | | - " ['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation.\n", |
528 | | - " ['weights_bias_correction', True], # Enable bias correction for weights.\n", |
529 | | - " ['z_threshold', float('inf')], # Threshold for zero-point quantization.\n", |
530 | | - " ['linear_collapsing', True], # Enable linear layer collapsing optimization.\n", |
531 | | - " ['residual_collapsing', True], # Enable residual connection collapsing.\n", |
| 527 | + " ['activation_error_method', QuantizationErrorMethod.MSE], # Error metric for activation (low priority).\n", |
| 528 | + " ['weights_bias_correction', True], # Enable bias correction for weights (low priority).\n", |
| 529 | + " ['z_threshold', float('inf')], # Threshold for zero-point quantization (low priority).\n", |
| 530 | + " ['linear_collapsing', True], # Enable linear layer collapsing optimization (low priority).\n", |
| 531 | + " ['residual_collapsing', True], # Enable residual connection collapsing (low priority).\n", |
532 | 532 | " ['weights_compression_ratio', 0.75], # Compression ratio for weights.\n", |
533 | 533 | " ['n_epochs', 5], # Number of epochs for gradient-based fine-tuning.\n", |
534 | | - " ['optimizer', None], # Optimizer to use during fine-tuning.\n", |
| 534 | + " ['optimizer', None], # Optimizer to use during fine-tuning (low priority).\n", |
535 | 535 | " ['distance_weighting_method', None], # Distance weighting method for GPTQ (low priority).\n", |
536 | 536 | " ['num_of_images', 5], # Number of images to use for calibration.\n", |
537 | 537 | " ['use_hessian_based_scores', False], # Whether to use Hessian-based scores for layer importance.\n", |
|
0 commit comments