diff --git a/docs/api/api_docs/classes/BitWidthConfig.html b/docs/api/api_docs/classes/BitWidthConfig.html index 34ff1f8af..83473d0f6 100644 --- a/docs/api/api_docs/classes/BitWidthConfig.html +++ b/docs/api/api_docs/classes/BitWidthConfig.html @@ -7,7 +7,7 @@
Get the value of the inner dictionary by the given key, If key is not in dictionary, it uses the default_factory to return a default value.
Any
key – Key to use in inner dictionary.
+key – Key to use in inner dictionary.
Value of the inner dictionary by the given key, or a default value if not exist. +
Value of the inner dictionary by the given key, or a default value if not exist. If default_factory was not passed at initialization, it returns None.
Any
Examples
When quantizing a Keras model, if we want to quantize the kernels of Conv2D layers only, we can set, and we know it’s kernel out/in channel indices are (3, 2) respectivly:
->>> import tensorflow as tf
+>>> import tensorflow as tf
>>> kernel_ops = [tf.keras.layers.Conv2D]
>>> kernel_channels_mapping = DefaultDict({tf.keras.layers.Conv2D: (3,2)})
diff --git a/docs/api/api_docs/classes/GradientPTQConfig.html b/docs/api/api_docs/classes/GradientPTQConfig.html
index f8c3485cc..c31461a72 100644
--- a/docs/api/api_docs/classes/GradientPTQConfig.html
+++ b/docs/api/api_docs/classes/GradientPTQConfig.html
@@ -7,7 +7,7 @@
GradientPTQConfig Class — MCT Documentation: ver 2.6.0
-
+
diff --git a/docs/api/api_docs/classes/MixedPrecisionQuantizationConfig.html b/docs/api/api_docs/classes/MixedPrecisionQuantizationConfig.html
index 8c2dfca9d..7ddeea6c4 100644
--- a/docs/api/api_docs/classes/MixedPrecisionQuantizationConfig.html
+++ b/docs/api/api_docs/classes/MixedPrecisionQuantizationConfig.html
@@ -7,7 +7,7 @@
MixedPrecisionQuantizationConfig — MCT Documentation: ver 2.6.0
-
+
diff --git a/docs/api/api_docs/classes/PruningConfig.html b/docs/api/api_docs/classes/PruningConfig.html
index 1abe2e370..aeb06f672 100644
--- a/docs/api/api_docs/classes/PruningConfig.html
+++ b/docs/api/api_docs/classes/PruningConfig.html
@@ -7,7 +7,7 @@
Pruning Configuration — MCT Documentation: ver 2.6.0
-
+
diff --git a/docs/api/api_docs/classes/PruningInfo.html b/docs/api/api_docs/classes/PruningInfo.html
index b66597303..962091a6d 100644
--- a/docs/api/api_docs/classes/PruningInfo.html
+++ b/docs/api/api_docs/classes/PruningInfo.html
@@ -7,7 +7,7 @@
Pruning Information — MCT Documentation: ver 2.6.0
-
+
@@ -65,6 +65,9 @@ Navigation
Return type:
Dict[BaseNode, np.ndarray]
+Return type:
+Dict[BaseNode, ndarray]
+
@@ -79,6 +82,9 @@ Navigation
Return type:
Dict[BaseNode, np.ndarray]
+Return type:
+Dict[BaseNode, ndarray]
+
diff --git a/docs/api/api_docs/classes/QuantizationConfig.html b/docs/api/api_docs/classes/QuantizationConfig.html
index dfc3ab3a5..4eab2f6ad 100644
--- a/docs/api/api_docs/classes/QuantizationConfig.html
+++ b/docs/api/api_docs/classes/QuantizationConfig.html
@@ -7,7 +7,7 @@
QuantizationConfig — MCT Documentation: ver 2.6.0
-
+
@@ -50,7 +50,7 @@ Navigation
activations using thresholds, with weight threshold selection based on MSE and activation threshold selection
using NOCLIPPING (min/max), while enabling relu_bound_to_power_of_2 and weights_bias_correction,
you can instantiate a quantization configuration like this:
->>> import model_compression_toolkit as mct
+>>> import model_compression_toolkit as mct
>>> qc = mct.core.QuantizationConfig(activation_error_method=mct.core.QuantizationErrorMethod.NOCLIPPING, weights_error_method=mct.core.QuantizationErrorMethod.MSE, relu_bound_to_power_of_2=True, weights_bias_correction=True)
diff --git a/docs/api/api_docs/classes/QuantizationErrorMethod.html b/docs/api/api_docs/classes/QuantizationErrorMethod.html
index f6685e620..a3d3b092a 100644
--- a/docs/api/api_docs/classes/QuantizationErrorMethod.html
+++ b/docs/api/api_docs/classes/QuantizationErrorMethod.html
@@ -7,7 +7,7 @@
QuantizationErrorMethod — MCT Documentation: ver 2.6.0
-
+
diff --git a/docs/api/api_docs/classes/ResourceUtilization.html b/docs/api/api_docs/classes/ResourceUtilization.html
index 9e4ea601c..9c0fe05c3 100644
--- a/docs/api/api_docs/classes/ResourceUtilization.html
+++ b/docs/api/api_docs/classes/ResourceUtilization.html
@@ -7,7 +7,7 @@
ResourceUtilization — MCT Documentation: ver 2.6.0
-
+
diff --git a/docs/api/api_docs/classes/Wrapper.html b/docs/api/api_docs/classes/Wrapper.html
index 36a729fc8..198ef4b24 100644
--- a/docs/api/api_docs/classes/Wrapper.html
+++ b/docs/api/api_docs/classes/Wrapper.html
@@ -7,7 +7,7 @@
wrapper — MCT Documentation: ver 2.6.0
-
+
@@ -57,11 +57,8 @@ Navigation
quantize_and_export(float_model, representative_dataset, framework='pytorch', method='PTQ', use_mixed_precision=False, param_items=None)¶
Main function to perform model quantization and export.
-- Return type:
-Tuple[bool, Any]
-
-- Parameters:
-
+- Parameters:
+
float_model – The float model to be quantized.
representative_dataset (Callable, np.array, tf.Tensor) – Representative dataset for calibration.
framework (str) – ‘tensorflow’ or ‘pytorch’.
@@ -74,13 +71,13 @@
Navigation
[[key,value],…]. Default: None
-- Returns:
-tuple (quantization success flag, quantized model)
+- Returns:
+tuple (quantization success flag, quantized model)
Examples
Import MCT
->>> import model_compression_toolkit as mct
+>>> import model_compression_toolkit as mct
Prepare the float model and dataset
@@ -345,6 +342,11 @@ Navigation
+
+- Return type:
+Tuple[bool, Any]
+
+
diff --git a/docs/api/api_docs/classes/XQuantConfig.html b/docs/api/api_docs/classes/XQuantConfig.html
index 68b667bf7..19fc4fbef 100644
--- a/docs/api/api_docs/classes/XQuantConfig.html
+++ b/docs/api/api_docs/classes/XQuantConfig.html
@@ -7,7 +7,7 @@
XQuant Configuration — MCT Documentation: ver 2.6.0
-
+
diff --git a/docs/api/api_docs/index.html b/docs/api/api_docs/index.html
index 7165a2c56..5c7693b1c 100644
--- a/docs/api/api_docs/index.html
+++ b/docs/api/api_docs/index.html
@@ -7,7 +7,7 @@
API Docs — MCT Documentation: ver 2.6.0
-
+
@@ -45,7 +45,7 @@ Navigation
API Docs¶
Init module for MCT API.
-import model_compression_toolkit as mct
+import model_compression_toolkit as mct
diff --git a/docs/api/api_docs/methods/get_keras_data_generation_config.html b/docs/api/api_docs/methods/get_keras_data_generation_config.html
index 394e33802..88918209e 100644
--- a/docs/api/api_docs/methods/get_keras_data_generation_config.html
+++ b/docs/api/api_docs/methods/get_keras_data_generation_config.html
@@ -7,7 +7,7 @@
Get DataGenerationConfig for Keras Models — MCT Documentation: ver 2.6.0
-
+
@@ -45,11 +45,8 @@ Navigation
model_compression_toolkit.data_generation.get_keras_data_generation_config(n_iter=DEFAULT_N_ITER, optimizer=Adam, data_gen_batch_size=DEFAULT_DATA_GEN_BS, initial_lr=DEFAULT_KERAS_INITIAL_LR, output_loss_multiplier=DEFAULT_KERAS_OUTPUT_LOSS_MULTIPLIER, scheduler_type=SchedulerType.REDUCE_ON_PLATEAU, bn_alignment_loss_type=BatchNormAlignemntLossType.L2_SQUARE, output_loss_type=OutputLossType.REGULARIZED_MIN_MAX_DIFF, data_init_type=DataInitType.Gaussian, layer_weighting_type=BNLayerWeightingType.AVERAGE, image_granularity=ImageGranularity.BatchWise, image_pipeline_type=ImagePipelineType.SMOOTHING_AND_AUGMENTATION, image_normalization_type=ImageNormalizationType.KERAS_APPLICATIONS, extra_pixels=DEFAULT_KERAS_EXTRA_PIXELS, bn_layer_types=[BatchNormalization], image_clipping=False)¶
Function to create a DataGenerationConfig object with the specified configuration parameters.
-- Return type:
--
-
-- Parameters:
-
+- Parameters:
+
n_iter (int) – Number of iterations for the data generation process.
optimizer (Optimizer) – The optimizer to use for the data generation process.
data_gen_batch_size (int) – Batch size for data generation.
@@ -68,11 +65,14 @@ Navigation
image_clipping (bool) – Whether to clip images during optimization.
-- Returns:
-Data generation configuration object.
+- Returns:
+Data generation configuration object.
+
+- Return type:
+-
- Return type:
--
+
-
diff --git a/docs/api/api_docs/methods/get_keras_gptq_config.html b/docs/api/api_docs/methods/get_keras_gptq_config.html
index bde134fb8..36099273c 100644
--- a/docs/api/api_docs/methods/get_keras_gptq_config.html
+++ b/docs/api/api_docs/methods/get_keras_gptq_config.html
@@ -7,7 +7,7 @@
Get GradientPTQConfig for Keras Models — MCT Documentation: ver 2.6.0
-
+
@@ -45,11 +45,8 @@ Navigation
model_compression_toolkit.gptq.get_keras_gptq_config(n_epochs, optimizer=None, optimizer_rest=None, loss=None, log_function=None, use_hessian_based_weights=True, regularization_factor=None, hessian_batch_size=ACT_HESSIAN_DEFAULT_BATCH_SIZE, use_hessian_sample_attention=True, gradual_activation_quantization=True)¶
Create a GradientPTQConfig instance for Keras models.
-- Return type:
--
-
-- Parameters:
-
+- Parameters:
+
n_epochs (int) – Number of epochs for running the representative dataset for fine-tuning.
optimizer (OptimizerV2) – Keras optimizer to use for fine-tuning for auxiliary variable. Default: Adam(learning rate set to 3e-2).
optimizer_rest (OptimizerV2) – Keras optimizer to use for fine-tuning of the bias variable. Default: Adam(learning rate set to 1e-4).
@@ -62,14 +59,14 @@ Navigation
gradual_activation_quantization (bool, GradualActivationQuantizationConfig) – If False, GradualActivationQuantization is disabled. If True, GradualActivationQuantization is enabled with the default settings. GradualActivationQuantizationConfig object can be passed to use non-default settings.
-- Returns:
-a GradientPTQConfig object to use when fine-tuning the quantized model using gptq.
+- Returns:
+a GradientPTQConfig object to use when fine-tuning the quantized model using gptq.
Examples
Import MCT and TensorFlow:
->>> import model_compression_toolkit as mct
->>> import tensorflow as tf
+>>> import model_compression_toolkit as mct
+>>> import tensorflow as tf
Create a GradientPTQConfig to run for 5 epochs:
@@ -81,6 +78,11 @@ Navigation
The configuration can be passed to keras_gradient_post_training_quantization() in order to quantize a keras model using gptq.
+
+- Return type:
+-
+
+
diff --git a/docs/api/api_docs/methods/get_pytorch_data_generation_config.html b/docs/api/api_docs/methods/get_pytorch_data_generation_config.html
index 9bd99a6b2..dae05e83c 100644
--- a/docs/api/api_docs/methods/get_pytorch_data_generation_config.html
+++ b/docs/api/api_docs/methods/get_pytorch_data_generation_config.html
@@ -7,7 +7,7 @@
Get DataGenerationConfig for Pytorch Models — MCT Documentation: ver 2.6.0
-
+
@@ -45,11 +45,8 @@ Navigation
model_compression_toolkit.data_generation.get_pytorch_data_generation_config(n_iter=DEFAULT_N_ITER, optimizer=RAdam, data_gen_batch_size=DEFAULT_DATA_GEN_BS, initial_lr=DEFAULT_PYTORCH_INITIAL_LR, output_loss_multiplier=DEFAULT_PYTORCH_OUTPUT_LOSS_MULTIPLIER, scheduler_type=SchedulerType.REDUCE_ON_PLATEAU_WITH_RESET, bn_alignment_loss_type=BatchNormAlignemntLossType.L2_SQUARE, output_loss_type=OutputLossType.NEGATIVE_MIN_MAX_DIFF, data_init_type=DataInitType.Gaussian, layer_weighting_type=BNLayerWeightingType.AVERAGE, image_granularity=ImageGranularity.AllImages, image_pipeline_type=ImagePipelineType.SMOOTHING_AND_AUGMENTATION, image_normalization_type=ImageNormalizationType.TORCHVISION, extra_pixels=DEFAULT_PYTORCH_EXTRA_PIXELS, bn_layer_types=DEFAULT_PYTORCH_BN_LAYER_TYPES, last_layer_types=DEFAULT_PYTORCH_LAST_LAYER_TYPES, image_clipping=True)¶
Function to create a DataGenerationConfig object with the specified configuration parameters.
-- Return type:
--
-
-- Parameters:
-
+- Parameters:
+
n_iter (int) – Number of iterations for the data generation process.
optimizer (Optimizer) – The optimizer to use for the data generation process.
data_gen_batch_size (int) – Batch size for data generation.
@@ -69,11 +66,14 @@ Navigation
image_clipping (bool) – Whether to clip images during optimization.
-- Returns:
-Data generation configuration object.
+- Returns:
+Data generation configuration object.
+
+- Return type:
+-
- Return type:
--
+
-
diff --git a/docs/api/api_docs/methods/get_pytroch_gptq_config.html b/docs/api/api_docs/methods/get_pytroch_gptq_config.html
index 4b6893053..a27547800 100644
--- a/docs/api/api_docs/methods/get_pytroch_gptq_config.html
+++ b/docs/api/api_docs/methods/get_pytroch_gptq_config.html
@@ -7,7 +7,7 @@
Get GradientPTQConfig for Pytorch Models — MCT Documentation: ver 2.6.0
-
+
@@ -45,11 +45,8 @@ Navigation
model_compression_toolkit.gptq.get_pytorch_gptq_config(n_epochs, optimizer=None, optimizer_rest=None, loss=None, log_function=None, use_hessian_based_weights=True, regularization_factor=None, hessian_batch_size=ACT_HESSIAN_DEFAULT_BATCH_SIZE, use_hessian_sample_attention=True, gradual_activation_quantization=True)¶
Create a GradientPTQConfig instance for Pytorch models.
-- Return type:
--
-
-- Parameters:
-
+- Parameters:
+
n_epochs (int) – Number of epochs for running the representative dataset for fine-tuning.
optimizer (Optimizer) – Pytorch optimizer to use for fine-tuning for auxiliary variable. Default: Adam(learning rate set to 3e-2).
optimizer_rest (Optimizer) – Pytorch optimizer to use for fine-tuning of the bias variable. Default: Adam(learning rate set to 1e-4).
@@ -62,22 +59,27 @@ Navigation
gradual_activation_quantization (bool, GradualActivationQuantizationConfig) – If False, GradualActivationQuantization is disabled. If True, GradualActivationQuantization is enabled with the default settings. GradualActivationQuantizationConfig object can be passed to use non-default settings.
-- Returns:
-a GradientPTQConfig object to use when fine-tuning the quantized model using gptq.
+- Returns:
+a GradientPTQConfig object to use when fine-tuning the quantized model using gptq.
Examples
Import MCT and Create a GradientPTQConfig to run for 5 epochs:
->>> import model_compression_toolkit as mct
+>>> import model_compression_toolkit as mct
>>> gptq_conf = mct.gptq.get_pytorch_gptq_config(n_epochs=5)
Other PyTorch optimizers can be passed with dummy params:
->>> import torch
+>>> import torch
>>> gptq_conf = mct.gptq.get_pytorch_gptq_config(n_epochs=3, optimizer=torch.optim.Adam([torch.Tensor(1)]))
The configuration can be passed to pytorch_gradient_post_training_quantization() in order to quantize a pytorch model using gptq.
+
+- Return type:
+-
+
+
diff --git a/docs/api/api_docs/methods/get_target_platform_capabilities.html b/docs/api/api_docs/methods/get_target_platform_capabilities.html
index d981a4e14..e846a07c3 100644
--- a/docs/api/api_docs/methods/get_target_platform_capabilities.html
+++ b/docs/api/api_docs/methods/get_target_platform_capabilities.html
@@ -7,7 +7,7 @@
Get TargetPlatformCapabilities for tpc version — MCT Documentation: ver 2.6.0
-
+
@@ -45,17 +45,17 @@ Navigation
model_compression_toolkit.get_target_platform_capabilities(tpc_version=TPC_V1_0, device_type=IMX500_TP_MODEL)¶
Retrieves target platform capabilities model based on tpc version and the specified device type.
-- Return type:
--
-
-- Parameters:
-
+- Parameters:
+
tpc_version (str) – Target platform capabilities version.
device_type (str) – The type of device for the target platform.
-- Returns:
-The TargetPlatformCapabilities object matching the tpc version.
+- Returns:
+The TargetPlatformCapabilities object matching the tpc version.
+
+- Return type:
+-
diff --git a/docs/api/api_docs/methods/get_target_platform_capabilities_sdsp.html b/docs/api/api_docs/methods/get_target_platform_capabilities_sdsp.html
index 8ab54b9ae..55a38f3f7 100644
--- a/docs/api/api_docs/methods/get_target_platform_capabilities_sdsp.html
+++ b/docs/api/api_docs/methods/get_target_platform_capabilities_sdsp.html
@@ -7,7 +7,7 @@
Get TargetPlatformCapabilities for sdsp converter version — MCT Documentation: ver 2.6.0
-
+
@@ -45,14 +45,14 @@ Navigation
model_compression_toolkit.get_target_platform_capabilities_sdsp(sdsp_version=SDSP_V3_14)¶
Retrieves target platform capabilities model based on sdsp converter version.
-- Return type:
--
+
- Parameters:
+sdsp_version (str) – Sdsp converter version.
-- Parameters:
-sdsp_version (str) – Sdsp converter version.
+- Returns:
+The TargetPlatformCapabilities object matching the sdsp converter version.
-- Returns:
-The TargetPlatformCapabilities object matching the sdsp converter version.
+- Return type:
+-
diff --git a/docs/api/api_docs/methods/keras_data_generation_experimental.html b/docs/api/api_docs/methods/keras_data_generation_experimental.html
index 3ecd40705..8a77338b8 100644
--- a/docs/api/api_docs/methods/keras_data_generation_experimental.html
+++ b/docs/api/api_docs/methods/keras_data_generation_experimental.html
@@ -7,7 +7,7 @@
Keras Data Generation — MCT Documentation: ver 2.6.0
-
+
@@ -45,30 +45,27 @@ Navigation
model_compression_toolkit.data_generation.keras_data_generation_experimental(model, n_images, output_image_size, data_generation_config)¶
Function to perform data generation using the provided Keras model and data generation configuration.
-- Return type:
-Tensor
-
-- Parameters:
-
+- Parameters:
+
model (Model) – Keras model to generate data for.
n_images (int) – Number of images to generate.
output_image_size (Union[int, Tuple[int, int]]) – Size of the output images.
data_generation_config (DataGenerationConfig) – Configuration for data generation.
-- Returns:
-Finalized list containing generated images.
+- Returns:
+Finalized list containing generated images.
-- Return type:
-List[tf.Tensor]
+- Return type:
+List[tf.Tensor]
Examples
In this example, we’ll walk through generating images using a simple Keras model and a data generation configuration. The process involves creating a model, setting up a data generation configuration, and finally generating images with specified parameters.
Start by importing the Model Compression Toolkit (MCT), TensorFlow, and some layers from tensorflow.keras:
->>> import model_compression_toolkit as mct
->>> from tensorflow.keras.models import Sequential
->>> from tensorflow.keras.layers import Conv2D, BatchNormalization, Flatten, Dense, Reshape
+>>> import model_compression_toolkit as mct
+>>> from tensorflow.keras.models import Sequential
+>>> from tensorflow.keras.layers import Conv2D, BatchNormalization, Flatten, Dense, Reshape
Next, define a simple Keras model:
@@ -86,6 +83,11 @@ Navigation
The generated images can then be used for various purposes, such as data-free quantization.
+
+- Return type:
+Tensor
+
+
diff --git a/docs/api/api_docs/methods/keras_gradient_post_training_quantization.html b/docs/api/api_docs/methods/keras_gradient_post_training_quantization.html
index c87c98636..c80b34b77 100644
--- a/docs/api/api_docs/methods/keras_gradient_post_training_quantization.html
+++ b/docs/api/api_docs/methods/keras_gradient_post_training_quantization.html
@@ -7,7 +7,7 @@
Keras Gradient Based Post Training Quantization — MCT Documentation: ver 2.6.0
-
+
@@ -58,11 +58,8 @@ Navigation
training quantization by comparing points between the float and quantized models, and minimizing the observed
loss.
-- Return type:
-Tuple[Model, Optional[UserInformation]]
-
-- Parameters:
-
+- Parameters:
+
in_model (Model) – Keras model to quantize.
representative_data_gen (Callable) – Dataset used for calibration.
gptq_config (GradientPTQConfig) – Configuration for using gptq (e.g. optimizer).
@@ -72,21 +69,21 @@ Navigation
target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – TargetPlatformCapabilities to optimize the Keras model according to.
-- Returns:
-A quantized model and information the user may need to handle the quantized model.
+- Returns:
+A quantized model and information the user may need to handle the quantized model.
Examples
Import a Keras model:
->>> from tensorflow.keras.applications.mobilenet import MobileNet
+>>> from tensorflow.keras.applications.mobilenet import MobileNet
>>> model = MobileNet()
Create a random dataset generator, for required number of calibration iterations (num_calibration_batches):
In this example a random dataset of 10 batches each containing 4 images is used.
->>> import numpy as np
+>>> import numpy as np
>>> num_calibration_batches = 10
->>> def repr_datagen():
+>>> def repr_datagen():
>>> for _ in range(num_calibration_batches):
>>> yield [np.random.random((4, 224, 224, 3))]
@@ -116,6 +113,11 @@ Navigation
>>> quantized_model, quantization_info = mct.gptq.keras_gradient_post_training_quantization(model, repr_datagen, gptq_config, target_resource_utilization=ru, core_config=config)
+
+- Return type:
+Tuple[Model, Optional[UserInformation]]
+
+
diff --git a/docs/api/api_docs/methods/keras_kpi_data.html b/docs/api/api_docs/methods/keras_kpi_data.html
index 3bb212d12..b9d168cc8 100644
--- a/docs/api/api_docs/methods/keras_kpi_data.html
+++ b/docs/api/api_docs/methods/keras_kpi_data.html
@@ -7,7 +7,7 @@
Get Resource Utilization information for Keras Models — MCT Documentation: ver 2.6.0
-
+
@@ -48,37 +48,39 @@ Navigation
Builds the computation graph from the given model and hw modeling, and uses it to compute the
resource utilization data.
-- Return type:
--
-
-- Parameters:
-
+- Parameters:
+
in_model (Model) – Keras model to quantize.
representative_data_gen (Callable) – Dataset used for calibration.
core_config (CoreConfig) – CoreConfig containing parameters for quantization and mixed precision of how the model should be quantized.
target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – FrameworkQuantizationCapabilities to optimize the Keras model according to.
-- Returns:
-A ResourceUtilization object with total weights parameters sum and max activation tensor.
+- Returns:
+A ResourceUtilization object with total weights parameters sum and max activation tensor.
Examples
Import a Keras model:
->>> from tensorflow.keras.applications.mobilenet import MobileNet
+>>> from tensorflow.keras.applications.mobilenet import MobileNet
>>> model = MobileNet()
Create a random dataset generator:
->>> import numpy as np
->>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
+>>> import numpy as np
+>>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
Import MCT and call for resource utilization data calculation:
->>> import model_compression_toolkit as mct
+>>> import model_compression_toolkit as mct
>>> ru_data = mct.core.keras_resource_utilization_data(model, repr_datagen)
+
+- Return type:
+-
+
+
diff --git a/docs/api/api_docs/methods/keras_load_quantizad_model.html b/docs/api/api_docs/methods/keras_load_quantizad_model.html
index ed3b04e9a..8397bdd66 100644
--- a/docs/api/api_docs/methods/keras_load_quantizad_model.html
+++ b/docs/api/api_docs/methods/keras_load_quantizad_model.html
@@ -7,7 +7,7 @@
Load Quantized Keras Model — MCT Documentation: ver 2.6.0
-
+
diff --git a/docs/api/api_docs/methods/keras_post_training_quantization.html b/docs/api/api_docs/methods/keras_post_training_quantization.html
index 15ce8354b..b42467913 100644
--- a/docs/api/api_docs/methods/keras_post_training_quantization.html
+++ b/docs/api/api_docs/methods/keras_post_training_quantization.html
@@ -7,7 +7,7 @@
Keras Post Training Quantization — MCT Documentation: ver 2.6.0
-
+
@@ -55,11 +55,8 @@ Navigation
In order to limit the maximal model’s size, a target ResourceUtilization need to be passed after weights_memory
is set (in bytes).
-- Return type:
-Tuple[Model, Optional[UserInformation]]
-
-- Parameters:
-
+- Parameters:
+
in_model (Model) – Keras model to quantize.
representative_data_gen (Callable) – Dataset used for calibration.
target_resource_utilization (ResourceUtilization) – ResourceUtilization object to limit the search of the mixed-precision configuration as desired.
@@ -67,25 +64,25 @@ Navigation
target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – TargetPlatformCapabilities to optimize the Keras model according to.
-- Returns:
-A quantized model and information the user may need to handle the quantized model.
+- Returns:
+A quantized model and information the user may need to handle the quantized model.
Examples
Import MCT:
->>> import model_compression_toolkit as mct
+>>> import model_compression_toolkit as mct
Import a Keras model:
->>> from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
+>>> from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
>>> model = MobileNetV2()
Create a random dataset generator, for required number of calibration iterations (num_calibration_batches):
In this example a random dataset of 10 batches each containing 4 images is used.
->>> import numpy as np
+>>> import numpy as np
>>> num_calibration_batches = 10
->>> def repr_datagen():
+>>> def repr_datagen():
>>> for _ in range(num_calibration_batches):
>>> yield [np.random.random((4, 224, 224, 3))]
@@ -113,6 +110,11 @@ Navigation
For more configuration options, please take a look at our API documentation.
+
+- Return type:
+Tuple[Model, Optional[UserInformation]]
+
+
diff --git a/docs/api/api_docs/methods/keras_pruning_experimental.html b/docs/api/api_docs/methods/keras_pruning_experimental.html
index be1cb8550..4732e318a 100644
--- a/docs/api/api_docs/methods/keras_pruning_experimental.html
+++ b/docs/api/api_docs/methods/keras_pruning_experimental.html
@@ -7,7 +7,7 @@
Keras Structured Pruning — MCT Documentation: ver 2.6.0
-
+
@@ -53,11 +53,8 @@ Navigation
identify groups of channels that can be removed with minimal impact on performance.
Notice that the pruned model must be retrained to recover the compressed model’s performance.
-- Return type:
-Tuple[Model, PruningInfo]
-
-- Parameters:
-
+- Parameters:
+
model (Model) – The original Keras model to be pruned.
target_resource_utilization (ResourceUtilization) – The target Key Performance Indicators to be achieved through pruning.
representative_data_gen (Callable) – A function to generate representative data for pruning analysis.
@@ -65,11 +62,11 @@ Navigation
target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – Platform-specific constraints and capabilities. Defaults to DEFAULT_KERAS_TPC.
-- Returns:
-A tuple containing the pruned Keras model and associated pruning information.
+- Returns:
+A tuple containing the pruned Keras model and associated pruning information.
-- Return type:
-Tuple[Model, PruningInfo]
+- Return type:
+Tuple[Model, PruningInfo]
@@ -78,17 +75,17 @@ Navigation
Examples
Import MCT:
->>> import model_compression_toolkit as mct
+>>> import model_compression_toolkit as mct
Import a Keras model:
->>> from tensorflow.keras.applications.resnet50 import ResNet50
+>>> from tensorflow.keras.applications.resnet50 import ResNet50
>>> model = ResNet50()
Create a random dataset generator:
->>> import numpy as np
->>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
+>>> import numpy as np
+>>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
Define a target resource utilization for pruning.
@@ -109,6 +106,11 @@
Navigation
>>> pruned_model, pruning_info = mct.pruning.keras_pruning_experimental(model=model, target_resource_utilization=target_resource_utilization, representative_data_gen=repr_datagen, pruning_config=pruning_config)
+
+- Return type:
+Tuple[Model, PruningInfo]
+
+
diff --git a/docs/api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.html b/docs/api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.html
index a468b0226..4f7c0129c 100644
--- a/docs/api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.html
+++ b/docs/api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.html
@@ -7,7 +7,7 @@
Keras Quantization Aware Training Model Finalize — MCT Documentation: ver 2.6.0
-
+
@@ -45,29 +45,26 @@ Navigation
model_compression_toolkit.qat.keras_quantization_aware_training_finalize_experimental(in_model)¶
Convert a model fine-tuned by the user (Trainable quantizers) to a model with Inferable quantizers.
-- Return type:
-Model
-
-- Parameters:
-in_model (Model) – Keras model to replace TrainableQuantizer with InferableQuantizer
+- Parameters:
+in_model (Model) – Keras model to replace TrainableQuantizer with InferableQuantizer
-- Returns:
-A quantized model with Inferable quantizers
+- Returns:
+A quantized model with Inferable quantizers
Examples
Import MCT:
->>> import model_compression_toolkit as mct
+>>> import model_compression_toolkit as mct
Import a Keras model:
->>> from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
+>>> from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
>>> model = MobileNetV2()
Create a random dataset generator:
->>> import numpy as np
->>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
+>>> import numpy as np
+>>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
Create a MCT core config, containing the quantization configuration:
@@ -96,6 +93,11 @@ Navigation
>>> quantized_model = mct.qat.keras_quantization_aware_training_finalize_experimental(quantized_model)
+
+- Return type:
+Model
+
+
diff --git a/docs/api/api_docs/methods/keras_quantization_aware_training_init_experimental.html b/docs/api/api_docs/methods/keras_quantization_aware_training_init_experimental.html
index c915dc2b8..6c0ff4112 100644
--- a/docs/api/api_docs/methods/keras_quantization_aware_training_init_experimental.html
+++ b/docs/api/api_docs/methods/keras_quantization_aware_training_init_experimental.html
@@ -7,7 +7,7 @@
Keras Quantization Aware Training Model Init — MCT Documentation: ver 2.6.0
-
+
@@ -75,19 +75,19 @@ Navigation
Examples
Import MCT:
->>> import model_compression_toolkit as mct
+>>> import model_compression_toolkit as mct
Import a Keras model:
->>> from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
+>>> from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
>>> model = MobileNetV2()
Create a random dataset generator, for required number of calibration iterations (num_calibration_batches):
In this example a random dataset of 10 batches each containing 4 images is used.
->>> import numpy as np
+>>> import numpy as np
>>> num_calibration_batches = 10
->>> def repr_datagen():
+>>> def repr_datagen():
>>> for _ in range(num_calibration_batches):
>>> yield [np.random.random((4, 224, 224, 3))]
diff --git a/docs/api/api_docs/methods/pytorch_data_generation_experimental.html b/docs/api/api_docs/methods/pytorch_data_generation_experimental.html
index ba62ecb96..990280a9e 100644
--- a/docs/api/api_docs/methods/pytorch_data_generation_experimental.html
+++ b/docs/api/api_docs/methods/pytorch_data_generation_experimental.html
@@ -7,7 +7,7 @@
Pytorch Data Generation — MCT Documentation: ver 2.6.0
-
+
@@ -45,30 +45,27 @@ Navigation
model_compression_toolkit.data_generation.pytorch_data_generation_experimental(model, n_images, output_image_size, data_generation_config)¶
Function to perform data generation using the provided model and data generation configuration.
-- Return type:
-List[Tensor]
-
-- Parameters:
-
+- Parameters:
+
model (Module) – PyTorch model to generate data for.
n_images (int) – Number of images to generate.
output_image_size (Union[int, Tuple[int, int]]) – The hight and width size of the output images.
data_generation_config (DataGenerationConfig) – Configuration for data generation.
-- Returns:
-Finalized list containing generated images.
+- Returns:
+Finalized list containing generated images.
-- Return type:
-List[Tensor]
+- Return type:
+List[Tensor]
Examples
In this example, we’ll walk through generating images using a simple PyTorch model and a data generation configuration. The process involves creating a model, setting up a data generation configuration, and finally generating images with specified parameters.
Start by importing the Model Compression Toolkit (MCT), PyTorch, and some modules from torch.nn:
->>> import model_compression_toolkit as mct
->>> import torch.nn as nn
->>> from torch.nn import Conv2d, BatchNorm2d, Flatten, Linear
+>>> import model_compression_toolkit as mct
+>>> import torch.nn as nn
+>>> from torch.nn import Conv2d, BatchNorm2d, Flatten, Linear
Next, define a simple PyTorch model:
@@ -86,6 +83,11 @@ Navigation
The generated images can then be used for various purposes, such as data-free quantization.
+
+- Return type:
+List[Tensor]
+
+
diff --git a/docs/api/api_docs/methods/pytorch_gradient_post_training_quantization.html b/docs/api/api_docs/methods/pytorch_gradient_post_training_quantization.html
index 572b6b545..4180ce4b4 100644
--- a/docs/api/api_docs/methods/pytorch_gradient_post_training_quantization.html
+++ b/docs/api/api_docs/methods/pytorch_gradient_post_training_quantization.html
@@ -7,7 +7,7 @@
Pytorch Gradient Based Post Training Quantization — MCT Documentation: ver 2.6.0
-
+
@@ -58,11 +58,8 @@ Navigation
training quantization by comparing points between the float and quantized models, and minimizing the observed
loss.
-- Return type:
-Tuple[Module, Optional[UserInformation]]
-
-- Parameters:
-
+- Parameters:
+
model (Module) – Pytorch model to quantize.
representative_data_gen (Callable) – Dataset used for calibration.
target_resource_utilization (ResourceUtilization) – ResourceUtilization object to limit the search of the mixed-precision configuration as desired.
@@ -72,25 +69,25 @@ Navigation
target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – TargetPlatformCapabilities to optimize the PyTorch model according to.
-- Returns:
-A quantized module and information the user may need to handle the quantized module.
+- Returns:
+A quantized module and information the user may need to handle the quantized module.
Examples
Import Model Compression Toolkit:
->>> import model_compression_toolkit as mct
+>>> import model_compression_toolkit as mct
Import a Pytorch module:
->>> from torchvision import models
+>>> from torchvision import models
>>> module = models.mobilenet_v2()
Create a random dataset generator, for required number of calibration iterations (num_calibration_batches):
In this example a random dataset of 10 batches each containing 4 images is used.
->>> import numpy as np
+>>> import numpy as np
>>> num_calibration_batches = 10
->>> def repr_datagen():
+>>> def repr_datagen():
>>> for _ in range(num_calibration_batches):
>>> yield [np.random.random((4, 3, 224, 224))]
@@ -103,6 +100,11 @@ Navigation
>>> quantized_module, quantization_info = mct.gptq.pytorch_gradient_post_training_quantization(module, repr_datagen, core_config=config, gptq_config=gptq_conf)
+
+- Return type:
+Tuple[Module, Optional[UserInformation]]
+
+
diff --git a/docs/api/api_docs/methods/pytorch_kpi_data.html b/docs/api/api_docs/methods/pytorch_kpi_data.html
index a566b86d8..c6f2cc235 100644
--- a/docs/api/api_docs/methods/pytorch_kpi_data.html
+++ b/docs/api/api_docs/methods/pytorch_kpi_data.html
@@ -7,7 +7,7 @@
Get Resource Utilization information for PyTorch Models — MCT Documentation: ver 2.6.0
-
+
@@ -46,37 +46,39 @@ Navigation
Computes resource utilization data that can be used to calculate the desired target resource utilization for mixed-precision quantization.
Builds the computation graph from the given model and target platform capabilities, and uses it to compute the resource utilization data.
-- Return type:
--
-
-- Parameters:
-
+- Parameters:
+
in_model (Model) – PyTorch model to quantize.
representative_data_gen (Callable) – Dataset used for calibration.
core_config (CoreConfig) – CoreConfig containing parameters for quantization and mixed precision
target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – FrameworkQuantizationCapabilities to optimize the PyTorch model according to.
-- Returns:
-A ResourceUtilization object with total weights parameters sum and max activation tensor.
+- Returns:
+A ResourceUtilization object with total weights parameters sum and max activation tensor.
Examples
Import a Pytorch model:
->>> from torchvision import models
+>>> from torchvision import models
>>> module = models.mobilenet_v2()
Create a random dataset generator:
->>> import numpy as np
->>> def repr_datagen(): yield [np.random.random((1, 3, 224, 224))]
+>>> import numpy as np
+>>> def repr_datagen(): yield [np.random.random((1, 3, 224, 224))]
Import mct and call for resource utilization data calculation:
->>> import model_compression_toolkit as mct
+>>> import model_compression_toolkit as mct
>>> ru_data = mct.core.pytorch_resource_utilization_data(module, repr_datagen)
+
+- Return type:
+-
+
+
diff --git a/docs/api/api_docs/methods/pytorch_post_training_quantization.html b/docs/api/api_docs/methods/pytorch_post_training_quantization.html
index ac93413e2..0773fb24b 100644
--- a/docs/api/api_docs/methods/pytorch_post_training_quantization.html
+++ b/docs/api/api_docs/methods/pytorch_post_training_quantization.html
@@ -7,7 +7,7 @@
Pytorch Post Training Quantization — MCT Documentation: ver 2.6.0
-
+
@@ -55,11 +55,8 @@ Navigation
training quantization by comparing points between the float and quantized modules, and minimizing the
observed loss.
-- Return type:
-Tuple[Module, Optional[UserInformation]]
-
-- Parameters:
-
+- Parameters:
+
in_module (Module) – Pytorch module to quantize.
representative_data_gen (Callable) – Dataset used for calibration.
target_resource_utilization (ResourceUtilization) – ResourceUtilization object to limit the search of the mixed-precision configuration as desired.
@@ -67,31 +64,36 @@ Navigation
target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – TargetPlatformCapabilities to optimize the PyTorch model according to.
-- Returns:
-A quantized module and information the user may need to handle the quantized module.
+- Returns:
+A quantized module and information the user may need to handle the quantized module.
Examples
Import a Pytorch module:
->>> from torchvision import models
+>>> from torchvision import models
>>> module = models.mobilenet_v2()
Create a random dataset generator, for required number of calibration iterations (num_calibration_batches):
In this example a random dataset of 10 batches each containing 4 images is used.
->>> import numpy as np
+>>> import numpy as np
>>> num_calibration_batches = 10
->>> def repr_datagen():
+>>> def repr_datagen():
>>> for _ in range(num_calibration_batches):
>>> yield [np.random.random((4, 3, 224, 224))]
Import MCT and pass the module with the representative dataset generator to get a quantized module
Set number of clibration iterations to 1:
->>> import model_compression_toolkit as mct
+>>> import model_compression_toolkit as mct
>>> quantized_module, quantization_info = mct.ptq.pytorch_post_training_quantization(module, repr_datagen)
+
+- Return type:
+Tuple[Module, Optional[UserInformation]]
+
+
diff --git a/docs/api/api_docs/methods/pytorch_pruning_experimental.html b/docs/api/api_docs/methods/pytorch_pruning_experimental.html
index b4e43bc86..706ec4862 100644
--- a/docs/api/api_docs/methods/pytorch_pruning_experimental.html
+++ b/docs/api/api_docs/methods/pytorch_pruning_experimental.html
@@ -7,7 +7,7 @@
Pytorch Structured Pruning — MCT Documentation: ver 2.6.0
-
+
@@ -53,11 +53,8 @@ Navigation
identify groups of channels that can be removed with minimal impact on performance.
Notice that the pruned model must be retrained to recover the compressed model’s performance.
-- Return type:
-Tuple[Module, PruningInfo]
-
-- Parameters:
-
+- Parameters:
+
model (Module) – The PyTorch model to be pruned.
target_resource_utilization (ResourceUtilization) – Key Performance Indicators specifying the pruning targets.
representative_data_gen (Callable) – A function to generate representative data for pruning analysis.
@@ -66,11 +63,11 @@ Navigation
Defaults to DEFAULT_PYTORCH_TPC.
-- Returns:
-A tuple containing the pruned Pytorch model and associated pruning information.
+- Returns:
+A tuple containing the pruned Pytorch model and associated pruning information.
-- Return type:
-Tuple[Model, PruningInfo]
+- Return type:
+Tuple[Model, PruningInfo]
@@ -79,17 +76,17 @@ Navigation
Examples
Import MCT:
->>> import model_compression_toolkit as mct
+>>> import model_compression_toolkit as mct
Import a Pytorch model:
->>> from torchvision.models import resnet50, ResNet50_Weights
+>>> from torchvision.models import resnet50, ResNet50_Weights
>>> model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
Create a random dataset generator:
->>> import numpy as np
->>> def repr_datagen(): yield [np.random.random((1, 3, 224, 224))]
+>>> import numpy as np
+>>> def repr_datagen(): yield [np.random.random((1, 3, 224, 224))]
Define a target resource utilization for pruning.
@@ -110,6 +107,11 @@
Navigation
>>> pruned_model, pruning_info = mct.pruning.pytorch_pruning_experimental(model=model, target_resource_utilization=target_resource_utilization, representative_data_gen=repr_datagen, pruning_config=pruning_config)
+
+- Return type:
+Tuple[Module, PruningInfo]
+
+
diff --git a/docs/api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.html b/docs/api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.html
index a8a81cf18..1365711fa 100644
--- a/docs/api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.html
+++ b/docs/api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.html
@@ -7,7 +7,7 @@
PyTorch Quantization Aware Training Model Finalize — MCT Documentation: ver 2.6.0
-
+
@@ -55,17 +55,17 @@ Navigation
Examples
Import MCT:
->>> import model_compression_toolkit as mct
+>>> import model_compression_toolkit as mct
Import a Pytorch model:
->>> from torchvision.models import mobilenet_v2
+>>> from torchvision.models import mobilenet_v2
>>> model = mobilenet_v2(pretrained=True)
Create a random dataset generator:
->>> import numpy as np
->>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
+>>> import numpy as np
+>>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
Create a MCT core config, containing the quantization configuration:
diff --git a/docs/api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.html b/docs/api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.html
index c1e3a19f2..a58199df2 100644
--- a/docs/api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.html
+++ b/docs/api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.html
@@ -7,7 +7,7 @@
PyTorch Quantization Aware Training Model Init — MCT Documentation: ver 2.6.0
-
+
@@ -74,18 +74,18 @@ Navigation
Examples
Import MCT:
->>> import model_compression_toolkit as mct
+>>> import model_compression_toolkit as mct
Import a Pytorch model:
->>> from torchvision.models import mobilenet_v2
+>>> from torchvision.models import mobilenet_v2
>>> model = mobilenet_v2(pretrained=True)
Create a random dataset generator, for required number of calibration iterations (num_calibration_batches). In this example, a random dataset of 10 batches each containing 4 images is used:
->>> import numpy as np
+>>> import numpy as np
>>> num_calibration_batches = 10
->>> def repr_datagen():
+>>> def repr_datagen():
>>> for _ in range(num_calibration_batches):
>>> yield [np.random.random((4, 3, 224, 224))]
diff --git a/docs/api/api_docs/methods/set_logger_path.html b/docs/api/api_docs/methods/set_logger_path.html
index 66272e074..0f14537ca 100644
--- a/docs/api/api_docs/methods/set_logger_path.html
+++ b/docs/api/api_docs/methods/set_logger_path.html
@@ -7,7 +7,7 @@
Enable a Logger — MCT Documentation: ver 2.6.0
-
+
@@ -45,11 +45,8 @@ Navigation
model_compression_toolkit.set_log_folder(folder, level=logging.INFO)¶
Set a directory path for saving a log file.
-- Return type:
-None
-
-- Parameters:
-
+- Parameters:
+
folder (str) – Folder path to save the log file.
level (int) – Level of verbosity to set to the logger and handlers.
@@ -61,6 +58,11 @@ Navigation
to set up logging.
Don’t use Python’s original logger.
+
+- Return type:
+None
+
+
diff --git a/docs/api/api_docs/methods/xquant_report_keras_experimental.html b/docs/api/api_docs/methods/xquant_report_keras_experimental.html
index 2feee9bb7..65d9d733a 100644
--- a/docs/api/api_docs/methods/xquant_report_keras_experimental.html
+++ b/docs/api/api_docs/methods/xquant_report_keras_experimental.html
@@ -7,7 +7,7 @@
XQuant Report Keras — MCT Documentation: ver 2.6.0
-
+
@@ -45,11 +45,8 @@ Navigation
model_compression_toolkit.xquant.keras.facade_xquant_report.xquant_report_keras_experimental(float_model, quantized_model, repr_dataset, validation_dataset, xquant_config)¶
Generate an explainable quantization report for a quantized Keras model.
-- Return type:
-Dict[str, Any]
-
-- Parameters:
-
+- Parameters:
+
float_model (keras.Model) – The original floating-point Keras model.
quantized_model (keras.Model) – The quantized Keras model.
repr_dataset (Callable) – The representative dataset used during quantization for similarity metrics computation.
@@ -57,11 +54,14 @@ Navigation
xquant_config (XQuantConfig) – Configuration settings for explainable quantization.
-- Returns:
-A dictionary containing the collected similarity metrics and report data.
+- Returns:
+A dictionary containing the collected similarity metrics and report data.
+
+- Return type:
+Dict[str, Any]
- Return type:
-Dict[str, Any]
+Dict[str, Any]
diff --git a/docs/api/api_docs/methods/xquant_report_pytorch_experimental.html b/docs/api/api_docs/methods/xquant_report_pytorch_experimental.html
index 696d88e5e..8388913a1 100644
--- a/docs/api/api_docs/methods/xquant_report_pytorch_experimental.html
+++ b/docs/api/api_docs/methods/xquant_report_pytorch_experimental.html
@@ -7,7 +7,7 @@
XQuant Report Pytorch — MCT Documentation: ver 2.6.0
-
+
diff --git a/docs/api/api_docs/methods/xquant_report_troubleshoot_pytorch_experimental.html b/docs/api/api_docs/methods/xquant_report_troubleshoot_pytorch_experimental.html
index 0f3b1c42b..6817c7f5e 100644
--- a/docs/api/api_docs/methods/xquant_report_troubleshoot_pytorch_experimental.html
+++ b/docs/api/api_docs/methods/xquant_report_troubleshoot_pytorch_experimental.html
@@ -7,7 +7,7 @@
XQuant Report Troubleshoot Pytorch — MCT Documentation: ver 2.6.0
-
+
diff --git a/docs/api/api_docs/modules/core_config.html b/docs/api/api_docs/modules/core_config.html
index a114722ad..f83bf0b59 100644
--- a/docs/api/api_docs/modules/core_config.html
+++ b/docs/api/api_docs/modules/core_config.html
@@ -7,7 +7,7 @@
CoreConfig — MCT Documentation: ver 2.6.0
-
+
diff --git a/docs/api/api_docs/modules/debug_config.html b/docs/api/api_docs/modules/debug_config.html
index 9cce28a75..56b140522 100644
--- a/docs/api/api_docs/modules/debug_config.html
+++ b/docs/api/api_docs/modules/debug_config.html
@@ -7,7 +7,7 @@
debug_config Module — MCT Documentation: ver 2.6.0
-
+
@@ -45,7 +45,7 @@ DebugConfig
-class model_compression_toolkit.core.DebugConfig(analyze_similarity=False, network_editor=<factory>, simulate_scheduler=False, bypass=False)¶
+class model_compression_toolkit.core.DebugConfig(analyze_similarity=False, network_editor=<factory>, simulate_scheduler=False, bypass=False, progress_info_callback=None)¶
A dataclass for MCT core debug information.
- Parameters:
@@ -55,9 +55,85 @@ DebugConfigEditRule]) – A list of rules and actions to edit the network for quantization.
simulate_scheduler (bool) – Simulate scheduler behavior to compute operators’ order and cuts.
bypass (bool) – A flag to enable MCT bypass, which skips MCT runner and returns the input model unchanged.
+progress_info_callback (Callable) – A user-defined callback function for retrieving progress information.
+
About progress_info_callback
+
+The progress_info_callback parameter in DebugConfig enables the following features and allows users to retrieve progress information when a callback function is configured:
+
+The callback function can receive MCT progress information.
+A progress bar is displayed in the CUI, allowing users to visualize how much processing has been completed while MCT is running.
+
+If no callback function is set, these features are disabled and the behavior and output remain unchanged.
+Examples of how to create a callback function to enable these features are provided in the Examples section.
+
+Examples
+Create a callable callback function.
+When defining the callback, make sure it accepts a dictionary representing the current processing state as an argument.
+Example 1: Use a class to keep track of the processing history.
+>>> class ProgressInfoCallback:
+... def __init__(self):
+... self.history = []
+...
+... def __call__(self, info):
+... current = info["currentComponent"]
+... total = info["totalComponents"]
+... component_name = info["completedComponents"]
+...
+... self.history.append({
+... "component_name": component_name,
+... "current": current,
+... "total": total
+... })
+...
+>>> progress_info_callback = ProgressInfoCallback()
+
+
+Example 2: Use a function to output the progress percentage and processing name to standard error (stderr).
+>>> def progress_info_callback(info):
+... current = info["currentComponent"]
+... total = info["totalComponents"]
+... component_name = info["completedComponents"]
+...
+... progress_percent = (current / total * 100.0)
+...
+... print(f"[{current}/{total}] {progress_percent:6.2f}% {component_name}",
+... file=__import__('sys').stderr, flush=True)
+
+
+From the processing state dictionary, you can retrieve information using the following keys:
+
+Keys in the processing state dictionary¶
+
+Parameter Key
+Value Type
+Description
+
+
+
+“currentComponent”
+int
+Current processing step
+
+“totalComponents”
+int
+Total number of processing steps
+
+“completedComponents”
+str
+Name of the component currently being processed
+
+
+
+Import MCT and configure DebugConfig with the callback function you created.
+Configure CoreConfig with this DebugConfig and use it.
+>>> import model_compression_toolkit as mct
+>>> debug_config = mct.core.DebugConfig(progress_info_callback=progress_info_callback)
+>>> core_config = mct.core.CoreConfig(debug_config=debug_config)
+
+
diff --git a/docs/api/api_docs/modules/exporter.html b/docs/api/api_docs/modules/exporter.html
index 435f7315a..2399c6b1b 100644
--- a/docs/api/api_docs/modules/exporter.html
+++ b/docs/api/api_docs/modules/exporter.html
@@ -7,7 +7,7 @@
exporter Module — MCT Documentation: ver 2.6.0
-
+
@@ -78,11 +78,8 @@ keras_export_model
-Return type:
-Dict[str, type]
-
-Parameters:
-
+- Parameters:
+
model – Model to export.
save_model_path – Path to save the model.
is_layer_exportable_fn – Callable to check whether a layer can be exported or not.
@@ -90,8 +87,11 @@ keras_export_modelReturns:
-Custom objects dictionary needed to load the model.
+- Returns:
+Custom objects dictionary needed to load the model.
+
+- Return type:
+Dict[str, type]
@@ -101,9 +101,9 @@ keras_export_model¶
To export a TensorFlow model as a quantized model, it is necessary to first apply quantization
to the model using MCT:
-import numpy as np
-from keras.applications import ResNet50
-import model_compression_toolkit as mct
+import numpy as np
+from keras.applications import ResNet50
+import model_compression_toolkit as mct
# Create a model
float_model = ResNet50()
@@ -122,7 +122,7 @@ keras serialization format¶
By default, mct.exporter.keras_export_model will export the quantized Keras model to
a .keras model with custom quantizers from mct_quantizers module.
-import tempfile
+import tempfile
# Path of exported model
_, keras_file_path = tempfile.mkstemp('.keras')
@@ -160,11 +160,8 @@ pytorch_export_model
-- Return type:
-None
-
-- Parameters:
-
+- Parameters:
+
model (Module) – Model to export.
save_model_path (str) – Path to save the model.
repr_dataset (Callable) – Representative dataset for tracing the pytorch model (mandatory for exporting it).
@@ -175,6 +172,9 @@ pytorch_export_modeloutput_names (Optional[List[str]]) – Optional list of output node names for export compatibility. This argument is relevant only when using PytorchExportSerializationFormat.ONNX.
+- Return type:
+None
+
@@ -186,17 +186,17 @@ Pytorch Tutorialimport model_compression_toolkit as mct
-import numpy as np
-import torch
-from torchvision.models.mobilenetv2 import mobilenet_v2
+import model_compression_toolkit as mct
+import numpy as np
+import torch
+from torchvision.models.mobilenetv2 import mobilenet_v2
# Create a model
float_model = mobilenet_v2()
# Notice that here the representative dataset is random for demonstration only.
-def representative_data_gen():
+def representative_data_gen():
yield [np.random.random((1, 3, 224, 224))]
@@ -254,8 +254,8 @@ ONNX model output names
Use exported model for inference¶
To load and infer using the exported model, which was exported to an ONNX file in MCTQ format, we will use mct_quantizers method get_ort_session_options during onnxruntime session creation. Notice, inference on models that are exported in this format are slowly and suffers from longer latency. However, inference of these models on IMX500 will not suffer from this issue.
-import mct_quantizers as mctq
-import onnxruntime as ort
+import mct_quantizers as mctq
+import onnxruntime as ort
sess = ort.InferenceSession(onnx_file_path,
mctq.get_ort_session_options(),
diff --git a/docs/api/api_docs/modules/layer_filters.html b/docs/api/api_docs/modules/layer_filters.html
index 941a9ebaa..da4e14b6e 100644
--- a/docs/api/api_docs/modules/layer_filters.html
+++ b/docs/api/api_docs/modules/layer_filters.html
@@ -7,7 +7,7 @@
Layer Attributes Filters — MCT Documentation: ver 2.6.0
-
+
diff --git a/docs/api/api_docs/modules/network_editor.html b/docs/api/api_docs/modules/network_editor.html
index 54006edb3..56cd07d0d 100644
--- a/docs/api/api_docs/modules/network_editor.html
+++ b/docs/api/api_docs/modules/network_editor.html
@@ -7,7 +7,7 @@
network_editor Module — MCT Documentation: ver 2.6.0
-
+
@@ -50,9 +50,9 @@ EditRule
and the action is applied on these nodes during the quantization process.
Examples
Create an EditRule to quantize all Conv2D kernel attribute weights using 9 bits:
->>> import model_compression_toolkit as mct
->>> from model_compression_toolkit.core.keras.constants import KERNEL
->>> from tensorflow.keras.layers import Conv2D
+>>> import model_compression_toolkit as mct
+>>> from model_compression_toolkit.core.keras.constants import KERNEL
+>>> from tensorflow.keras.layers import Conv2D
>>> er_list = [mct.core.network_editor.EditRule(filter=mct.core.network_editor.NodeTypeFilter(Conv2D), action=mct.core.network_editor.ChangeCandidatesWeightsQuantConfigAttr(attr_name=KERNEL, weights_n_bits=9))]
diff --git a/docs/api/api_docs/modules/qat_config.html b/docs/api/api_docs/modules/qat_config.html
index da879fe16..a6ad8b503 100644
--- a/docs/api/api_docs/modules/qat_config.html
+++ b/docs/api/api_docs/modules/qat_config.html
@@ -7,7 +7,7 @@
qat_config Module — MCT Documentation: ver 2.6.0
-
+
diff --git a/docs/api/api_docs/modules/target_platform_capabilities.html b/docs/api/api_docs/modules/target_platform_capabilities.html
index d6b785708..76f1524bb 100644
--- a/docs/api/api_docs/modules/target_platform_capabilities.html
+++ b/docs/api/api_docs/modules/target_platform_capabilities.html
@@ -7,7 +7,7 @@
target_platform_capabilities Module — MCT Documentation: ver 2.6.0
-
+
diff --git a/docs/api/api_docs/modules/trainable_infrastructure.html b/docs/api/api_docs/modules/trainable_infrastructure.html
index 52e173b98..15de91ad3 100644
--- a/docs/api/api_docs/modules/trainable_infrastructure.html
+++ b/docs/api/api_docs/modules/trainable_infrastructure.html
@@ -7,7 +7,7 @@
trainable_infrastructure Module — MCT Documentation: ver 2.6.0
-
+
@@ -128,8 +128,8 @@ TrainableQuantizerWeightsConfigfrom model_compression_toolkit.target_platform_capabilities.target_platform_capabilities import QuantizationMethod
-from model_compression_toolkit.constants import THRESHOLD, MIN_THRESHOLD
+from model_compression_toolkit.target_platform_capabilities.target_platform_capabilities import QuantizationMethod
+from model_compression_toolkit.constants import THRESHOLD, MIN_THRESHOLD
TrainableQuantizerWeightsConfig(weights_quantization_method=QuantizationMethod.SYMMETRIC,
weights_n_bits=8,
@@ -165,8 +165,8 @@ TrainableQuantizerActivationConfigfrom model_compression_toolkit.target_platform_capabilities.target_platform_capabilities import QuantizationMethod
-from model_compression_toolkit.constants import THRESHOLD, MIN_THRESHOLD
+from model_compression_toolkit.target_platform_capabilities.target_platform_capabilities import QuantizationMethod
+from model_compression_toolkit.constants import THRESHOLD, MIN_THRESHOLD
TrainableQuantizerActivationConfig(activation_quantization_method=QuantizationMethod.UNIFORM,
activation_n_bits=8,
diff --git a/docs/api/api_docs/notes/tpc_note.html b/docs/api/api_docs/notes/tpc_note.html
index a93c45d53..885ac18cf 100644
--- a/docs/api/api_docs/notes/tpc_note.html
+++ b/docs/api/api_docs/notes/tpc_note.html
@@ -7,7 +7,7 @@
<no title> — MCT Documentation: ver 2.6.0
-
+
diff --git a/docs/docs_troubleshoot/genindex.html b/docs/docs_troubleshoot/genindex.html
index e2f12927f..1350a4150 100644
--- a/docs/docs_troubleshoot/genindex.html
+++ b/docs/docs_troubleshoot/genindex.html
@@ -6,7 +6,7 @@
Index — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
-
+
diff --git a/docs/docs_troubleshoot/index.html b/docs/docs_troubleshoot/index.html
index 73d959f8c..0ddb16140 100644
--- a/docs/docs_troubleshoot/index.html
+++ b/docs/docs_troubleshoot/index.html
@@ -7,7 +7,7 @@
TroubleShooting Manual (MCT XQuant Extension Tool) — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
-
+
diff --git a/docs/docs_troubleshoot/search.html b/docs/docs_troubleshoot/search.html
index 7cda3fba8..16ece6516 100644
--- a/docs/docs_troubleshoot/search.html
+++ b/docs/docs_troubleshoot/search.html
@@ -6,7 +6,7 @@
Search — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
-
+
diff --git a/docs/docs_troubleshoot/static/pygments.css b/docs/docs_troubleshoot/static/pygments.css
index 5f2b0a250..0d49244ed 100644
--- a/docs/docs_troubleshoot/static/pygments.css
+++ b/docs/docs_troubleshoot/static/pygments.css
@@ -6,26 +6,26 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
.highlight .hll { background-color: #ffffcc }
.highlight { background: #eeffcc; }
.highlight .c { color: #408090; font-style: italic } /* Comment */
-.highlight .err { border: 1px solid #F00 } /* Error */
+.highlight .err { border: 1px solid #FF0000 } /* Error */
.highlight .k { color: #007020; font-weight: bold } /* Keyword */
-.highlight .o { color: #666 } /* Operator */
+.highlight .o { color: #666666 } /* Operator */
.highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */
.highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */
.highlight .cp { color: #007020 } /* Comment.Preproc */
.highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */
.highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */
-.highlight .cs { color: #408090; background-color: #FFF0F0 } /* Comment.Special */
+.highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #A00000 } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
-.highlight .gr { color: #F00 } /* Generic.Error */
+.highlight .gr { color: #FF0000 } /* Generic.Error */
.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
.highlight .gi { color: #00A000 } /* Generic.Inserted */
-.highlight .go { color: #333 } /* Generic.Output */
-.highlight .gp { color: #C65D09; font-weight: bold } /* Generic.Prompt */
+.highlight .go { color: #333333 } /* Generic.Output */
+.highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
-.highlight .gt { color: #04D } /* Generic.Traceback */
+.highlight .gt { color: #0044DD } /* Generic.Traceback */
.highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
@@ -33,43 +33,43 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
.highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #902000 } /* Keyword.Type */
.highlight .m { color: #208050 } /* Literal.Number */
-.highlight .s { color: #4070A0 } /* Literal.String */
-.highlight .na { color: #4070A0 } /* Name.Attribute */
+.highlight .s { color: #4070a0 } /* Literal.String */
+.highlight .na { color: #4070a0 } /* Name.Attribute */
.highlight .nb { color: #007020 } /* Name.Builtin */
-.highlight .nc { color: #0E84B5; font-weight: bold } /* Name.Class */
-.highlight .no { color: #60ADD5 } /* Name.Constant */
-.highlight .nd { color: #555; font-weight: bold } /* Name.Decorator */
-.highlight .ni { color: #D55537; font-weight: bold } /* Name.Entity */
+.highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
+.highlight .no { color: #60add5 } /* Name.Constant */
+.highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */
+.highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */
.highlight .ne { color: #007020 } /* Name.Exception */
-.highlight .nf { color: #06287E } /* Name.Function */
+.highlight .nf { color: #06287e } /* Name.Function */
.highlight .nl { color: #002070; font-weight: bold } /* Name.Label */
-.highlight .nn { color: #0E84B5; font-weight: bold } /* Name.Namespace */
+.highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
.highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */
-.highlight .nv { color: #BB60D5 } /* Name.Variable */
+.highlight .nv { color: #bb60d5 } /* Name.Variable */
.highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */
-.highlight .w { color: #BBB } /* Text.Whitespace */
+.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #208050 } /* Literal.Number.Bin */
.highlight .mf { color: #208050 } /* Literal.Number.Float */
.highlight .mh { color: #208050 } /* Literal.Number.Hex */
.highlight .mi { color: #208050 } /* Literal.Number.Integer */
.highlight .mo { color: #208050 } /* Literal.Number.Oct */
-.highlight .sa { color: #4070A0 } /* Literal.String.Affix */
-.highlight .sb { color: #4070A0 } /* Literal.String.Backtick */
-.highlight .sc { color: #4070A0 } /* Literal.String.Char */
-.highlight .dl { color: #4070A0 } /* Literal.String.Delimiter */
-.highlight .sd { color: #4070A0; font-style: italic } /* Literal.String.Doc */
-.highlight .s2 { color: #4070A0 } /* Literal.String.Double */
-.highlight .se { color: #4070A0; font-weight: bold } /* Literal.String.Escape */
-.highlight .sh { color: #4070A0 } /* Literal.String.Heredoc */
-.highlight .si { color: #70A0D0; font-style: italic } /* Literal.String.Interpol */
-.highlight .sx { color: #C65D09 } /* Literal.String.Other */
+.highlight .sa { color: #4070a0 } /* Literal.String.Affix */
+.highlight .sb { color: #4070a0 } /* Literal.String.Backtick */
+.highlight .sc { color: #4070a0 } /* Literal.String.Char */
+.highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */
+.highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
+.highlight .s2 { color: #4070a0 } /* Literal.String.Double */
+.highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
+.highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */
+.highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
+.highlight .sx { color: #c65d09 } /* Literal.String.Other */
.highlight .sr { color: #235388 } /* Literal.String.Regex */
-.highlight .s1 { color: #4070A0 } /* Literal.String.Single */
+.highlight .s1 { color: #4070a0 } /* Literal.String.Single */
.highlight .ss { color: #517918 } /* Literal.String.Symbol */
.highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */
-.highlight .fm { color: #06287E } /* Name.Function.Magic */
-.highlight .vc { color: #BB60D5 } /* Name.Variable.Class */
-.highlight .vg { color: #BB60D5 } /* Name.Variable.Global */
-.highlight .vi { color: #BB60D5 } /* Name.Variable.Instance */
-.highlight .vm { color: #BB60D5 } /* Name.Variable.Magic */
+.highlight .fm { color: #06287e } /* Name.Function.Magic */
+.highlight .vc { color: #bb60d5 } /* Name.Variable.Class */
+.highlight .vg { color: #bb60d5 } /* Name.Variable.Global */
+.highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
+.highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */
.highlight .il { color: #208050 } /* Literal.Number.Integer.Long */
\ No newline at end of file
diff --git a/docs/docs_troubleshoot/troubleshoots/bias_correction.html b/docs/docs_troubleshoot/troubleshoots/bias_correction.html
index 14ba9f6be..839a46043 100644
--- a/docs/docs_troubleshoot/troubleshoots/bias_correction.html
+++ b/docs/docs_troubleshoot/troubleshoots/bias_correction.html
@@ -7,7 +7,7 @@
Bias Correction — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
-
+
diff --git a/docs/docs_troubleshoot/troubleshoots/enabling_hessian-based_mixed_precision.html b/docs/docs_troubleshoot/troubleshoots/enabling_hessian-based_mixed_precision.html
index a1aa13592..9e72dbe68 100644
--- a/docs/docs_troubleshoot/troubleshoots/enabling_hessian-based_mixed_precision.html
+++ b/docs/docs_troubleshoot/troubleshoots/enabling_hessian-based_mixed_precision.html
@@ -7,7 +7,7 @@
Enabling Hessian-based Mixed Precision — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
-
+
diff --git a/docs/docs_troubleshoot/troubleshoots/gptq-gradient_based_post_training_quantization.html b/docs/docs_troubleshoot/troubleshoots/gptq-gradient_based_post_training_quantization.html
index acd49af61..263064897 100644
--- a/docs/docs_troubleshoot/troubleshoots/gptq-gradient_based_post_training_quantization.html
+++ b/docs/docs_troubleshoot/troubleshoots/gptq-gradient_based_post_training_quantization.html
@@ -7,7 +7,7 @@
GPTQ - Gradient-Based Post Training Quantization — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
-
+
diff --git a/docs/docs_troubleshoot/troubleshoots/mixed_precision_with_model_output_loss_objective.html b/docs/docs_troubleshoot/troubleshoots/mixed_precision_with_model_output_loss_objective.html
index ad0957cbb..7286296f5 100644
--- a/docs/docs_troubleshoot/troubleshoots/mixed_precision_with_model_output_loss_objective.html
+++ b/docs/docs_troubleshoot/troubleshoots/mixed_precision_with_model_output_loss_objective.html
@@ -7,7 +7,7 @@
Mixed Precision with model output loss objective — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
-
+
@@ -62,7 +62,7 @@ Solution
MCT offers an API to adjust the Mixed Precision objective method (MpDistanceWeighting).
Set the distance_weighting_method attribute to MpDistanceWeighting.LAST_LAYER in the MixedPrecisionQuantizationConfig of the CoreConfig.
By emphasizing a loss function that places greater importance on enhancing the model’s quantized output, users can mitigate the risk of detrimental precision reductions in the last layer.
-from model_compression_toolkit.core.common.mixed_precision import MpDistanceWeighting
+from model_compression_toolkit.core.common.mixed_precision import MpDistanceWeighting
mixed_precision_config = mct.core.MixedPrecisionQuantizationConfig(distance_weighting_method=MpDistanceWeighting.LAST_LAYER)
core_config = mct.core.CoreConfig(mixed_precision_config=mixed_precision_config)
diff --git a/docs/docs_troubleshoot/troubleshoots/outlier_removal.html b/docs/docs_troubleshoot/troubleshoots/outlier_removal.html
index fd731cd2d..ef4b50923 100644
--- a/docs/docs_troubleshoot/troubleshoots/outlier_removal.html
+++ b/docs/docs_troubleshoot/troubleshoots/outlier_removal.html
@@ -7,7 +7,7 @@
Outlier Removal — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
-
+
diff --git a/docs/docs_troubleshoot/troubleshoots/representative_and_validation_dataset_mismatch.html b/docs/docs_troubleshoot/troubleshoots/representative_and_validation_dataset_mismatch.html
index bdab2b0fb..0efd370fd 100644
--- a/docs/docs_troubleshoot/troubleshoots/representative_and_validation_dataset_mismatch.html
+++ b/docs/docs_troubleshoot/troubleshoots/representative_and_validation_dataset_mismatch.html
@@ -7,7 +7,7 @@
Representative and Validation Dataset Mismatch — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
-
+
diff --git a/docs/docs_troubleshoot/troubleshoots/representative_dataset_size_and_diversity.html b/docs/docs_troubleshoot/troubleshoots/representative_dataset_size_and_diversity.html
index ee4a881f8..f901696c8 100644
--- a/docs/docs_troubleshoot/troubleshoots/representative_dataset_size_and_diversity.html
+++ b/docs/docs_troubleshoot/troubleshoots/representative_dataset_size_and_diversity.html
@@ -7,7 +7,7 @@
Representative Dataset size and diversity — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
-
+
diff --git a/docs/docs_troubleshoot/troubleshoots/shift_negative_activation.html b/docs/docs_troubleshoot/troubleshoots/shift_negative_activation.html
index 7d7a2338b..fd5fd1d39 100644
--- a/docs/docs_troubleshoot/troubleshoots/shift_negative_activation.html
+++ b/docs/docs_troubleshoot/troubleshoots/shift_negative_activation.html
@@ -7,7 +7,7 @@
Shift Negative Activation — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
-
+
diff --git a/docs/docs_troubleshoot/troubleshoots/threhold_selection_error_method.html b/docs/docs_troubleshoot/troubleshoots/threhold_selection_error_method.html
index 8b3059191..2b2fb42df 100644
--- a/docs/docs_troubleshoot/troubleshoots/threhold_selection_error_method.html
+++ b/docs/docs_troubleshoot/troubleshoots/threhold_selection_error_method.html
@@ -7,7 +7,7 @@
Threshold selection error method — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
-
+
diff --git a/docs/docs_troubleshoot/troubleshoots/unbalanced_concatenation.html b/docs/docs_troubleshoot/troubleshoots/unbalanced_concatenation.html
index c95c95fa6..7f38ead4d 100644
--- a/docs/docs_troubleshoot/troubleshoots/unbalanced_concatenation.html
+++ b/docs/docs_troubleshoot/troubleshoots/unbalanced_concatenation.html
@@ -7,7 +7,7 @@
Unbalanced “concatenation” — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
-
+
diff --git a/docs/docs_troubleshoot/troubleshoots/using_more_samples_in_mixed_precision_quantization.html b/docs/docs_troubleshoot/troubleshoots/using_more_samples_in_mixed_precision_quantization.html
index 50db9dad6..e1a640647 100644
--- a/docs/docs_troubleshoot/troubleshoots/using_more_samples_in_mixed_precision_quantization.html
+++ b/docs/docs_troubleshoot/troubleshoots/using_more_samples_in_mixed_precision_quantization.html
@@ -7,7 +7,7 @@
Using more samples in Mixed Precision quantization — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
-
+
diff --git a/docs/genindex.html b/docs/genindex.html
index e1f138f60..4b0eddcc2 100644
--- a/docs/genindex.html
+++ b/docs/genindex.html
@@ -6,7 +6,7 @@
Index — MCT Documentation: ver 2.6.0
-
+
diff --git a/docs/guidelines/XQuant_Extension_Tool.html b/docs/guidelines/XQuant_Extension_Tool.html
index 84606063a..5970df91c 100644
--- a/docs/guidelines/XQuant_Extension_Tool.html
+++ b/docs/guidelines/XQuant_Extension_Tool.html
@@ -7,7 +7,7 @@
XQuant Extension Tool — MCT Documentation: ver 2.6.0
-
+
@@ -86,7 +86,7 @@ How to Runthe XQuant tutorial with xquant_report_troubleshoot_pytorch_experimental.
-from model_compression_toolkit.xquant import xquant_report_troubleshoot_pytorch_experimental
+from model_compression_toolkit.xquant import xquant_report_troubleshoot_pytorch_experimental
# xquant_report_pytorch_experimental --> xquant_report_troubleshoot_pytorch_experimental
result = xquant_report_troubleshoot_pytorch_experimental(
float_model,
@@ -111,7 +111,7 @@ How to Runxquant_config = XQuantConfig(report_dir='./log_tensorboard_xquant')
-from model_compression_toolkit.xquant import xquant_report_troubleshoot_pytorch_experimental
+from model_compression_toolkit.xquant import xquant_report_troubleshoot_pytorch_experimental
result = xquant_report_troubleshoot_pytorch_experimental(
float_model,
quantized_model,
@@ -208,7 +208,7 @@ Understanding the Judgeable Troubleshoots
-WARNING:Model Compression Toolkit:There are output values that deviate significantly from the average. Refer to the following images and the TroubleShooting Documentation (MCT XQuant Extension Tool) of 'Outlier Removal'.
+WARNING:Model Compression Toolkit:There are output values that deviate significantly from the average. Refer to the following images and the TroubleShooting Documentation (MCT XQuant Extension Tool) of 'Outlier Removal'.
diff --git a/docs/guidelines/visualization.html b/docs/guidelines/visualization.html
index 5783d82f7..e713d300b 100644
--- a/docs/guidelines/visualization.html
+++ b/docs/guidelines/visualization.html
@@ -7,7 +7,7 @@
Visualization within TensorBoard — MCT Documentation: ver 2.6.0
-
+
@@ -50,7 +50,7 @@ Navigation
Visualization within TensorBoard¶
One may log various graphs and data collected in different phases of the model quantization and display them within the Tensorboard UI.
To use it, all you have to do is to set a logger path. Setting a path is done by calling set_log_folder.
-import model_compression_toolkit as mct
+import model_compression_toolkit as mct
mct.set_log_folder('/logger/dir/path')
diff --git a/docs/index.html b/docs/index.html
index 2ec291518..0a5b860a4 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -7,7 +7,7 @@
Model Compression Toolkit User Guide — MCT Documentation: ver 2.6.0
-
+
diff --git a/docs/search.html b/docs/search.html
index 740d162e7..37d780dc0 100644
--- a/docs/search.html
+++ b/docs/search.html
@@ -6,7 +6,7 @@
Search — MCT Documentation: ver 2.6.0
-
+
diff --git a/docs/searchindex.js b/docs/searchindex.js
index 0bba14246..dac0e8f35 100644
--- a/docs/searchindex.js
+++ b/docs/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"alltitles": {"API Docs": [[13, null]], "API Documentation": [[50, "api-documentation"]], "About XQuant Extension Tool": [[48, "about-xquant-extension-tool"]], "Actions": [[43, "actions"]], "Attribute Filters": [[42, "attribute-filters"]], "AttributeQuantizationConfig": [[45, "attributequantizationconfig"]], "BNLayerWeightingType": [[1, "bnlayerweightingtype"]], "BaseKerasTrainableQuantizer": [[46, "basekerastrainablequantizer"]], "BasePytorchTrainableQuantizer": [[46, "basepytorchtrainablequantizer"]], "BatchNormAlignemntLossType": [[1, "batchnormalignemntlosstype"]], "BitWidthConfig": [[0, null]], "ChannelAxis": [[3, "channelaxis"]], "ChannelsFilteringStrategy": [[6, "channelsfilteringstrategy"]], "CoreConfig": [[39, null]], "Cosine Similarity Comparison": [[49, "cosine-similarity-comparison"]], "Data Generation Configuration": [[1, null]], "DataInitType": [[1, "datainittype"]], "DebugConfig": [[40, "debugconfig"]], "DefaultDict Class": [[2, null]], "EditRule": [[43, "editrule"]], "Enable a Logger": [[35, null]], "Filters": [[43, "filters"]], "FrameworkInfo Class": [[3, null]], "Fusing": [[45, "fusing"]], "GPTQHessianScoresConfig Class": [[4, "gptqhessianscoresconfig-class"]], "Get DataGenerationConfig for Keras Models": [[14, null]], "Get DataGenerationConfig for Pytorch Models": [[16, null]], "Get GradientPTQConfig for Keras Models": [[15, null]], "Get GradientPTQConfig for Pytorch Models": [[17, null]], "Get Resource Utilization information for Keras Models": [[22, null]], "Get Resource Utilization information for PyTorch Models": [[30, null]], "Get TargetPlatformCapabilities for sdsp converter version": [[19, null]], "Get TargetPlatformCapabilities for tpc version": [[18, null]], "GradientPTQConfig Class": [[4, null]], "GradualActivationQuantizationConfig": [[4, "gradualactivationquantizationconfig"]], "How to Run": [[48, "how-to-run"]], "ImageGranularity": [[1, "imagegranularity"]], "ImageNormalizationType": [[1, "imagenormalizationtype"]], "ImagePipelineType": [[1, "imagepipelinetype"]], "ImportanceMetric": [[6, "importancemetric"]], "Indices and tables": [[13, "indices-and-tables"]], "Install": [[50, "install"]], "Keras Data Generation": [[20, null]], "Keras Gradient Based Post Training Quantization": [[21, null]], "Keras Post Training Quantization": [[24, null]], "Keras Quantization Aware Training Model Finalize": [[26, null]], "Keras Quantization Aware Training Model Init": [[27, null]], "Keras Structured Pruning": [[25, null]], "Keras Tutorial": [[41, "keras-tutorial"]], "KerasExportSerializationFormat": [[41, "kerasexportserializationformat"]], "Layer Attributes Filters": [[42, null]], "Load Quantized Keras Model": [[23, null]], "MCTQ": [[41, "mctq"]], "MCTQ Quantization Format": [[41, "mctq-quantization-format"]], "ManualBitWidthSelection": [[0, "manualbitwidthselection"]], "Mixed-precision Configuration Bit-width": [[49, "mixed-precision-configuration-bit-width"]], "MixedPrecisionQuantizationConfig": [[5, null]], "Model Compression Toolkit User Guide": [[50, null]], "MpDistanceWeighting": [[5, "mpdistanceweighting"]], "MpMetricNormalization": [[5, "mpmetricnormalization"]], "ONNX": [[41, "onnx"]], "ONNX model output names": [[41, "onnx-model-output-names"]], "ONNX opset version": [[41, "onnx-opset-version"]], "OpQuantizationConfig": [[45, "opquantizationconfig"]], "OperatorSetGroup": [[45, "operatorsetgroup"]], "OperatorsSet": [[45, "operatorsset"]], "OutputLossType": [[1, "outputlosstype"]], "Overall Process Flow": [[48, "overall-process-flow"]], "Overview": [[50, "overview"]], "Pruning Configuration": [[6, null]], "Pruning Information": [[7, null]], "PyTorch Quantization Aware Training Model Finalize": [[33, null]], "PyTorch Quantization Aware Training Model Init": [[34, null]], "Pytorch Data Generation": [[28, null]], "Pytorch Gradient Based Post Training Quantization": [[29, null]], "Pytorch Post Training Quantization": [[31, null]], "Pytorch Structured Pruning": [[32, null]], "Pytorch Tutorial": [[41, "pytorch-tutorial"]], "PytorchExportSerializationFormat": [[41, "pytorchexportserializationformat"]], "QATConfig": [[44, "qatconfig"]], "QFractionLinearAnnealingConfig": [[4, "qfractionlinearannealingconfig"]], "QuantizationConfig": [[8, null]], "QuantizationConfigOptions": [[45, "quantizationconfigoptions"]], "QuantizationErrorMethod": [[9, null]], "QuantizationFormat": [[41, "quantizationformat"]], "QuantizationMethod": [[45, "quantizationmethod"]], "Quickstart": [[50, "quickstart"]], "References": [[50, "references"]], "ResourceUtilization": [[10, null]], "RoundingType": [[4, "roundingtype"]], "SchedulerType": [[1, "schedulertype"]], "Supported Features": [[50, "supported-features"]], "TargetPlatformCapabilities": [[45, "targetplatformcapabilities"]], "Technical Constraints": [[50, "technical-constraints"]], "TrainableQuantizerActivationConfig": [[46, "trainablequantizeractivationconfig"]], "TrainableQuantizerWeightsConfig": [[46, "trainablequantizerweightsconfig"]], "TrainingMethod": [[44, "trainingmethod"], [46, "trainingmethod"]], "Understanding the General Troubleshoots": [[48, "understanding-the-general-troubleshoots"]], "Understanding the Judgeable Troubleshoots": [[48, "understanding-the-judgeable-troubleshoots"]], "Understanding the Quantization Error Graph": [[48, "understanding-the-quantization-error-graph"]], "Use exported model for inference": [[41, "use-exported-model-for-inference"]], "Visualization within TensorBoard": [[49, null]], "XQuant Configuration": [[12, null]], "XQuant Extension Tool": [[48, null]], "XQuant Report Keras": [[36, null]], "XQuant Report Pytorch": [[37, null]], "XQuant Report Troubleshoot Pytorch": [[38, null]], "XQuantConfig Format and Examples": [[48, "xquantconfig-format-and-examples"]], "XQuantConfig parameter": [[48, "id3"]], "core": [[13, "core"]], "data_generation": [[13, "data-generation"]], "debug_config Module": [[40, null]], "exporter": [[13, "exporter"]], "exporter Module": [[41, null]], "gptq": [[13, "gptq"]], "keras serialization format": [[41, "keras-serialization-format"]], "keras_export_model": [[41, "keras-export-model"]], "keras_load_quantized_model": [[13, "keras-load-quantized-model"]], "network_editor Module": [[43, null]], "pruning": [[13, "pruning"]], "ptq": [[13, "ptq"]], "pytorch_export_model": [[41, "pytorch-export-model"]], "qat": [[13, "qat"]], "qat_config Module": [[44, null]], "set_log_folder": [[13, "set-log-folder"]], "target_platform_capabilities": [[13, "target-platform-capabilities"]], "target_platform_capabilities Module": [[45, null]], "trainable_infrastructure": [[13, "trainable-infrastructure"]], "trainable_infrastructure Module": [[46, null]], "wrapper": [[11, null], [13, "wrapper"]], "xquant": [[13, "xquant"]]}, "docnames": ["api/api_docs/classes/BitWidthConfig", "api/api_docs/classes/DataGenerationConfig", "api/api_docs/classes/DefaultDict", "api/api_docs/classes/FrameworkInfo", "api/api_docs/classes/GradientPTQConfig", "api/api_docs/classes/MixedPrecisionQuantizationConfig", "api/api_docs/classes/PruningConfig", "api/api_docs/classes/PruningInfo", "api/api_docs/classes/QuantizationConfig", "api/api_docs/classes/QuantizationErrorMethod", "api/api_docs/classes/ResourceUtilization", "api/api_docs/classes/Wrapper", "api/api_docs/classes/XQuantConfig", "api/api_docs/index", "api/api_docs/methods/get_keras_data_generation_config", "api/api_docs/methods/get_keras_gptq_config", "api/api_docs/methods/get_pytorch_data_generation_config", "api/api_docs/methods/get_pytroch_gptq_config", "api/api_docs/methods/get_target_platform_capabilities", "api/api_docs/methods/get_target_platform_capabilities_sdsp", "api/api_docs/methods/keras_data_generation_experimental", "api/api_docs/methods/keras_gradient_post_training_quantization", "api/api_docs/methods/keras_kpi_data", "api/api_docs/methods/keras_load_quantizad_model", "api/api_docs/methods/keras_post_training_quantization", "api/api_docs/methods/keras_pruning_experimental", "api/api_docs/methods/keras_quantization_aware_training_finalize_experimental", "api/api_docs/methods/keras_quantization_aware_training_init_experimental", "api/api_docs/methods/pytorch_data_generation_experimental", "api/api_docs/methods/pytorch_gradient_post_training_quantization", "api/api_docs/methods/pytorch_kpi_data", "api/api_docs/methods/pytorch_post_training_quantization", "api/api_docs/methods/pytorch_pruning_experimental", "api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental", "api/api_docs/methods/pytorch_quantization_aware_training_init_experimental", "api/api_docs/methods/set_logger_path", "api/api_docs/methods/xquant_report_keras_experimental", "api/api_docs/methods/xquant_report_pytorch_experimental", "api/api_docs/methods/xquant_report_troubleshoot_pytorch_experimental", "api/api_docs/modules/core_config", "api/api_docs/modules/debug_config", "api/api_docs/modules/exporter", "api/api_docs/modules/layer_filters", "api/api_docs/modules/network_editor", "api/api_docs/modules/qat_config", "api/api_docs/modules/target_platform_capabilities", "api/api_docs/modules/trainable_infrastructure", "api/api_docs/notes/tpc_note", "guidelines/XQuant_Extension_Tool", "guidelines/visualization", "index"], "envversion": {"sphinx": 64, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["api/api_docs/classes/BitWidthConfig.rst", "api/api_docs/classes/DataGenerationConfig.rst", "api/api_docs/classes/DefaultDict.rst", "api/api_docs/classes/FrameworkInfo.rst", "api/api_docs/classes/GradientPTQConfig.rst", "api/api_docs/classes/MixedPrecisionQuantizationConfig.rst", "api/api_docs/classes/PruningConfig.rst", "api/api_docs/classes/PruningInfo.rst", "api/api_docs/classes/QuantizationConfig.rst", "api/api_docs/classes/QuantizationErrorMethod.rst", "api/api_docs/classes/ResourceUtilization.rst", "api/api_docs/classes/Wrapper.rst", "api/api_docs/classes/XQuantConfig.rst", "api/api_docs/index.rst", "api/api_docs/methods/get_keras_data_generation_config.rst", "api/api_docs/methods/get_keras_gptq_config.rst", "api/api_docs/methods/get_pytorch_data_generation_config.rst", "api/api_docs/methods/get_pytroch_gptq_config.rst", "api/api_docs/methods/get_target_platform_capabilities.rst", "api/api_docs/methods/get_target_platform_capabilities_sdsp.rst", "api/api_docs/methods/keras_data_generation_experimental.rst", "api/api_docs/methods/keras_gradient_post_training_quantization.rst", "api/api_docs/methods/keras_kpi_data.rst", "api/api_docs/methods/keras_load_quantizad_model.rst", "api/api_docs/methods/keras_post_training_quantization.rst", "api/api_docs/methods/keras_pruning_experimental.rst", "api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.rst", "api/api_docs/methods/keras_quantization_aware_training_init_experimental.rst", "api/api_docs/methods/pytorch_data_generation_experimental.rst", "api/api_docs/methods/pytorch_gradient_post_training_quantization.rst", "api/api_docs/methods/pytorch_kpi_data.rst", "api/api_docs/methods/pytorch_post_training_quantization.rst", "api/api_docs/methods/pytorch_pruning_experimental.rst", "api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.rst", "api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.rst", "api/api_docs/methods/set_logger_path.rst", "api/api_docs/methods/xquant_report_keras_experimental.rst", "api/api_docs/methods/xquant_report_pytorch_experimental.rst", "api/api_docs/methods/xquant_report_troubleshoot_pytorch_experimental.rst", "api/api_docs/modules/core_config.rst", "api/api_docs/modules/debug_config.rst", "api/api_docs/modules/exporter.rst", "api/api_docs/modules/layer_filters.rst", "api/api_docs/modules/network_editor.rst", "api/api_docs/modules/qat_config.rst", "api/api_docs/modules/target_platform_capabilities.rst", "api/api_docs/modules/trainable_infrastructure.rst", "api/api_docs/notes/tpc_note.rst", "guidelines/XQuant_Extension_Tool.rst", "guidelines/visualization.rst", "index.rst"], "indexentries": {"add_metadata (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.add_metadata", false]], "attributefilter (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.AttributeFilter", false]], "attributequantizationconfig (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig", false]], "base_config (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.quantizationconfigoptions attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions.base_config", false]], "basekerastrainablequantizer (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.BaseKerasTrainableQuantizer", false]], "basepytorchtrainablequantizer (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.BasePytorchTrainableQuantizer", false]], "batchnormalignemntlosstype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.BatchNormAlignemntLossType", false]], "bit_width (model_compression_toolkit.core.common.quantization.bit_width_config.manualbitwidthselection attribute)": [[0, "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection.bit_width", false]], "bitwidthconfig (class in model_compression_toolkit.core)": [[0, "model_compression_toolkit.core.BitWidthConfig", false]], "bnlayerweightingtype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.BNLayerWeightingType", false]], "changecandidatesactivationquantconfigattr (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeCandidatesActivationQuantConfigAttr", false]], "changecandidatesactivationquantizationmethod (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeCandidatesActivationQuantizationMethod", false]], "changecandidatesweightsquantconfigattr (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeCandidatesWeightsQuantConfigAttr", false]], "changecandidatesweightsquantizationmethod (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeCandidatesWeightsQuantizationMethod", false]], "changefinalactivationquantconfigattr (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeFinalActivationQuantConfigAttr", false]], "changefinalweightsquantconfigattr (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeFinalWeightsQuantConfigAttr", false]], "changefinalweightsquantizationmethod (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeFinalWeightsQuantizationMethod", false]], "changequantizationparamfunction (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeQuantizationParamFunction", false]], "channelaxis (class in model_compression_toolkit.core)": [[3, "model_compression_toolkit.core.ChannelAxis", false]], "channels_filtering_strategy (model_compression_toolkit.pruning.pruningconfig attribute)": [[6, "model_compression_toolkit.pruning.PruningConfig.channels_filtering_strategy", false]], "channelsfilteringstrategy (class in model_compression_toolkit.pruning)": [[6, "model_compression_toolkit.pruning.ChannelsFilteringStrategy", false]], "coreconfig (class in model_compression_toolkit.core)": [[39, "model_compression_toolkit.core.CoreConfig", false]], "datagenerationconfig (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.DataGenerationConfig", false]], "datainittype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.DataInitType", false]], "debugconfig (class in model_compression_toolkit.core)": [[40, "model_compression_toolkit.core.DebugConfig", false]], "default_qco (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.default_qco", false]], "defaultdict (class in model_compression_toolkit)": [[2, "model_compression_toolkit.DefaultDict", false]], "editrule (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.EditRule", false]], "enable_weights_quantization (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.enable_weights_quantization", false]], "eq (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.Eq", false]], "filter (model_compression_toolkit.core.common.quantization.bit_width_config.manualbitwidthselection attribute)": [[0, "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection.filter", false]], "frameworkinfo (class in model_compression_toolkit.core)": [[3, "model_compression_toolkit.core.FrameworkInfo", false]], "fuse_op_quantization_config (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.fusing attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing.fuse_op_quantization_config", false]], "fusing (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing", false]], "fusing_patterns (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.fusing_patterns", false]], "get() (model_compression_toolkit.defaultdict method)": [[2, "model_compression_toolkit.DefaultDict.get", false]], "get_keras_data_generation_config() (in module model_compression_toolkit.data_generation)": [[14, "model_compression_toolkit.data_generation.get_keras_data_generation_config", false]], "get_keras_gptq_config() (in module model_compression_toolkit.gptq)": [[15, "model_compression_toolkit.gptq.get_keras_gptq_config", false]], "get_pytorch_data_generation_config() (in module model_compression_toolkit.data_generation)": [[16, "model_compression_toolkit.data_generation.get_pytorch_data_generation_config", false]], "get_pytorch_gptq_config() (in module model_compression_toolkit.gptq)": [[17, "model_compression_toolkit.gptq.get_pytorch_gptq_config", false]], "get_target_platform_capabilities() (in module model_compression_toolkit)": [[18, "model_compression_toolkit.get_target_platform_capabilities", false]], "get_target_platform_capabilities_sdsp() (in module model_compression_toolkit)": [[19, "model_compression_toolkit.get_target_platform_capabilities_sdsp", false]], "gptqhessianscoresconfig (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.GPTQHessianScoresConfig", false]], "gradientptqconfig (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.GradientPTQConfig", false]], "gradualactivationquantizationconfig (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.GradualActivationQuantizationConfig", false]], "greater (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.Greater", false]], "greatereq (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.GreaterEq", false]], "imagegranularity (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.ImageGranularity", false]], "imagenormalizationtype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.ImageNormalizationType", false]], "imagepipelinetype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.ImagePipelineType", false]], "importance_metric (model_compression_toolkit.pruning.pruningconfig attribute)": [[6, "model_compression_toolkit.pruning.PruningConfig.importance_metric", false]], "importance_scores (model_compression_toolkit.pruning.pruninginfo property)": [[7, "model_compression_toolkit.pruning.PruningInfo.importance_scores", false]], "importancemetric (class in model_compression_toolkit.pruning)": [[6, "model_compression_toolkit.pruning.ImportanceMetric", false]], "insert_preserving_quantizers (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.insert_preserving_quantizers", false]], "is_simd_padding (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.is_simd_padding", false]], "keras_data_generation_experimental() (in module model_compression_toolkit.data_generation)": [[20, "model_compression_toolkit.data_generation.keras_data_generation_experimental", false]], "keras_export_model (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.keras_export_model", false]], "keras_gradient_post_training_quantization() (in module model_compression_toolkit.gptq)": [[21, "model_compression_toolkit.gptq.keras_gradient_post_training_quantization", false]], "keras_load_quantized_model() (in module model_compression_toolkit)": [[23, "model_compression_toolkit.keras_load_quantized_model", false]], "keras_post_training_quantization() (in module model_compression_toolkit.ptq)": [[24, "model_compression_toolkit.ptq.keras_post_training_quantization", false]], "keras_pruning_experimental() (in module model_compression_toolkit.pruning)": [[25, "model_compression_toolkit.pruning.keras_pruning_experimental", false]], "keras_quantization_aware_training_finalize_experimental() (in module model_compression_toolkit.qat)": [[26, "model_compression_toolkit.qat.keras_quantization_aware_training_finalize_experimental", false]], "keras_quantization_aware_training_init_experimental() (in module model_compression_toolkit.qat)": [[27, "model_compression_toolkit.qat.keras_quantization_aware_training_init_experimental", false]], "keras_resource_utilization_data() (in module model_compression_toolkit.core)": [[22, "model_compression_toolkit.core.keras_resource_utilization_data", false]], "kerasexportserializationformat (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.KerasExportSerializationFormat", false]], "keys() (model_compression_toolkit.defaultdict method)": [[2, "model_compression_toolkit.DefaultDict.keys", false]], "lut_values_bitwidth (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.lut_values_bitwidth", false]], "manual_activation_bit_width_selection_list (model_compression_toolkit.core.bitwidthconfig attribute)": [[0, "model_compression_toolkit.core.BitWidthConfig.manual_activation_bit_width_selection_list", false]], "manual_weights_bit_width_selection_list (model_compression_toolkit.core.bitwidthconfig attribute)": [[0, "model_compression_toolkit.core.BitWidthConfig.manual_weights_bit_width_selection_list", false]], "manualbitwidthselection (class in model_compression_toolkit.core.common.quantization.bit_width_config)": [[0, "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection", false]], "mctwrapper (class in model_compression_toolkit.wrapper.mct_wrapper)": [[11, "model_compression_toolkit.wrapper.mct_wrapper.MCTWrapper", false]], "mixedprecisionquantizationconfig (class in model_compression_toolkit.core)": [[5, "model_compression_toolkit.core.MixedPrecisionQuantizationConfig", false]], "mpdistanceweighting (class in model_compression_toolkit.core)": [[5, "model_compression_toolkit.core.MpDistanceWeighting", false]], "mpmetricnormalization (class in model_compression_toolkit.core)": [[5, "model_compression_toolkit.core.MpMetricNormalization", false]], "name (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.fusing attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing.name", false]], "name (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsetgroup attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup.name", false]], "name (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsset attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet.name", false]], "name (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.name", false]], "nodenamefilter (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.NodeNameFilter", false]], "nodenamescopefilter (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.NodeNameScopeFilter", false]], "nodetypefilter (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.NodeTypeFilter", false]], "noteq (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.NotEq", false]], "num_score_approximations (model_compression_toolkit.pruning.pruningconfig attribute)": [[6, "model_compression_toolkit.pruning.PruningConfig.num_score_approximations", false]], "operator_groups (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.fusing attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing.operator_groups", false]], "operator_set (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.operator_set", false]], "operators_set (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsetgroup attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup.operators_set", false]], "operatorsetgroup (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup", false]], "operatorsset (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet", false]], "opquantizationconfig (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OpQuantizationConfig", false]], "outputlosstype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.OutputLossType", false]], "pruning_masks (model_compression_toolkit.pruning.pruninginfo property)": [[7, "model_compression_toolkit.pruning.PruningInfo.pruning_masks", false]], "pruningconfig (class in model_compression_toolkit.pruning)": [[6, "model_compression_toolkit.pruning.PruningConfig", false]], "pruninginfo (class in model_compression_toolkit.pruning)": [[7, "model_compression_toolkit.pruning.PruningInfo", false]], "pytorch_data_generation_experimental() (in module model_compression_toolkit.data_generation)": [[28, "model_compression_toolkit.data_generation.pytorch_data_generation_experimental", false]], "pytorch_export_model (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.pytorch_export_model", false]], "pytorch_gradient_post_training_quantization() (in module model_compression_toolkit.gptq)": [[29, "model_compression_toolkit.gptq.pytorch_gradient_post_training_quantization", false]], "pytorch_post_training_quantization() (in module model_compression_toolkit.ptq)": [[31, "model_compression_toolkit.ptq.pytorch_post_training_quantization", false]], "pytorch_pruning_experimental() (in module model_compression_toolkit.pruning)": [[32, "model_compression_toolkit.pruning.pytorch_pruning_experimental", false]], "pytorch_quantization_aware_training_finalize_experimental() (in module model_compression_toolkit.qat)": [[33, "model_compression_toolkit.qat.pytorch_quantization_aware_training_finalize_experimental", false]], "pytorch_quantization_aware_training_init_experimental() (in module model_compression_toolkit.qat)": [[34, "model_compression_toolkit.qat.pytorch_quantization_aware_training_init_experimental", false]], "pytorch_resource_utilization_data() (in module model_compression_toolkit.core)": [[30, "model_compression_toolkit.core.pytorch_resource_utilization_data", false]], "pytorchexportserializationformat (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.PytorchExportSerializationFormat", false]], "qatconfig (class in model_compression_toolkit.qat)": [[44, "model_compression_toolkit.qat.QATConfig", false]], "qc_options (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsset attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet.qc_options", false]], "qfractionlinearannealingconfig (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.QFractionLinearAnnealingConfig", false]], "quantization_configurations (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.quantizationconfigoptions attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions.quantization_configurations", false]], "quantizationconfig (class in model_compression_toolkit.core)": [[8, "model_compression_toolkit.core.QuantizationConfig", false]], "quantizationconfigoptions (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions", false]], "quantizationerrormethod (class in model_compression_toolkit.core)": [[9, "model_compression_toolkit.core.QuantizationErrorMethod", false]], "quantizationformat (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.QuantizationFormat", false]], "quantizationmethod (class in model_compression_toolkit.target_platform_capabilities)": [[45, "model_compression_toolkit.target_platform_capabilities.QuantizationMethod", false]], "quantize_and_export() (model_compression_toolkit.wrapper.mct_wrapper.mctwrapper method)": [[11, "model_compression_toolkit.wrapper.mct_wrapper.MCTWrapper.quantize_and_export", false]], "resourceutilization (class in model_compression_toolkit.core)": [[10, "model_compression_toolkit.core.ResourceUtilization", false]], "roundingtype (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.RoundingType", false]], "schedulertype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.SchedulerType", false]], "schema_version (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.SCHEMA_VERSION", false]], "set_log_folder() (in module model_compression_toolkit)": [[35, "model_compression_toolkit.set_log_folder", false]], "smaller (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.Smaller", false]], "smallereq (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.SmallerEq", false]], "targetplatformcapabilities (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities", false]], "tpc_minor_version (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.tpc_minor_version", false]], "tpc_patch_version (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.tpc_patch_version", false]], "tpc_platform_type (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.tpc_platform_type", false]], "trainablequantizeractivationconfig (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.TrainableQuantizerActivationConfig", false]], "trainablequantizerweightsconfig (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.TrainableQuantizerWeightsConfig", false]], "trainingmethod (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.TrainingMethod", false]], "type (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsset attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet.type", false]], "weights_n_bits (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.weights_n_bits", false]], "weights_per_channel_threshold (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.weights_per_channel_threshold", false]], "weights_quantization_method (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.weights_quantization_method", false]], "xquant_report_keras_experimental() (in module model_compression_toolkit.xquant.keras.facade_xquant_report)": [[36, "model_compression_toolkit.xquant.keras.facade_xquant_report.xquant_report_keras_experimental", false]], "xquant_report_pytorch_experimental() (in module model_compression_toolkit.xquant.pytorch.facade_xquant_report)": [[37, "model_compression_toolkit.xquant.pytorch.facade_xquant_report.xquant_report_pytorch_experimental", false]], "xquant_report_troubleshoot_pytorch_experimental() (in module model_compression_toolkit.xquant.pytorch.facade_xquant_report)": [[38, "model_compression_toolkit.xquant.pytorch.facade_xquant_report.xquant_report_troubleshoot_pytorch_experimental", false]], "xquantconfig (class in model_compression_toolkit.xquant.common.xquant_config)": [[12, "model_compression_toolkit.xquant.common.xquant_config.XQuantConfig", false]]}, "objects": {"model_compression_toolkit": [[2, 0, 1, "", "DefaultDict"], [18, 3, 1, "", "get_target_platform_capabilities"], [19, 3, 1, "", "get_target_platform_capabilities_sdsp"], [23, 3, 1, "", "keras_load_quantized_model"], [35, 3, 1, "", "set_log_folder"]], "model_compression_toolkit.DefaultDict": [[2, 1, 1, "", "get"], [2, 1, 1, "", "keys"]], "model_compression_toolkit.core": [[0, 0, 1, "", "BitWidthConfig"], [3, 0, 1, "", "ChannelAxis"], [39, 0, 1, "", "CoreConfig"], [40, 0, 1, "", "DebugConfig"], [3, 0, 1, "", "FrameworkInfo"], [5, 0, 1, "", "MixedPrecisionQuantizationConfig"], [5, 0, 1, "", "MpDistanceWeighting"], [5, 0, 1, "", "MpMetricNormalization"], [8, 0, 1, "", "QuantizationConfig"], [9, 0, 1, "", "QuantizationErrorMethod"], [10, 0, 1, "", "ResourceUtilization"], [22, 3, 1, "", "keras_resource_utilization_data"], [30, 3, 1, "", "pytorch_resource_utilization_data"]], "model_compression_toolkit.core.BitWidthConfig": [[0, 2, 1, "", "manual_activation_bit_width_selection_list"], [0, 2, 1, "", "manual_weights_bit_width_selection_list"]], "model_compression_toolkit.core.common.quantization.bit_width_config": [[0, 0, 1, "", "ManualBitWidthSelection"]], "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection": [[0, 2, 1, "", "bit_width"], [0, 2, 1, "", "filter"]], "model_compression_toolkit.core.network_editor": [[43, 0, 1, "", "ChangeCandidatesActivationQuantConfigAttr"], [43, 0, 1, "", "ChangeCandidatesActivationQuantizationMethod"], [43, 0, 1, "", "ChangeCandidatesWeightsQuantConfigAttr"], [43, 0, 1, "", "ChangeCandidatesWeightsQuantizationMethod"], [43, 0, 1, "", "ChangeFinalActivationQuantConfigAttr"], [43, 0, 1, "", "ChangeFinalWeightsQuantConfigAttr"], [43, 0, 1, "", "ChangeFinalWeightsQuantizationMethod"], [43, 0, 1, "", "ChangeQuantizationParamFunction"], [43, 0, 1, "", "EditRule"], [43, 0, 1, "", "NodeNameFilter"], [43, 0, 1, "", "NodeNameScopeFilter"], [43, 0, 1, "", "NodeTypeFilter"]], "model_compression_toolkit.data_generation": [[1, 0, 1, "", "BNLayerWeightingType"], [1, 0, 1, "", "BatchNormAlignemntLossType"], [1, 0, 1, "", "DataGenerationConfig"], [1, 0, 1, "", "DataInitType"], [1, 0, 1, "", "ImageGranularity"], [1, 0, 1, "", "ImageNormalizationType"], [1, 0, 1, "", "ImagePipelineType"], [1, 0, 1, "", "OutputLossType"], [1, 0, 1, "", "SchedulerType"], [14, 3, 1, "", "get_keras_data_generation_config"], [16, 3, 1, "", "get_pytorch_data_generation_config"], [20, 3, 1, "", "keras_data_generation_experimental"], [28, 3, 1, "", "pytorch_data_generation_experimental"]], "model_compression_toolkit.exporter": [[41, 0, 1, "", "KerasExportSerializationFormat"], [41, 0, 1, "", "PytorchExportSerializationFormat"], [41, 0, 1, "", "QuantizationFormat"], [41, 0, 1, "", "keras_export_model"], [41, 0, 1, "", "pytorch_export_model"]], "model_compression_toolkit.gptq": [[4, 0, 1, "", "GPTQHessianScoresConfig"], [4, 0, 1, "", "GradientPTQConfig"], [4, 0, 1, "", "GradualActivationQuantizationConfig"], [4, 0, 1, "", "QFractionLinearAnnealingConfig"], [4, 0, 1, "", "RoundingType"], [15, 3, 1, "", "get_keras_gptq_config"], [17, 3, 1, "", "get_pytorch_gptq_config"], [21, 3, 1, "", "keras_gradient_post_training_quantization"], [29, 3, 1, "", "pytorch_gradient_post_training_quantization"]], "model_compression_toolkit.pruning": [[6, 0, 1, "", "ChannelsFilteringStrategy"], [6, 0, 1, "", "ImportanceMetric"], [6, 0, 1, "", "PruningConfig"], [7, 0, 1, "", "PruningInfo"], [25, 3, 1, "", "keras_pruning_experimental"], [32, 3, 1, "", "pytorch_pruning_experimental"]], "model_compression_toolkit.pruning.PruningConfig": [[6, 2, 1, "", "channels_filtering_strategy"], [6, 2, 1, "", "importance_metric"], [6, 2, 1, "", "num_score_approximations"]], "model_compression_toolkit.pruning.PruningInfo": [[7, 4, 1, "", "importance_scores"], [7, 4, 1, "", "pruning_masks"]], "model_compression_toolkit.ptq": [[24, 3, 1, "", "keras_post_training_quantization"], [31, 3, 1, "", "pytorch_post_training_quantization"]], "model_compression_toolkit.qat": [[44, 0, 1, "", "QATConfig"], [26, 3, 1, "", "keras_quantization_aware_training_finalize_experimental"], [27, 3, 1, "", "keras_quantization_aware_training_init_experimental"], [33, 3, 1, "", "pytorch_quantization_aware_training_finalize_experimental"], [34, 3, 1, "", "pytorch_quantization_aware_training_init_experimental"]], "model_compression_toolkit.target_platform_capabilities": [[42, 0, 1, "", "AttributeFilter"], [42, 0, 1, "", "Eq"], [42, 0, 1, "", "Greater"], [42, 0, 1, "", "GreaterEq"], [42, 0, 1, "", "NotEq"], [45, 0, 1, "", "QuantizationMethod"], [42, 0, 1, "", "Smaller"], [42, 0, 1, "", "SmallerEq"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema": [[45, 0, 1, "", "AttributeQuantizationConfig"], [45, 0, 1, "", "Fusing"], [45, 0, 1, "", "OpQuantizationConfig"], [45, 0, 1, "", "OperatorSetGroup"], [45, 0, 1, "", "OperatorsSet"], [45, 0, 1, "", "QuantizationConfigOptions"], [45, 0, 1, "", "TargetPlatformCapabilities"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig": [[45, 2, 1, "", "enable_weights_quantization"], [45, 2, 1, "", "lut_values_bitwidth"], [45, 2, 1, "", "weights_n_bits"], [45, 2, 1, "", "weights_per_channel_threshold"], [45, 2, 1, "", "weights_quantization_method"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing": [[45, 2, 1, "", "fuse_op_quantization_config"], [45, 2, 1, "", "name"], [45, 2, 1, "", "operator_groups"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup": [[45, 2, 1, "", "name"], [45, 2, 1, "", "operators_set"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet": [[45, 2, 1, "", "name"], [45, 2, 1, "", "qc_options"], [45, 2, 1, "", "type"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions": [[45, 2, 1, "", "base_config"], [45, 2, 1, "", "quantization_configurations"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities": [[45, 2, 1, "", "SCHEMA_VERSION"], [45, 2, 1, "", "add_metadata"], [45, 2, 1, "", "default_qco"], [45, 2, 1, "", "fusing_patterns"], [45, 2, 1, "", "insert_preserving_quantizers"], [45, 2, 1, "", "is_simd_padding"], [45, 2, 1, "", "name"], [45, 2, 1, "", "operator_set"], [45, 2, 1, "", "tpc_minor_version"], [45, 2, 1, "", "tpc_patch_version"], [45, 2, 1, "", "tpc_platform_type"]], "model_compression_toolkit.trainable_infrastructure": [[46, 0, 1, "", "BaseKerasTrainableQuantizer"], [46, 0, 1, "", "BasePytorchTrainableQuantizer"], [46, 0, 1, "", "TrainableQuantizerActivationConfig"], [46, 0, 1, "", "TrainableQuantizerWeightsConfig"], [46, 0, 1, "", "TrainingMethod"]], "model_compression_toolkit.wrapper.mct_wrapper": [[11, 0, 1, "", "MCTWrapper"]], "model_compression_toolkit.wrapper.mct_wrapper.MCTWrapper": [[11, 1, 1, "", "quantize_and_export"]], "model_compression_toolkit.xquant.common.xquant_config": [[12, 0, 1, "", "XQuantConfig"]], "model_compression_toolkit.xquant.keras.facade_xquant_report": [[36, 3, 1, "", "xquant_report_keras_experimental"]], "model_compression_toolkit.xquant.pytorch.facade_xquant_report": [[37, 3, 1, "", "xquant_report_pytorch_experimental"], [38, 3, 1, "", "xquant_report_troubleshoot_pytorch_experimental"]]}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "function", "Python function"], "4": ["py", "property", "Python property"]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:function", "4": "py:property"}, "terms": {"": [3, 6, 8, 10, 21, 24, 25, 26, 27, 29, 31, 32, 34, 35, 41, 42, 43, 45, 46, 48, 50], "0": [1, 3, 4, 5, 7, 8, 11, 12, 14, 16, 21, 24, 25, 26, 27, 32, 41, 46, 48], "05": 8, "06": 5, "08153": 46, "1": [1, 3, 4, 5, 7, 8, 11, 12, 17, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31, 32, 33, 41, 48, 50], "10": [20, 21, 24, 27, 28, 29, 31, 34], "10000000000": 5, "14": 11, "15": 41, "16": [12, 41, 48], "1902": 46, "1e": [5, 15, 17], "1st": 15, "2": [3, 8, 12, 15, 17, 20, 28, 45, 46, 48, 50], "20": 49, "2021": 50, "2023": 50, "224": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "2nd": 15, "3": [3, 11, 15, 17, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 41, 46], "32": [4, 5, 11], "3e": [15, 17], "3rd": 15, "4": [15, 17, 20, 21, 24, 25, 27, 28, 29, 31, 32, 34, 48], "4th": 15, "5": [11, 12, 15, 17, 25, 32, 48], "50": [25, 32], "52587890625e": 8, "6": 28, "75": [11, 21, 24, 26, 27], "8": [20, 21, 24, 26, 27, 28, 41, 45, 46], "9": 43, "A": [0, 3, 4, 5, 7, 8, 13, 15, 17, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40, 43, 44, 45, 50], "And": 48, "As": [5, 48, 49], "By": [4, 5, 11, 25, 29, 31, 32, 41, 49], "For": [3, 8, 12, 18, 19, 20, 21, 24, 26, 27, 28, 34, 41, 45, 46, 47, 48, 49, 50], "If": [2, 3, 4, 5, 12, 15, 17, 21, 24, 26, 27, 29, 31, 39, 41, 42, 45, 48], "In": [5, 20, 21, 24, 27, 28, 29, 31, 34, 41, 42, 44, 48], "It": [2, 11, 12, 45, 46, 48], "No": 1, "One": 49, "The": [0, 1, 3, 4, 5, 6, 7, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 34, 36, 37, 38, 41, 43, 45, 46, 48, 49], "Then": [3, 21, 24, 27, 29, 31, 34, 43, 49], "There": [41, 48, 49], "These": [48, 49], "To": [41, 48, 49], "With": 48, "_": [21, 24, 27, 29, 31, 34, 41], "_input_data": 41, "_model_input_nam": 41, "_model_output_nam": 41, "_with_model_output_loss_object": 48, "about": [3, 4, 7, 13, 15, 17, 21, 24, 26, 27, 41, 45, 46], "abov": [12, 48], "absolut": 9, "abstract": [13, 46], "accept": [15, 45], "access": 7, "accord": [13, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 41, 42], "accordingli": 45, "accuraci": [12, 48], "achiev": 25, "act": 7, "act_hessian_default_batch_s": [15, 17], "action": 40, "activ": [0, 3, 4, 5, 8, 10, 11, 21, 22, 24, 27, 29, 30, 31, 34, 41, 43, 44, 45, 46, 48, 49], "activation_bias_correct": 8, "activation_bias_correction_threshold": 8, "activation_channel_equ": 8, "activation_error_method": [8, 11], "activation_memori": 10, "activation_min_max_map": 3, "activation_n_bit": [45, 46], "activation_op": 3, "activation_quantization_candid": 46, "activation_quantization_method": [43, 45, 46], "activation_quantization_param": 46, "activation_quantization_params_fn": 43, "activation_quantizer_map": 3, "activation_quantizer_params_overrid": 44, "activation_training_method": 44, "ad": 45, "adam": [14, 15, 17], "add": [1, 3, 12, 14, 16, 23, 46], "add_metadata": 45, "addit": [23, 41, 48], "address": 45, "advanc": 3, "affect": [21, 24, 26, 27], "after": [13, 21, 23, 24, 27, 34, 48, 50], "aim": [25, 32], "algorithm": 5, "align": [1, 14, 16], "all": [1, 3, 4, 5, 8, 43, 46, 49], "allimag": [1, 16], "allow": [6, 12, 20, 28, 41, 45], "along": 49, "also": [25, 32, 45], "an": [1, 2, 3, 4, 7, 11, 13, 21, 24, 27, 34, 36, 37, 38, 41, 42, 43, 45, 46, 48, 50], "analysi": [25, 32], "analyz": [25, 32, 38], "analyze_similar": 40, "ani": [1, 2, 3, 5, 11, 36, 37, 38, 41, 42, 46], "anneal": 4, "api": [3, 4, 24, 27, 34, 44, 48], "appli": [0, 1, 5, 8, 13, 41, 42, 43, 45, 48], "applic": [21, 22, 24, 25, 26, 27, 41], "approach": 6, "appropri": 48, "approxim": [6, 25, 32], "ar": [3, 5, 12, 18, 19, 21, 24, 25, 27, 29, 31, 32, 34, 41, 45, 46, 47, 48, 49], "architectur": [25, 32], "argument": [4, 41, 45], "arrai": [7, 11], "art": 50, "arxiv": [46, 50], "assess": [25, 32], "associ": [25, 32], "assum": [25, 32], "astyp": 41, "attent": [4, 15, 17, 46], "attirbut": 3, "attr": 42, "attr_nam": 43, "attr_valu": 43, "attr_weights_configs_map": 45, "attribut": [43, 45, 46], "attributefilt": 42, "auto": 13, "automat": 48, "auxiliari": [15, 17], "avail": 41, "averag": [1, 5, 14, 15, 16, 17, 48], "avg": 5, "awar": [13, 44, 46, 50], "axi": [3, 46, 48], "backend": 45, "base": [1, 4, 5, 8, 9, 11, 13, 15, 17, 18, 19, 20, 25, 28, 31, 32, 46, 48, 50], "base_config": 45, "basenod": 7, "basenodematch": 0, "basic": 46, "batch": [1, 4, 5, 14, 15, 16, 17, 20, 21, 24, 27, 28, 29, 31, 34], "batchnorm": [1, 14, 16, 20, 21, 24, 27, 29, 31, 34], "batchnorm2d": 28, "batchnormalignemntlosstyp": [14, 16], "batchwis": [1, 14], "been": 7, "begin": 4, "behavior": [40, 48], "being": [21, 24, 27, 29, 31, 34, 45, 46], "below": [12, 48], "between": [4, 5, 12, 21, 29, 31, 45, 48, 49], "bia": [4, 11, 15, 17, 21, 24, 26, 27], "bidwidth": 5, "bit": [0, 5, 10, 13, 21, 24, 26, 27, 34, 39, 41, 43, 45, 46, 50], "bit_width": 0, "bit_width_config": [0, 39], "bitwidth": [5, 12, 21, 24, 26, 27, 48], "bitwidthconfig": [13, 39], "block": [46, 49], "bn_alignment_loss_typ": [1, 14, 16], "bn_layer_typ": [1, 14, 16], "bnlayerweightingtyp": [14, 16], "bool": [1, 4, 5, 11, 12, 14, 15, 16, 17, 40, 45, 46], "boolean": 23, "bop": 10, "both": [11, 21, 24, 29, 31, 33, 46, 49], "build": [22, 30, 46, 50], "built": [27, 34, 46], "bypass": 40, "byte": [10, 21, 24, 25, 27, 32, 34, 49], "c": [12, 48], "calcul": [5, 6, 13, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 48], "calibr": [11, 21, 22, 24, 27, 29, 30, 31, 34], "call": [22, 30, 35, 45, 49], "callabl": [3, 5, 11, 12, 15, 17, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 36, 37, 38, 41, 42], "can": [3, 4, 8, 11, 13, 15, 17, 20, 22, 25, 28, 30, 32, 40, 41, 43, 45, 46, 48, 49, 50], "candid": [5, 21, 24, 26, 27, 43], "cannot": 45, "capabl": [11, 18, 19, 25, 30, 32], "case": 5, "caus": [12, 13, 38, 48], "chang": [20, 28, 41, 43, 48, 49], "changecandidatesactivationquantconfigattr": 43, "changecandidatesactivationquantizationmethod": 43, "changecandidatesweightsquantconfigattr": 43, "changecandidatesweightsquantizationmethod": 43, "changefinalactivationquantconfigattr": 43, "changefinalweightsquantconfigattr": 43, "changefinalweightsquantizationmethod": 43, "changequantizationmethod": 43, "changequantizationparamfunct": 43, "channel": [3, 6, 7, 13, 25, 32, 45, 46, 49], "channels_filtering_strategi": 6, "check": [5, 41, 42, 43], "choos": [1, 4, 41], "chosen": 49, "circl": 48, "class": [0, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 23, 39, 40, 41, 42, 43, 44, 45, 46], "clibrat": 31, "click": 49, "clip": [1, 14, 16], "clone": 50, "coeffici": [3, 21, 24, 26, 27, 29, 31, 45, 46], "cohen": 50, "collaps": 11, "collect": [3, 21, 24, 27, 29, 31, 34, 36, 37, 38, 49], "com": 50, "combin": 45, "common": [0, 12], "compar": [5, 21, 29, 31, 48, 49], "comparison": 50, "compat": 41, "compil": 23, "complet": [4, 11], "compon": [45, 46, 48], "compress": [11, 13, 20, 25, 28, 29, 32, 48], "comput": [3, 4, 5, 9, 12, 13, 15, 17, 22, 30, 36, 40, 49], "compute_distance_fn": 5, "concat_threshold_upd": 8, "concaten": [12, 45, 48], "concatn": [12, 48], "config": [4, 20, 21, 24, 25, 26, 27, 28, 29, 32, 33, 34, 39, 43, 46], "configur": [0, 4, 5, 8, 10, 11, 13, 14, 15, 16, 17, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 48, 50], "configuration_overwrit": 5, "confirm": 48, "connect": 11, "consid": [6, 14, 16, 25, 32, 45], "consol": 48, "constant": [6, 43, 46], "constraint": [21, 24, 25, 29, 31, 32], "contain": [7, 13, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 46, 48], "conv2d": [3, 20, 21, 24, 26, 27, 28, 43, 45], "conveni": 35, "convent": 48, "convert": [11, 13, 26, 33, 45], "core": [0, 3, 5, 8, 9, 10, 11, 21, 22, 24, 25, 26, 27, 29, 30, 32, 33, 34, 39, 40, 43], "core_config": [21, 22, 24, 26, 27, 29, 30, 31, 33, 34], "coreconfig": [13, 21, 22, 24, 26, 27, 29, 30, 31, 33, 34], "correct": 11, "correspond": [7, 48], "cosin": [48, 50], "count_param": [21, 24, 25, 26, 27], "countermeasur": 48, "cpuexecutionprovid": 41, "creat": [3, 4, 8, 11, 13, 14, 15, 16, 17, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 41, 42, 43, 45, 48], "creation": 41, "crop": 1, "cudaexecutionprovid": 41, "current": [4, 41], "custom": [5, 12, 20, 23, 27, 28, 41], "custom_metric_fn": 5, "custom_object": [23, 26, 27], "custom_similarity_metr": 12, "custom_tpc_opset_to_lay": 8, "cut": 40, "dash": 48, "data": [13, 14, 16, 22, 25, 30, 32, 36, 37, 38, 41, 45, 49, 50], "data_gen_batch_s": [1, 14, 16, 20, 28], "data_gener": [1, 14, 16, 20, 28], "data_generation_config": [20, 28], "data_init_typ": [1, 14, 16], "dataclass": [39, 40], "datagenerationconfig": [1, 13, 20, 28], "datainittyp": [14, 16], "dataset": [4, 11, 15, 17, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 38, 41, 48, 49], "debug": [39, 40], "debug_config": 39, "debugconfig": 39, "deeper": 49, "def": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "default": [1, 2, 4, 5, 6, 11, 14, 15, 16, 17, 21, 24, 25, 29, 31, 32, 39, 41, 44, 45, 49], "default_data_gen_b": [14, 16], "default_factori": 2, "default_keras_extra_pixel": 14, "default_keras_initial_lr": 14, "default_keras_output_loss_multipli": 14, "default_keras_tpc": [21, 24, 25, 27], "default_n_it": [14, 16], "default_onnx_opset_vers": 41, "default_pytorch_bn_layer_typ": 16, "default_pytorch_extra_pixel": 16, "default_pytorch_initial_lr": 16, "default_pytorch_last_layer_typ": 16, "default_pytorch_output_loss_multipli": 16, "default_pytorch_tpc": [29, 31, 32, 34], "default_qco": 45, "default_valu": 2, "default_weight_attr_config": 45, "defaultdict": [3, 13], "defin": [0, 4, 5, 15, 17, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 45, 46, 48], "degrad": [12, 13, 38, 48], "demonstr": [41, 45], "dens": [3, 20], "dense_nparam": [25, 32], "depend": [1, 21, 24, 27, 29, 31, 34], "describ": 48, "descript": 11, "desir": [13, 21, 22, 24, 26, 27, 29, 30, 31, 34], "detail": [41, 45, 48], "detect": [12, 13, 38, 48], "determin": [6, 25, 32, 45], "develop": 50, "deviat": 48, "devic": [13, 18], "device_typ": 18, "diagram": 45, "diamant": 50, "dict": [3, 7, 12, 36, 37, 38, 41, 45, 46, 48], "dictionari": [2, 3, 4, 12, 26, 27, 36, 37, 38, 41, 43, 44, 46], "differ": [1, 8, 13, 21, 24, 26, 27, 41, 45, 48, 49], "dikstein": 50, "dir": [12, 48, 49], "directori": [12, 13, 35, 48], "disabl": [15, 17], "displai": [48, 49], "distanc": [5, 11], "distance_weighting_method": [5, 11], "distil": [4, 50], "distribut": 9, "diverg": [9, 49], "divers": 1, "divid": 3, "divis": 49, "dnn": 46, "do": [1, 48, 49], "document": [13, 24, 27, 34, 48], "doe": 48, "doesn": 50, "don": 35, "done": 49, "dot": 49, "dqa": 46, "dror": 50, "dtype": 41, "dummi": 17, "durat": [25, 32], "dure": [4, 13, 14, 15, 16, 17, 18, 19, 36, 37, 38, 41, 43, 45, 46, 47, 49], "e": [3, 5, 11, 21, 24, 27, 29, 31, 34, 50], "each": [5, 6, 7, 12, 21, 24, 25, 27, 29, 31, 32, 34, 43, 45, 46, 48, 49], "easi": 48, "easili": [13, 50], "edit": [39, 40, 43], "editrul": 40, "either": 45, "element": [7, 45], "empti": 2, "emul": 46, "enabl": [1, 5, 8, 11, 13, 15, 17, 40, 46, 50], "enable_activation_quant": [45, 46], "enable_weights_quant": [45, 46], "encapsul": [0, 8], "end_step": 4, "engin": 50, "enhanc": 50, "ensur": 5, "entir": 13, "enum": [1, 3, 4, 6, 9, 46], "epoch": [4, 11, 15, 17], "epsilon": 5, "eptq": 50, "eq": 42, "equal": 42, "er_list": 43, "error": [9, 11, 12], "estim": [4, 46], "etc": [3, 10, 13, 21, 24, 27, 29, 31, 34, 49], "euclidean": 49, "evalu": [5, 36, 37, 38], "even": 48, "exact": 17, "exampl": [3, 8, 11, 15, 17, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 43, 45, 46, 50], "exceed": 48, "execut": 48, "exist": [2, 43, 48], "exp": 5, "exp_distance_weighting_sigma": 5, "expect": [4, 49], "experiment": [13, 20, 28, 50], "explain": [12, 13, 36, 37, 38, 46], "explicitli": 45, "expon": 5, "exponenti": 5, "export": 11, "extend": [25, 32], "extens": [11, 41, 50], "extra": [1, 14, 16], "extra_pixel": [1, 14, 16], "extrem": 48, "facade_xquant_report": [36, 37, 38], "factor": [4, 5, 9, 15, 17], "factori": [0, 4, 39, 40], "fake": 41, "fake_qu": [27, 34], "fakely_qu": 41, "fallback": 45, "fals": [4, 5, 8, 11, 12, 14, 15, 17, 40, 46], "familiar": 48, "fetch": 45, "few": [49, 50], "field": [18, 19, 42, 45, 47], "figur": [40, 49], "file": [23, 26, 27, 35, 41], "filepath": 23, "filter": [0, 1, 6], "final": [4, 5, 12, 13, 20, 28, 43, 48, 49, 50], "find": [21, 24, 27, 34], "fine": [15, 17, 25, 26, 27, 32, 33, 34], "first": [1, 21, 24, 27, 29, 31, 34, 41, 49], "first_layer_multipli": 1, "fix": 45, "fixed_scal": [18, 19, 45, 47], "fixed_zero_point": [18, 19, 45, 47], "flag": [1, 11, 40, 45], "flatten": [20, 28], "flip": 1, "float": [1, 4, 5, 11, 12, 14, 15, 16, 17, 21, 27, 29, 31, 34, 36, 37, 38, 41, 45, 46, 48, 49], "float32": [25, 32, 41], "float_model": [11, 36, 37, 38, 41, 48], "fold": [21, 24, 27, 29, 31, 34], "folder": [35, 48], "follow": [3, 4, 11, 12, 46, 48, 49], "footprint": [25, 32], "form": 45, "format": [3, 13], "fraction": 4, "framework": [3, 11, 46], "frameworkquantizationcap": [22, 29, 30, 31], "free": [6, 20, 25, 28, 32, 50], "freez": 46, "freeze_quant_param": 46, "friendli": [25, 32, 50], "from": [3, 4, 11, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 41, 43, 45, 46, 47, 48, 49, 50], "from_config": 46, "function": [3, 4, 5, 11, 12, 13, 14, 15, 16, 17, 20, 23, 25, 28, 32, 35, 43, 45, 46, 48], "fuse_op_quantization_config": 45, "fusing_pattern": 45, "futur": [18, 19, 20, 28, 45, 47], "g": [3, 11, 21, 24, 27, 29, 31, 34], "gather": [45, 49], "gaussian": [1, 14, 16], "gener": [2, 12, 13, 14, 16, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 38, 45, 49, 50], "generated_imag": [20, 28], "get": [2, 3, 4, 5, 13, 21, 24, 26, 27, 29, 31, 33, 34, 45, 49], "get_config": 46, "get_input": 41, "get_keras_data_generation_config": [13, 14, 20], "get_keras_gptq_config": [11, 13, 15, 21], "get_ort_session_opt": 41, "get_output": 41, "get_pytorch_data_generation_config": [13, 16, 28], "get_pytorch_gptq_config": [11, 13, 17], "get_target_platform_cap": [13, 18, 45], "get_target_platform_capabilities_sdsp": [13, 19, 45], "git": 50, "github": [41, 50], "given": [2, 21, 22, 24, 27, 29, 30, 31, 34], "gordon": 50, "gptq": [4, 11, 15, 17, 21, 29], "gptq_conf": [15, 17, 29], "gptq_config": [21, 29, 31], "gptq_quantizer_params_overrid": 4, "gptq_representative_data_gen": [21, 29], "grad": 1, "gradient": [1, 4, 11, 13, 31, 50], "gradientptq": [4, 13], "gradientptqconfig": [13, 21, 29], "gradual": 4, "gradual_activation_quant": [15, 17], "gradual_activation_quantization_config": 4, "gradualactivationquant": [15, 17], "gradualactivationquantizationconfig": [15, 17], "granular": [1, 14, 16], "graph": [22, 30, 43, 49], "greater": 42, "greatereq": 42, "greedi": [5, 6], "group": [3, 6, 25, 32, 45], "h": 50, "ha": [7, 41, 42, 43], "habi": 50, "handl": [11, 21, 24, 27, 29, 31, 34], "handler": 35, "hardwar": [13, 25, 32, 45, 46, 50], "have": [3, 41, 42, 48, 49], "henc": 45, "here": [12, 25, 32, 41, 45, 48, 50], "hessian": [4, 5, 6, 9, 11, 15, 17, 25, 32, 50], "hessian_batch_s": [4, 5, 15, 17], "hessian_weights_config": 4, "hessians_num_sampl": 4, "higher": [25, 32], "highlight": 48, "hight": 28, "histogram": [21, 24, 27, 29, 31, 34, 49], "hmse": 9, "hold": [3, 39, 42, 45], "holder": 46, "how": [3, 6, 21, 22, 24, 27, 29, 31, 34, 41, 46, 50], "howev": 41, "hptq": [45, 50], "http": [46, 50], "hw": 22, "i": [1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 13, 15, 17, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 34, 35, 39, 40, 41, 42, 43, 45, 46, 48, 49, 50], "ident": [1, 5], "identifi": [25, 32, 45, 48], "ignor": [18, 19, 45, 47], "ilp": [21, 24, 27, 34], "imag": [1, 4, 5, 11, 14, 16, 20, 21, 24, 27, 28, 29, 31, 34, 48, 49], "image_clip": [1, 14, 16], "image_granular": [1, 14, 16], "image_normalization_typ": [1, 14, 16], "image_pipeline_typ": [1, 14, 16], "imagegranular": [14, 16], "imagenet": 1, "imagenet1k_v1": 32, "imagenormalizationtyp": [14, 16], "imagepipelinetyp": [14, 16], "imagewis": 1, "impact": [25, 32], "implement": [12, 46], "implment": 46, "import": [3, 6, 7, 8, 11, 13, 15, 17, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 41, 43, 46, 48, 49], "importance_metr": 6, "importance_scor": 7, "improv": [5, 25, 32, 48], "imx500": [11, 41, 45], "imx500_tp_model": 18, "in_model": [21, 22, 24, 26, 27, 30, 33, 34], "in_modul": [31, 48], "includ": [4, 7, 11, 21, 24, 27, 29, 31, 34, 45, 46], "increas": [4, 5], "index": [3, 13], "indic": [3, 7, 25, 32, 45, 48], "individu": 48, "induc": 9, "inf": [8, 10, 11], "infer": [13, 26, 33, 45, 46], "inferablequant": [26, 33], "inferencesess": 41, "info": [6, 35], "inform": [3, 4, 13, 15, 17, 18, 19, 21, 24, 25, 27, 29, 31, 32, 34, 40, 45, 46, 47], "infrastructur": 46, "init": [13, 43, 50], "initi": [1, 2, 4, 6, 11, 12, 14, 16, 27, 34, 46, 48], "initial_lr": [1, 14, 16], "initial_q_fract": 4, "inner": 2, "input": [1, 5, 11, 14, 16, 21, 24, 27, 29, 31, 34, 40, 45, 48], "input_sc": 8, "input_shap": 20, "insert": 49, "insert_preserving_quant": 45, "instal": 41, "instanc": [4, 11, 13, 15, 17, 43, 45, 49], "instanti": [4, 8, 44], "instruct": 45, "insuffici": [12, 48], "int": [0, 1, 4, 5, 6, 12, 14, 15, 16, 17, 20, 28, 35, 41, 45, 46, 48], "int8": 41, "integ": [5, 41, 45], "interest": 5, "interfac": [4, 11, 17], "introduc": 46, "inverse_min_max_diff": 1, "involv": [20, 25, 28, 32], "is_detect_under_threshold_quantize_error": 12, "is_keras_layer_export": 41, "is_layer_exportable_fn": 41, "is_pytorch_layer_export": 41, "is_simd_pad": 45, "issu": [5, 41, 48], "item": 48, "iter": [1, 14, 16, 20, 21, 24, 27, 28, 29, 31, 34], "its": [2, 3, 11, 13, 23, 25, 32, 42, 45, 49], "jen": 50, "judg": [12, 13, 38, 48], "judgment": 48, "just": 50, "keep": [33, 50], "kei": [2, 11, 12, 25, 32, 42], "kept": [7, 27, 34], "ker": 27, "kera": [3, 11, 13, 43, 46, 50], "keras_appl": [1, 14], "keras_data_generation_experiment": [13, 20], "keras_default_tpc": 22, "keras_file_path": 41, "keras_gradient_post_training_quant": [13, 15, 21], "keras_load_quantized_model": 23, "keras_post_training_quant": [13, 24, 41, 43, 49], "keras_pruning_experiment": [13, 25], "keras_quantization_aware_training_finalize_experiment": [13, 26], "keras_quantization_aware_training_init_experiment": [13, 26, 27], "keras_resource_utilization_data": [13, 22], "kernel": [3, 21, 24, 26, 27, 43, 46], "kernel_channels_map": 3, "kernel_op": 3, "kernel_ops_attributes_map": 3, "keyword": 45, "kl": [9, 49], "know": [3, 13], "knowledg": [4, 50], "known_dict": 2, "kwarg": 43, "l": [25, 50], "l2": 1, "l2_squar": [1, 14, 16], "l_p_valu": 8, "label": [6, 25, 32, 45, 50], "lambda": 41, "larg": [12, 48], "larger": 5, "last": [3, 4, 5, 48], "last_lay": 5, "last_layer_typ": [1, 16], "latenc": 41, "latest": 50, "launch": 49, "layaer": [13, 38], "layer": [1, 3, 5, 7, 11, 12, 14, 15, 16, 17, 20, 21, 24, 25, 26, 27, 29, 31, 32, 33, 34, 40, 41, 43, 45, 46, 48, 49], "layer_min_max_map": 3, "layer_weighting_typ": [1, 14, 16], "layerfilterparam": 42, "learn": [1, 14, 15, 16, 17, 46], "learnabl": 46, "least": 6, "left": 11, "let": 41, "level": 35, "lfh": [6, 25, 32], "librari": [3, 8], "like": [8, 45], "limit": [6, 21, 24, 26, 27, 29, 31, 34], "line": 48, "linear": [4, 11, 28], "linear_collaps": [8, 11], "linearli": 4, "link": 48, "list": [0, 1, 3, 5, 11, 14, 15, 16, 20, 28, 40, 41, 43, 50], "liter": 45, "ll": [20, 28], "load": [13, 26, 27, 41, 46], "load_model": [26, 27], "loadopt": 23, "log": [4, 12, 13, 15, 17, 35, 48, 49], "log_funct": [4, 15, 17], "log_norm": 4, "log_tensorboard_xqu": 48, "logdir": 49, "logger": [13, 40, 49], "longer": 41, "look": [24, 27, 34, 45, 50], "lookup": 45, "loss": [1, 4, 12, 14, 15, 16, 17, 21, 25, 29, 31, 32, 48], "low": 11, "lp": 9, "lsq": 46, "lut_pot_quant": 45, "lut_sym_quant": 45, "lut_values_bitwidth": 45, "mae": [9, 49], "mai": [20, 21, 24, 27, 28, 29, 31, 34, 42, 49], "main": [11, 45, 48, 49], "make": 9, "manag": [0, 11], "mandatori": 41, "mani": 49, "manipul": [0, 1], "manner": 45, "manual": [0, 13, 39, 48], "manual_activation_bit_width_selection_list": 0, "manual_weights_bit_width_selection_list": 0, "manualweightsbitwidthselect": 0, "map": [3, 45], "mask": 7, "match": [18, 19, 42, 43], "mathemat": 49, "max": [1, 3, 5, 8, 9, 21, 22, 24, 27, 29, 30, 31, 34, 49], "maxbit": 5, "maxim": [21, 24, 27, 34], "mct": [3, 8, 11, 13, 15, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 39, 40, 41, 43, 45, 46, 47, 48, 49, 50], "mct_current_schema": 45, "mct_quantiz": 41, "mct_wrapper": 11, "mctwrapper": 11, "mean": [1, 4, 9, 49], "measur": [6, 10, 12, 48, 49], "meet": [25, 32], "memori": [10, 25, 32, 49], "messag": 48, "metadata": [7, 45], "method": [4, 5, 6, 9, 11, 13, 25, 32, 35, 41, 43, 44, 45, 46], "metric": [4, 5, 6, 12, 36, 37, 38, 48], "metric_epsilon": 5, "metric_norm": 5, "metric_normalization_threshold": 5, "min": [1, 3, 5, 8, 9, 21, 24, 27, 29, 31, 34, 49], "min_threshold": [8, 46], "minbit": 5, "minim": [5, 9, 21, 25, 29, 31, 32], "minimum": 46, "minor": 45, "minut": 50, "mix": [5, 10, 11, 12, 13, 21, 22, 24, 26, 27, 29, 30, 31, 34, 39, 45, 48, 50], "mixed_precis": 11, "mixed_precision_config": [21, 22, 24, 26, 27, 39], "mixedprecisionquantizationconfig": [11, 13, 21, 22, 24, 26, 27, 39], "mkstemp": 41, "mobilenet": [21, 22], "mobilenet_v2": [24, 26, 27, 29, 30, 31, 33, 34, 41], "mobilenetv2": [24, 26, 27, 41, 49], "model": [3, 4, 5, 7, 8, 10, 11, 12, 13, 18, 19, 20, 21, 24, 25, 28, 29, 31, 32, 36, 37, 38, 39, 40, 43, 44, 45, 46, 48, 49], "model_compression_toolkit": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49], "model_fil": [26, 27], "model_format_onnx_mctq": 41, "model_mp": 5, "model_output": 41, "modifi": [13, 43], "modul": [13, 28, 29, 30, 31, 32, 37, 38], "more": [9, 18, 19, 24, 25, 27, 32, 34, 41, 45, 47, 48, 49], "most": 48, "mse": [8, 9, 11, 12, 48, 49], "multipl": [3, 5, 35, 45], "multiple_tensors_mse_loss": 4, "multipli": [1, 12, 14, 16, 48], "must": [25, 32, 45], "n_epoch": [4, 11, 15, 17, 21], "n_imag": [20, 28], "n_iter": [1, 14, 16, 20, 28], "nadam": 15, "name": [12, 43, 45, 48, 49], "nchw": 3, "ndarrai": 7, "necessari": [4, 11, 41, 46, 48], "need": [3, 11, 13, 21, 24, 27, 29, 31, 34, 41, 42, 46, 48], "neg": [1, 5, 48], "negative_min_max_diff": [1, 16], "network": [3, 6, 11, 33, 39, 40, 43, 49, 50], "network_editor": [13, 40], "netzer": 50, "neural": [6, 11, 50], "neuron": 7, "new": [43, 45], "next": [20, 28, 41, 42], "nhwc": 3, "nn": [28, 37, 38], "no_norm": 1, "no_quantization_op": 3, "noclip": [8, 9], "node": [0, 27, 34, 41, 43, 46, 49], "node_nam": 43, "node_name_scop": 43, "node_typ": 43, "nodenamefilt": 43, "nodenamescopefilt": 43, "nodetypefilt": 43, "nois": 9, "non": [5, 15, 17, 45], "none": [1, 2, 4, 5, 8, 11, 12, 15, 17, 21, 23, 24, 27, 29, 31, 34, 35, 39, 41, 43, 44, 45, 46], "norm": [9, 49], "norm_scor": [4, 5], "normal": [1, 4, 5, 14, 16], "note": [21, 24, 26, 27], "notebook": 50, "noteq": 42, "notic": [20, 25, 28, 32, 41], "now": [6, 18, 19, 34, 41, 45, 46, 47, 49], "np": [7, 11, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "num_calibration_batch": [21, 24, 27, 29, 31, 34], "num_interest_points_factor": 5, "num_of_imag": [5, 11, 21, 24], "num_score_approxim": [6, 25, 32], "number": [1, 4, 5, 6, 11, 12, 14, 15, 16, 17, 20, 21, 24, 25, 27, 28, 29, 31, 32, 34, 45, 46, 48], "numel": 32, "numer": 5, "numpi": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "o": 50, "object": [0, 3, 4, 5, 6, 10, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 29, 30, 31, 34, 41, 43, 45, 46, 48], "observ": [21, 29, 31, 45, 49], "one": [5, 42, 49], "onli": [3, 4, 5, 6, 12, 21, 24, 26, 27, 41, 45], "onlin": [27, 34], "onnx": 11, "onnx_file_path": 41, "onnx_opset_vers": 41, "onnxruntim": 41, "op": [42, 45], "open": [41, 49, 50], "oper": [3, 10, 40, 42, 45], "operator_group": 45, "operator_set": 45, "operators_set": 45, "operatorsetnam": 45, "opquantizationconfig": [18, 19, 47], "optim": [1, 3, 4, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 24, 27, 29, 30, 31, 34, 39, 45, 46, 47, 50], "optimizer_bia": 4, "optimizer_quantization_paramet": 4, "optimizer_rest": [4, 15, 17], "optimizerv2": 15, "option": [11, 13, 21, 23, 24, 25, 27, 29, 31, 32, 34, 41, 45], "order": [15, 17, 21, 24, 27, 34, 40, 41, 42, 44], "org": 46, "orient": [13, 46], "origin": [25, 35, 36, 37, 38, 49], "ort": 41, "other": [1, 11, 15, 17, 48], "otherwis": 45, "our": [21, 24, 26, 27, 34, 50], "out": [3, 6], "out1": 50, "out2": 50, "out3": 50, "out_channel_axis_map": 3, "outlier": [12, 48], "output": [1, 3, 12, 14, 16, 20, 21, 24, 27, 28, 29, 31, 33, 34, 45, 48, 49, 50], "output_image_s": [20, 28], "output_loss_multipli": [1, 14, 16], "output_loss_typ": [1, 14, 16], "output_nam": 41, "outputlosstyp": [14, 16], "over": 5, "overrid": [4, 44], "overwrit": 5, "p": 32, "packag": [41, 46, 50], "pad": 45, "page": 13, "pair": 49, "param": [17, 40, 43, 46], "param_item": 11, "paramet": [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "pars": 45, "part": 41, "pass": [2, 3, 5, 15, 17, 21, 24, 25, 26, 27, 29, 31, 32, 33, 34, 43], "patch": 45, "path": [11, 13, 23, 35, 41, 48, 49], "pattern": 45, "pdf": 46, "per": [1, 3, 4, 21, 24, 27, 34, 45, 46, 49], "per_sampl": 4, "percentag": 5, "peretz": 50, "perform": [6, 10, 11, 20, 25, 28, 32], "phase": 49, "pinpoint": 40, "pip": [41, 50], "pipelin": [1, 11, 14, 16], "pixel": [1, 14, 16], "place": 45, "plan": 41, "platform": [11, 18, 19, 21, 24, 25, 26, 27, 30, 32, 45], "pleas": [24, 27, 34, 41, 44, 48, 50], "plot": [40, 49], "point": [4, 5, 15, 17, 21, 29, 31, 36, 37, 38, 45, 49], "posit": 45, "possibl": [9, 21, 24, 27, 34, 45, 49], "post": [4, 11, 13, 25, 27, 32, 34, 50], "power": [21, 24, 27, 29, 31, 34, 45], "power_of_two": 45, "poweroftwo": 46, "pre": 5, "preced": [21, 24, 27, 29, 31, 34], "precis": [5, 10, 11, 12, 13, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 34, 39, 45, 48, 50], "predefin": [5, 6], "predict": 41, "prepar": [11, 13, 27, 34], "preprint": 50, "present": [2, 48, 49], "preserv": 45, "pretrain": [33, 34], "prevent": 5, "prior": 5, "prioriti": 11, "problemat": 40, "procedur": 48, "process": [4, 5, 8, 13, 14, 15, 16, 17, 18, 19, 20, 25, 28, 32, 39, 40, 43, 44, 45, 47, 49], "product": 49, "project": [41, 50], "properti": 7, "propos": [46, 48], "provid": [2, 11, 20, 25, 28, 32, 41, 45, 46, 48, 49], "prune": [10, 50], "pruned_model": [25, 32], "pruning_config": [25, 32], "pruning_info": [25, 32], "pruning_mask": 7, "pruning_num_score_approxim": 6, "pruningconfig": [6, 13, 25, 32], "pruninginfo": [7, 13, 25, 32], "ptq": [11, 24, 31, 41, 48], "purpos": [20, 28, 40], "py": 50, "pydantic_cor": 45, "pypi": 50, "python": [35, 50], "pytorch": [11, 13, 45, 46, 50], "pytorch_data_generation_experiment": [13, 28], "pytorch_default_tpc": 30, "pytorch_gradient_post_training_quant": [13, 17, 29], "pytorch_post_training_quant": [13, 31, 41, 48], "pytorch_pruning_experiment": [13, 32], "pytorch_quantization_aware_training_finalize_experiment": [13, 33], "pytorch_quantization_aware_training_init_experiment": [13, 33, 34], "pytorch_resource_utilization_data": [13, 30], "q": 41, "q_fraction_scheduler_polici": 4, "qat": [26, 27, 33, 34, 44], "qat_config": [13, 27, 34], "qatconfig": [27, 34], "qc": 8, "qc_option": 45, "qmodel": 11, "qnnpack": 45, "quant": 41, "quantifi": [7, 49], "quantiz": [0, 3, 4, 5, 8, 9, 11, 12, 13, 15, 17, 20, 22, 28, 30, 36, 37, 38, 39, 40, 43, 44, 45, 46, 49, 50], "quantization_config": [39, 46], "quantization_configur": 45, "quantization_format": 41, "quantization_info": [21, 24, 26, 27, 29, 31, 33, 34], "quantization_preserv": [18, 19, 45, 47], "quantizationconfig": [13, 39], "quantizationerrormethod": [8, 11, 13], "quantizationmethod": [3, 46], "quantize_and_export": 11, "quantize_reported_dir": [12, 48], "quantized_exportable_model": 41, "quantized_info": 48, "quantized_model": [11, 21, 24, 26, 27, 33, 34, 36, 37, 38, 48], "quantized_modul": [29, 31], "quantizewrapp": [13, 27, 33, 34], "question": 41, "r": 50, "radam": 16, "rais": 45, "random": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "random_data_gen": 48, "rang": [3, 12, 21, 24, 27, 29, 31, 34, 48], "rate": [1, 14, 15, 16, 17], "ratio": [11, 12, 48], "readi": 33, "readm": 41, "receiv": 11, "recent": 48, "recommend": 48, "recov": [25, 32], "red": 48, "reduc": [5, 25, 32], "reduce_on_plateau": [1, 14], "reduce_on_plateau_with_reset": 16, "reduceonplateau": 1, "refer": [41, 48], "refine_mp_solut": 5, "regard": 42, "regular": [1, 4, 15, 17], "regularization_factor": [4, 15, 17], "regularized_min_max_diff": [1, 14], "relat": [3, 7, 13, 45], "releas": 50, "relev": 41, "relu": 3, "relu_bound_to_power_of_2": 8, "remov": [12, 25, 32, 33, 48], "replac": [26, 48], "report": [12, 13, 48], "report_dir": [12, 48], "repositori": 41, "repr_datagen": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34], "repr_dataset": [36, 37, 38, 41], "repres": [4, 5, 10, 11, 15, 17, 21, 24, 25, 26, 27, 29, 31, 32, 33, 34, 36, 37, 38, 41, 43, 45, 48, 49], "representative_data_gen": [21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 41, 48], "representative_dataset": 11, "request": 2, "requir": [21, 24, 27, 29, 31, 34, 46, 49], "research": 50, "reshap": [3, 20], "residu": 11, "residual_collaps": [8, 11], "resnet50": [25, 32, 41], "resnet50_weight": 32, "resourc": [6, 10, 11, 13, 21, 24, 25, 26, 27, 32, 33, 34, 49], "resourceutil": [13, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 34], "respect": 48, "respectivli": 3, "rest": 4, "result": 48, "retrain": [25, 32], "retriev": [18, 19, 45], "return": [2, 4, 5, 7, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41], "round": 4, "rounding_typ": 4, "ru": [21, 24, 26, 27], "ru_data": [22, 30], "rule": [40, 43], "run": [4, 15, 17, 41, 49], "runner": 40, "same": [1, 41, 45], "sampl": [4, 15, 17, 49], "save": [3, 11, 12, 27, 35, 41, 46, 48], "save_model_path": [11, 41], "saved_model": 23, "savedmodel": 23, "scalar": 49, "scale": [4, 5, 45], "scale_log_norm": 4, "schedul": [1, 4, 14, 16, 40], "scheduler_typ": [1, 14, 16], "schedulertyp": [14, 16], "schema": 45, "schema_vers": 45, "score": [4, 5, 6, 7, 9, 11, 15, 17, 25, 32], "sdsp": [11, 13, 45], "sdsp_v3_14": 19, "sdsp_version": [11, 19], "search": [5, 10, 13, 21, 24, 27, 29, 31, 34], "second": 49, "see": [4, 17, 48, 50], "seen": 49, "select": [0, 3, 6, 8, 9, 11, 13, 39, 41, 44, 45, 46], "self": 45, "semiconductor": 50, "sensit": [5, 6, 25, 32], "sequenti": [20, 28], "serial": 13, "serialization_format": 41, "sess": 41, "session": 41, "set": [3, 11, 12, 13, 15, 17, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 34, 35, 36, 37, 38, 41, 43, 45, 46, 48, 49], "set_log_fold": [35, 48, 49], "setup": [11, 50], "sever": [21, 24, 27, 29, 31, 34, 49], "shift": 48, "shift_negative_activation_correct": 8, "shift_negative_params_search": 8, "shift_negative_ratio": 8, "shift_negative_threshold_recalcul": 8, "shortli": 45, "should": [3, 6, 15, 21, 22, 24, 25, 26, 27, 29, 31, 32, 34, 41, 45, 49], "show": 49, "shown": 48, "sigma": 5, "signal": 9, "signed": 45, "signific": [7, 48], "significantli": 48, "simd": [25, 32, 45], "simd_siz": 45, "similar": [9, 12, 36, 37, 38, 40, 48, 50], "similarli": 45, "simpl": [20, 28], "simplic": [20, 28], "simul": 40, "simulate_schedul": 40, "simultan": 45, "singl": 45, "six": 48, "size": [1, 4, 5, 14, 15, 16, 17, 20, 21, 24, 26, 27, 28, 34, 41, 46], "skip": [12, 40, 41, 48], "slowli": 41, "small": 48, "smaller": 42, "smallereq": 42, "smooth": [1, 46], "smoothing_and_augment": [1, 14, 16], "so": [11, 41], "softmax": 3, "softmax_shift": 8, "softquant": 4, "solut": 50, "solver": [21, 24, 27, 34], "some": [18, 19, 20, 28, 41, 45, 47, 49], "soni": 50, "sonysemiconductorsolut": 50, "sourc": 50, "specif": [0, 3, 11, 13, 25, 32, 43, 48, 49], "specifi": [6, 11, 12, 14, 16, 18, 20, 23, 25, 28, 32, 41, 45, 48], "sphinx": 13, "sqnr": [12, 48], "squar": [1, 9], "stabl": 50, "stage": 49, "standard": [25, 32, 46], "start": [20, 28, 41, 46, 50], "start_step": 4, "state": 50, "state_dict": 32, "statist": [3, 21, 24, 27, 29, 31, 34, 49], "ste": [4, 44, 46], "step": [1, 4, 46, 48], "store": [7, 46], "str": [3, 11, 12, 18, 19, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 35, 36, 37, 38, 41, 42, 45, 48], "straight": [4, 46], "strategi": [6, 25, 32], "string": 43, "structur": [13, 50], "student": 4, "success": 11, "suffer": 41, "suggest": 48, "sum": [10, 22, 25, 30, 32], "support": [4, 11, 41], "supported_input_activation_n_bit": 45, "symmetr": [21, 24, 27, 29, 31, 34, 45, 46], "t": [35, 50], "tab": 49, "tabl": 45, "tag": 49, "take": [5, 24, 27, 34, 50], "target": [4, 11, 13, 18, 19, 21, 22, 24, 25, 26, 27, 30, 32, 33, 34, 45], "target_platform_cap": [21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 42, 46], "target_q_fract": 4, "target_resource_util": [21, 24, 25, 27, 29, 31, 32, 34], "targetplatformcap": [13, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34], "teacher": 4, "tempfil": 41, "tensor": [5, 11, 12, 15, 17, 20, 22, 28, 30, 45, 46, 49, 50], "tensorboard": [40, 50], "tensorflow": [3, 11, 13, 15, 20, 21, 22, 24, 25, 26, 27, 41, 43, 45, 50], "tf": [3, 11, 15, 20, 23, 26, 27], "tflite": [41, 45], "than": [5, 42, 48], "thei": 3, "them": [45, 49], "thi": [5, 7, 8, 9, 11, 13, 20, 21, 23, 24, 25, 26, 27, 28, 29, 31, 32, 34, 35, 41, 45, 46, 48, 50], "those": 48, "three": [3, 48], "threshold": [5, 8, 9, 11, 12, 21, 24, 27, 29, 31, 34, 45, 46, 48], "threshold_bitwidth_mixed_precis": 48, "threshold_bitwidth_mixed_precision_with_model_output_loss_object": 12, "threshold_degrade_layer_ratio": [12, 48], "threshold_quantize_error": [12, 48], "threshold_ratio_unbalanced_concaten": [12, 48], "threshold_zscore_outlier_remov": [12, 48], "through": [4, 20, 25, 28, 46], "throughout": 4, "thu": [25, 32, 49], "time": [3, 6, 46], "togeth": [25, 32], "tool": [11, 13, 46, 50], "toolkit": [11, 13, 20, 28, 29, 48], "torch": [17, 28, 37, 38, 41, 50], "torchscript": 41, "torchvis": [1, 16, 29, 30, 31, 32, 33, 34, 41], "total": [10, 22, 30], "total_memori": 10, "tpc": [11, 13, 25, 32, 45], "tpc_minor_vers": 45, "tpc_patch_vers": 45, "tpc_platform_typ": 45, "tpc_v1_0": 18, "tpc_version": 18, "trace": 41, "train": [4, 11, 13, 44, 46, 50], "train_bia": 4, "trainabl": [23, 26, 46], "trainable_infrastructur": 44, "trainablequant": 26, "transform": [1, 21, 24, 27, 29, 31, 34], "transpos": 3, "treat": 45, "troubleshoot": 13, "true": [1, 5, 8, 11, 12, 15, 16, 17, 23, 33, 34, 46], "try": 5, "tun": 34, "tune": [15, 17, 25, 26, 27, 32, 33], "tupl": [1, 3, 11, 14, 16, 20, 21, 24, 25, 28, 29, 31, 32, 43, 45], "tutori": 48, "two": [5, 12, 21, 24, 27, 29, 31, 34, 41, 45, 48, 49], "type": [0, 1, 2, 4, 5, 6, 7, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31, 32, 35, 36, 37, 38, 41, 43, 45, 48], "ui": 49, "unbalanc": [12, 48], "unchang": 40, "under": 49, "unifi": 11, "uniform": [45, 46], "union": [1, 14, 16, 20, 21, 22, 24, 25, 27, 28, 29, 30, 31, 32, 34, 45], "uniqu": 45, "up": [6, 20, 28, 35, 45, 49], "updat": [4, 11], "upon": 46, "us": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50], "use_hessian_based_scor": [5, 11], "use_hessian_based_weight": [15, 17], "use_hessian_sample_attent": [15, 17], "use_mixed_precis": 11, "user": [11, 13, 21, 24, 26, 27, 29, 31, 33, 34, 48], "userinform": [21, 24, 29, 31], "util": [6, 11, 13, 21, 24, 25, 26, 27, 32, 33, 34, 46], "v": 50, "valid": [36, 37, 38, 45, 46, 48], "validation_dataset": [36, 37, 38, 48], "validationerror": 45, "valu": [1, 2, 3, 4, 5, 6, 9, 11, 12, 21, 24, 25, 26, 27, 32, 41, 42, 43, 45, 46, 48], "valuabl": 9, "variabl": [11, 15, 17], "variou": [11, 20, 28, 49], "vector": [4, 49], "verbos": 35, "version": [11, 13, 20, 28, 45], "via": [41, 50], "view": 49, "visit": [44, 50], "visual": [48, 50], "wa": [2, 41, 48], "wai": [49, 50], "walk": [20, 28], "want": 3, "warn": [11, 48], "we": [3, 20, 21, 24, 25, 27, 28, 32, 34, 41, 43, 45, 46, 49], "weight": [0, 1, 3, 4, 5, 8, 10, 11, 14, 15, 16, 17, 21, 22, 25, 27, 29, 30, 31, 32, 33, 34, 41, 43, 44, 45, 46, 49], "weight_quantizer_params_overrid": 44, "weight_training_method": 44, "weights_bias_correct": [8, 11], "weights_channels_axi": 46, "weights_compression_ratio": 11, "weights_error_method": 8, "weights_memori": [6, 10, 21, 24, 25, 27, 32, 34], "weights_n_bit": [43, 45, 46], "weights_per_channel_threshold": [45, 46], "weights_quantization_candid": 46, "weights_quantization_method": [43, 45, 46], "weights_quantization_param": 46, "weights_quantization_params_fn": 43, "weights_second_moment_correct": 8, "were": 49, "when": [1, 2, 3, 4, 5, 6, 9, 10, 12, 13, 15, 17, 21, 24, 26, 27, 40, 41, 42, 44, 45, 46, 48, 49], "where": [7, 12, 41, 43, 48, 49], "whether": [4, 5, 7, 11, 14, 15, 16, 17, 23, 40, 41, 45, 46], "which": [4, 6, 40, 41, 42, 43, 45, 46], "while": [8, 21, 24, 26, 27, 34, 45], "who": 48, "width": [0, 5, 12, 13, 21, 24, 27, 28, 34, 39, 45, 48, 50], "within": [40, 45, 48, 50], "without": 13, "work": 50, "would": 49, "wrap": [2, 3, 23, 27, 34, 42, 45, 46], "wrapper": [27, 33, 34, 46], "writer": 49, "x": 48, "xquant": [11, 50], "xquant_config": [12, 36, 37, 38, 48], "xquant_report_keras_experiment": [13, 36], "xquant_report_pytorch_experiment": [13, 37, 48], "xquant_report_troubleshoot_pytorch_experiment": [12, 13, 38, 48], "xquantconfig": [12, 13, 36, 37, 38], "y": 48, "yield": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "you": [8, 11, 41, 45, 49, 50], "your": [41, 48], "z": 11, "z_score": [12, 48], "z_threshold": [8, 11], "zero": [5, 45]}, "titles": ["BitWidthConfig", "Data Generation Configuration", "DefaultDict Class", "FrameworkInfo Class", "GradientPTQConfig Class", "MixedPrecisionQuantizationConfig", "Pruning Configuration", "Pruning Information", "QuantizationConfig", "QuantizationErrorMethod", "ResourceUtilization", "wrapper", "XQuant Configuration", "API Docs", "Get DataGenerationConfig for Keras Models", "Get GradientPTQConfig for Keras Models", "Get DataGenerationConfig for Pytorch Models", "Get GradientPTQConfig for Pytorch Models", "Get TargetPlatformCapabilities for tpc version", "Get TargetPlatformCapabilities for sdsp converter version", "Keras Data Generation", "Keras Gradient Based Post Training Quantization", "Get Resource Utilization information for Keras Models", "Load Quantized Keras Model", "Keras Post Training Quantization", "Keras Structured Pruning", "Keras Quantization Aware Training Model Finalize", "Keras Quantization Aware Training Model Init", "Pytorch Data Generation", "Pytorch Gradient Based Post Training Quantization", "Get Resource Utilization information for PyTorch Models", "Pytorch Post Training Quantization", "Pytorch Structured Pruning", "PyTorch Quantization Aware Training Model Finalize", "PyTorch Quantization Aware Training Model Init", "Enable a Logger", "XQuant Report Keras", "XQuant Report Pytorch", "XQuant Report Troubleshoot Pytorch", "CoreConfig", "debug_config Module", "exporter Module", "Layer Attributes Filters", "network_editor Module", "qat_config Module", "target_platform_capabilities Module", "trainable_infrastructure Module", "<no title>", "XQuant Extension Tool", "Visualization within TensorBoard", "Model Compression Toolkit User Guide"], "titleterms": {"about": 48, "action": 43, "api": [13, 50], "attribut": 42, "attributequantizationconfig": 45, "awar": [26, 27, 33, 34], "base": [21, 29], "basekerastrainablequant": 46, "basepytorchtrainablequant": 46, "batchnormalignemntlosstyp": 1, "bit": 49, "bitwidthconfig": 0, "bnlayerweightingtyp": 1, "channelaxi": 3, "channelsfilteringstrategi": 6, "class": [2, 3, 4], "comparison": 49, "compress": 50, "configur": [1, 6, 12, 49], "constraint": 50, "convert": 19, "core": 13, "coreconfig": 39, "cosin": 49, "data": [1, 20, 28], "data_gener": 13, "datagenerationconfig": [14, 16], "datainittyp": 1, "debug_config": 40, "debugconfig": 40, "defaultdict": 2, "doc": 13, "document": 50, "editrul": 43, "enabl": 35, "error": 48, "exampl": 48, "export": [13, 41], "extens": 48, "featur": 50, "filter": [42, 43], "final": [26, 33], "flow": 48, "format": [41, 48], "frameworkinfo": 3, "fuse": 45, "gener": [1, 20, 28, 48], "get": [14, 15, 16, 17, 18, 19, 22, 30], "gptq": 13, "gptqhessianscoresconfig": 4, "gradient": [21, 29], "gradientptqconfig": [4, 15, 17], "gradualactivationquantizationconfig": 4, "graph": 48, "guid": 50, "how": 48, "imagegranular": 1, "imagenormalizationtyp": 1, "imagepipelinetyp": 1, "importancemetr": 6, "indic": 13, "infer": 41, "inform": [7, 22, 30], "init": [27, 34], "instal": 50, "judgeabl": 48, "kera": [14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 36, 41], "keras_export_model": 41, "keras_load_quantized_model": 13, "kerasexportserializationformat": 41, "layer": 42, "load": 23, "logger": 35, "manualbitwidthselect": 0, "mctq": 41, "mix": 49, "mixedprecisionquantizationconfig": 5, "model": [14, 15, 16, 17, 22, 23, 26, 27, 30, 33, 34, 41, 50], "modul": [40, 41, 43, 44, 45, 46], "mpdistanceweight": 5, "mpmetricnorm": 5, "name": 41, "network_editor": 43, "onnx": 41, "operatorsetgroup": 45, "operatorsset": 45, "opquantizationconfig": 45, "opset": 41, "output": 41, "outputlosstyp": 1, "overal": 48, "overview": 50, "paramet": 48, "post": [21, 24, 29, 31], "precis": 49, "process": 48, "prune": [6, 7, 13, 25, 32], "ptq": 13, "pytorch": [16, 17, 28, 29, 30, 31, 32, 33, 34, 37, 38, 41], "pytorch_export_model": 41, "pytorchexportserializationformat": 41, "qat": 13, "qat_config": 44, "qatconfig": 44, "qfractionlinearannealingconfig": 4, "quantiz": [21, 23, 24, 26, 27, 29, 31, 33, 34, 41, 48], "quantizationconfig": 8, "quantizationconfigopt": 45, "quantizationerrormethod": 9, "quantizationformat": 41, "quantizationmethod": 45, "quickstart": 50, "refer": 50, "report": [36, 37, 38], "resourc": [22, 30], "resourceutil": 10, "roundingtyp": 4, "run": 48, "schedulertyp": 1, "sdsp": 19, "serial": 41, "set_log_fold": 13, "similar": 49, "structur": [25, 32], "support": 50, "tabl": 13, "target_platform_cap": [13, 45], "targetplatformcap": [18, 19, 45], "technic": 50, "tensorboard": 49, "tool": 48, "toolkit": 50, "tpc": 18, "train": [21, 24, 26, 27, 29, 31, 33, 34], "trainable_infrastructur": [13, 46], "trainablequantizeractivationconfig": 46, "trainablequantizerweightsconfig": 46, "trainingmethod": [44, 46], "troubleshoot": [38, 48], "tutori": 41, "understand": 48, "us": 41, "user": 50, "util": [22, 30], "version": [18, 19, 41], "visual": 49, "width": 49, "within": 49, "wrapper": [11, 13], "xquant": [12, 13, 36, 37, 38, 48], "xquantconfig": 48}})
\ No newline at end of file
+Search.setIndex({"alltitles": {"API Docs": [[13, null]], "API Documentation": [[50, "api-documentation"]], "About XQuant Extension Tool": [[48, "about-xquant-extension-tool"]], "Actions": [[43, "actions"]], "Attribute Filters": [[42, "attribute-filters"]], "AttributeQuantizationConfig": [[45, "attributequantizationconfig"]], "BNLayerWeightingType": [[1, "bnlayerweightingtype"]], "BaseKerasTrainableQuantizer": [[46, "basekerastrainablequantizer"]], "BasePytorchTrainableQuantizer": [[46, "basepytorchtrainablequantizer"]], "BatchNormAlignemntLossType": [[1, "batchnormalignemntlosstype"]], "BitWidthConfig": [[0, null]], "ChannelAxis": [[3, "channelaxis"]], "ChannelsFilteringStrategy": [[6, "channelsfilteringstrategy"]], "CoreConfig": [[39, null]], "Cosine Similarity Comparison": [[49, "cosine-similarity-comparison"]], "Data Generation Configuration": [[1, null]], "DataInitType": [[1, "datainittype"]], "DebugConfig": [[40, "debugconfig"]], "DefaultDict Class": [[2, null]], "EditRule": [[43, "editrule"]], "Enable a Logger": [[35, null]], "Filters": [[43, "filters"]], "FrameworkInfo Class": [[3, null]], "Fusing": [[45, "fusing"]], "GPTQHessianScoresConfig Class": [[4, "gptqhessianscoresconfig-class"]], "Get DataGenerationConfig for Keras Models": [[14, null]], "Get DataGenerationConfig for Pytorch Models": [[16, null]], "Get GradientPTQConfig for Keras Models": [[15, null]], "Get GradientPTQConfig for Pytorch Models": [[17, null]], "Get Resource Utilization information for Keras Models": [[22, null]], "Get Resource Utilization information for PyTorch Models": [[30, null]], "Get TargetPlatformCapabilities for sdsp converter version": [[19, null]], "Get TargetPlatformCapabilities for tpc version": [[18, null]], "GradientPTQConfig Class": [[4, null]], "GradualActivationQuantizationConfig": [[4, "gradualactivationquantizationconfig"]], "How to Run": [[48, "how-to-run"]], "ImageGranularity": [[1, "imagegranularity"]], "ImageNormalizationType": [[1, "imagenormalizationtype"]], "ImagePipelineType": [[1, "imagepipelinetype"]], "ImportanceMetric": [[6, "importancemetric"]], "Indices and tables": [[13, "indices-and-tables"]], "Install": [[50, "install"]], "Keras Data Generation": [[20, null]], "Keras Gradient Based Post Training Quantization": [[21, null]], "Keras Post Training Quantization": [[24, null]], "Keras Quantization Aware Training Model Finalize": [[26, null]], "Keras Quantization Aware Training Model Init": [[27, null]], "Keras Structured Pruning": [[25, null]], "Keras Tutorial": [[41, "keras-tutorial"]], "KerasExportSerializationFormat": [[41, "kerasexportserializationformat"]], "Keys in the processing state dictionary": [[40, "id1"]], "Layer Attributes Filters": [[42, null]], "Load Quantized Keras Model": [[23, null]], "MCTQ": [[41, "mctq"]], "MCTQ Quantization Format": [[41, "mctq-quantization-format"]], "ManualBitWidthSelection": [[0, "manualbitwidthselection"]], "Mixed-precision Configuration Bit-width": [[49, "mixed-precision-configuration-bit-width"]], "MixedPrecisionQuantizationConfig": [[5, null]], "Model Compression Toolkit User Guide": [[50, null]], "MpDistanceWeighting": [[5, "mpdistanceweighting"]], "MpMetricNormalization": [[5, "mpmetricnormalization"]], "ONNX": [[41, "onnx"]], "ONNX model output names": [[41, "onnx-model-output-names"]], "ONNX opset version": [[41, "onnx-opset-version"]], "OpQuantizationConfig": [[45, "opquantizationconfig"]], "OperatorSetGroup": [[45, "operatorsetgroup"]], "OperatorsSet": [[45, "operatorsset"]], "OutputLossType": [[1, "outputlosstype"]], "Overall Process Flow": [[48, "overall-process-flow"]], "Overview": [[50, "overview"]], "Pruning Configuration": [[6, null]], "Pruning Information": [[7, null]], "PyTorch Quantization Aware Training Model Finalize": [[33, null]], "PyTorch Quantization Aware Training Model Init": [[34, null]], "Pytorch Data Generation": [[28, null]], "Pytorch Gradient Based Post Training Quantization": [[29, null]], "Pytorch Post Training Quantization": [[31, null]], "Pytorch Structured Pruning": [[32, null]], "Pytorch Tutorial": [[41, "pytorch-tutorial"]], "PytorchExportSerializationFormat": [[41, "pytorchexportserializationformat"]], "QATConfig": [[44, "qatconfig"]], "QFractionLinearAnnealingConfig": [[4, "qfractionlinearannealingconfig"]], "QuantizationConfig": [[8, null]], "QuantizationConfigOptions": [[45, "quantizationconfigoptions"]], "QuantizationErrorMethod": [[9, null]], "QuantizationFormat": [[41, "quantizationformat"]], "QuantizationMethod": [[45, "quantizationmethod"]], "Quickstart": [[50, "quickstart"]], "References": [[50, "references"]], "ResourceUtilization": [[10, null]], "RoundingType": [[4, "roundingtype"]], "SchedulerType": [[1, "schedulertype"]], "Supported Features": [[50, "supported-features"]], "TargetPlatformCapabilities": [[45, "targetplatformcapabilities"]], "Technical Constraints": [[50, "technical-constraints"]], "TrainableQuantizerActivationConfig": [[46, "trainablequantizeractivationconfig"]], "TrainableQuantizerWeightsConfig": [[46, "trainablequantizerweightsconfig"]], "TrainingMethod": [[44, "trainingmethod"], [46, "trainingmethod"]], "Understanding the General Troubleshoots": [[48, "understanding-the-general-troubleshoots"]], "Understanding the Judgeable Troubleshoots": [[48, "understanding-the-judgeable-troubleshoots"]], "Understanding the Quantization Error Graph": [[48, "understanding-the-quantization-error-graph"]], "Use exported model for inference": [[41, "use-exported-model-for-inference"]], "Visualization within TensorBoard": [[49, null]], "XQuant Configuration": [[12, null]], "XQuant Extension Tool": [[48, null]], "XQuant Report Keras": [[36, null]], "XQuant Report Pytorch": [[37, null]], "XQuant Report Troubleshoot Pytorch": [[38, null]], "XQuantConfig Format and Examples": [[48, "xquantconfig-format-and-examples"]], "XQuantConfig parameter": [[48, "id3"]], "core": [[13, "core"]], "data_generation": [[13, "data-generation"]], "debug_config Module": [[40, null]], "exporter": [[13, "exporter"]], "exporter Module": [[41, null]], "gptq": [[13, "gptq"]], "keras serialization format": [[41, "keras-serialization-format"]], "keras_export_model": [[41, "keras-export-model"]], "keras_load_quantized_model": [[13, "keras-load-quantized-model"]], "network_editor Module": [[43, null]], "pruning": [[13, "pruning"]], "ptq": [[13, "ptq"]], "pytorch_export_model": [[41, "pytorch-export-model"]], "qat": [[13, "qat"]], "qat_config Module": [[44, null]], "set_log_folder": [[13, "set-log-folder"]], "target_platform_capabilities": [[13, "target-platform-capabilities"]], "target_platform_capabilities Module": [[45, null]], "trainable_infrastructure": [[13, "trainable-infrastructure"]], "trainable_infrastructure Module": [[46, null]], "wrapper": [[11, null], [13, "wrapper"]], "xquant": [[13, "xquant"]]}, "docnames": ["api/api_docs/classes/BitWidthConfig", "api/api_docs/classes/DataGenerationConfig", "api/api_docs/classes/DefaultDict", "api/api_docs/classes/FrameworkInfo", "api/api_docs/classes/GradientPTQConfig", "api/api_docs/classes/MixedPrecisionQuantizationConfig", "api/api_docs/classes/PruningConfig", "api/api_docs/classes/PruningInfo", "api/api_docs/classes/QuantizationConfig", "api/api_docs/classes/QuantizationErrorMethod", "api/api_docs/classes/ResourceUtilization", "api/api_docs/classes/Wrapper", "api/api_docs/classes/XQuantConfig", "api/api_docs/index", "api/api_docs/methods/get_keras_data_generation_config", "api/api_docs/methods/get_keras_gptq_config", "api/api_docs/methods/get_pytorch_data_generation_config", "api/api_docs/methods/get_pytroch_gptq_config", "api/api_docs/methods/get_target_platform_capabilities", "api/api_docs/methods/get_target_platform_capabilities_sdsp", "api/api_docs/methods/keras_data_generation_experimental", "api/api_docs/methods/keras_gradient_post_training_quantization", "api/api_docs/methods/keras_kpi_data", "api/api_docs/methods/keras_load_quantizad_model", "api/api_docs/methods/keras_post_training_quantization", "api/api_docs/methods/keras_pruning_experimental", "api/api_docs/methods/keras_quantization_aware_training_finalize_experimental", "api/api_docs/methods/keras_quantization_aware_training_init_experimental", "api/api_docs/methods/pytorch_data_generation_experimental", "api/api_docs/methods/pytorch_gradient_post_training_quantization", "api/api_docs/methods/pytorch_kpi_data", "api/api_docs/methods/pytorch_post_training_quantization", "api/api_docs/methods/pytorch_pruning_experimental", "api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental", "api/api_docs/methods/pytorch_quantization_aware_training_init_experimental", "api/api_docs/methods/set_logger_path", "api/api_docs/methods/xquant_report_keras_experimental", "api/api_docs/methods/xquant_report_pytorch_experimental", "api/api_docs/methods/xquant_report_troubleshoot_pytorch_experimental", "api/api_docs/modules/core_config", "api/api_docs/modules/debug_config", "api/api_docs/modules/exporter", "api/api_docs/modules/layer_filters", "api/api_docs/modules/network_editor", "api/api_docs/modules/qat_config", "api/api_docs/modules/target_platform_capabilities", "api/api_docs/modules/trainable_infrastructure", "api/api_docs/notes/tpc_note", "guidelines/XQuant_Extension_Tool", "guidelines/visualization", "index"], "envversion": {"sphinx": 64, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["api/api_docs/classes/BitWidthConfig.rst", "api/api_docs/classes/DataGenerationConfig.rst", "api/api_docs/classes/DefaultDict.rst", "api/api_docs/classes/FrameworkInfo.rst", "api/api_docs/classes/GradientPTQConfig.rst", "api/api_docs/classes/MixedPrecisionQuantizationConfig.rst", "api/api_docs/classes/PruningConfig.rst", "api/api_docs/classes/PruningInfo.rst", "api/api_docs/classes/QuantizationConfig.rst", "api/api_docs/classes/QuantizationErrorMethod.rst", "api/api_docs/classes/ResourceUtilization.rst", "api/api_docs/classes/Wrapper.rst", "api/api_docs/classes/XQuantConfig.rst", "api/api_docs/index.rst", "api/api_docs/methods/get_keras_data_generation_config.rst", "api/api_docs/methods/get_keras_gptq_config.rst", "api/api_docs/methods/get_pytorch_data_generation_config.rst", "api/api_docs/methods/get_pytroch_gptq_config.rst", "api/api_docs/methods/get_target_platform_capabilities.rst", "api/api_docs/methods/get_target_platform_capabilities_sdsp.rst", "api/api_docs/methods/keras_data_generation_experimental.rst", "api/api_docs/methods/keras_gradient_post_training_quantization.rst", "api/api_docs/methods/keras_kpi_data.rst", "api/api_docs/methods/keras_load_quantizad_model.rst", "api/api_docs/methods/keras_post_training_quantization.rst", "api/api_docs/methods/keras_pruning_experimental.rst", "api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.rst", "api/api_docs/methods/keras_quantization_aware_training_init_experimental.rst", "api/api_docs/methods/pytorch_data_generation_experimental.rst", "api/api_docs/methods/pytorch_gradient_post_training_quantization.rst", "api/api_docs/methods/pytorch_kpi_data.rst", "api/api_docs/methods/pytorch_post_training_quantization.rst", "api/api_docs/methods/pytorch_pruning_experimental.rst", "api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.rst", "api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.rst", "api/api_docs/methods/set_logger_path.rst", "api/api_docs/methods/xquant_report_keras_experimental.rst", "api/api_docs/methods/xquant_report_pytorch_experimental.rst", "api/api_docs/methods/xquant_report_troubleshoot_pytorch_experimental.rst", "api/api_docs/modules/core_config.rst", "api/api_docs/modules/debug_config.rst", "api/api_docs/modules/exporter.rst", "api/api_docs/modules/layer_filters.rst", "api/api_docs/modules/network_editor.rst", "api/api_docs/modules/qat_config.rst", "api/api_docs/modules/target_platform_capabilities.rst", "api/api_docs/modules/trainable_infrastructure.rst", "api/api_docs/notes/tpc_note.rst", "guidelines/XQuant_Extension_Tool.rst", "guidelines/visualization.rst", "index.rst"], "indexentries": {"add_metadata (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.add_metadata", false]], "attributefilter (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.AttributeFilter", false]], "attributequantizationconfig (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig", false]], "base_config (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.quantizationconfigoptions attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions.base_config", false]], "basekerastrainablequantizer (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.BaseKerasTrainableQuantizer", false]], "basepytorchtrainablequantizer (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.BasePytorchTrainableQuantizer", false]], "batchnormalignemntlosstype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.BatchNormAlignemntLossType", false]], "bit_width (model_compression_toolkit.core.common.quantization.bit_width_config.manualbitwidthselection attribute)": [[0, "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection.bit_width", false]], "bitwidthconfig (class in model_compression_toolkit.core)": [[0, "model_compression_toolkit.core.BitWidthConfig", false]], "bnlayerweightingtype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.BNLayerWeightingType", false]], "changecandidatesactivationquantconfigattr (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeCandidatesActivationQuantConfigAttr", false]], "changecandidatesactivationquantizationmethod (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeCandidatesActivationQuantizationMethod", false]], "changecandidatesweightsquantconfigattr (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeCandidatesWeightsQuantConfigAttr", false]], "changecandidatesweightsquantizationmethod (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeCandidatesWeightsQuantizationMethod", false]], "changefinalactivationquantconfigattr (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeFinalActivationQuantConfigAttr", false]], "changefinalweightsquantconfigattr (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeFinalWeightsQuantConfigAttr", false]], "changefinalweightsquantizationmethod (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeFinalWeightsQuantizationMethod", false]], "changequantizationparamfunction (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeQuantizationParamFunction", false]], "channelaxis (class in model_compression_toolkit.core)": [[3, "model_compression_toolkit.core.ChannelAxis", false]], "channels_filtering_strategy (model_compression_toolkit.pruning.pruningconfig attribute)": [[6, "model_compression_toolkit.pruning.PruningConfig.channels_filtering_strategy", false]], "channelsfilteringstrategy (class in model_compression_toolkit.pruning)": [[6, "model_compression_toolkit.pruning.ChannelsFilteringStrategy", false]], "coreconfig (class in model_compression_toolkit.core)": [[39, "model_compression_toolkit.core.CoreConfig", false]], "datagenerationconfig (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.DataGenerationConfig", false]], "datainittype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.DataInitType", false]], "debugconfig (class in model_compression_toolkit.core)": [[40, "model_compression_toolkit.core.DebugConfig", false]], "default_qco (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.default_qco", false]], "defaultdict (class in model_compression_toolkit)": [[2, "model_compression_toolkit.DefaultDict", false]], "editrule (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.EditRule", false]], "enable_weights_quantization (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.enable_weights_quantization", false]], "eq (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.Eq", false]], "filter (model_compression_toolkit.core.common.quantization.bit_width_config.manualbitwidthselection attribute)": [[0, "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection.filter", false]], "frameworkinfo (class in model_compression_toolkit.core)": [[3, "model_compression_toolkit.core.FrameworkInfo", false]], "fuse_op_quantization_config (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.fusing attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing.fuse_op_quantization_config", false]], "fusing (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing", false]], "fusing_patterns (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.fusing_patterns", false]], "get() (model_compression_toolkit.defaultdict method)": [[2, "model_compression_toolkit.DefaultDict.get", false]], "get_keras_data_generation_config() (in module model_compression_toolkit.data_generation)": [[14, "model_compression_toolkit.data_generation.get_keras_data_generation_config", false]], "get_keras_gptq_config() (in module model_compression_toolkit.gptq)": [[15, "model_compression_toolkit.gptq.get_keras_gptq_config", false]], "get_pytorch_data_generation_config() (in module model_compression_toolkit.data_generation)": [[16, "model_compression_toolkit.data_generation.get_pytorch_data_generation_config", false]], "get_pytorch_gptq_config() (in module model_compression_toolkit.gptq)": [[17, "model_compression_toolkit.gptq.get_pytorch_gptq_config", false]], "get_target_platform_capabilities() (in module model_compression_toolkit)": [[18, "model_compression_toolkit.get_target_platform_capabilities", false]], "get_target_platform_capabilities_sdsp() (in module model_compression_toolkit)": [[19, "model_compression_toolkit.get_target_platform_capabilities_sdsp", false]], "gptqhessianscoresconfig (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.GPTQHessianScoresConfig", false]], "gradientptqconfig (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.GradientPTQConfig", false]], "gradualactivationquantizationconfig (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.GradualActivationQuantizationConfig", false]], "greater (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.Greater", false]], "greatereq (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.GreaterEq", false]], "imagegranularity (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.ImageGranularity", false]], "imagenormalizationtype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.ImageNormalizationType", false]], "imagepipelinetype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.ImagePipelineType", false]], "importance_metric (model_compression_toolkit.pruning.pruningconfig attribute)": [[6, "model_compression_toolkit.pruning.PruningConfig.importance_metric", false]], "importance_scores (model_compression_toolkit.pruning.pruninginfo property)": [[7, "model_compression_toolkit.pruning.PruningInfo.importance_scores", false]], "importancemetric (class in model_compression_toolkit.pruning)": [[6, "model_compression_toolkit.pruning.ImportanceMetric", false]], "insert_preserving_quantizers (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.insert_preserving_quantizers", false]], "is_simd_padding (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.is_simd_padding", false]], "keras_data_generation_experimental() (in module model_compression_toolkit.data_generation)": [[20, "model_compression_toolkit.data_generation.keras_data_generation_experimental", false]], "keras_export_model (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.keras_export_model", false]], "keras_gradient_post_training_quantization() (in module model_compression_toolkit.gptq)": [[21, "model_compression_toolkit.gptq.keras_gradient_post_training_quantization", false]], "keras_load_quantized_model() (in module model_compression_toolkit)": [[23, "model_compression_toolkit.keras_load_quantized_model", false]], "keras_post_training_quantization() (in module model_compression_toolkit.ptq)": [[24, "model_compression_toolkit.ptq.keras_post_training_quantization", false]], "keras_pruning_experimental() (in module model_compression_toolkit.pruning)": [[25, "model_compression_toolkit.pruning.keras_pruning_experimental", false]], "keras_quantization_aware_training_finalize_experimental() (in module model_compression_toolkit.qat)": [[26, "model_compression_toolkit.qat.keras_quantization_aware_training_finalize_experimental", false]], "keras_quantization_aware_training_init_experimental() (in module model_compression_toolkit.qat)": [[27, "model_compression_toolkit.qat.keras_quantization_aware_training_init_experimental", false]], "keras_resource_utilization_data() (in module model_compression_toolkit.core)": [[22, "model_compression_toolkit.core.keras_resource_utilization_data", false]], "kerasexportserializationformat (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.KerasExportSerializationFormat", false]], "keys() (model_compression_toolkit.defaultdict method)": [[2, "model_compression_toolkit.DefaultDict.keys", false]], "lut_values_bitwidth (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.lut_values_bitwidth", false]], "manual_activation_bit_width_selection_list (model_compression_toolkit.core.bitwidthconfig attribute)": [[0, "model_compression_toolkit.core.BitWidthConfig.manual_activation_bit_width_selection_list", false]], "manual_weights_bit_width_selection_list (model_compression_toolkit.core.bitwidthconfig attribute)": [[0, "model_compression_toolkit.core.BitWidthConfig.manual_weights_bit_width_selection_list", false]], "manualbitwidthselection (class in model_compression_toolkit.core.common.quantization.bit_width_config)": [[0, "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection", false]], "mctwrapper (class in model_compression_toolkit.wrapper.mct_wrapper)": [[11, "model_compression_toolkit.wrapper.mct_wrapper.MCTWrapper", false]], "mixedprecisionquantizationconfig (class in model_compression_toolkit.core)": [[5, "model_compression_toolkit.core.MixedPrecisionQuantizationConfig", false]], "mpdistanceweighting (class in model_compression_toolkit.core)": [[5, "model_compression_toolkit.core.MpDistanceWeighting", false]], "mpmetricnormalization (class in model_compression_toolkit.core)": [[5, "model_compression_toolkit.core.MpMetricNormalization", false]], "name (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.fusing attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing.name", false]], "name (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsetgroup attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup.name", false]], "name (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsset attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet.name", false]], "name (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.name", false]], "nodenamefilter (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.NodeNameFilter", false]], "nodenamescopefilter (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.NodeNameScopeFilter", false]], "nodetypefilter (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.NodeTypeFilter", false]], "noteq (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.NotEq", false]], "num_score_approximations (model_compression_toolkit.pruning.pruningconfig attribute)": [[6, "model_compression_toolkit.pruning.PruningConfig.num_score_approximations", false]], "operator_groups (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.fusing attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing.operator_groups", false]], "operator_set (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.operator_set", false]], "operators_set (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsetgroup attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup.operators_set", false]], "operatorsetgroup (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup", false]], "operatorsset (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet", false]], "opquantizationconfig (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OpQuantizationConfig", false]], "outputlosstype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.OutputLossType", false]], "pruning_masks (model_compression_toolkit.pruning.pruninginfo property)": [[7, "model_compression_toolkit.pruning.PruningInfo.pruning_masks", false]], "pruningconfig (class in model_compression_toolkit.pruning)": [[6, "model_compression_toolkit.pruning.PruningConfig", false]], "pruninginfo (class in model_compression_toolkit.pruning)": [[7, "model_compression_toolkit.pruning.PruningInfo", false]], "pytorch_data_generation_experimental() (in module model_compression_toolkit.data_generation)": [[28, "model_compression_toolkit.data_generation.pytorch_data_generation_experimental", false]], "pytorch_export_model (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.pytorch_export_model", false]], "pytorch_gradient_post_training_quantization() (in module model_compression_toolkit.gptq)": [[29, "model_compression_toolkit.gptq.pytorch_gradient_post_training_quantization", false]], "pytorch_post_training_quantization() (in module model_compression_toolkit.ptq)": [[31, "model_compression_toolkit.ptq.pytorch_post_training_quantization", false]], "pytorch_pruning_experimental() (in module model_compression_toolkit.pruning)": [[32, "model_compression_toolkit.pruning.pytorch_pruning_experimental", false]], "pytorch_quantization_aware_training_finalize_experimental() (in module model_compression_toolkit.qat)": [[33, "model_compression_toolkit.qat.pytorch_quantization_aware_training_finalize_experimental", false]], "pytorch_quantization_aware_training_init_experimental() (in module model_compression_toolkit.qat)": [[34, "model_compression_toolkit.qat.pytorch_quantization_aware_training_init_experimental", false]], "pytorch_resource_utilization_data() (in module model_compression_toolkit.core)": [[30, "model_compression_toolkit.core.pytorch_resource_utilization_data", false]], "pytorchexportserializationformat (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.PytorchExportSerializationFormat", false]], "qatconfig (class in model_compression_toolkit.qat)": [[44, "model_compression_toolkit.qat.QATConfig", false]], "qc_options (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsset attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet.qc_options", false]], "qfractionlinearannealingconfig (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.QFractionLinearAnnealingConfig", false]], "quantization_configurations (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.quantizationconfigoptions attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions.quantization_configurations", false]], "quantizationconfig (class in model_compression_toolkit.core)": [[8, "model_compression_toolkit.core.QuantizationConfig", false]], "quantizationconfigoptions (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions", false]], "quantizationerrormethod (class in model_compression_toolkit.core)": [[9, "model_compression_toolkit.core.QuantizationErrorMethod", false]], "quantizationformat (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.QuantizationFormat", false]], "quantizationmethod (class in model_compression_toolkit.target_platform_capabilities)": [[45, "model_compression_toolkit.target_platform_capabilities.QuantizationMethod", false]], "quantize_and_export() (model_compression_toolkit.wrapper.mct_wrapper.mctwrapper method)": [[11, "model_compression_toolkit.wrapper.mct_wrapper.MCTWrapper.quantize_and_export", false]], "resourceutilization (class in model_compression_toolkit.core)": [[10, "model_compression_toolkit.core.ResourceUtilization", false]], "roundingtype (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.RoundingType", false]], "schedulertype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.SchedulerType", false]], "schema_version (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.SCHEMA_VERSION", false]], "set_log_folder() (in module model_compression_toolkit)": [[35, "model_compression_toolkit.set_log_folder", false]], "smaller (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.Smaller", false]], "smallereq (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.SmallerEq", false]], "targetplatformcapabilities (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities", false]], "tpc_minor_version (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.tpc_minor_version", false]], "tpc_patch_version (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.tpc_patch_version", false]], "tpc_platform_type (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.tpc_platform_type", false]], "trainablequantizeractivationconfig (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.TrainableQuantizerActivationConfig", false]], "trainablequantizerweightsconfig (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.TrainableQuantizerWeightsConfig", false]], "trainingmethod (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.TrainingMethod", false]], "type (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsset attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet.type", false]], "weights_n_bits (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.weights_n_bits", false]], "weights_per_channel_threshold (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.weights_per_channel_threshold", false]], "weights_quantization_method (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.weights_quantization_method", false]], "xquant_report_keras_experimental() (in module model_compression_toolkit.xquant.keras.facade_xquant_report)": [[36, "model_compression_toolkit.xquant.keras.facade_xquant_report.xquant_report_keras_experimental", false]], "xquant_report_pytorch_experimental() (in module model_compression_toolkit.xquant.pytorch.facade_xquant_report)": [[37, "model_compression_toolkit.xquant.pytorch.facade_xquant_report.xquant_report_pytorch_experimental", false]], "xquant_report_troubleshoot_pytorch_experimental() (in module model_compression_toolkit.xquant.pytorch.facade_xquant_report)": [[38, "model_compression_toolkit.xquant.pytorch.facade_xquant_report.xquant_report_troubleshoot_pytorch_experimental", false]], "xquantconfig (class in model_compression_toolkit.xquant.common.xquant_config)": [[12, "model_compression_toolkit.xquant.common.xquant_config.XQuantConfig", false]]}, "objects": {"model_compression_toolkit": [[2, 0, 1, "", "DefaultDict"], [18, 3, 1, "", "get_target_platform_capabilities"], [19, 3, 1, "", "get_target_platform_capabilities_sdsp"], [23, 3, 1, "", "keras_load_quantized_model"], [35, 3, 1, "", "set_log_folder"]], "model_compression_toolkit.DefaultDict": [[2, 1, 1, "", "get"], [2, 1, 1, "", "keys"]], "model_compression_toolkit.core": [[0, 0, 1, "", "BitWidthConfig"], [3, 0, 1, "", "ChannelAxis"], [39, 0, 1, "", "CoreConfig"], [40, 0, 1, "", "DebugConfig"], [3, 0, 1, "", "FrameworkInfo"], [5, 0, 1, "", "MixedPrecisionQuantizationConfig"], [5, 0, 1, "", "MpDistanceWeighting"], [5, 0, 1, "", "MpMetricNormalization"], [8, 0, 1, "", "QuantizationConfig"], [9, 0, 1, "", "QuantizationErrorMethod"], [10, 0, 1, "", "ResourceUtilization"], [22, 3, 1, "", "keras_resource_utilization_data"], [30, 3, 1, "", "pytorch_resource_utilization_data"]], "model_compression_toolkit.core.BitWidthConfig": [[0, 2, 1, "", "manual_activation_bit_width_selection_list"], [0, 2, 1, "", "manual_weights_bit_width_selection_list"]], "model_compression_toolkit.core.common.quantization.bit_width_config": [[0, 0, 1, "", "ManualBitWidthSelection"]], "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection": [[0, 2, 1, "", "bit_width"], [0, 2, 1, "", "filter"]], "model_compression_toolkit.core.network_editor": [[43, 0, 1, "", "ChangeCandidatesActivationQuantConfigAttr"], [43, 0, 1, "", "ChangeCandidatesActivationQuantizationMethod"], [43, 0, 1, "", "ChangeCandidatesWeightsQuantConfigAttr"], [43, 0, 1, "", "ChangeCandidatesWeightsQuantizationMethod"], [43, 0, 1, "", "ChangeFinalActivationQuantConfigAttr"], [43, 0, 1, "", "ChangeFinalWeightsQuantConfigAttr"], [43, 0, 1, "", "ChangeFinalWeightsQuantizationMethod"], [43, 0, 1, "", "ChangeQuantizationParamFunction"], [43, 0, 1, "", "EditRule"], [43, 0, 1, "", "NodeNameFilter"], [43, 0, 1, "", "NodeNameScopeFilter"], [43, 0, 1, "", "NodeTypeFilter"]], "model_compression_toolkit.data_generation": [[1, 0, 1, "", "BNLayerWeightingType"], [1, 0, 1, "", "BatchNormAlignemntLossType"], [1, 0, 1, "", "DataGenerationConfig"], [1, 0, 1, "", "DataInitType"], [1, 0, 1, "", "ImageGranularity"], [1, 0, 1, "", "ImageNormalizationType"], [1, 0, 1, "", "ImagePipelineType"], [1, 0, 1, "", "OutputLossType"], [1, 0, 1, "", "SchedulerType"], [14, 3, 1, "", "get_keras_data_generation_config"], [16, 3, 1, "", "get_pytorch_data_generation_config"], [20, 3, 1, "", "keras_data_generation_experimental"], [28, 3, 1, "", "pytorch_data_generation_experimental"]], "model_compression_toolkit.exporter": [[41, 0, 1, "", "KerasExportSerializationFormat"], [41, 0, 1, "", "PytorchExportSerializationFormat"], [41, 0, 1, "", "QuantizationFormat"], [41, 0, 1, "", "keras_export_model"], [41, 0, 1, "", "pytorch_export_model"]], "model_compression_toolkit.gptq": [[4, 0, 1, "", "GPTQHessianScoresConfig"], [4, 0, 1, "", "GradientPTQConfig"], [4, 0, 1, "", "GradualActivationQuantizationConfig"], [4, 0, 1, "", "QFractionLinearAnnealingConfig"], [4, 0, 1, "", "RoundingType"], [15, 3, 1, "", "get_keras_gptq_config"], [17, 3, 1, "", "get_pytorch_gptq_config"], [21, 3, 1, "", "keras_gradient_post_training_quantization"], [29, 3, 1, "", "pytorch_gradient_post_training_quantization"]], "model_compression_toolkit.pruning": [[6, 0, 1, "", "ChannelsFilteringStrategy"], [6, 0, 1, "", "ImportanceMetric"], [6, 0, 1, "", "PruningConfig"], [7, 0, 1, "", "PruningInfo"], [25, 3, 1, "", "keras_pruning_experimental"], [32, 3, 1, "", "pytorch_pruning_experimental"]], "model_compression_toolkit.pruning.PruningConfig": [[6, 2, 1, "", "channels_filtering_strategy"], [6, 2, 1, "", "importance_metric"], [6, 2, 1, "", "num_score_approximations"]], "model_compression_toolkit.pruning.PruningInfo": [[7, 4, 1, "", "importance_scores"], [7, 4, 1, "", "pruning_masks"]], "model_compression_toolkit.ptq": [[24, 3, 1, "", "keras_post_training_quantization"], [31, 3, 1, "", "pytorch_post_training_quantization"]], "model_compression_toolkit.qat": [[44, 0, 1, "", "QATConfig"], [26, 3, 1, "", "keras_quantization_aware_training_finalize_experimental"], [27, 3, 1, "", "keras_quantization_aware_training_init_experimental"], [33, 3, 1, "", "pytorch_quantization_aware_training_finalize_experimental"], [34, 3, 1, "", "pytorch_quantization_aware_training_init_experimental"]], "model_compression_toolkit.target_platform_capabilities": [[42, 0, 1, "", "AttributeFilter"], [42, 0, 1, "", "Eq"], [42, 0, 1, "", "Greater"], [42, 0, 1, "", "GreaterEq"], [42, 0, 1, "", "NotEq"], [45, 0, 1, "", "QuantizationMethod"], [42, 0, 1, "", "Smaller"], [42, 0, 1, "", "SmallerEq"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema": [[45, 0, 1, "", "AttributeQuantizationConfig"], [45, 0, 1, "", "Fusing"], [45, 0, 1, "", "OpQuantizationConfig"], [45, 0, 1, "", "OperatorSetGroup"], [45, 0, 1, "", "OperatorsSet"], [45, 0, 1, "", "QuantizationConfigOptions"], [45, 0, 1, "", "TargetPlatformCapabilities"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig": [[45, 2, 1, "", "enable_weights_quantization"], [45, 2, 1, "", "lut_values_bitwidth"], [45, 2, 1, "", "weights_n_bits"], [45, 2, 1, "", "weights_per_channel_threshold"], [45, 2, 1, "", "weights_quantization_method"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing": [[45, 2, 1, "", "fuse_op_quantization_config"], [45, 2, 1, "", "name"], [45, 2, 1, "", "operator_groups"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup": [[45, 2, 1, "", "name"], [45, 2, 1, "", "operators_set"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet": [[45, 2, 1, "", "name"], [45, 2, 1, "", "qc_options"], [45, 2, 1, "", "type"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions": [[45, 2, 1, "", "base_config"], [45, 2, 1, "", "quantization_configurations"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities": [[45, 2, 1, "", "SCHEMA_VERSION"], [45, 2, 1, "", "add_metadata"], [45, 2, 1, "", "default_qco"], [45, 2, 1, "", "fusing_patterns"], [45, 2, 1, "", "insert_preserving_quantizers"], [45, 2, 1, "", "is_simd_padding"], [45, 2, 1, "", "name"], [45, 2, 1, "", "operator_set"], [45, 2, 1, "", "tpc_minor_version"], [45, 2, 1, "", "tpc_patch_version"], [45, 2, 1, "", "tpc_platform_type"]], "model_compression_toolkit.trainable_infrastructure": [[46, 0, 1, "", "BaseKerasTrainableQuantizer"], [46, 0, 1, "", "BasePytorchTrainableQuantizer"], [46, 0, 1, "", "TrainableQuantizerActivationConfig"], [46, 0, 1, "", "TrainableQuantizerWeightsConfig"], [46, 0, 1, "", "TrainingMethod"]], "model_compression_toolkit.wrapper.mct_wrapper": [[11, 0, 1, "", "MCTWrapper"]], "model_compression_toolkit.wrapper.mct_wrapper.MCTWrapper": [[11, 1, 1, "", "quantize_and_export"]], "model_compression_toolkit.xquant.common.xquant_config": [[12, 0, 1, "", "XQuantConfig"]], "model_compression_toolkit.xquant.keras.facade_xquant_report": [[36, 3, 1, "", "xquant_report_keras_experimental"]], "model_compression_toolkit.xquant.pytorch.facade_xquant_report": [[37, 3, 1, "", "xquant_report_pytorch_experimental"], [38, 3, 1, "", "xquant_report_troubleshoot_pytorch_experimental"]]}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "function", "Python function"], "4": ["py", "property", "Python property"]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:function", "4": "py:property"}, "terms": {"": [3, 6, 8, 10, 21, 24, 25, 26, 27, 29, 31, 32, 34, 35, 41, 42, 43, 45, 46, 48, 50], "0": [1, 3, 4, 5, 7, 8, 11, 12, 14, 16, 21, 24, 25, 26, 27, 32, 40, 41, 46, 48], "05": 8, "06": 5, "08153": 46, "1": [1, 3, 4, 5, 7, 8, 11, 12, 17, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31, 32, 33, 40, 41, 48, 50], "10": [20, 21, 24, 27, 28, 29, 31, 34], "100": 40, "10000000000": 5, "14": 11, "15": 41, "16": [12, 41, 48], "1902": 46, "1e": [5, 15, 17], "1st": 15, "2": [3, 8, 12, 15, 17, 20, 28, 40, 45, 46, 48, 50], "20": 49, "2021": 50, "2023": 50, "224": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "2f": 40, "2nd": 15, "3": [3, 11, 15, 17, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 41, 46], "32": [4, 5, 11], "3e": [15, 17], "3rd": 15, "4": [15, 17, 20, 21, 24, 25, 27, 28, 29, 31, 32, 34, 48], "4th": 15, "5": [11, 12, 15, 17, 25, 32, 48], "50": [25, 32], "52587890625e": 8, "6": [28, 40], "75": [11, 21, 24, 26, 27], "8": [20, 21, 24, 26, 27, 28, 41, 45, 46], "9": 43, "A": [0, 3, 4, 5, 7, 8, 13, 15, 17, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40, 43, 44, 45, 50], "And": 48, "As": [5, 48, 49], "By": [4, 5, 11, 25, 29, 31, 32, 41, 49], "For": [3, 8, 12, 18, 19, 20, 21, 24, 26, 27, 28, 34, 41, 45, 46, 47, 48, 49, 50], "If": [2, 3, 4, 5, 12, 15, 17, 21, 24, 26, 27, 29, 31, 39, 40, 41, 42, 45, 48], "In": [5, 20, 21, 24, 27, 28, 29, 31, 34, 41, 42, 44, 48], "It": [2, 11, 12, 45, 46, 48], "No": 1, "One": 49, "The": [0, 1, 3, 4, 5, 6, 7, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 34, 36, 37, 38, 40, 41, 43, 45, 46, 48, 49], "Then": [3, 21, 24, 27, 29, 31, 34, 43, 49], "There": [41, 48, 49], "These": [48, 49], "To": [41, 48, 49], "With": 48, "_": [21, 24, 27, 29, 31, 34, 41], "__call__": 40, "__import__": 40, "__init__": 40, "_input_data": 41, "_model_input_nam": 41, "_model_output_nam": 41, "_with_model_output_loss_object": 48, "about": [3, 4, 7, 13, 15, 17, 21, 24, 26, 27, 40, 41, 45, 46], "abov": [12, 48], "absolut": 9, "abstract": [13, 46], "accept": [15, 40, 45], "access": 7, "accord": [13, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 41, 42], "accordingli": 45, "accuraci": [12, 48], "achiev": 25, "act": 7, "act_hessian_default_batch_s": [15, 17], "action": 40, "activ": [0, 3, 4, 5, 8, 10, 11, 21, 22, 24, 27, 29, 30, 31, 34, 41, 43, 44, 45, 46, 48, 49], "activation_bias_correct": 8, "activation_bias_correction_threshold": 8, "activation_channel_equ": 8, "activation_error_method": [8, 11], "activation_memori": 10, "activation_min_max_map": 3, "activation_n_bit": [45, 46], "activation_op": 3, "activation_quantization_candid": 46, "activation_quantization_method": [43, 45, 46], "activation_quantization_param": 46, "activation_quantization_params_fn": 43, "activation_quantizer_map": 3, "activation_quantizer_params_overrid": 44, "activation_training_method": 44, "ad": 45, "adam": [14, 15, 17], "add": [1, 3, 12, 14, 16, 23, 46], "add_metadata": 45, "addit": [23, 41, 48], "address": 45, "advanc": 3, "affect": [21, 24, 26, 27], "after": [13, 21, 23, 24, 27, 34, 48, 50], "aim": [25, 32], "algorithm": 5, "align": [1, 14, 16], "all": [1, 3, 4, 5, 8, 43, 46, 49], "allimag": [1, 16], "allow": [6, 12, 20, 28, 40, 41, 45], "along": 49, "also": [25, 32, 45], "an": [1, 2, 3, 4, 7, 11, 13, 21, 24, 27, 34, 36, 37, 38, 40, 41, 42, 43, 45, 46, 48, 50], "analysi": [25, 32], "analyz": [25, 32, 38], "analyze_similar": 40, "ani": [1, 2, 3, 5, 11, 36, 37, 38, 41, 42, 46], "anneal": 4, "api": [3, 4, 24, 27, 34, 44, 48], "append": 40, "appli": [0, 1, 5, 8, 13, 41, 42, 43, 45, 48], "applic": [21, 22, 24, 25, 26, 27, 41], "approach": 6, "appropri": 48, "approxim": [6, 25, 32], "ar": [3, 5, 12, 18, 19, 21, 24, 25, 27, 29, 31, 32, 34, 40, 41, 45, 46, 47, 48, 49], "architectur": [25, 32], "argument": [4, 40, 41, 45], "arrai": [7, 11], "art": 50, "arxiv": [46, 50], "assess": [25, 32], "associ": [25, 32], "assum": [25, 32], "astyp": 41, "attent": [4, 15, 17, 46], "attirbut": 3, "attr": 42, "attr_nam": 43, "attr_valu": 43, "attr_weights_configs_map": 45, "attribut": [43, 45, 46], "attributefilt": 42, "auto": 13, "automat": 48, "auxiliari": [15, 17], "avail": 41, "averag": [1, 5, 14, 15, 16, 17, 48], "avg": 5, "awar": [13, 44, 46, 50], "axi": [3, 46, 48], "backend": 45, "bar": 40, "base": [1, 4, 5, 8, 9, 11, 13, 15, 17, 18, 19, 20, 25, 28, 31, 32, 46, 48, 50], "base_config": 45, "basenod": 7, "basenodematch": 0, "basic": 46, "batch": [1, 4, 5, 14, 15, 16, 17, 20, 21, 24, 27, 28, 29, 31, 34], "batchnorm": [1, 14, 16, 20, 21, 24, 27, 29, 31, 34], "batchnorm2d": 28, "batchnormalignemntlosstyp": [14, 16], "batchwis": [1, 14], "been": [7, 40], "begin": 4, "behavior": [40, 48], "being": [21, 24, 27, 29, 31, 34, 40, 45, 46], "below": [12, 48], "between": [4, 5, 12, 21, 29, 31, 45, 48, 49], "bia": [4, 11, 15, 17, 21, 24, 26, 27], "bidwidth": 5, "bit": [0, 5, 10, 13, 21, 24, 26, 27, 34, 39, 41, 43, 45, 46, 50], "bit_width": 0, "bit_width_config": [0, 39], "bitwidth": [5, 12, 21, 24, 26, 27, 48], "bitwidthconfig": [13, 39], "block": [46, 49], "bn_alignment_loss_typ": [1, 14, 16], "bn_layer_typ": [1, 14, 16], "bnlayerweightingtyp": [14, 16], "bool": [1, 4, 5, 11, 12, 14, 15, 16, 17, 40, 45, 46], "boolean": 23, "bop": 10, "both": [11, 21, 24, 29, 31, 33, 46, 49], "build": [22, 30, 46, 50], "built": [27, 34, 46], "bypass": 40, "byte": [10, 21, 24, 25, 27, 32, 34, 49], "c": [12, 48], "calcul": [5, 6, 13, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 48], "calibr": [11, 21, 22, 24, 27, 29, 30, 31, 34], "call": [22, 30, 35, 45, 49], "callabl": [3, 5, 11, 12, 15, 17, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 36, 37, 38, 40, 41, 42], "callback": 40, "can": [3, 4, 8, 11, 13, 15, 17, 20, 22, 25, 28, 30, 32, 40, 41, 43, 45, 46, 48, 49, 50], "candid": [5, 21, 24, 26, 27, 43], "cannot": 45, "capabl": [11, 18, 19, 25, 30, 32], "case": 5, "caus": [12, 13, 38, 48], "chang": [20, 28, 41, 43, 48, 49], "changecandidatesactivationquantconfigattr": 43, "changecandidatesactivationquantizationmethod": 43, "changecandidatesweightsquantconfigattr": 43, "changecandidatesweightsquantizationmethod": 43, "changefinalactivationquantconfigattr": 43, "changefinalweightsquantconfigattr": 43, "changefinalweightsquantizationmethod": 43, "changequantizationmethod": 43, "changequantizationparamfunct": 43, "channel": [3, 6, 7, 13, 25, 32, 45, 46, 49], "channels_filtering_strategi": 6, "check": [5, 41, 42, 43], "choos": [1, 4, 41], "chosen": 49, "circl": 48, "class": [0, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 23, 39, 40, 41, 42, 43, 44, 45, 46], "clibrat": 31, "click": 49, "clip": [1, 14, 16], "clone": 50, "coeffici": [3, 21, 24, 26, 27, 29, 31, 45, 46], "cohen": 50, "collaps": 11, "collect": [3, 21, 24, 27, 29, 31, 34, 36, 37, 38, 49], "com": 50, "combin": 45, "common": [0, 12], "compar": [5, 21, 29, 31, 48, 49], "comparison": 50, "compat": 41, "compil": 23, "complet": [4, 11, 40], "completedcompon": 40, "compon": [40, 45, 46, 48], "component_nam": 40, "compress": [11, 13, 20, 25, 28, 29, 32, 48], "comput": [3, 4, 5, 9, 12, 13, 15, 17, 22, 30, 36, 40, 49], "compute_distance_fn": 5, "concat_threshold_upd": 8, "concaten": [12, 45, 48], "concatn": [12, 48], "config": [4, 20, 21, 24, 25, 26, 27, 28, 29, 32, 33, 34, 39, 43, 46], "configur": [0, 4, 5, 8, 10, 11, 13, 14, 15, 16, 17, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 48, 50], "configuration_overwrit": 5, "confirm": 48, "connect": 11, "consid": [6, 14, 16, 25, 32, 45], "consol": 48, "constant": [6, 43, 46], "constraint": [21, 24, 25, 29, 31, 32], "contain": [7, 13, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 46, 48], "conv2d": [3, 20, 21, 24, 26, 27, 28, 43, 45], "conveni": 35, "convent": 48, "convert": [11, 13, 26, 33, 45], "core": [0, 3, 5, 8, 9, 10, 11, 21, 22, 24, 25, 26, 27, 29, 30, 32, 33, 34, 39, 40, 43], "core_config": [21, 22, 24, 26, 27, 29, 30, 31, 33, 34, 40], "coreconfig": [13, 21, 22, 24, 26, 27, 29, 30, 31, 33, 34, 40], "correct": 11, "correspond": [7, 48], "cosin": [48, 50], "count_param": [21, 24, 25, 26, 27], "countermeasur": 48, "cpuexecutionprovid": 41, "creat": [3, 4, 8, 11, 13, 14, 15, 16, 17, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 40, 41, 42, 43, 45, 48], "creation": 41, "crop": 1, "cudaexecutionprovid": 41, "cui": 40, "current": [4, 40, 41], "currentcompon": 40, "custom": [5, 12, 20, 23, 27, 28, 41], "custom_metric_fn": 5, "custom_object": [23, 26, 27], "custom_similarity_metr": 12, "custom_tpc_opset_to_lay": 8, "cut": 40, "dash": 48, "data": [13, 14, 16, 22, 25, 30, 32, 36, 37, 38, 41, 45, 49, 50], "data_gen_batch_s": [1, 14, 16, 20, 28], "data_gener": [1, 14, 16, 20, 28], "data_generation_config": [20, 28], "data_init_typ": [1, 14, 16], "dataclass": [39, 40], "datagenerationconfig": [1, 13, 20, 28], "datainittyp": [14, 16], "dataset": [4, 11, 15, 17, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 38, 41, 48, 49], "debug": [39, 40], "debug_config": 39, "debugconfig": 39, "deeper": 49, "def": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 40, 41], "default": [1, 2, 4, 5, 6, 11, 14, 15, 16, 17, 21, 24, 25, 29, 31, 32, 39, 41, 44, 45, 49], "default_data_gen_b": [14, 16], "default_factori": 2, "default_keras_extra_pixel": 14, "default_keras_initial_lr": 14, "default_keras_output_loss_multipli": 14, "default_keras_tpc": [21, 24, 25, 27], "default_n_it": [14, 16], "default_onnx_opset_vers": 41, "default_pytorch_bn_layer_typ": 16, "default_pytorch_extra_pixel": 16, "default_pytorch_initial_lr": 16, "default_pytorch_last_layer_typ": 16, "default_pytorch_output_loss_multipli": 16, "default_pytorch_tpc": [29, 31, 32, 34], "default_qco": 45, "default_valu": 2, "default_weight_attr_config": 45, "defaultdict": [3, 13], "defin": [0, 4, 5, 15, 17, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 40, 45, 46, 48], "degrad": [12, 13, 38, 48], "demonstr": [41, 45], "dens": [3, 20], "dense_nparam": [25, 32], "depend": [1, 21, 24, 27, 29, 31, 34], "describ": 48, "descript": [11, 40], "desir": [13, 21, 22, 24, 26, 27, 29, 30, 31, 34], "detail": [41, 45, 48], "detect": [12, 13, 38, 48], "determin": [6, 25, 32, 45], "develop": 50, "deviat": 48, "devic": [13, 18], "device_typ": 18, "diagram": 45, "diamant": 50, "dict": [3, 7, 12, 36, 37, 38, 41, 45, 46, 48], "dictionari": [2, 3, 4, 12, 26, 27, 36, 37, 38, 41, 43, 44, 46], "differ": [1, 8, 13, 21, 24, 26, 27, 41, 45, 48, 49], "dikstein": 50, "dir": [12, 48, 49], "directori": [12, 13, 35, 48], "disabl": [15, 17, 40], "displai": [40, 48, 49], "distanc": [5, 11], "distance_weighting_method": [5, 11], "distil": [4, 50], "distribut": 9, "diverg": [9, 49], "divers": 1, "divid": 3, "divis": 49, "dnn": 46, "do": [1, 48, 49], "document": [13, 24, 27, 34, 48], "doe": 48, "doesn": 50, "don": 35, "done": 49, "dot": 49, "dqa": 46, "dror": 50, "dtype": 41, "dummi": 17, "durat": [25, 32], "dure": [4, 13, 14, 15, 16, 17, 18, 19, 36, 37, 38, 41, 43, 45, 46, 47, 49], "e": [3, 5, 11, 21, 24, 27, 29, 31, 34, 50], "each": [5, 6, 7, 12, 21, 24, 25, 27, 29, 31, 32, 34, 43, 45, 46, 48, 49], "easi": 48, "easili": [13, 50], "edit": [39, 40, 43], "editrul": 40, "either": 45, "element": [7, 45], "empti": 2, "emul": 46, "enabl": [1, 5, 8, 11, 13, 15, 17, 40, 46, 50], "enable_activation_quant": [45, 46], "enable_weights_quant": [45, 46], "encapsul": [0, 8], "end_step": 4, "engin": 50, "enhanc": 50, "ensur": 5, "entir": 13, "enum": [1, 3, 4, 6, 9, 46], "epoch": [4, 11, 15, 17], "epsilon": 5, "eptq": 50, "eq": 42, "equal": 42, "er_list": 43, "error": [9, 11, 12, 40], "estim": [4, 46], "etc": [3, 10, 13, 21, 24, 27, 29, 31, 34, 49], "euclidean": 49, "evalu": [5, 36, 37, 38], "even": 48, "exact": 17, "exampl": [3, 8, 11, 15, 17, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 40, 43, 45, 46, 50], "exceed": 48, "execut": 48, "exist": [2, 43, 48], "exp": 5, "exp_distance_weighting_sigma": 5, "expect": [4, 49], "experiment": [13, 20, 28, 50], "explain": [12, 13, 36, 37, 38, 46], "explicitli": 45, "expon": 5, "exponenti": 5, "export": 11, "extend": [25, 32], "extens": [11, 41, 50], "extra": [1, 14, 16], "extra_pixel": [1, 14, 16], "extrem": 48, "f": 40, "facade_xquant_report": [36, 37, 38], "factor": [4, 5, 9, 15, 17], "factori": [0, 4, 39, 40], "fake": 41, "fake_qu": [27, 34], "fakely_qu": 41, "fallback": 45, "fals": [4, 5, 8, 11, 12, 14, 15, 17, 40, 46], "familiar": 48, "featur": 40, "fetch": 45, "few": [49, 50], "field": [18, 19, 42, 45, 47], "figur": [40, 49], "file": [23, 26, 27, 35, 40, 41], "filepath": 23, "filter": [0, 1, 6], "final": [4, 5, 12, 13, 20, 28, 43, 48, 49, 50], "find": [21, 24, 27, 34], "fine": [15, 17, 25, 26, 27, 32, 33, 34], "first": [1, 21, 24, 27, 29, 31, 34, 41, 49], "first_layer_multipli": 1, "fix": 45, "fixed_scal": [18, 19, 45, 47], "fixed_zero_point": [18, 19, 45, 47], "flag": [1, 11, 40, 45], "flatten": [20, 28], "flip": 1, "float": [1, 4, 5, 11, 12, 14, 15, 16, 17, 21, 27, 29, 31, 34, 36, 37, 38, 41, 45, 46, 48, 49], "float32": [25, 32, 41], "float_model": [11, 36, 37, 38, 41, 48], "flush": 40, "fold": [21, 24, 27, 29, 31, 34], "folder": [35, 48], "follow": [3, 4, 11, 12, 40, 46, 48, 49], "footprint": [25, 32], "form": 45, "format": [3, 13], "fraction": 4, "framework": [3, 11, 46], "frameworkquantizationcap": [22, 29, 30, 31], "free": [6, 20, 25, 28, 32, 50], "freez": 46, "freeze_quant_param": 46, "friendli": [25, 32, 50], "from": [3, 4, 11, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 40, 41, 43, 45, 46, 47, 48, 49, 50], "from_config": 46, "function": [3, 4, 5, 11, 12, 13, 14, 15, 16, 17, 20, 23, 25, 28, 32, 35, 40, 43, 45, 46, 48], "fuse_op_quantization_config": 45, "fusing_pattern": 45, "futur": [18, 19, 20, 28, 45, 47], "g": [3, 11, 21, 24, 27, 29, 31, 34], "gather": [45, 49], "gaussian": [1, 14, 16], "gener": [2, 12, 13, 14, 16, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 38, 45, 49, 50], "generated_imag": [20, 28], "get": [2, 3, 4, 5, 13, 21, 24, 26, 27, 29, 31, 33, 34, 45, 49], "get_config": 46, "get_input": 41, "get_keras_data_generation_config": [13, 14, 20], "get_keras_gptq_config": [11, 13, 15, 21], "get_ort_session_opt": 41, "get_output": 41, "get_pytorch_data_generation_config": [13, 16, 28], "get_pytorch_gptq_config": [11, 13, 17], "get_target_platform_cap": [13, 18, 45], "get_target_platform_capabilities_sdsp": [13, 19, 45], "git": 50, "github": [41, 50], "given": [2, 21, 22, 24, 27, 29, 30, 31, 34], "gordon": 50, "gptq": [4, 11, 15, 17, 21, 29], "gptq_conf": [15, 17, 29], "gptq_config": [21, 29, 31], "gptq_quantizer_params_overrid": 4, "gptq_representative_data_gen": [21, 29], "grad": 1, "gradient": [1, 4, 11, 13, 31, 50], "gradientptq": [4, 13], "gradientptqconfig": [13, 21, 29], "gradual": 4, "gradual_activation_quant": [15, 17], "gradual_activation_quantization_config": 4, "gradualactivationquant": [15, 17], "gradualactivationquantizationconfig": [15, 17], "granular": [1, 14, 16], "graph": [22, 30, 43, 49], "greater": 42, "greatereq": 42, "greedi": [5, 6], "group": [3, 6, 25, 32, 45], "h": 50, "ha": [7, 40, 41, 42, 43], "habi": 50, "handl": [11, 21, 24, 27, 29, 31, 34], "handler": 35, "hardwar": [13, 25, 32, 45, 46, 50], "have": [3, 41, 42, 48, 49], "henc": 45, "here": [12, 25, 32, 41, 45, 48, 50], "hessian": [4, 5, 6, 9, 11, 15, 17, 25, 32, 50], "hessian_batch_s": [4, 5, 15, 17], "hessian_weights_config": 4, "hessians_num_sampl": 4, "higher": [25, 32], "highlight": 48, "hight": 28, "histogram": [21, 24, 27, 29, 31, 34, 49], "histori": 40, "hmse": 9, "hold": [3, 39, 42, 45], "holder": 46, "how": [3, 6, 21, 22, 24, 27, 29, 31, 34, 40, 41, 46, 50], "howev": 41, "hptq": [45, 50], "http": [46, 50], "hw": 22, "i": [1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 13, 15, 17, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 34, 35, 39, 40, 41, 42, 43, 45, 46, 48, 49, 50], "ident": [1, 5], "identifi": [25, 32, 45, 48], "ignor": [18, 19, 45, 47], "ilp": [21, 24, 27, 34], "imag": [1, 4, 5, 11, 14, 16, 20, 21, 24, 27, 28, 29, 31, 34, 48, 49], "image_clip": [1, 14, 16], "image_granular": [1, 14, 16], "image_normalization_typ": [1, 14, 16], "image_pipeline_typ": [1, 14, 16], "imagegranular": [14, 16], "imagenet": 1, "imagenet1k_v1": 32, "imagenormalizationtyp": [14, 16], "imagepipelinetyp": [14, 16], "imagewis": 1, "impact": [25, 32], "implement": [12, 46], "implment": 46, "import": [3, 6, 7, 8, 11, 13, 15, 17, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 40, 41, 43, 46, 48, 49], "importance_metr": 6, "importance_scor": 7, "improv": [5, 25, 32, 48], "imx500": [11, 41, 45], "imx500_tp_model": 18, "in_model": [21, 22, 24, 26, 27, 30, 33, 34], "in_modul": [31, 48], "includ": [4, 7, 11, 21, 24, 27, 29, 31, 34, 45, 46], "increas": [4, 5], "index": [3, 13], "indic": [3, 7, 25, 32, 45, 48], "individu": 48, "induc": 9, "inf": [8, 10, 11], "infer": [13, 26, 33, 45, 46], "inferablequant": [26, 33], "inferencesess": 41, "info": [6, 35, 40], "inform": [3, 4, 13, 15, 17, 18, 19, 21, 24, 25, 27, 29, 31, 32, 34, 40, 45, 46, 47], "infrastructur": 46, "init": [13, 43, 50], "initi": [1, 2, 4, 6, 11, 12, 14, 16, 27, 34, 46, 48], "initial_lr": [1, 14, 16], "initial_q_fract": 4, "inner": 2, "input": [1, 5, 11, 14, 16, 21, 24, 27, 29, 31, 34, 40, 45, 48], "input_sc": 8, "input_shap": 20, "insert": 49, "insert_preserving_quant": 45, "instal": 41, "instanc": [4, 11, 13, 15, 17, 43, 45, 49], "instanti": [4, 8, 44], "instruct": 45, "insuffici": [12, 48], "int": [0, 1, 4, 5, 6, 12, 14, 15, 16, 17, 20, 28, 35, 40, 41, 45, 46, 48], "int8": 41, "integ": [5, 41, 45], "interest": 5, "interfac": [4, 11, 17], "introduc": 46, "inverse_min_max_diff": 1, "involv": [20, 25, 28, 32], "is_detect_under_threshold_quantize_error": 12, "is_keras_layer_export": 41, "is_layer_exportable_fn": 41, "is_pytorch_layer_export": 41, "is_simd_pad": 45, "issu": [5, 41, 48], "item": 48, "iter": [1, 14, 16, 20, 21, 24, 27, 28, 29, 31, 34], "its": [2, 3, 11, 13, 23, 25, 32, 42, 45, 49], "jen": 50, "judg": [12, 13, 38, 48], "judgment": 48, "just": 50, "keep": [33, 40, 50], "kei": [2, 11, 12, 25, 32, 42], "kept": [7, 27, 34], "ker": 27, "kera": [3, 11, 13, 43, 46, 50], "keras_appl": [1, 14], "keras_data_generation_experiment": [13, 20], "keras_default_tpc": 22, "keras_file_path": 41, "keras_gradient_post_training_quant": [13, 15, 21], "keras_load_quantized_model": 23, "keras_post_training_quant": [13, 24, 41, 43, 49], "keras_pruning_experiment": [13, 25], "keras_quantization_aware_training_finalize_experiment": [13, 26], "keras_quantization_aware_training_init_experiment": [13, 26, 27], "keras_resource_utilization_data": [13, 22], "kernel": [3, 21, 24, 26, 27, 43, 46], "kernel_channels_map": 3, "kernel_op": 3, "kernel_ops_attributes_map": 3, "keyword": 45, "kl": [9, 49], "know": [3, 13], "knowledg": [4, 50], "known_dict": 2, "kwarg": 43, "l": [25, 50], "l2": 1, "l2_squar": [1, 14, 16], "l_p_valu": 8, "label": [6, 25, 32, 45, 50], "lambda": 41, "larg": [12, 48], "larger": 5, "last": [3, 4, 5, 48], "last_lay": 5, "last_layer_typ": [1, 16], "latenc": 41, "latest": 50, "launch": 49, "layaer": [13, 38], "layer": [1, 3, 5, 7, 11, 12, 14, 15, 16, 17, 20, 21, 24, 25, 26, 27, 29, 31, 32, 33, 34, 40, 41, 43, 45, 46, 48, 49], "layer_min_max_map": 3, "layer_weighting_typ": [1, 14, 16], "layerfilterparam": 42, "learn": [1, 14, 15, 16, 17, 46], "learnabl": 46, "least": 6, "left": 11, "let": 41, "level": 35, "lfh": [6, 25, 32], "librari": [3, 8], "like": [8, 45], "limit": [6, 21, 24, 26, 27, 29, 31, 34], "line": 48, "linear": [4, 11, 28], "linear_collaps": [8, 11], "linearli": 4, "link": 48, "list": [0, 1, 3, 5, 11, 14, 15, 16, 20, 28, 40, 41, 43, 50], "liter": 45, "ll": [20, 28], "load": [13, 26, 27, 41, 46], "load_model": [26, 27], "loadopt": 23, "log": [4, 12, 13, 15, 17, 35, 48, 49], "log_funct": [4, 15, 17], "log_norm": 4, "log_tensorboard_xqu": 48, "logdir": 49, "logger": [13, 40, 49], "longer": 41, "look": [24, 27, 34, 45, 50], "lookup": 45, "loss": [1, 4, 12, 14, 15, 16, 17, 21, 25, 29, 31, 32, 48], "low": 11, "lp": 9, "lsq": 46, "lut_pot_quant": 45, "lut_sym_quant": 45, "lut_values_bitwidth": 45, "mae": [9, 49], "mai": [20, 21, 24, 27, 28, 29, 31, 34, 42, 49], "main": [11, 45, 48, 49], "make": [9, 40], "manag": [0, 11], "mandatori": 41, "mani": 49, "manipul": [0, 1], "manner": 45, "manual": [0, 13, 39, 48], "manual_activation_bit_width_selection_list": 0, "manual_weights_bit_width_selection_list": 0, "manualweightsbitwidthselect": 0, "map": [3, 45], "mask": 7, "match": [18, 19, 42, 43], "mathemat": 49, "max": [1, 3, 5, 8, 9, 21, 22, 24, 27, 29, 30, 31, 34, 49], "maxbit": 5, "maxim": [21, 24, 27, 34], "mct": [3, 8, 11, 13, 15, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 39, 40, 41, 43, 45, 46, 47, 48, 49, 50], "mct_current_schema": 45, "mct_quantiz": 41, "mct_wrapper": 11, "mctwrapper": 11, "mean": [1, 4, 9, 49], "measur": [6, 10, 12, 48, 49], "meet": [25, 32], "memori": [10, 25, 32, 49], "messag": 48, "metadata": [7, 45], "method": [4, 5, 6, 9, 11, 13, 25, 32, 35, 41, 43, 44, 45, 46], "metric": [4, 5, 6, 12, 36, 37, 38, 48], "metric_epsilon": 5, "metric_norm": 5, "metric_normalization_threshold": 5, "min": [1, 3, 5, 8, 9, 21, 24, 27, 29, 31, 34, 49], "min_threshold": [8, 46], "minbit": 5, "minim": [5, 9, 21, 25, 29, 31, 32], "minimum": 46, "minor": 45, "minut": 50, "mix": [5, 10, 11, 12, 13, 21, 22, 24, 26, 27, 29, 30, 31, 34, 39, 45, 48, 50], "mixed_precis": 11, "mixed_precision_config": [21, 22, 24, 26, 27, 39], "mixedprecisionquantizationconfig": [11, 13, 21, 22, 24, 26, 27, 39], "mkstemp": 41, "mobilenet": [21, 22], "mobilenet_v2": [24, 26, 27, 29, 30, 31, 33, 34, 41], "mobilenetv2": [24, 26, 27, 41, 49], "model": [3, 4, 5, 7, 8, 10, 11, 12, 13, 18, 19, 20, 21, 24, 25, 28, 29, 31, 32, 36, 37, 38, 39, 40, 43, 44, 45, 46, 48, 49], "model_compression_toolkit": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49], "model_fil": [26, 27], "model_format_onnx_mctq": 41, "model_mp": 5, "model_output": 41, "modifi": [13, 43], "modul": [13, 28, 29, 30, 31, 32, 37, 38], "more": [9, 18, 19, 24, 25, 27, 32, 34, 41, 45, 47, 48, 49], "most": 48, "mse": [8, 9, 11, 12, 48, 49], "much": 40, "multipl": [3, 5, 35, 45], "multiple_tensors_mse_loss": 4, "multipli": [1, 12, 14, 16, 48], "must": [25, 32, 45], "n_epoch": [4, 11, 15, 17, 21], "n_imag": [20, 28], "n_iter": [1, 14, 16, 20, 28], "nadam": 15, "name": [12, 40, 43, 45, 48, 49], "nchw": 3, "ndarrai": 7, "necessari": [4, 11, 41, 46, 48], "need": [3, 11, 13, 21, 24, 27, 29, 31, 34, 41, 42, 46, 48], "neg": [1, 5, 48], "negative_min_max_diff": [1, 16], "network": [3, 6, 11, 33, 39, 40, 43, 49, 50], "network_editor": [13, 40], "netzer": 50, "neural": [6, 11, 50], "neuron": 7, "new": [43, 45], "next": [20, 28, 41, 42], "nhwc": 3, "nn": [28, 37, 38], "no_norm": 1, "no_quantization_op": 3, "noclip": [8, 9], "node": [0, 27, 34, 41, 43, 46, 49], "node_nam": 43, "node_name_scop": 43, "node_typ": 43, "nodenamefilt": 43, "nodenamescopefilt": 43, "nodetypefilt": 43, "nois": 9, "non": [5, 15, 17, 45], "none": [1, 2, 4, 5, 8, 11, 12, 15, 17, 21, 23, 24, 27, 29, 31, 34, 35, 39, 40, 41, 43, 44, 45, 46], "norm": [9, 49], "norm_scor": [4, 5], "normal": [1, 4, 5, 14, 16], "note": [21, 24, 26, 27], "notebook": 50, "noteq": 42, "notic": [20, 25, 28, 32, 41], "now": [6, 18, 19, 34, 41, 45, 46, 47, 49], "np": [7, 11, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "num_calibration_batch": [21, 24, 27, 29, 31, 34], "num_interest_points_factor": 5, "num_of_imag": [5, 11, 21, 24], "num_score_approxim": [6, 25, 32], "number": [1, 4, 5, 6, 11, 12, 14, 15, 16, 17, 20, 21, 24, 25, 27, 28, 29, 31, 32, 34, 40, 45, 46, 48], "numel": 32, "numer": 5, "numpi": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "o": 50, "object": [0, 3, 4, 5, 6, 10, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 29, 30, 31, 34, 41, 43, 45, 46, 48], "observ": [21, 29, 31, 45, 49], "one": [5, 42, 49], "onli": [3, 4, 5, 6, 12, 21, 24, 26, 27, 41, 45], "onlin": [27, 34], "onnx": 11, "onnx_file_path": 41, "onnx_opset_vers": 41, "onnxruntim": 41, "op": [42, 45], "open": [41, 49, 50], "oper": [3, 10, 40, 42, 45], "operator_group": 45, "operator_set": 45, "operators_set": 45, "operatorsetnam": 45, "opquantizationconfig": [18, 19, 47], "optim": [1, 3, 4, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 24, 27, 29, 30, 31, 34, 39, 45, 46, 47, 50], "optimizer_bia": 4, "optimizer_quantization_paramet": 4, "optimizer_rest": [4, 15, 17], "optimizerv2": 15, "option": [11, 13, 21, 23, 24, 25, 27, 29, 31, 32, 34, 41, 45], "order": [15, 17, 21, 24, 27, 34, 40, 41, 42, 44], "org": 46, "orient": [13, 46], "origin": [25, 35, 36, 37, 38, 49], "ort": 41, "other": [1, 11, 15, 17, 48], "otherwis": 45, "our": [21, 24, 26, 27, 34, 50], "out": [3, 6], "out1": 50, "out2": 50, "out3": 50, "out_channel_axis_map": 3, "outlier": [12, 48], "output": [1, 3, 12, 14, 16, 20, 21, 24, 27, 28, 29, 31, 33, 34, 40, 45, 48, 49, 50], "output_image_s": [20, 28], "output_loss_multipli": [1, 14, 16], "output_loss_typ": [1, 14, 16], "output_nam": 41, "outputlosstyp": [14, 16], "over": 5, "overrid": [4, 44], "overwrit": 5, "p": 32, "packag": [41, 46, 50], "pad": 45, "page": 13, "pair": 49, "param": [17, 40, 43, 46], "param_item": 11, "paramet": [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "pars": 45, "part": 41, "pass": [2, 3, 5, 15, 17, 21, 24, 25, 26, 27, 29, 31, 32, 33, 34, 43], "patch": 45, "path": [11, 13, 23, 35, 41, 48, 49], "pattern": 45, "pdf": 46, "per": [1, 3, 4, 21, 24, 27, 34, 45, 46, 49], "per_sampl": 4, "percentag": [5, 40], "peretz": 50, "perform": [6, 10, 11, 20, 25, 28, 32], "phase": 49, "pinpoint": 40, "pip": [41, 50], "pipelin": [1, 11, 14, 16], "pixel": [1, 14, 16], "place": 45, "plan": 41, "platform": [11, 18, 19, 21, 24, 25, 26, 27, 30, 32, 45], "pleas": [24, 27, 34, 41, 44, 48, 50], "plot": [40, 49], "point": [4, 5, 15, 17, 21, 29, 31, 36, 37, 38, 45, 49], "posit": 45, "possibl": [9, 21, 24, 27, 34, 45, 49], "post": [4, 11, 13, 25, 27, 32, 34, 50], "power": [21, 24, 27, 29, 31, 34, 45], "power_of_two": 45, "poweroftwo": 46, "pre": 5, "preced": [21, 24, 27, 29, 31, 34], "precis": [5, 10, 11, 12, 13, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 34, 39, 45, 48, 50], "predefin": [5, 6], "predict": 41, "prepar": [11, 13, 27, 34], "preprint": 50, "present": [2, 48, 49], "preserv": 45, "pretrain": [33, 34], "prevent": 5, "print": 40, "prior": 5, "prioriti": 11, "problemat": 40, "procedur": 48, "process": [4, 5, 8, 13, 14, 15, 16, 17, 18, 19, 20, 25, 28, 32, 39, 43, 44, 45, 47, 49], "product": 49, "progress": 40, "progress_info_callback": 40, "progress_perc": 40, "progressinfocallback": 40, "project": [41, 50], "properti": 7, "propos": [46, 48], "provid": [2, 11, 20, 25, 28, 32, 40, 41, 45, 46, 48, 49], "prune": [10, 50], "pruned_model": [25, 32], "pruning_config": [25, 32], "pruning_info": [25, 32], "pruning_mask": 7, "pruning_num_score_approxim": 6, "pruningconfig": [6, 13, 25, 32], "pruninginfo": [7, 13, 25, 32], "ptq": [11, 24, 31, 41, 48], "purpos": [20, 28, 40], "py": 50, "pydantic_cor": 45, "pypi": 50, "python": [35, 50], "pytorch": [11, 13, 45, 46, 50], "pytorch_data_generation_experiment": [13, 28], "pytorch_default_tpc": 30, "pytorch_gradient_post_training_quant": [13, 17, 29], "pytorch_post_training_quant": [13, 31, 41, 48], "pytorch_pruning_experiment": [13, 32], "pytorch_quantization_aware_training_finalize_experiment": [13, 33], "pytorch_quantization_aware_training_init_experiment": [13, 33, 34], "pytorch_resource_utilization_data": [13, 30], "q": 41, "q_fraction_scheduler_polici": 4, "qat": [26, 27, 33, 34, 44], "qat_config": [13, 27, 34], "qatconfig": [27, 34], "qc": 8, "qc_option": 45, "qmodel": 11, "qnnpack": 45, "quant": 41, "quantifi": [7, 49], "quantiz": [0, 3, 4, 5, 8, 9, 11, 12, 13, 15, 17, 20, 22, 28, 30, 36, 37, 38, 39, 40, 43, 44, 45, 46, 49, 50], "quantization_config": [39, 46], "quantization_configur": 45, "quantization_format": 41, "quantization_info": [21, 24, 26, 27, 29, 31, 33, 34], "quantization_preserv": [18, 19, 45, 47], "quantizationconfig": [13, 39], "quantizationerrormethod": [8, 11, 13], "quantizationmethod": [3, 46], "quantize_and_export": 11, "quantize_reported_dir": [12, 48], "quantized_exportable_model": 41, "quantized_info": 48, "quantized_model": [11, 21, 24, 26, 27, 33, 34, 36, 37, 38, 48], "quantized_modul": [29, 31], "quantizewrapp": [13, 27, 33, 34], "question": 41, "r": 50, "radam": 16, "rais": 45, "random": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "random_data_gen": 48, "rang": [3, 12, 21, 24, 27, 29, 31, 34, 48], "rate": [1, 14, 15, 16, 17], "ratio": [11, 12, 48], "readi": 33, "readm": 41, "receiv": [11, 40], "recent": 48, "recommend": 48, "recov": [25, 32], "red": 48, "reduc": [5, 25, 32], "reduce_on_plateau": [1, 14], "reduce_on_plateau_with_reset": 16, "reduceonplateau": 1, "refer": [41, 48], "refine_mp_solut": 5, "regard": 42, "regular": [1, 4, 15, 17], "regularization_factor": [4, 15, 17], "regularized_min_max_diff": [1, 14], "relat": [3, 7, 13, 45], "releas": 50, "relev": 41, "relu": 3, "relu_bound_to_power_of_2": 8, "remain": 40, "remov": [12, 25, 32, 33, 48], "replac": [26, 48], "report": [12, 13, 48], "report_dir": [12, 48], "repositori": 41, "repr_datagen": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34], "repr_dataset": [36, 37, 38, 41], "repres": [4, 5, 10, 11, 15, 17, 21, 24, 25, 26, 27, 29, 31, 32, 33, 34, 36, 37, 38, 40, 41, 43, 45, 48, 49], "representative_data_gen": [21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 41, 48], "representative_dataset": 11, "request": 2, "requir": [21, 24, 27, 29, 31, 34, 46, 49], "research": 50, "reshap": [3, 20], "residu": 11, "residual_collaps": [8, 11], "resnet50": [25, 32, 41], "resnet50_weight": 32, "resourc": [6, 10, 11, 13, 21, 24, 25, 26, 27, 32, 33, 34, 49], "resourceutil": [13, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 34], "respect": 48, "respectivli": 3, "rest": 4, "result": 48, "retrain": [25, 32], "retriev": [18, 19, 40, 45], "return": [2, 4, 5, 7, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41], "round": 4, "rounding_typ": 4, "ru": [21, 24, 26, 27], "ru_data": [22, 30], "rule": [40, 43], "run": [4, 15, 17, 40, 41, 49], "runner": 40, "same": [1, 41, 45], "sampl": [4, 15, 17, 49], "save": [3, 11, 12, 27, 35, 41, 46, 48], "save_model_path": [11, 41], "saved_model": 23, "savedmodel": 23, "scalar": 49, "scale": [4, 5, 45], "scale_log_norm": 4, "schedul": [1, 4, 14, 16, 40], "scheduler_typ": [1, 14, 16], "schedulertyp": [14, 16], "schema": 45, "schema_vers": 45, "score": [4, 5, 6, 7, 9, 11, 15, 17, 25, 32], "sdsp": [11, 13, 45], "sdsp_v3_14": 19, "sdsp_version": [11, 19], "search": [5, 10, 13, 21, 24, 27, 29, 31, 34], "second": 49, "section": 40, "see": [4, 17, 48, 50], "seen": 49, "select": [0, 3, 6, 8, 9, 11, 13, 39, 41, 44, 45, 46], "self": [40, 45], "semiconductor": 50, "sensit": [5, 6, 25, 32], "sequenti": [20, 28], "serial": 13, "serialization_format": 41, "sess": 41, "session": 41, "set": [3, 11, 12, 13, 15, 17, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 34, 35, 36, 37, 38, 40, 41, 43, 45, 46, 48, 49], "set_log_fold": [35, 48, 49], "setup": [11, 50], "sever": [21, 24, 27, 29, 31, 34, 49], "shift": 48, "shift_negative_activation_correct": 8, "shift_negative_params_search": 8, "shift_negative_ratio": 8, "shift_negative_threshold_recalcul": 8, "shortli": 45, "should": [3, 6, 15, 21, 22, 24, 25, 26, 27, 29, 31, 32, 34, 41, 45, 49], "show": 49, "shown": 48, "sigma": 5, "signal": 9, "signed": 45, "signific": [7, 48], "significantli": 48, "simd": [25, 32, 45], "simd_siz": 45, "similar": [9, 12, 36, 37, 38, 40, 48, 50], "similarli": 45, "simpl": [20, 28], "simplic": [20, 28], "simul": 40, "simulate_schedul": 40, "simultan": 45, "singl": 45, "six": 48, "size": [1, 4, 5, 14, 15, 16, 17, 20, 21, 24, 26, 27, 28, 34, 41, 46], "skip": [12, 40, 41, 48], "slowli": 41, "small": 48, "smaller": 42, "smallereq": 42, "smooth": [1, 46], "smoothing_and_augment": [1, 14, 16], "so": [11, 41], "softmax": 3, "softmax_shift": 8, "softquant": 4, "solut": 50, "solver": [21, 24, 27, 34], "some": [18, 19, 20, 28, 41, 45, 47, 49], "soni": 50, "sonysemiconductorsolut": 50, "sourc": 50, "specif": [0, 3, 11, 13, 25, 32, 43, 48, 49], "specifi": [6, 11, 12, 14, 16, 18, 20, 23, 25, 28, 32, 41, 45, 48], "sphinx": 13, "sqnr": [12, 48], "squar": [1, 9], "stabl": 50, "stage": 49, "standard": [25, 32, 40, 46], "start": [20, 28, 41, 46, 50], "start_step": 4, "state": 50, "state_dict": 32, "statist": [3, 21, 24, 27, 29, 31, 34, 49], "stderr": 40, "ste": [4, 44, 46], "step": [1, 4, 40, 46, 48], "store": [7, 46], "str": [3, 11, 12, 18, 19, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 35, 36, 37, 38, 40, 41, 42, 45, 48], "straight": [4, 46], "strategi": [6, 25, 32], "string": 43, "structur": [13, 50], "student": 4, "success": 11, "suffer": 41, "suggest": 48, "sum": [10, 22, 25, 30, 32], "support": [4, 11, 41], "supported_input_activation_n_bit": 45, "sure": 40, "sy": 40, "symmetr": [21, 24, 27, 29, 31, 34, 45, 46], "t": [35, 50], "tab": 49, "tabl": 45, "tag": 49, "take": [5, 24, 27, 34, 50], "target": [4, 11, 13, 18, 19, 21, 22, 24, 25, 26, 27, 30, 32, 33, 34, 45], "target_platform_cap": [21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 42, 46], "target_q_fract": 4, "target_resource_util": [21, 24, 25, 27, 29, 31, 32, 34], "targetplatformcap": [13, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34], "teacher": 4, "tempfil": 41, "tensor": [5, 11, 12, 15, 17, 20, 22, 28, 30, 45, 46, 49, 50], "tensorboard": [40, 50], "tensorflow": [3, 11, 13, 15, 20, 21, 22, 24, 25, 26, 27, 41, 43, 45, 50], "tf": [3, 11, 15, 20, 23, 26, 27], "tflite": [41, 45], "than": [5, 42, 48], "thei": 3, "them": [45, 49], "thi": [5, 7, 8, 9, 11, 13, 20, 21, 23, 24, 25, 26, 27, 28, 29, 31, 32, 34, 35, 40, 41, 45, 46, 48, 50], "those": 48, "three": [3, 48], "threshold": [5, 8, 9, 11, 12, 21, 24, 27, 29, 31, 34, 45, 46, 48], "threshold_bitwidth_mixed_precis": 48, "threshold_bitwidth_mixed_precision_with_model_output_loss_object": 12, "threshold_degrade_layer_ratio": [12, 48], "threshold_quantize_error": [12, 48], "threshold_ratio_unbalanced_concaten": [12, 48], "threshold_zscore_outlier_remov": [12, 48], "through": [4, 20, 25, 28, 46], "throughout": 4, "thu": [25, 32, 49], "time": [3, 6, 46], "togeth": [25, 32], "tool": [11, 13, 46, 50], "toolkit": [11, 13, 20, 28, 29, 48], "torch": [17, 28, 37, 38, 41, 50], "torchscript": 41, "torchvis": [1, 16, 29, 30, 31, 32, 33, 34, 41], "total": [10, 22, 30, 40], "total_memori": 10, "totalcompon": 40, "tpc": [11, 13, 25, 32, 45], "tpc_minor_vers": 45, "tpc_patch_vers": 45, "tpc_platform_typ": 45, "tpc_v1_0": 18, "tpc_version": 18, "trace": 41, "track": 40, "train": [4, 11, 13, 44, 46, 50], "train_bia": 4, "trainabl": [23, 26, 46], "trainable_infrastructur": 44, "trainablequant": 26, "transform": [1, 21, 24, 27, 29, 31, 34], "transpos": 3, "treat": 45, "troubleshoot": 13, "true": [1, 5, 8, 11, 12, 15, 16, 17, 23, 33, 34, 40, 46], "try": 5, "tun": 34, "tune": [15, 17, 25, 26, 27, 32, 33], "tupl": [1, 3, 11, 14, 16, 20, 21, 24, 25, 28, 29, 31, 32, 43, 45], "tutori": 48, "two": [5, 12, 21, 24, 27, 29, 31, 34, 41, 45, 48, 49], "type": [0, 1, 2, 4, 5, 6, 7, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31, 32, 35, 36, 37, 38, 40, 41, 43, 45, 48], "ui": 49, "unbalanc": [12, 48], "unchang": 40, "under": 49, "unifi": 11, "uniform": [45, 46], "union": [1, 14, 16, 20, 21, 22, 24, 25, 27, 28, 29, 30, 31, 32, 34, 45], "uniqu": 45, "up": [6, 20, 28, 35, 45, 49], "updat": [4, 11], "upon": 46, "us": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50], "use_hessian_based_scor": [5, 11], "use_hessian_based_weight": [15, 17], "use_hessian_sample_attent": [15, 17], "use_mixed_precis": 11, "user": [11, 13, 21, 24, 26, 27, 29, 31, 33, 34, 40, 48], "userinform": [21, 24, 29, 31], "util": [6, 11, 13, 21, 24, 25, 26, 27, 32, 33, 34, 46], "v": 50, "valid": [36, 37, 38, 45, 46, 48], "validation_dataset": [36, 37, 38, 48], "validationerror": 45, "valu": [1, 2, 3, 4, 5, 6, 9, 11, 12, 21, 24, 25, 26, 27, 32, 40, 41, 42, 43, 45, 46, 48], "valuabl": 9, "variabl": [11, 15, 17], "variou": [11, 20, 28, 49], "vector": [4, 49], "verbos": 35, "version": [11, 13, 20, 28, 45], "via": [41, 50], "view": 49, "visit": [44, 50], "visual": [40, 48, 50], "wa": [2, 41, 48], "wai": [49, 50], "walk": [20, 28], "want": 3, "warn": [11, 48], "we": [3, 20, 21, 24, 25, 27, 28, 32, 34, 41, 43, 45, 46, 49], "weight": [0, 1, 3, 4, 5, 8, 10, 11, 14, 15, 16, 17, 21, 22, 25, 27, 29, 30, 31, 32, 33, 34, 41, 43, 44, 45, 46, 49], "weight_quantizer_params_overrid": 44, "weight_training_method": 44, "weights_bias_correct": [8, 11], "weights_channels_axi": 46, "weights_compression_ratio": 11, "weights_error_method": 8, "weights_memori": [6, 10, 21, 24, 25, 27, 32, 34], "weights_n_bit": [43, 45, 46], "weights_per_channel_threshold": [45, 46], "weights_quantization_candid": 46, "weights_quantization_method": [43, 45, 46], "weights_quantization_param": 46, "weights_quantization_params_fn": 43, "weights_second_moment_correct": 8, "were": 49, "when": [1, 2, 3, 4, 5, 6, 9, 10, 12, 13, 15, 17, 21, 24, 26, 27, 40, 41, 42, 44, 45, 46, 48, 49], "where": [7, 12, 41, 43, 48, 49], "whether": [4, 5, 7, 11, 14, 15, 16, 17, 23, 40, 41, 45, 46], "which": [4, 6, 40, 41, 42, 43, 45, 46], "while": [8, 21, 24, 26, 27, 34, 40, 45], "who": 48, "width": [0, 5, 12, 13, 21, 24, 27, 28, 34, 39, 45, 48, 50], "within": [40, 45, 48, 50], "without": 13, "work": 50, "would": 49, "wrap": [2, 3, 23, 27, 34, 42, 45, 46], "wrapper": [27, 33, 34, 46], "writer": 49, "x": 48, "xquant": [11, 50], "xquant_config": [12, 36, 37, 38, 48], "xquant_report_keras_experiment": [13, 36], "xquant_report_pytorch_experiment": [13, 37, 48], "xquant_report_troubleshoot_pytorch_experiment": [12, 13, 38, 48], "xquantconfig": [12, 13, 36, 37, 38], "y": 48, "yield": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "you": [8, 11, 40, 41, 45, 49, 50], "your": [41, 48], "z": 11, "z_score": [12, 48], "z_threshold": [8, 11], "zero": [5, 45]}, "titles": ["BitWidthConfig", "Data Generation Configuration", "DefaultDict Class", "FrameworkInfo Class", "GradientPTQConfig Class", "MixedPrecisionQuantizationConfig", "Pruning Configuration", "Pruning Information", "QuantizationConfig", "QuantizationErrorMethod", "ResourceUtilization", "wrapper", "XQuant Configuration", "API Docs", "Get DataGenerationConfig for Keras Models", "Get GradientPTQConfig for Keras Models", "Get DataGenerationConfig for Pytorch Models", "Get GradientPTQConfig for Pytorch Models", "Get TargetPlatformCapabilities for tpc version", "Get TargetPlatformCapabilities for sdsp converter version", "Keras Data Generation", "Keras Gradient Based Post Training Quantization", "Get Resource Utilization information for Keras Models", "Load Quantized Keras Model", "Keras Post Training Quantization", "Keras Structured Pruning", "Keras Quantization Aware Training Model Finalize", "Keras Quantization Aware Training Model Init", "Pytorch Data Generation", "Pytorch Gradient Based Post Training Quantization", "Get Resource Utilization information for PyTorch Models", "Pytorch Post Training Quantization", "Pytorch Structured Pruning", "PyTorch Quantization Aware Training Model Finalize", "PyTorch Quantization Aware Training Model Init", "Enable a Logger", "XQuant Report Keras", "XQuant Report Pytorch", "XQuant Report Troubleshoot Pytorch", "CoreConfig", "debug_config Module", "exporter Module", "Layer Attributes Filters", "network_editor Module", "qat_config Module", "target_platform_capabilities Module", "trainable_infrastructure Module", "<no title>", "XQuant Extension Tool", "Visualization within TensorBoard", "Model Compression Toolkit User Guide"], "titleterms": {"about": 48, "action": 43, "api": [13, 50], "attribut": 42, "attributequantizationconfig": 45, "awar": [26, 27, 33, 34], "base": [21, 29], "basekerastrainablequant": 46, "basepytorchtrainablequant": 46, "batchnormalignemntlosstyp": 1, "bit": 49, "bitwidthconfig": 0, "bnlayerweightingtyp": 1, "channelaxi": 3, "channelsfilteringstrategi": 6, "class": [2, 3, 4], "comparison": 49, "compress": 50, "configur": [1, 6, 12, 49], "constraint": 50, "convert": 19, "core": 13, "coreconfig": 39, "cosin": 49, "data": [1, 20, 28], "data_gener": 13, "datagenerationconfig": [14, 16], "datainittyp": 1, "debug_config": 40, "debugconfig": 40, "defaultdict": 2, "dictionari": 40, "doc": 13, "document": 50, "editrul": 43, "enabl": 35, "error": 48, "exampl": 48, "export": [13, 41], "extens": 48, "featur": 50, "filter": [42, 43], "final": [26, 33], "flow": 48, "format": [41, 48], "frameworkinfo": 3, "fuse": 45, "gener": [1, 20, 28, 48], "get": [14, 15, 16, 17, 18, 19, 22, 30], "gptq": 13, "gptqhessianscoresconfig": 4, "gradient": [21, 29], "gradientptqconfig": [4, 15, 17], "gradualactivationquantizationconfig": 4, "graph": 48, "guid": 50, "how": 48, "imagegranular": 1, "imagenormalizationtyp": 1, "imagepipelinetyp": 1, "importancemetr": 6, "indic": 13, "infer": 41, "inform": [7, 22, 30], "init": [27, 34], "instal": 50, "judgeabl": 48, "kei": 40, "kera": [14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 36, 41], "keras_export_model": 41, "keras_load_quantized_model": 13, "kerasexportserializationformat": 41, "layer": 42, "load": 23, "logger": 35, "manualbitwidthselect": 0, "mctq": 41, "mix": 49, "mixedprecisionquantizationconfig": 5, "model": [14, 15, 16, 17, 22, 23, 26, 27, 30, 33, 34, 41, 50], "modul": [40, 41, 43, 44, 45, 46], "mpdistanceweight": 5, "mpmetricnorm": 5, "name": 41, "network_editor": 43, "onnx": 41, "operatorsetgroup": 45, "operatorsset": 45, "opquantizationconfig": 45, "opset": 41, "output": 41, "outputlosstyp": 1, "overal": 48, "overview": 50, "paramet": 48, "post": [21, 24, 29, 31], "precis": 49, "process": [40, 48], "prune": [6, 7, 13, 25, 32], "ptq": 13, "pytorch": [16, 17, 28, 29, 30, 31, 32, 33, 34, 37, 38, 41], "pytorch_export_model": 41, "pytorchexportserializationformat": 41, "qat": 13, "qat_config": 44, "qatconfig": 44, "qfractionlinearannealingconfig": 4, "quantiz": [21, 23, 24, 26, 27, 29, 31, 33, 34, 41, 48], "quantizationconfig": 8, "quantizationconfigopt": 45, "quantizationerrormethod": 9, "quantizationformat": 41, "quantizationmethod": 45, "quickstart": 50, "refer": 50, "report": [36, 37, 38], "resourc": [22, 30], "resourceutil": 10, "roundingtyp": 4, "run": 48, "schedulertyp": 1, "sdsp": 19, "serial": 41, "set_log_fold": 13, "similar": 49, "state": 40, "structur": [25, 32], "support": 50, "tabl": 13, "target_platform_cap": [13, 45], "targetplatformcap": [18, 19, 45], "technic": 50, "tensorboard": 49, "tool": 48, "toolkit": 50, "tpc": 18, "train": [21, 24, 26, 27, 29, 31, 33, 34], "trainable_infrastructur": [13, 46], "trainablequantizeractivationconfig": 46, "trainablequantizerweightsconfig": 46, "trainingmethod": [44, 46], "troubleshoot": [38, 48], "tutori": 41, "understand": 48, "us": 41, "user": 50, "util": [22, 30], "version": [18, 19, 41], "visual": 49, "width": 49, "within": 49, "wrapper": [11, 13], "xquant": [12, 13, 36, 37, 38, 48], "xquantconfig": 48}})
\ No newline at end of file
diff --git a/docs/static/pygments.css b/docs/static/pygments.css
index 5f2b0a250..0d49244ed 100644
--- a/docs/static/pygments.css
+++ b/docs/static/pygments.css
@@ -6,26 +6,26 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
.highlight .hll { background-color: #ffffcc }
.highlight { background: #eeffcc; }
.highlight .c { color: #408090; font-style: italic } /* Comment */
-.highlight .err { border: 1px solid #F00 } /* Error */
+.highlight .err { border: 1px solid #FF0000 } /* Error */
.highlight .k { color: #007020; font-weight: bold } /* Keyword */
-.highlight .o { color: #666 } /* Operator */
+.highlight .o { color: #666666 } /* Operator */
.highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */
.highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */
.highlight .cp { color: #007020 } /* Comment.Preproc */
.highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */
.highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */
-.highlight .cs { color: #408090; background-color: #FFF0F0 } /* Comment.Special */
+.highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #A00000 } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
-.highlight .gr { color: #F00 } /* Generic.Error */
+.highlight .gr { color: #FF0000 } /* Generic.Error */
.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
.highlight .gi { color: #00A000 } /* Generic.Inserted */
-.highlight .go { color: #333 } /* Generic.Output */
-.highlight .gp { color: #C65D09; font-weight: bold } /* Generic.Prompt */
+.highlight .go { color: #333333 } /* Generic.Output */
+.highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
-.highlight .gt { color: #04D } /* Generic.Traceback */
+.highlight .gt { color: #0044DD } /* Generic.Traceback */
.highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
@@ -33,43 +33,43 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
.highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #902000 } /* Keyword.Type */
.highlight .m { color: #208050 } /* Literal.Number */
-.highlight .s { color: #4070A0 } /* Literal.String */
-.highlight .na { color: #4070A0 } /* Name.Attribute */
+.highlight .s { color: #4070a0 } /* Literal.String */
+.highlight .na { color: #4070a0 } /* Name.Attribute */
.highlight .nb { color: #007020 } /* Name.Builtin */
-.highlight .nc { color: #0E84B5; font-weight: bold } /* Name.Class */
-.highlight .no { color: #60ADD5 } /* Name.Constant */
-.highlight .nd { color: #555; font-weight: bold } /* Name.Decorator */
-.highlight .ni { color: #D55537; font-weight: bold } /* Name.Entity */
+.highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
+.highlight .no { color: #60add5 } /* Name.Constant */
+.highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */
+.highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */
.highlight .ne { color: #007020 } /* Name.Exception */
-.highlight .nf { color: #06287E } /* Name.Function */
+.highlight .nf { color: #06287e } /* Name.Function */
.highlight .nl { color: #002070; font-weight: bold } /* Name.Label */
-.highlight .nn { color: #0E84B5; font-weight: bold } /* Name.Namespace */
+.highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
.highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */
-.highlight .nv { color: #BB60D5 } /* Name.Variable */
+.highlight .nv { color: #bb60d5 } /* Name.Variable */
.highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */
-.highlight .w { color: #BBB } /* Text.Whitespace */
+.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #208050 } /* Literal.Number.Bin */
.highlight .mf { color: #208050 } /* Literal.Number.Float */
.highlight .mh { color: #208050 } /* Literal.Number.Hex */
.highlight .mi { color: #208050 } /* Literal.Number.Integer */
.highlight .mo { color: #208050 } /* Literal.Number.Oct */
-.highlight .sa { color: #4070A0 } /* Literal.String.Affix */
-.highlight .sb { color: #4070A0 } /* Literal.String.Backtick */
-.highlight .sc { color: #4070A0 } /* Literal.String.Char */
-.highlight .dl { color: #4070A0 } /* Literal.String.Delimiter */
-.highlight .sd { color: #4070A0; font-style: italic } /* Literal.String.Doc */
-.highlight .s2 { color: #4070A0 } /* Literal.String.Double */
-.highlight .se { color: #4070A0; font-weight: bold } /* Literal.String.Escape */
-.highlight .sh { color: #4070A0 } /* Literal.String.Heredoc */
-.highlight .si { color: #70A0D0; font-style: italic } /* Literal.String.Interpol */
-.highlight .sx { color: #C65D09 } /* Literal.String.Other */
+.highlight .sa { color: #4070a0 } /* Literal.String.Affix */
+.highlight .sb { color: #4070a0 } /* Literal.String.Backtick */
+.highlight .sc { color: #4070a0 } /* Literal.String.Char */
+.highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */
+.highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
+.highlight .s2 { color: #4070a0 } /* Literal.String.Double */
+.highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
+.highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */
+.highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
+.highlight .sx { color: #c65d09 } /* Literal.String.Other */
.highlight .sr { color: #235388 } /* Literal.String.Regex */
-.highlight .s1 { color: #4070A0 } /* Literal.String.Single */
+.highlight .s1 { color: #4070a0 } /* Literal.String.Single */
.highlight .ss { color: #517918 } /* Literal.String.Symbol */
.highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */
-.highlight .fm { color: #06287E } /* Name.Function.Magic */
-.highlight .vc { color: #BB60D5 } /* Name.Variable.Class */
-.highlight .vg { color: #BB60D5 } /* Name.Variable.Global */
-.highlight .vi { color: #BB60D5 } /* Name.Variable.Instance */
-.highlight .vm { color: #BB60D5 } /* Name.Variable.Magic */
+.highlight .fm { color: #06287e } /* Name.Function.Magic */
+.highlight .vc { color: #bb60d5 } /* Name.Variable.Class */
+.highlight .vg { color: #bb60d5 } /* Name.Variable.Global */
+.highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
+.highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */
.highlight .il { color: #208050 } /* Literal.Number.Integer.Long */
\ No newline at end of file
diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
index 308b0c90e..c7adc4881 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
@@ -28,6 +28,8 @@
from model_compression_toolkit.core.common.mixed_precision.sensitivity_eval.sensitivity_evaluation import SensitivityEvaluation
from model_compression_toolkit.core.common.mixed_precision.solution_refinement_procedure import \
greedy_solution_refinement_procedure
+from model_compression_toolkit.core.common.progress_config.progress_info_controller import \
+ ProgressInfoController
class BitWidthSearchMethod(Enum):
@@ -41,7 +43,8 @@ def search_bit_width(graph: Graph,
mp_config: MixedPrecisionQuantizationConfig,
representative_data_gen: Callable,
search_method: BitWidthSearchMethod = BitWidthSearchMethod.INTEGER_PROGRAMMING,
- hessian_info_service: HessianInfoService = None) -> List[int]:
+ hessian_info_service: HessianInfoService = None,
+ progress_info_controller: ProgressInfoController = None) -> List[int]:
"""
Search for an MP configuration for a given graph. Given a search_method method (by default, it's linear
programming), we use the sensitivity_evaluator object that provides a function to compute an
@@ -59,6 +62,7 @@ def search_bit_width(graph: Graph,
representative_data_gen: Dataset to use for retrieving images for the models inputs.
search_method: BitWidthSearchMethod to define which searching method to use.
hessian_info_service: HessianInfoService to fetch Hessian-approximation information.
+ progress_info_controller: ProgressInfoController to display and manage overall progress information.
Returns:
A MP configuration for the graph (list of integers, where the index in the list, is the node's
@@ -81,7 +85,8 @@ def search_bit_width(graph: Graph,
# even if a virtual graph was created (and is used only for BOPS utilization computation purposes)
se = SensitivityEvaluation(graph, mp_config, representative_data_gen=representative_data_gen, fw_info=fw_info,
fw_impl=fw_impl, disable_activation_for_metric=disable_activation_for_metric,
- hessian_info_service=hessian_info_service)
+ hessian_info_service=hessian_info_service,
+ progress_info_controller=progress_info_controller)
if search_method != BitWidthSearchMethod.INTEGER_PROGRAMMING:
raise NotImplementedError()
@@ -97,7 +102,8 @@ def search_bit_width(graph: Graph,
fw_impl=fw_impl,
sensitivity_evaluator=se,
target_resource_utilization=target_resource_utilization,
- mp_config=mp_config)
+ mp_config=mp_config,
+ progress_info_controller=progress_info_controller)
nodes_bit_cfg = search_manager.search()
graph.skip_validation_check = False
diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
index d0191946d..aa35e93a7 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
@@ -44,6 +44,8 @@
from model_compression_toolkit.logger import Logger
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
MixedPrecisionQuantizationConfig, MpMetricNormalization
+from model_compression_toolkit.core.common.progress_config.progress_info_controller import \
+ ProgressInfoController
class MixedPrecisionSearchManager:
@@ -57,7 +59,8 @@ def __init__(self,
fw_impl: FrameworkImplementation,
sensitivity_evaluator: SensitivityEvaluation,
target_resource_utilization: ResourceUtilization,
- mp_config: MixedPrecisionQuantizationConfig):
+ mp_config: MixedPrecisionQuantizationConfig,
+ progress_info_controller: ProgressInfoController = None):
"""
Args:
@@ -67,11 +70,14 @@ def __init__(self,
sensitivity_evaluator: A SensitivityEvaluation which provides a function that evaluates the sensitivity of
a bit-width configuration for the MP model.
target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it.
+ progress_info_controller: ProgressInfoController to display and manage overall progress information.
"""
self.fw_info = fw_info
self.fw_impl = fw_impl
+ self.progress_info_controller = progress_info_controller
+
self.original_graph = graph
# graph for mp search
self.mp_graph, self.using_virtual_graph = self._get_mp_graph(graph, target_resource_utilization)
@@ -183,6 +189,9 @@ def ensure_maxbit_minimal_metric(node_candidates_metrics, max_ind):
metrics[max_ind] = max_val
return metrics
+ if self.progress_info_controller is not None:
+ self.progress_info_controller.set_description('Research Mixed Precision')
+
layer_to_metrics_mapping = {}
debug_mapping = {}
for node_idx, node in tqdm(enumerate(self.mp_topo_configurable_nodes)):
diff --git a/model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/metric_calculators.py b/model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/metric_calculators.py
index f3f36d913..e926148e3 100644
--- a/model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/metric_calculators.py
+++ b/model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/metric_calculators.py
@@ -22,6 +22,8 @@
from model_compression_toolkit.core.common.model_builder_mode import ModelBuilderMode
from model_compression_toolkit.core.common.similarity_analyzer import compute_kl_divergence
from model_compression_toolkit.logger import Logger
+from model_compression_toolkit.core.common.progress_config.progress_info_controller import \
+ ProgressInfoController
@runtime_checkable
@@ -64,7 +66,8 @@ def __init__(self,
representative_data_gen: Callable,
fw_info: FrameworkInfo,
fw_impl: Any,
- hessian_info_service: HessianInfoService = None):
+ hessian_info_service: HessianInfoService = None,
+ progress_info_controller: ProgressInfoController = None):
"""
Args:
graph: Graph to search for its MP configuration.
@@ -74,6 +77,7 @@ def __init__(self,
fw_impl: FrameworkImplementation object with a specific framework methods implementation.
representative_data_gen: Dataset used for getting batches for inference.
hessian_info_service: HessianInfoService to fetch Hessian approximation information.
+ progress_info_controller: ProgressInfoController to display and manage overall progress information.
"""
self.graph = graph
self.mp_config = mp_config
@@ -121,7 +125,7 @@ def __init__(self,
# Hessian-based scores for weighted average distance metric computation
self.interest_points_hessians = None
if self.mp_config.distance_weighting_method == MpDistanceWeighting.HESSIAN:
- self.interest_points_hessians = self._compute_hessian_based_scores(hessian_info_service)
+ self.interest_points_hessians = self._compute_hessian_based_scores(hessian_info_service, progress_info_controller)
def compute(self, mp_model) -> float:
"""
@@ -168,16 +172,20 @@ def _init_baseline_tensors_list(self):
return [self.fw_impl.to_numpy(self.fw_impl.sensitivity_eval_inference(self.ref_model, images))
for images in self.images_batches]
- def _compute_hessian_based_scores(self, hessian_info_service: HessianInfoService) -> np.ndarray:
+ def _compute_hessian_based_scores(self, hessian_info_service: HessianInfoService, progress_info_controller: ProgressInfoController) -> np.ndarray:
"""
Compute Hessian-based scores for each interest point.
Args:
hessian_info_service: Hessian service.
+ progress_info_controller: Progress infomation controller.
Returns:
A vector of scores, one for each interest point, to be used for the distance metric weighted average computation.
"""
+ if progress_info_controller is not None:
+ progress_info_controller.set_description('Compute Hessian for Mixed Precision')
+
# Create a request for Hessian approximation scores with specific configurations
# (here we use per-tensor approximation of the Hessian's trace w.r.t the node's activations)
fw_dataloader = self.fw_impl.convert_data_gen_to_dataloader(self.representative_data_gen,
diff --git a/model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/sensitivity_evaluation.py b/model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/sensitivity_evaluation.py
index 399dc583b..55029e73a 100644
--- a/model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/sensitivity_evaluation.py
+++ b/model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/sensitivity_evaluation.py
@@ -27,6 +27,8 @@
from model_compression_toolkit.core.common.quantization.node_quantization_config import ActivationQuantizationMode
from model_compression_toolkit.core.common.model_builder_mode import ModelBuilderMode
from model_compression_toolkit.core.common.hessian import HessianInfoService
+from model_compression_toolkit.core.common.progress_config.progress_info_controller import \
+ ProgressInfoController
class SensitivityEvaluation:
@@ -41,7 +43,8 @@ def __init__(self,
fw_info: FrameworkInfo,
fw_impl: Any,
disable_activation_for_metric: bool = False,
- hessian_info_service: HessianInfoService = None
+ hessian_info_service: HessianInfoService = None,
+ progress_info_controller: ProgressInfoController = None
):
"""
Args:
@@ -53,7 +56,7 @@ def __init__(self,
fw_impl: FrameworkImplementation object with a specific framework methods implementation.
disable_activation_for_metric: Whether to disable activation quantization when computing the MP metric.
hessian_info_service: HessianInfoService to fetch Hessian approximation information.
-
+ progress_info_controller: ProgressInfoController to display and manage overall progress information.
"""
self.mp_config = mp_config
self.representative_data_gen = representative_data_gen
@@ -65,7 +68,8 @@ def __init__(self,
else:
self.metric_calculator = DistanceMetricCalculator(graph, mp_config, representative_data_gen,
fw_info=fw_info, fw_impl=fw_impl,
- hessian_info_service=hessian_info_service)
+ hessian_info_service=hessian_info_service,
+ progress_info_controller=progress_info_controller)
# Build a mixed-precision model which can be configured to use different bitwidth in different layers.
# Also, returns a mapping between a configurable graph's node and its matching layer(s) in the built MP model.
diff --git a/model_compression_toolkit/core/common/progress_config/__init__.py b/model_compression_toolkit/core/common/progress_config/__init__.py
new file mode 100644
index 000000000..0f3b9c87e
--- /dev/null
+++ b/model_compression_toolkit/core/common/progress_config/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2026 Sony Semiconductor Solutions, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
diff --git a/model_compression_toolkit/core/common/progress_config/constants.py b/model_compression_toolkit/core/common/progress_config/constants.py
new file mode 100755
index 000000000..403707c51
--- /dev/null
+++ b/model_compression_toolkit/core/common/progress_config/constants.py
@@ -0,0 +1,24 @@
+# Copyright 2026 Sony Semiconductor Solutions, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+COMPLETED_COMPONENTS = 'completedComponents'
+TOTAL_COMPONENTS = 'totalComponents'
+CURRENT_COMPONENT = 'currentComponent'
+
+PROGRESS_INFO_CALLBACK = 'progress_info_callback'
+TOTAL_STEP = 'total_step'
+
+PROGRESS_BAR_POSITION = 2
+DEFAULT_TOTAL_STEP = 4
diff --git a/model_compression_toolkit/core/common/progress_config/progress_info_controller.py b/model_compression_toolkit/core/common/progress_config/progress_info_controller.py
new file mode 100644
index 000000000..e279492cd
--- /dev/null
+++ b/model_compression_toolkit/core/common/progress_config/progress_info_controller.py
@@ -0,0 +1,159 @@
+
+# Copyright 2026 Sony Semiconductor Solutions, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from typing import Optional, Callable, TYPE_CHECKING
+from dataclasses import dataclass, field
+from tqdm import tqdm
+
+from model_compression_toolkit.core.common.progress_config.constants import (
+ COMPLETED_COMPONENTS, TOTAL_COMPONENTS, CURRENT_COMPONENT,
+ PROGRESS_BAR_POSITION, PROGRESS_INFO_CALLBACK, TOTAL_STEP, DEFAULT_TOTAL_STEP
+)
+
+if TYPE_CHECKING: # pragma: no cover
+ from model_compression_toolkit.core import CoreConfig
+ from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig
+ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
+
+
+@dataclass
+class ProgressInfoController:
+ """
+ A unified progress bar controller class.
+ Support single progress bar.
+
+ Attributes:
+ total_step: Total number of processing steps.
+ description: Description for the progress bar.
+ current_step: Current step number (starts from 0, incremented by set_description()).
+ callback: User-defined callback function.
+ """
+ total_step: int = field(default=0)
+ current_step: int = field(default=0)
+ description: str = field(default="Model Compression Toolkit Progress Infomation")
+ progress_info_callback: Optional[Callable] = field(default=None)
+
+ def __new__(cls, *args, **kwargs):
+ """
+ Create or skip instantiation based on the enable flag.
+ Returns None when progress display should be disabled.
+ """
+ progress_info_callback = kwargs.get(PROGRESS_INFO_CALLBACK)
+ total_step = kwargs.get(TOTAL_STEP)
+
+ if progress_info_callback is None or total_step <= 0:
+ return None
+
+ if not callable(progress_info_callback):
+ raise TypeError(f"{PROGRESS_INFO_CALLBACK} must be a callable (function or callable instance).")
+
+ return super().__new__(cls)
+
+ def __post_init__(self):
+ """Create progress bar after initialization."""
+ # Initial single bar mode
+ self.pbar = tqdm(
+ total=self.total_step,
+ desc=self.description,
+ position=PROGRESS_BAR_POSITION,
+ leave=False,
+ unit='step',
+ dynamic_ncols=True,
+ bar_format='{l_bar}{bar:}|'
+ )
+
+ def set_description(self, description: str):
+ """
+ Update progress bar description.
+ Automatically increments step number each time set_description is called,
+ displaying in "Step X/Y: ..." format.
+
+ Args:
+ description: New description text ("Step X/Y: " is automatically added).
+ """
+ self.description = description
+ self.current_step += 1
+ formatted_description = f"Step {self.current_step}/{self.total_step}: {description}"
+
+ try:
+ assert self.current_step <= self.total_step, \
+ f"current_step: {self.current_step}, exceeded total_step: {self.total_step}."
+ except AssertionError:
+ self.close()
+ raise
+
+ self.pbar.set_description(formatted_description, refresh=False)
+ self.pbar.update()
+
+ progress_info = {
+ COMPLETED_COMPONENTS: description,
+ TOTAL_COMPONENTS: self.total_step,
+ CURRENT_COMPONENT: self.current_step
+ }
+ self.progress_info_callback(progress_info)
+
+ def close(self):
+ """Close progress bar."""
+ if self.pbar is not None:
+ self.pbar.close()
+ self.pbar = None
+
+
+def research_progress_total(core_config: 'CoreConfig',
+ target_resource_utilization: 'ResourceUtilization' = None,
+ gptq_config: 'GradientPTQConfig' = None) -> int:
+ """
+ Check whether specific processing will be executed based on input arguments
+ and calculate the total number of processing steps.
+
+ Processing step breakdown:
+ 1. Preprocessing (required)
+ 2. Statistics calculation (required)
+ 3. Weight parameter calculation (required)
+ 4. Hessian calculation (when GPTQ or specific settings enabled)
+ 5. MP calculation (when Mixed Precision enabled)
+ 6. Post-processing ~ conversion to exportable model (required)
+
+ Args:
+ core_config: CoreConfig object.
+ target_resource_utilization: ResourceUtilization object (used for Mixed Precision determination).
+ gptq_config: GPTQ configuration object.
+
+ Returns:
+ Total number of processing steps.
+ """
+ # Base required steps: preprocessing, statistics, weight params, post-processing
+ total_steps = DEFAULT_TOTAL_STEP
+
+ # Add MP calculation step (when Mixed Precision enabled)
+ if target_resource_utilization is not None and \
+ target_resource_utilization.is_any_restricted():
+ total_steps += 1
+
+ # Add Hessian step (when Mixed Precision with Hessian enabled)
+ if core_config.mixed_precision_config is not None and \
+ core_config.mixed_precision_config.use_hessian_based_scores:
+ total_steps += 1
+
+ # Add GPTQ training step (when GPTQ is enabled)
+ if gptq_config is not None:
+ total_steps += 1
+
+ # Add Hessian step (when GPTQ with Hessian enabled)
+ if gptq_config.hessian_weights_config is not None:
+ total_steps += 1
+
+ return total_steps
diff --git a/model_compression_toolkit/core/common/quantization/debug_config.py b/model_compression_toolkit/core/common/quantization/debug_config.py
index 2f0ccde7a..1abefa11d 100644
--- a/model_compression_toolkit/core/common/quantization/debug_config.py
+++ b/model_compression_toolkit/core/common/quantization/debug_config.py
@@ -14,7 +14,7 @@
# ==============================================================================
from dataclasses import dataclass, field
-from typing import List
+from typing import List, Callable
from model_compression_toolkit.core.common.network_editors.edit_network import EditRule
@@ -30,9 +30,83 @@ class DebugConfig:
network_editor (List[EditRule]): A list of rules and actions to edit the network for quantization.
simulate_scheduler (bool): Simulate scheduler behavior to compute operators' order and cuts.
bypass (bool): A flag to enable MCT bypass, which skips MCT runner and returns the input model unchanged.
+ progress_info_callback (Callable): A user-defined callback function for retrieving progress information.
+
+ About progress_info_callback
+
+ The `progress_info_callback` parameter in `DebugConfig` enables the following features and allows users to retrieve progress information when a callback function is configured:
+
+ - The callback function can receive MCT progress information.
+ - A progress bar is displayed in the CUI, allowing users to visualize how much processing has been completed while MCT is running.
+
+ If no callback function is set, these features are disabled and the behavior and output remain unchanged.
+ Examples of how to create a callback function to enable these features are provided in the Examples section.
+
+ Examples:
+
+ Create a callable callback function.
+ When defining the callback, make sure it accepts a dictionary representing the current processing state as an argument.
+
+ Example 1: Use a class to keep track of the processing history.
+
+ >>> class ProgressInfoCallback:
+ ... def __init__(self):
+ ... self.history = []
+ ...
+ ... def __call__(self, info):
+ ... current = info["currentComponent"]
+ ... total = info["totalComponents"]
+ ... component_name = info["completedComponents"]
+ ...
+ ... self.history.append({
+ ... "component_name": component_name,
+ ... "current": current,
+ ... "total": total
+ ... })
+ ...
+ >>> progress_info_callback = ProgressInfoCallback()
+
+
+ Example 2: Use a function to output the progress percentage and processing name to standard error (stderr).
+
+ >>> def progress_info_callback(info):
+ ... current = info["currentComponent"]
+ ... total = info["totalComponents"]
+ ... component_name = info["completedComponents"]
+ ...
+ ... progress_percent = (current / total * 100.0)
+ ...
+ ... print(f"[{current}/{total}] {progress_percent:6.2f}% {component_name}",
+ ... file=__import__('sys').stderr, flush=True)
+
+ From the processing state dictionary, you can retrieve information using the following keys:
+
+ .. list-table:: Keys in the processing state dictionary
+ :header-rows: 1
+
+ * - Parameter Key
+ - Value Type
+ - Description
+ * - "currentComponent"
+ - int
+ - Current processing step
+ * - "totalComponents"
+ - int
+ - Total number of processing steps
+ * - "completedComponents"
+ - str
+ - Name of the component currently being processed
+
+ Import MCT and configure DebugConfig with the callback function you created.
+ Configure CoreConfig with this DebugConfig and use it.
+
+ >>> import model_compression_toolkit as mct
+ >>> debug_config = mct.core.DebugConfig(progress_info_callback=progress_info_callback)
+ >>> core_config = mct.core.CoreConfig(debug_config=debug_config)
"""
analyze_similarity: bool = False
network_editor: List[EditRule] = field(default_factory=list)
simulate_scheduler: bool = False
bypass: bool = False
+ progress_info_callback: Callable = None
diff --git a/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py b/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py
index ca8cdbfd7..136db0937 100644
--- a/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py
+++ b/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py
@@ -28,6 +28,8 @@
from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_weights_computation import \
get_weights_qparams
from model_compression_toolkit.logger import Logger
+from model_compression_toolkit.core.common.progress_config.progress_info_controller import \
+ ProgressInfoController
def _collect_nodes_for_hmse(nodes_list: List[BaseNode], graph: Graph) -> List[BaseNode]:
@@ -60,7 +62,8 @@ def calculate_quantization_params(graph: Graph,
repr_data_gen_fn: Callable[[], Generator],
nodes: List[BaseNode] = None,
hessian_info_service: HessianInfoService = None,
- num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES):
+ num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES,
+ progress_info_controller: ProgressInfoController = None):
"""
For a graph, go over its nodes, compute quantization params (for both weights and activations according
to the given framework info), and create and attach a NodeQuantizationConfig to each node (containing the
@@ -75,6 +78,7 @@ def calculate_quantization_params(graph: Graph,
nodes: List of nodes to compute their thresholds instead of computing it for all nodes in the graph.
hessian_info_service: HessianInfoService object for retrieving Hessian-based scores (used only with HMSE error method).
num_hessian_samples: Number of samples to approximate Hessian-based scores on (used only with HMSE error method).
+ progress_info_controller: ProgressInfoController to display and manage overall progress information.
"""
Logger.info(f"\nRunning quantization parameters search. "
@@ -97,6 +101,9 @@ def calculate_quantization_params(graph: Graph,
target_nodes=nodes_for_hmse)
hessian_info_service.fetch_hessian(request)
+ if progress_info_controller is not None:
+ progress_info_controller.set_description('Calculate Quantization Parameters')
+
for n in tqdm(nodes_list, "Calculating quantization parameters"): # iterate only nodes that we should compute their thresholds
for candidate_qc in n.candidates_quantization_cfg:
for attr in n.get_node_weights_attributes():
diff --git a/model_compression_toolkit/core/quantization_prep_runner.py b/model_compression_toolkit/core/quantization_prep_runner.py
index 3b03ad2bb..6aab979ee 100644
--- a/model_compression_toolkit/core/quantization_prep_runner.py
+++ b/model_compression_toolkit/core/quantization_prep_runner.py
@@ -32,6 +32,8 @@
from model_compression_toolkit.core.common.substitutions.apply_substitutions import substitute
from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter
+from model_compression_toolkit.core.common.progress_config.progress_info_controller import \
+ ProgressInfoController
def quantization_preparation_runner(graph: Graph,
@@ -40,7 +42,8 @@ def quantization_preparation_runner(graph: Graph,
fw_info: FrameworkInfo,
fw_impl: FrameworkImplementation,
tb_w: TensorboardWriter = None,
- hessian_info_service: HessianInfoService = None, ) -> Graph:
+ hessian_info_service: HessianInfoService = None,
+ progress_info_controller: ProgressInfoController = None) -> Graph:
"""
Prepares a trained model for post-training quantization.
First, the model graph is optimized using several transformations (e.g. folding BatchNormalization to preceding layers).
@@ -58,6 +61,7 @@ def quantization_preparation_runner(graph: Graph,
fw_impl: FrameworkImplementation object with a specific framework methods implementation.
tb_w: TensorboardWriter object for logging
hessian_info_service: HessianInfoService object for retrieving Hessian-based scores.
+ progress_info_controller: ProgressInfoController to display and manage overall progress information.
Returns:
Graph object that represents the model, contains thresholds, and ready for quantization.
@@ -66,6 +70,9 @@ def quantization_preparation_runner(graph: Graph,
######################################
# Statistic collection
######################################
+ if progress_info_controller is not None:
+ progress_info_controller.set_description('Statistics Collection')
+
mi = ModelCollector(graph,
fw_impl,
fw_info,
@@ -92,7 +99,8 @@ def quantization_preparation_runner(graph: Graph,
######################################
calculate_quantization_params(graph, fw_impl=fw_impl, repr_data_gen_fn=representative_data_gen,
- hessian_info_service=hessian_info_service)
+ hessian_info_service=hessian_info_service,
+ progress_info_controller=progress_info_controller)
if tb_w is not None:
tb_w.add_graph(graph, 'thresholds_selection')
diff --git a/model_compression_toolkit/core/runner.py b/model_compression_toolkit/core/runner.py
index 8226f59e6..53a380184 100644
--- a/model_compression_toolkit/core/runner.py
+++ b/model_compression_toolkit/core/runner.py
@@ -36,6 +36,8 @@
from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter, \
finalize_bitwidth_in_tb
+from model_compression_toolkit.core.common.progress_config.progress_info_controller import \
+ ProgressInfoController
from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner
from model_compression_toolkit.core.quantization_prep_runner import quantization_preparation_runner
from model_compression_toolkit.logger import Logger
@@ -51,7 +53,8 @@ def core_runner(in_model: Any,
fqc: FrameworkQuantizationCapabilities,
target_resource_utilization: ResourceUtilization = None,
running_gptq: bool = False,
- tb_w: TensorboardWriter = None):
+ tb_w: TensorboardWriter = None,
+ progress_info_controller: ProgressInfoController = None):
"""
Quantize a trained model using post-training quantization.
First, the model graph is optimized using several transformations (e.g. folding BatchNormalization to preceding
@@ -72,11 +75,14 @@ def core_runner(in_model: Any,
the attached framework operator's information.
target_resource_utilization: ResourceUtilization to constraint the search of the mixed-precision configuration for the model.
tb_w: TensorboardWriter object for logging
+ progress_info_controller: ProgressInfoController to display and manage overall progress information.
Returns:
An internal graph representation of the input model.
"""
+ if progress_info_controller is not None:
+ progress_info_controller.set_description('MCT Graph Preprocessing')
# Warn is representative dataset has batch-size == 1
batch_data = next(iter(representative_data_gen()))
@@ -115,7 +121,8 @@ def core_runner(in_model: Any,
fw_info=fw_info,
fw_impl=fw_impl,
tb_w=tb_w,
- hessian_info_service=hessian_info_service)
+ hessian_info_service=hessian_info_service,
+ progress_info_controller=progress_info_controller)
######################################
# Finalize bit widths
@@ -130,7 +137,8 @@ def core_runner(in_model: Any,
target_resource_utilization,
core_config.mixed_precision_config,
representative_data_gen,
- hessian_info_service=hessian_info_service)
+ hessian_info_service=hessian_info_service,
+ progress_info_controller=progress_info_controller)
else:
Logger.warning(
f'Mixed Precision has overwrite bit-width configuration{core_config.mixed_precision_config.configuration_overwrite}')
diff --git a/model_compression_toolkit/gptq/common/gptq_training.py b/model_compression_toolkit/gptq/common/gptq_training.py
index 8c6a4168e..38fb4bcc9 100644
--- a/model_compression_toolkit/gptq/common/gptq_training.py
+++ b/model_compression_toolkit/gptq/common/gptq_training.py
@@ -31,6 +31,8 @@
get_gradual_activation_quantizer_wrapper_factory
from model_compression_toolkit.gptq.common.regularization_factory import get_regularization
from model_compression_toolkit.logger import Logger
+from model_compression_toolkit.core.common.progress_config.progress_info_controller import \
+ ProgressInfoController
from model_compression_toolkit.trainable_infrastructure.common.util import get_total_grad_steps
@@ -46,7 +48,8 @@ def __init__(self,
fw_impl: GPTQFrameworkImplemantation,
fw_info: FrameworkInfo,
representative_data_gen_fn: Callable[[], Generator],
- hessian_info_service: HessianInfoService = None):
+ hessian_info_service: HessianInfoService = None,
+ progress_info_controller: ProgressInfoController = None):
"""
Build two models from a graph: A teacher network (float model) and a student network (quantized model).
Use the dataset generator to pass images through the teacher and student networks to get intermediate
@@ -61,6 +64,7 @@ def __init__(self,
fw_info: Framework information
representative_data_gen_fn: factory for representative data generator.
hessian_info_service: HessianInfoService for fetching and computing Hessian-approximation information.
+ progress_info_controller: ProgressInfoController to display and manage overall progress information.
"""
self.graph_float = copy.deepcopy(graph_float)
self.graph_quant = copy.deepcopy(graph_quant)
@@ -68,6 +72,7 @@ def __init__(self,
self.fw_impl = fw_impl
self.fw_info = fw_info
self.representative_data_gen_fn = representative_data_gen_fn
+ self.progress_info_controller = progress_info_controller
def _get_total_grad_steps():
return get_total_grad_steps(representative_data_gen_fn) * gptq_config.n_epochs
@@ -131,6 +136,10 @@ def _get_total_grad_steps():
[len(optimizer_params_tuple[1]) for optimizer_params_tuple in self.optimizer_with_param]) > 0
self.use_sample_layer_attention = hessian_cfg and hessian_cfg.per_sample
+ if hessian_cfg:
+ if self.progress_info_controller is not None:
+ self.progress_info_controller.set_description('Compute Hessian for GPTQ')
+
if self.use_sample_layer_attention:
# normalization is currently not supported, make sure the config reflects it.
if hessian_cfg.norm_scores or hessian_cfg.log_norm or hessian_cfg.scale_log_norm:
@@ -289,7 +298,8 @@ def gptq_training(graph_float: Graph,
representative_data_gen: Callable,
fw_impl: GPTQFrameworkImplemantation,
fw_info: FrameworkInfo,
- hessian_info_service: HessianInfoService = None) -> Graph:
+ hessian_info_service: HessianInfoService = None,
+ progress_info_controller: ProgressInfoController = None) -> Graph:
"""
GPTQ training process using knowledge distillation with a teacher network (float model) and a student network (quantized model).
Args:
@@ -300,6 +310,7 @@ def gptq_training(graph_float: Graph,
fw_impl: Framework implementation
fw_info: Framework information
hessian_info_service: HessianInfoService to fetch information based on the Hessian approximation.
+ progress_info_controller: ProgressInfoController to display and manage overall progress information.
Returns:
Quantized graph for export
@@ -314,9 +325,12 @@ def gptq_training(graph_float: Graph,
fw_impl,
fw_info,
representative_data_gen,
- hessian_info_service=hessian_info_service)
+ hessian_info_service=hessian_info_service,
+ progress_info_controller=progress_info_controller)
# Training process
+ if progress_info_controller is not None:
+ progress_info_controller.set_description('Train with GPTQ')
gptq_trainer.train()
# Update graph
diff --git a/model_compression_toolkit/gptq/keras/gptq_training.py b/model_compression_toolkit/gptq/keras/gptq_training.py
index 1e135ff21..b77a99cff 100644
--- a/model_compression_toolkit/gptq/keras/gptq_training.py
+++ b/model_compression_toolkit/gptq/keras/gptq_training.py
@@ -54,6 +54,9 @@
import copy
from model_compression_toolkit.core.keras.constants import BIAS, USE_BIAS
from model_compression_toolkit.gptq.keras.quantizer.soft_rounding.soft_quantizer_reg import SoftQuantizerRegularization
+from model_compression_toolkit.core.common.progress_config.progress_info_controller import \
+ ProgressInfoController
+
class KerasGPTQTrainer(GPTQTrainer):
"""
@@ -67,7 +70,8 @@ def __init__(self,
fw_impl: FrameworkImplementation,
fw_info: FrameworkInfo,
representative_data_gen: Callable,
- hessian_info_service: HessianInfoService = None):
+ hessian_info_service: HessianInfoService = None,
+ progress_info_controller: ProgressInfoController = None):
"""
Build two models from a graph: A teacher network (float model) and a student network (quantized model).
Use the dataset generator to pass images through the teacher and student networks to get intermediate
@@ -82,6 +86,7 @@ def __init__(self,
fw_info: Framework information.
representative_data_gen: Dataset to use for inputs of the models.
hessian_info_service: HessianScoresService for fetching and computing Hessian's approximation scores.
+ progress_info_controller: ProgressInfoController to display and manage overall progress information.
"""
@@ -96,7 +101,8 @@ def __init__(self,
fw_impl,
fw_info,
representative_data_gen_fn=representative_data_gen,
- hessian_info_service=hessian_info_service)
+ hessian_info_service=hessian_info_service,
+ progress_info_controller=progress_info_controller)
def _prepare_train_dataloader_sla(self, data_gen_fn: Callable[[], Generator]) -> tf.data.Dataset:
diff --git a/model_compression_toolkit/gptq/keras/quantization_facade.py b/model_compression_toolkit/gptq/keras/quantization_facade.py
index 0726e516c..54bb61d26 100644
--- a/model_compression_toolkit/gptq/keras/quantization_facade.py
+++ b/model_compression_toolkit/gptq/keras/quantization_facade.py
@@ -33,6 +33,8 @@
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import MixedPrecisionQuantizationConfig
from model_compression_toolkit.core import CoreConfig
+from model_compression_toolkit.core.common.progress_config.progress_info_controller import \
+ ProgressInfoController, research_progress_total
from model_compression_toolkit.core.runner import core_runner
from model_compression_toolkit.gptq.runner import gptq_runner
from model_compression_toolkit.core.analyzer import analyzer_model_quantization
@@ -253,6 +255,12 @@ def keras_gradient_post_training_quantization(in_model: Model, representative_da
target_platform_capabilities,
custom_opset2layer=core_config.quantization_config.custom_tpc_opset_to_layer)
+ progress_info_controller = ProgressInfoController(
+ total_step=research_progress_total(core_config, target_resource_utilization, gptq_config),
+ description="MCT Keras GPTQ Progress",
+ progress_info_callback=core_config.debug_config.progress_info_callback
+ )
+
tg, bit_widths_config, hessian_info_service, scheduling_info = core_runner(in_model=in_model,
representative_data_gen=representative_data_gen,
core_config=core_config,
@@ -261,7 +269,8 @@ def keras_gradient_post_training_quantization(in_model: Model, representative_da
fqc=framework_platform_capabilities,
target_resource_utilization=target_resource_utilization,
tb_w=tb_w,
- running_gptq=True)
+ running_gptq=True,
+ progress_info_controller=progress_info_controller)
float_graph = copy.deepcopy(tg)
@@ -273,10 +282,14 @@ def keras_gradient_post_training_quantization(in_model: Model, representative_da
DEFAULT_KERAS_INFO,
fw_impl,
tb_w,
- hessian_info_service=hessian_info_service)
+ hessian_info_service=hessian_info_service,
+ progress_info_controller=progress_info_controller)
del hessian_info_service
+ if progress_info_controller is not None:
+ progress_info_controller.set_description("MCT Graph Finalization")
+
if core_config.debug_config.analyze_similarity:
analyzer_model_quantization(representative_data_gen,
tb_w,
@@ -290,6 +303,10 @@ def keras_gradient_post_training_quantization(in_model: Model, representative_da
exportable_model = add_metadata(exportable_model,
create_model_metadata(fqc=framework_platform_capabilities,
scheduling_info=scheduling_info))
+
+ if progress_info_controller is not None:
+ progress_info_controller.close()
+
return exportable_model, user_info
else:
diff --git a/model_compression_toolkit/gptq/pytorch/gptq_training.py b/model_compression_toolkit/gptq/pytorch/gptq_training.py
index 6b7b0378b..7d9c9b8dc 100644
--- a/model_compression_toolkit/gptq/pytorch/gptq_training.py
+++ b/model_compression_toolkit/gptq/pytorch/gptq_training.py
@@ -42,6 +42,8 @@
from model_compression_toolkit.gptq.pytorch.quantizer.soft_rounding.soft_quantizer_reg import SoftQuantizerRegularization as PytorchSoftQuantizerRegularization
from model_compression_toolkit.logger import Logger
+from model_compression_toolkit.core.common.progress_config.progress_info_controller import \
+ ProgressInfoController
class PytorchGPTQTrainer(GPTQTrainer):
@@ -56,7 +58,8 @@ def __init__(self,
fw_impl: FrameworkImplementation,
fw_info: FrameworkInfo,
representative_data_gen: Callable,
- hessian_info_service: HessianInfoService = None):
+ hessian_info_service: HessianInfoService = None,
+ progress_info_controller: ProgressInfoController = None):
"""
Build two models from a graph: A teacher network (float model) and a student network (quantized model).
Use the dataset generator to pass images through the teacher and student networks to get intermediate
@@ -71,6 +74,7 @@ def __init__(self,
fw_info: Framework information
representative_data_gen: Dataset to use for inputs of the models.
hessian_info_service: HessianInfoService to fetch info based on the hessian approximation of the float model.
+ progress_info_controller: ProgressInfoController to display and manage overall progress information.
"""
self.fw_soft_quantizer_regularization = PytorchSoftQuantizerRegularization
self.fw_linear_annealing_scheduler = PytorchLinearAnnealingScheduler
@@ -83,7 +87,8 @@ def __init__(self,
fw_impl,
fw_info,
representative_data_gen_fn=representative_data_gen,
- hessian_info_service=hessian_info_service)
+ hessian_info_service=hessian_info_service,
+ progress_info_controller=progress_info_controller)
def _prepare_train_dataloader_sla(self, data_gen_fn: Callable[[], Generator]) -> DataLoader:
diff --git a/model_compression_toolkit/gptq/pytorch/quantization_facade.py b/model_compression_toolkit/gptq/pytorch/quantization_facade.py
index 22fcc61a0..d19894fa1 100644
--- a/model_compression_toolkit/gptq/pytorch/quantization_facade.py
+++ b/model_compression_toolkit/gptq/pytorch/quantization_facade.py
@@ -22,6 +22,8 @@
MixedPrecisionQuantizationConfig
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
ResourceUtilization
+from model_compression_toolkit.core.common.progress_config.progress_info_controller import \
+ ProgressInfoController, research_progress_total
from model_compression_toolkit.core.common.user_info import UserInformation
from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer
from model_compression_toolkit.core.runner import core_runner
@@ -226,6 +228,12 @@ def pytorch_gradient_post_training_quantization(model: Module,
framework_quantization_capabilities = attach2pytorch.attach(target_platform_capabilities,
core_config.quantization_config.custom_tpc_opset_to_layer)
+ progress_info_controller = ProgressInfoController(
+ total_step=research_progress_total(core_config, target_resource_utilization, gptq_config),
+ description="MCT PyTorch GPTQ Progress",
+ progress_info_callback=core_config.debug_config.progress_info_callback
+ )
+
# ---------------------- #
# Core Runner
# ---------------------- #
@@ -237,7 +245,8 @@ def pytorch_gradient_post_training_quantization(model: Module,
fqc=framework_quantization_capabilities,
target_resource_utilization=target_resource_utilization,
tb_w=tb_w,
- running_gptq=True)
+ running_gptq=True,
+ progress_info_controller=progress_info_controller)
float_graph = copy.deepcopy(graph)
@@ -252,7 +261,11 @@ def pytorch_gradient_post_training_quantization(model: Module,
DEFAULT_PYTORCH_INFO,
fw_impl,
tb_w,
- hessian_info_service=hessian_info_service)
+ hessian_info_service=hessian_info_service,
+ progress_info_controller=progress_info_controller)
+
+ if progress_info_controller is not None:
+ progress_info_controller.set_description("MCT Graph Finalization")
if core_config.debug_config.analyze_similarity:
analyzer_model_quantization(representative_data_gen,
@@ -267,6 +280,10 @@ def pytorch_gradient_post_training_quantization(model: Module,
exportable_model = add_metadata(exportable_model,
create_model_metadata(fqc=framework_quantization_capabilities,
scheduling_info=scheduling_info))
+
+ if progress_info_controller is not None:
+ progress_info_controller.close()
+
return exportable_model, user_info
diff --git a/model_compression_toolkit/gptq/runner.py b/model_compression_toolkit/gptq/runner.py
index 9d1fbc65f..b9238c717 100644
--- a/model_compression_toolkit/gptq/runner.py
+++ b/model_compression_toolkit/gptq/runner.py
@@ -30,6 +30,8 @@
from model_compression_toolkit.core.common.statistics_correction.apply_bias_correction_to_graph import \
apply_bias_correction_to_graph
from model_compression_toolkit.logger import Logger
+from model_compression_toolkit.core.common.progress_config.progress_info_controller import \
+ ProgressInfoController
def _apply_gptq(gptq_config: GradientPTQConfig,
@@ -39,7 +41,8 @@ def _apply_gptq(gptq_config: GradientPTQConfig,
tg_bias: Graph,
fw_info: FrameworkInfo,
fw_impl: FrameworkImplementation,
- hessian_info_service: HessianInfoService = None) -> Graph:
+ hessian_info_service: HessianInfoService = None,
+ progress_info_controller: ProgressInfoController = None) -> Graph:
"""
Apply GPTQ to improve accuracy of quantized model.
Build two models from a graph: A teacher network (float model) and a student network (quantized model).
@@ -55,6 +58,7 @@ def _apply_gptq(gptq_config: GradientPTQConfig,
fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.).
fw_impl: Framework implementation per framework
hessian_info_service: HessianInfoService to fetch information based on the hessian approximation for the float model.
+ progress_info_controller: ProgressInfoController to display and manage overall progress information.
Returns:
"""
@@ -65,7 +69,8 @@ def _apply_gptq(gptq_config: GradientPTQConfig,
representative_data_gen,
fw_impl,
fw_info,
- hessian_info_service=hessian_info_service)
+ hessian_info_service=hessian_info_service,
+ progress_info_controller=progress_info_controller)
if tb_w is not None:
tb_w.add_graph(tg_bias, 'after_gptq')
@@ -80,7 +85,8 @@ def gptq_runner(tg: Graph,
fw_info: FrameworkInfo,
fw_impl: FrameworkImplementation,
tb_w: TensorboardWriter,
- hessian_info_service: HessianInfoService = None) -> Graph:
+ hessian_info_service: HessianInfoService = None,
+ progress_info_controller: ProgressInfoController = None) -> Graph:
"""
Quantize a graph that has final weights candidates quantization configurations.
Before we quantize the graph weights, we apply GPTQ to get an improved graph.
@@ -95,6 +101,7 @@ def gptq_runner(tg: Graph,
fw_impl: FrameworkImplementation object with a specific framework methods implementation.
tb_w: A TensorBoardWriter object initialized with the logger dir path if it was set, or None otherwise.
hessian_info_service: HessianScoresService to fetch approximations of the hessian scores for the float model.
+ progress_info_controller: ProgressInfoController to display and manage overall progress information.
Returns:
A graph after model weights GPTQ fine-tuning.
@@ -119,6 +126,7 @@ def gptq_runner(tg: Graph,
tg_bias,
fw_info,
fw_impl,
- hessian_info_service=hessian_info_service)
+ hessian_info_service=hessian_info_service,
+ progress_info_controller=progress_info_controller)
return tg_gptq
diff --git a/model_compression_toolkit/ptq/keras/quantization_facade.py b/model_compression_toolkit/ptq/keras/quantization_facade.py
index 8ddcba218..2732b602f 100644
--- a/model_compression_toolkit/ptq/keras/quantization_facade.py
+++ b/model_compression_toolkit/ptq/keras/quantization_facade.py
@@ -28,6 +28,8 @@
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
MixedPrecisionQuantizationConfig
+from model_compression_toolkit.core.common.progress_config.progress_info_controller import \
+ ProgressInfoController, research_progress_total
from model_compression_toolkit.core.runner import core_runner
from model_compression_toolkit.ptq.runner import ptq_runner
from model_compression_toolkit.metadata import create_model_metadata
@@ -147,6 +149,12 @@ def keras_post_training_quantization(in_model: Model,
target_platform_capabilities,
custom_opset2layer=core_config.quantization_config.custom_tpc_opset_to_layer)
+ progress_info_controller = ProgressInfoController(
+ total_step=research_progress_total(core_config, target_resource_utilization),
+ description="MCT Keras PTQ Progress",
+ progress_info_callback=core_config.debug_config.progress_info_callback
+ )
+
# Ignore returned hessian service as PTQ does not use it
tg, bit_widths_config, _, scheduling_info = core_runner(in_model=in_model,
representative_data_gen=representative_data_gen,
@@ -155,7 +163,8 @@ def keras_post_training_quantization(in_model: Model,
fw_impl=fw_impl,
fqc=framework_platform_capabilities,
target_resource_utilization=target_resource_utilization,
- tb_w=tb_w)
+ tb_w=tb_w,
+ progress_info_controller=progress_info_controller)
# At this point, tg is a graph that went through substitutions (such as BN folding) and is
# ready for quantization (namely, it holds quantization params, etc.) but the weights are
@@ -171,6 +180,9 @@ def keras_post_training_quantization(in_model: Model,
fw_impl,
tb_w)
+ if progress_info_controller is not None:
+ progress_info_controller.set_description("MCT Graph Finalization")
+
if core_config.debug_config.analyze_similarity:
quantized_graph = quantize_graph_weights(graph_with_stats_correction)
analyzer_model_quantization(representative_data_gen,
@@ -185,6 +197,10 @@ def keras_post_training_quantization(in_model: Model,
exportable_model = add_metadata(exportable_model,
create_model_metadata(fqc=framework_platform_capabilities,
scheduling_info=scheduling_info))
+
+ if progress_info_controller is not None:
+ progress_info_controller.close()
+
return exportable_model, user_info
diff --git a/model_compression_toolkit/ptq/pytorch/quantization_facade.py b/model_compression_toolkit/ptq/pytorch/quantization_facade.py
index 26ab2f796..890e895bd 100644
--- a/model_compression_toolkit/ptq/pytorch/quantization_facade.py
+++ b/model_compression_toolkit/ptq/pytorch/quantization_facade.py
@@ -26,6 +26,8 @@
from model_compression_toolkit.core import CoreConfig
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
MixedPrecisionQuantizationConfig
+from model_compression_toolkit.core.common.progress_config.progress_info_controller import \
+ ProgressInfoController, research_progress_total
from model_compression_toolkit.core.runner import core_runner
from model_compression_toolkit.ptq.runner import ptq_runner
from model_compression_toolkit.core.analyzer import analyzer_model_quantization
@@ -119,6 +121,12 @@ def pytorch_post_training_quantization(in_module: Module,
framework_platform_capabilities = attach2pytorch.attach(target_platform_capabilities,
core_config.quantization_config.custom_tpc_opset_to_layer)
+ progress_info_controller = ProgressInfoController(
+ total_step=research_progress_total(core_config, target_resource_utilization),
+ description="MCT PyTorch PTQ Progress",
+ progress_info_callback=core_config.debug_config.progress_info_callback
+ )
+
# Ignore hessian info service as it is not used here yet.
tg, bit_widths_config, _, scheduling_info = core_runner(in_model=in_module,
representative_data_gen=representative_data_gen,
@@ -127,7 +135,8 @@ def pytorch_post_training_quantization(in_module: Module,
fw_impl=fw_impl,
fqc=framework_platform_capabilities,
target_resource_utilization=target_resource_utilization,
- tb_w=tb_w)
+ tb_w=tb_w,
+ progress_info_controller=progress_info_controller)
# At this point, tg is a graph that went through substitutions (such as BN folding) and is
# ready for quantization (namely, it holds quantization params, etc.) but the weights are
@@ -143,6 +152,9 @@ def pytorch_post_training_quantization(in_module: Module,
fw_impl,
tb_w)
+ if progress_info_controller is not None:
+ progress_info_controller.set_description("MCT Graph Finalization")
+
if core_config.debug_config.analyze_similarity:
quantized_graph = quantize_graph_weights(graph_with_stats_correction)
analyzer_model_quantization(representative_data_gen,
@@ -157,6 +169,10 @@ def pytorch_post_training_quantization(in_module: Module,
exportable_model = add_metadata(exportable_model,
create_model_metadata(fqc=framework_platform_capabilities,
scheduling_info=scheduling_info))
+
+ if progress_info_controller is not None:
+ progress_info_controller.close()
+
return exportable_model, user_info
diff --git a/tests_pytest/common_tests/unit_tests/core/mixed_precision/sensitivity_eval/test_sensitivity_evaluator.py b/tests_pytest/common_tests/unit_tests/core/mixed_precision/sensitivity_eval/test_sensitivity_evaluator.py
index fbe7456b5..d2b4c3c1e 100644
--- a/tests_pytest/common_tests/unit_tests/core/mixed_precision/sensitivity_eval/test_sensitivity_evaluator.py
+++ b/tests_pytest/common_tests/unit_tests/core/mixed_precision/sensitivity_eval/test_sensitivity_evaluator.py
@@ -48,8 +48,9 @@ def init(s, *args, **kwargs):
kwargs = dict(custom_metric_fn=Mock()) if custom else {}
mp_config = MixedPrecisionQuantizationConfig(**kwargs)
hessian_mock = Mock() # we only check the object is passed to calculator as is
+ progress_info_mock = Mock() # we only check the object is passed to calculator as is
se = SensitivityEvaluation(graph_mock, mp_config, repr_datagen, fw_info=fw_info_mock, fw_impl=fw_impl_mock,
- hessian_info_service=hessian_mock)
+ hessian_info_service=hessian_mock, progress_info_controller=progress_info_mock)
# compare exact types in case there is inheritance between calculators
assert type(se.metric_calculator) is calc_type
@@ -57,7 +58,8 @@ def init(s, *args, **kwargs):
init_spy.assert_called_once_with(graph_mock, mp_config.custom_metric_fn)
else:
init_spy.assert_called_once_with(graph_mock, mp_config, repr_datagen, fw_info=fw_info_mock,
- fw_impl=fw_impl_mock, hessian_info_service=hessian_mock)
+ fw_impl=fw_impl_mock, hessian_info_service=hessian_mock,
+ progress_info_controller=progress_info_mock)
build_mp_model_mock.assert_called_with(graph_mock, [1, 2, 3], False)
assert se.mp_model == build_mp_model_mock.return_value[0]
diff --git a/tests_pytest/common_tests/unit_tests/core/progress_config/test_progress_control_module.py b/tests_pytest/common_tests/unit_tests/core/progress_config/test_progress_control_module.py
new file mode 100644
index 000000000..2e51dac16
--- /dev/null
+++ b/tests_pytest/common_tests/unit_tests/core/progress_config/test_progress_control_module.py
@@ -0,0 +1,169 @@
+# Copyright 2026 Sony Semiconductor Solutions, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import pytest
+
+from typing import Callable
+from tqdm import tqdm
+
+from model_compression_toolkit.core.common.quantization.debug_config import DebugConfig
+from model_compression_toolkit.core.common.progress_config.progress_info_controller import \
+ ProgressInfoController
+from model_compression_toolkit.core.common.progress_config.constants import \
+ COMPLETED_COMPONENTS, TOTAL_COMPONENTS, CURRENT_COMPONENT
+
+
+def check_callback_function(info):
+ pass
+
+
+class CheckCallBackFunction:
+ def __init__(self):
+ self.history = []
+ self.count = 0
+
+ def __call__(self, info):
+ self.history.append({
+ COMPLETED_COMPONENTS: info[COMPLETED_COMPONENTS],
+ TOTAL_COMPONENTS: info[TOTAL_COMPONENTS],
+ CURRENT_COMPONENT: info[CURRENT_COMPONENT],
+ })
+ self.count += 1
+
+
+class TestProgessInfoController:
+
+ ### Initialization Test
+ @pytest.mark.parametrize(
+ "total_step, callback_function, expected",
+ [
+ pytest.param(-1, None, None, id="unset_callback_and_no_steps"),
+ pytest.param(1, None, None, id="unset_callback_and_with_steps"),
+ pytest.param(0, CheckCallBackFunction(), None, id="set_callback_and_no_steps"),
+ pytest.param(2, CheckCallBackFunction(), ProgressInfoController, id="set_callback_and_steps"),
+ pytest.param(2, check_callback_function, ProgressInfoController, id="set_callback_function_and_steps"),
+ ],
+ )
+ def test_progress_info_controller_initalize(self, total_step, callback_function, expected):
+ controller = ProgressInfoController(
+ total_step=total_step,
+ progress_info_callback=callback_function,
+ description='Unit Test'
+ )
+
+ if expected is None:
+ ### Expected value verification (None)
+ assert controller is expected
+ else:
+ ### Expected value verification (ProgressInfoController)
+ assert isinstance(controller, expected)
+ assert isinstance(controller.pbar, tqdm)
+
+ ### Verify the initialization of class member variables
+ assert controller.total_step == total_step
+ assert controller.current_step == 0
+ assert controller.description == 'Unit Test'
+ assert callable(controller.progress_info_callback)
+
+ ### Initialization Invalid Test
+ @pytest.mark.parametrize(
+ "callback_function",
+ [
+ pytest.param(30, id="set_type_is_int"),
+ pytest.param('callback', id="set_type_is_str"),
+ pytest.param([check_callback_function], id="set_type_is_list"),
+ ],
+ )
+ def test_progress_info_controller_initalize_invalid(self, callback_function):
+ with pytest.raises(TypeError) as err_msg:
+ controller = ProgressInfoController(
+ total_step=1,
+ progress_info_callback=callback_function,
+ description='Initialization Invalid Test'
+ )
+
+ ### Verify assertion error message
+ assert str(err_msg.value) == \
+ f"progress_info_callback must be a callable (function or callable instance)."
+
+ ### Normal Test
+ def test_progress_info_controller_update_description(self):
+ controller = ProgressInfoController(
+ total_step=2,
+ progress_info_callback=CheckCallBackFunction(),
+ )
+
+ controller.set_description("Preprocessing")
+ controller.set_description("Finalization")
+
+ callback = controller.progress_info_callback
+
+ ### Verify callback was called 2 times
+ assert callback.count == 2
+
+ ### Verify first call
+ assert callback.history[0][COMPLETED_COMPONENTS] == "Preprocessing"
+ assert callback.history[0][TOTAL_COMPONENTS] == 2
+ assert callback.history[0][CURRENT_COMPONENT] == 1
+
+ ### Verify second call
+ assert callback.history[1][COMPLETED_COMPONENTS] == "Finalization"
+ assert callback.history[1][TOTAL_COMPONENTS] == 2
+ assert callback.history[1][CURRENT_COMPONENT] == 2
+
+ controller.close()
+
+ ### Verify pbar is closed
+ assert controller.pbar is None
+
+ ### Invalid Test
+ def test_progress_info_controller_invalid_count_check(self):
+ controller = ProgressInfoController(
+ total_step=1,
+ progress_info_callback=CheckCallBackFunction(),
+ description='Invalid Test'
+ )
+
+ with pytest.raises(AssertionError) as err_msg:
+ controller.set_description("Preprocessing")
+ controller.set_description("Finalization")
+
+ ### Verify assertion error message
+ assert str(err_msg.value) == \
+ f"current_step: 2, exceeded total_step: 1."
+
+ ### Verify pbar is safely closed
+ assert controller.pbar is None
+
+ ### Verify callback was called 1 time
+ callback = controller.progress_info_callback
+ assert callback.count == 1
+
+ ### DebugConfig Variable Test
+ @pytest.mark.parametrize(
+ "callback_function, expected",
+ [
+ pytest.param(None, None, id="unset_callback"),
+ pytest.param(check_callback_function, Callable, id="set_callback_of_function"),
+ pytest.param(CheckCallBackFunction(), CheckCallBackFunction, id="set_callback_of_class"),
+ ],
+ )
+ def test_adding_debug_config_menber_variable(self, callback_function, expected):
+ debug_config = DebugConfig(progress_info_callback=callback_function)
+
+ if expected is None:
+ assert debug_config.progress_info_callback == expected
+ else:
+ assert callable(debug_config.progress_info_callback)
diff --git a/tests_pytest/common_tests/unit_tests/core/progress_config/test_research_progress_total.py b/tests_pytest/common_tests/unit_tests/core/progress_config/test_research_progress_total.py
new file mode 100644
index 000000000..534044506
--- /dev/null
+++ b/tests_pytest/common_tests/unit_tests/core/progress_config/test_research_progress_total.py
@@ -0,0 +1,148 @@
+# Copyright 2026 Sony Semiconductor Solutions, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import pytest
+from unittest.mock import Mock
+
+from model_compression_toolkit.core.common.progress_config.progress_info_controller import \
+ research_progress_total
+
+
+MOCK_OBJ = Mock()
+
+
+def mock_core_config(
+ mixed_precision_config=None
+):
+ core_config = Mock()
+ core_config.mixed_precision_config = mixed_precision_config
+ core_config.is_mixed_precision_enabled = bool(mixed_precision_config)
+
+ return core_config
+
+
+def mock_mixed_precision_config(
+ use_hessian_based_scores=False
+):
+ if use_hessian_based_scores is None:
+ mixed_precision_config = None
+ else:
+ mixed_precision_config = Mock()
+ mixed_precision_config.use_hessian_based_scores = use_hessian_based_scores
+
+ return mixed_precision_config
+
+
+def mock_gptq_config(
+ hessian_weights_config=None
+):
+ gptq_config = Mock()
+ gptq_config.hessian_weights_config = hessian_weights_config
+
+ return gptq_config
+
+
+def mock_resource_utilization(
+ is_any_restricted=False
+):
+ if is_any_restricted is None:
+ resource_utilization = None
+ else:
+ resource_utilization = Mock()
+ resource_utilization.is_any_restricted.return_value = is_any_restricted
+
+ return resource_utilization
+
+
+class TestResearchProgressTotal:
+
+ ### PTQ (Single Precision)
+ @pytest.mark.parametrize(
+ "is_any_restricted, expected",
+ [
+ pytest.param(None, 4, id="no_ru_flag_ptq_sp_base"),
+ pytest.param(False, 4, id="disable_ru_flag_ptq_sp_base"),
+ ],
+ )
+ def test_ptq_sp(self, is_any_restricted, expected):
+ core_config = mock_core_config()
+ target_resource_utilization=mock_resource_utilization(is_any_restricted)
+
+ result = research_progress_total(
+ core_config=core_config,
+ target_resource_utilization=target_resource_utilization
+ )
+ assert result == expected
+
+ ### PTQ (Mixed Precision)
+ @pytest.mark.parametrize(
+ "mp_hessian_enabled, expected",
+ [
+ pytest.param(None, 5, id="unset_mp_cfg_ptq_mp"),
+ pytest.param(False, 5, id="mp_hessian_disable_ptq_mp"),
+ pytest.param(True, 6, id="mp_hessian_enable_ptq_mp"),
+ ],
+ )
+ def test_ptq_mp(self, mp_hessian_enabled, expected):
+ core_config = mock_core_config(mixed_precision_config=mock_mixed_precision_config(mp_hessian_enabled))
+ result = research_progress_total(
+ core_config=core_config,
+ target_resource_utilization=mock_resource_utilization(True),
+ )
+ assert result == expected
+
+ ### GPTQ (Single Precision)
+ @pytest.mark.parametrize(
+ "is_any_restricted, gptq_hessian_weights_config, expected",
+ [
+ pytest.param(False, None, 5, id="disable_ru_flag_gptq_sp_enable_hessian"),
+ pytest.param(False, MOCK_OBJ, 6, id="disable_ru_flag_gptq_sp_disable_hessian"),
+ pytest.param(None, None, 5, id="no_ru_flag_gptq_sp_enable_hessian"),
+ pytest.param(None, MOCK_OBJ, 6, id="no_ru_flag_gptq_sp_disable_hessian"),
+ ],
+ )
+ def test_gptq_sp(self, is_any_restricted, gptq_hessian_weights_config, expected):
+ core_config = mock_core_config()
+ gptq_config = mock_gptq_config(gptq_hessian_weights_config)
+ target_resource_utilization=mock_resource_utilization(is_any_restricted)
+
+ result = research_progress_total(core_config=core_config,
+ gptq_config=gptq_config,
+ target_resource_utilization=target_resource_utilization)
+ assert result == expected
+
+ ### GPTQ (Mixed Precision)
+ @pytest.mark.parametrize(
+ "mp_hessian_enabled, gptq_hessian_weights_config, expected",
+ [
+ pytest.param(None, None, 6, id="unset_mp_cfg_and_hessian_w_cfg_gptq_mp"),
+ pytest.param(False, None, 6, id="all_disabled_hessian_gptq_mp"),
+ pytest.param(True, None, 7, id="enabled_mp_hessian_disabled_gptq_hessian"),
+ pytest.param(None, MOCK_OBJ, 7, id="unset_mp_cfg_and_set_hessian_w_cfg_gptq_mp"),
+ pytest.param(False, MOCK_OBJ, 7, id="disabled_mp_hessian_enabled_gptq_hessian"),
+ pytest.param(True, MOCK_OBJ, 8, id="all_enabled_hessian_gptq_mp"),
+ ],
+ )
+ def test_gptq_mp(self, mp_hessian_enabled, gptq_hessian_weights_config, expected):
+ core_config = mock_core_config(mixed_precision_config=mock_mixed_precision_config(mp_hessian_enabled))
+ target_resource_utilization = mock_resource_utilization(True)
+ gptq_config = mock_gptq_config(gptq_hessian_weights_config)
+
+ result = research_progress_total(
+ core_config=core_config,
+ target_resource_utilization=target_resource_utilization,
+ gptq_config=gptq_config,
+ )
+ assert result == expected
diff --git a/tests_pytest/keras_tests/e2e_tests/test_progress_visualization_keras.py b/tests_pytest/keras_tests/e2e_tests/test_progress_visualization_keras.py
new file mode 100644
index 000000000..dd2310783
--- /dev/null
+++ b/tests_pytest/keras_tests/e2e_tests/test_progress_visualization_keras.py
@@ -0,0 +1,123 @@
+# Copyright 2026 Sony Semiconductor Solutions, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import pytest
+
+import model_compression_toolkit as mct
+
+import tensorflow as tf
+import tensorflow.keras as keras
+import numpy as np
+
+tf.config.run_functions_eagerly(True)
+
+
+class E2ETestProgressInfoCallback:
+ def __init__(self):
+ self.history = []
+
+ def __call__(self, info):
+ self.history.append(info)
+
+
+def representative_data_gen():
+ yield [np.random.randn(1, 8, 8, 3)]
+
+
+class TestKerasProgressVisualization:
+
+ def _build_test_model(self):
+ x = keras.layers.Input((8, 8, 3))
+ y = keras.layers.Conv2D(filters=8, kernel_size=3)(x)
+ y = keras.layers.BatchNormalization()(y)
+ y = keras.layers.ReLU()(y)
+ return keras.Model(inputs=x, outputs=y)
+
+ def _build_expected_prog_info(self, core_config, resource_utilization, gptq_config):
+
+ expected_str_list = ["MCT Graph Preprocessing", "Statistics Collection", "Calculate Quantization Parameters"]
+
+ if resource_utilization is not None and resource_utilization.is_any_restricted():
+ if core_config.mixed_precision_config is not None and core_config.mixed_precision_config.use_hessian_based_scores:
+ expected_str_list.append("Compute Hessian for Mixed Precision")
+ expected_str_list.append("Research Mixed Precision")
+
+ if gptq_config is not None:
+ if gptq_config.hessian_weights_config is not None:
+ expected_str_list.append("Compute Hessian for GPTQ")
+ expected_str_list.append("Train with GPTQ")
+
+ expected_str_list.append("MCT Graph Finalization")
+
+ expected_components = [
+ {
+ "completedComponents": component,
+ "totalComponents": len(expected_str_list),
+ "currentComponent": idx,
+ }
+ for idx, component in enumerate(expected_str_list, start=1)
+ ]
+
+ return expected_components
+
+ @pytest.mark.parametrize('is_enable_gptq_hessian', [False, True])
+ @pytest.mark.parametrize('is_enable_mp_hessian', [False, True])
+ @pytest.mark.parametrize('is_enable_mp', [False, True])
+ @pytest.mark.parametrize('q_method', ['ptq', 'gptq'])
+ def test_keras_progress_visualization(self, q_method, is_enable_mp, is_enable_mp_hessian, is_enable_gptq_hessian):
+ if q_method == 'ptq' and is_enable_gptq_hessian:
+ pytest.skip("Skipping because the combination 'ptq' x 'gptq_hessian' is invalid.")
+
+ float_model = self._build_test_model()
+ callback_func = E2ETestProgressInfoCallback()
+
+ tpc = mct.get_target_platform_capabilities()
+ core_config = mct.core.CoreConfig(debug_config=mct.core.DebugConfig(
+ progress_info_callback=callback_func),
+ mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(
+ num_of_images=1,
+ use_hessian_based_scores=is_enable_mp_hessian))
+ if is_enable_mp:
+ resource_utilization_data = mct.core.keras_resource_utilization_data(float_model,
+ representative_data_gen,
+ core_config=core_config,
+ target_platform_capabilities=tpc)
+ resource_utilization = mct.core.ResourceUtilization(weights_memory=resource_utilization_data.weights_memory * 0.9)
+ else:
+ resource_utilization = None
+
+ if q_method == 'gptq':
+ gptq_config = mct.gptq.get_keras_gptq_config(n_epochs=3,
+ use_hessian_based_weights=is_enable_gptq_hessian,
+ use_hessian_sample_attention=is_enable_gptq_hessian)
+ else:
+ gptq_config = None
+
+
+ if q_method == 'ptq':
+ _, _ = mct.ptq.keras_post_training_quantization(in_model=float_model,
+ representative_data_gen=representative_data_gen,
+ target_resource_utilization=resource_utilization,
+ core_config=core_config,
+ target_platform_capabilities=tpc)
+ elif q_method == 'gptq':
+ _, _ = mct.gptq.keras_gradient_post_training_quantization(in_model=float_model,
+ representative_data_gen=representative_data_gen,
+ target_resource_utilization=resource_utilization,
+ gptq_config=gptq_config,
+ core_config=core_config,
+ target_platform_capabilities=tpc)
+
+ expected_history = self._build_expected_prog_info(core_config, resource_utilization, gptq_config)
+ assert callback_func.history == expected_history
diff --git a/tests_pytest/pytorch_tests/e2e_tests/test_progress_visualization_pytorch.py b/tests_pytest/pytorch_tests/e2e_tests/test_progress_visualization_pytorch.py
new file mode 100644
index 000000000..936a6dacd
--- /dev/null
+++ b/tests_pytest/pytorch_tests/e2e_tests/test_progress_visualization_pytorch.py
@@ -0,0 +1,130 @@
+# Copyright 2026 Sony Semiconductor Solutions, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import pytest
+
+import model_compression_toolkit as mct
+
+import torch
+from torch import nn
+
+
+class E2ETestProgressInfoCallback:
+ def __init__(self):
+ self.history = []
+
+ def __call__(self, info):
+ self.history.append(info)
+
+
+def representative_data_gen():
+ yield [torch.randn(1, 3, 8, 8)]
+
+
+class TestPytorchProgressVisualization:
+
+ def _build_test_model(self):
+
+ class Model(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.conv = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3)
+ self.bn = nn.BatchNorm2d(8)
+ self.relu = nn.ReLU()
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = self.bn(x)
+ x = self.relu(x)
+ return x
+
+ return Model()
+
+ def _build_expected_prog_info(self, core_config, resource_utilization, gptq_config):
+
+ expected_str_list = ["MCT Graph Preprocessing", "Statistics Collection", "Calculate Quantization Parameters"]
+
+ if resource_utilization is not None and resource_utilization.is_any_restricted():
+ if core_config.mixed_precision_config is not None and core_config.mixed_precision_config.use_hessian_based_scores:
+ expected_str_list.append("Compute Hessian for Mixed Precision")
+ expected_str_list.append("Research Mixed Precision")
+
+ if gptq_config is not None:
+ if gptq_config.hessian_weights_config is not None:
+ expected_str_list.append("Compute Hessian for GPTQ")
+ expected_str_list.append("Train with GPTQ")
+
+ expected_str_list.append("MCT Graph Finalization")
+
+ expected_components = [
+ {
+ "completedComponents": component,
+ "totalComponents": len(expected_str_list),
+ "currentComponent": idx,
+ }
+ for idx, component in enumerate(expected_str_list, start=1)
+ ]
+
+ return expected_components
+
+ @pytest.mark.parametrize('is_enable_gptq_hessian', [False, True])
+ @pytest.mark.parametrize('is_enable_mp_hessian', [False, True])
+ @pytest.mark.parametrize('is_enable_mp', [False, True])
+ @pytest.mark.parametrize('q_method', ['ptq', 'gptq'])
+ def test_pytorch_progress_visualization(self, q_method, is_enable_mp, is_enable_mp_hessian, is_enable_gptq_hessian):
+ if q_method == 'ptq' and is_enable_gptq_hessian:
+ pytest.skip("Skipping because the combination 'ptq' x 'gptq_hessian' is invalid.")
+
+ float_model = self._build_test_model()
+ callback_func = E2ETestProgressInfoCallback()
+
+ tpc = mct.get_target_platform_capabilities()
+ core_config = mct.core.CoreConfig(debug_config=mct.core.DebugConfig(
+ progress_info_callback=callback_func),
+ mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(
+ num_of_images=1,
+ use_hessian_based_scores=is_enable_mp_hessian))
+ if is_enable_mp:
+ resource_utilization_data = mct.core.pytorch_resource_utilization_data(float_model,
+ representative_data_gen,
+ core_config=core_config,
+ target_platform_capabilities=tpc)
+ resource_utilization = mct.core.ResourceUtilization(weights_memory=resource_utilization_data.weights_memory * 0.9)
+ else:
+ resource_utilization = None
+
+ if q_method == 'gptq':
+ gptq_config = mct.gptq.get_pytorch_gptq_config(n_epochs=3,
+ use_hessian_based_weights=is_enable_gptq_hessian,
+ use_hessian_sample_attention=is_enable_gptq_hessian)
+ else:
+ gptq_config = None
+
+
+ if q_method == 'ptq':
+ _, _ = mct.ptq.pytorch_post_training_quantization(in_module=float_model,
+ representative_data_gen=representative_data_gen,
+ target_resource_utilization=resource_utilization,
+ core_config=core_config,
+ target_platform_capabilities=tpc)
+ elif q_method == 'gptq':
+ _, _ = mct.gptq.pytorch_gradient_post_training_quantization(model=float_model,
+ representative_data_gen=representative_data_gen,
+ target_resource_utilization=resource_utilization,
+ gptq_config=gptq_config,
+ core_config=core_config,
+ target_platform_capabilities=tpc)
+
+ expected_history = self._build_expected_prog_info(core_config, resource_utilization, gptq_config)
+ assert callback_func.history == expected_history